@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -3
- package/dist/chunk-JQGRWJZO.mjs +1216 -0
- package/dist/chunk-NBO4R5PC.mjs +313 -0
- package/dist/chunk-RO22VFIF.mjs +29 -0
- package/dist/chunk-WXCFWUCN.mjs +678 -0
- package/dist/glassbox/index.d.mts +59 -0
- package/dist/glassbox/index.d.ts +59 -0
- package/dist/glassbox/index.js +312 -0
- package/dist/glassbox/index.mjs +12 -0
- package/dist/glassbox-routes/index.d.mts +242 -0
- package/dist/glassbox-routes/index.d.ts +242 -0
- package/dist/glassbox-routes/index.js +2452 -0
- package/dist/glassbox-routes/index.mjs +652 -0
- package/dist/index.d.mts +1179 -11
- package/dist/index.d.ts +1179 -11
- package/dist/index.js +3475 -236
- package/dist/index.mjs +1560 -78
- package/dist/ir-BIAT9gJk.d.ts +1031 -0
- package/dist/ir-De2AQtlr.d.mts +1031 -0
- package/dist/profiles.d.mts +137 -2
- package/dist/profiles.d.ts +137 -2
- package/dist/profiles.js +820 -11
- package/dist/profiles.mjs +5 -1
- package/dist/types-BjrIFPGe.d.mts +131 -0
- package/dist/types-D_JAhCv4.d.ts +131 -0
- package/package.json +12 -2
- package/dist/chunk-MBEI5UOM.mjs +0 -409
- package/dist/profiles-BiyrF36f.d.mts +0 -489
- package/dist/profiles-C5lVqF8_.d.ts +0 -489
package/dist/index.js
CHANGED
|
@@ -20,12 +20,19 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
20
20
|
// src/index.ts
|
|
21
21
|
var index_exports = {};
|
|
22
22
|
__export(index_exports, {
|
|
23
|
+
ABSOLUTE_FLOOR: () => ABSOLUTE_FLOOR,
|
|
23
24
|
ALIASES: () => ALIASES,
|
|
24
25
|
ALL_ARCHETYPES: () => ALL_ARCHETYPES,
|
|
26
|
+
ARCHETYPE_FLOOR_DEFAULT: () => ARCHETYPE_FLOOR_DEFAULT,
|
|
25
27
|
CallError: () => CallError,
|
|
26
28
|
DIALECT_VERSION: () => DIALECT_VERSION,
|
|
27
29
|
INTENT_ARCHETYPES: () => INTENT_ARCHETYPES,
|
|
30
|
+
MEASURED_GROUNDING_MIN_N: () => MEASURED_GROUNDING_MIN_N,
|
|
31
|
+
PROVIDER_ENV_KEYS: () => PROVIDER_ENV_KEYS,
|
|
32
|
+
RULE_SEQUENTIAL_TOOL_CLIFF: () => RULE_SEQUENTIAL_TOOL_CLIFF,
|
|
33
|
+
TRANSLATOR_FLOOR: () => TRANSLATOR_FLOOR,
|
|
28
34
|
allProfiles: () => allProfiles,
|
|
35
|
+
applySectionRewrites: () => applySectionRewrites,
|
|
29
36
|
bucketContext: () => bucketContext,
|
|
30
37
|
bucketHistory: () => bucketHistory,
|
|
31
38
|
bucketToolCount: () => bucketToolCount,
|
|
@@ -36,13 +43,41 @@ __export(index_exports, {
|
|
|
36
43
|
configureBrain: () => configureBrain,
|
|
37
44
|
countTokens: () => countTokens,
|
|
38
45
|
execute: () => execute,
|
|
46
|
+
getActionableAdvisories: () => getActionableAdvisories,
|
|
47
|
+
getAllStarterChains: () => getAllStarterChains,
|
|
48
|
+
getAllStarterChainsWithGrounding: () => getAllStarterChainsWithGrounding,
|
|
49
|
+
getArchetypePerfScore: () => getArchetypePerfScore,
|
|
50
|
+
getDefaultFallbackChain: () => getDefaultFallbackChain,
|
|
51
|
+
getDefaultFallbackChainWithGrounding: () => getDefaultFallbackChainWithGrounding,
|
|
52
|
+
getModelCompatibility: () => getModelCompatibility,
|
|
53
|
+
getPerAxisMetrics: () => getPerAxisMetrics,
|
|
39
54
|
getProfile: () => getProfile,
|
|
55
|
+
getReachabilityDiagnostic: () => getReachabilityDiagnostic,
|
|
56
|
+
getSequentialStarterChain: () => getSequentialStarterChain,
|
|
57
|
+
getSequentialStarterChainWithGrounding: () => getSequentialStarterChainWithGrounding,
|
|
58
|
+
getStarterChain: () => getStarterChain,
|
|
59
|
+
getStarterChainWithGrounding: () => getStarterChainWithGrounding,
|
|
40
60
|
hashShape: () => hashShape,
|
|
41
61
|
isArchetype: () => isArchetype,
|
|
62
|
+
isBrainQueryActiveFor: () => isBrainQueryActiveFor,
|
|
63
|
+
isModelReachable: () => isModelReachable,
|
|
64
|
+
isProviderReachable: () => isProviderReachable,
|
|
42
65
|
learningKey: () => learningKey,
|
|
66
|
+
loadAliasesFromBrain: () => loadAliasesFromBrain,
|
|
67
|
+
loadArchetypePerfFromBrain: () => loadArchetypePerfFromBrain,
|
|
68
|
+
loadArchetypePerfNFromBrain: () => loadArchetypePerfNFromBrain,
|
|
69
|
+
loadChainsFromBrain: () => loadChainsFromBrain,
|
|
70
|
+
loadModelsFromBrain: () => loadModelsFromBrain,
|
|
71
|
+
loadPricingFromBrain: () => loadPricingFromBrain,
|
|
72
|
+
markAdvisoryResolved: () => markAdvisoryResolved,
|
|
73
|
+
profileToRow: () => profileToRow,
|
|
43
74
|
profilesByProvider: () => profilesByProvider,
|
|
44
75
|
record: () => record,
|
|
76
|
+
recordOutcome: () => recordOutcome,
|
|
45
77
|
resetTokenizer: () => resetTokenizer,
|
|
78
|
+
resolvePricingAt: () => resolvePricingAt,
|
|
79
|
+
resolveProviderKey: () => resolveProviderKey,
|
|
80
|
+
runAdvisor: () => runAdvisor,
|
|
46
81
|
setTokenizer: () => setTokenizer,
|
|
47
82
|
tryGetProfile: () => tryGetProfile
|
|
48
83
|
});
|
|
@@ -235,38 +270,96 @@ function passToolRelevance(ir, opts = {}) {
|
|
|
235
270
|
]
|
|
236
271
|
};
|
|
237
272
|
}
|
|
273
|
+
function totalHistoryTokens(history) {
|
|
274
|
+
let total = 0;
|
|
275
|
+
for (const m of history) {
|
|
276
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
277
|
+
}
|
|
278
|
+
return total;
|
|
279
|
+
}
|
|
238
280
|
function passCompressHistory(ir, opts = {}) {
|
|
239
281
|
const history = ir.history;
|
|
240
|
-
if (!history || history.length === 0)
|
|
282
|
+
if (!history || history.length === 0) {
|
|
283
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
284
|
+
}
|
|
241
285
|
const keepRecent = opts.keepRecent ?? 4;
|
|
242
286
|
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
243
|
-
|
|
244
|
-
const
|
|
245
|
-
const
|
|
246
|
-
const
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
287
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
288
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
289
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
290
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens;
|
|
291
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
292
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
293
|
+
}
|
|
294
|
+
if (history.length > keepRecent) {
|
|
295
|
+
const cutIndex = history.length - keepRecent;
|
|
296
|
+
const old = history.slice(0, cutIndex);
|
|
297
|
+
const recent = history.slice(cutIndex);
|
|
298
|
+
const userTurns = old.filter((m) => m.role === "user");
|
|
299
|
+
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
300
|
+
const oldTokens = totalHistoryTokens(old);
|
|
301
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
302
|
+
const summary = {
|
|
303
|
+
role: "system",
|
|
304
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
305
|
+
};
|
|
306
|
+
return {
|
|
307
|
+
value: { ...ir, history: [summary, ...recent] },
|
|
308
|
+
mutations: [
|
|
309
|
+
{
|
|
310
|
+
id: `compress-history-${old.length}`,
|
|
311
|
+
source: "static_pass",
|
|
312
|
+
passName: "compress_history",
|
|
313
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
314
|
+
}
|
|
315
|
+
],
|
|
316
|
+
historyTokensTotal
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
if (tokenThresholdHit) {
|
|
320
|
+
let fattestIdx = -1;
|
|
321
|
+
let fattestTokens = 0;
|
|
322
|
+
for (let i = 0; i < history.length; i++) {
|
|
323
|
+
const m = history[i];
|
|
324
|
+
if (!m || typeof m.content !== "string") continue;
|
|
325
|
+
const t = countTokens(m.content);
|
|
326
|
+
if (t > fattestTokens) {
|
|
327
|
+
fattestTokens = t;
|
|
328
|
+
fattestIdx = i;
|
|
261
329
|
}
|
|
262
|
-
|
|
263
|
-
|
|
330
|
+
}
|
|
331
|
+
const FAT_DOMINANCE_FLOOR = 0.3;
|
|
332
|
+
const fattest = fattestIdx >= 0 ? history[fattestIdx] : void 0;
|
|
333
|
+
if (fattest && historyTokensTotal > 0 && fattestTokens / historyTokensTotal >= FAT_DOMINANCE_FLOOR) {
|
|
334
|
+
const firstLine = fattest.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
335
|
+
const newContent = `[Earlier ${fattest.role} message content omitted: ~${fattestTokens} tokens. Preview: "${firstLine}"]`;
|
|
336
|
+
const newHistory = history.slice();
|
|
337
|
+
newHistory[fattestIdx] = { ...fattest, content: newContent };
|
|
338
|
+
return {
|
|
339
|
+
value: { ...ir, history: newHistory },
|
|
340
|
+
mutations: [
|
|
341
|
+
{
|
|
342
|
+
id: `compress-fat-message-${fattestIdx}`,
|
|
343
|
+
source: "static_pass",
|
|
344
|
+
passName: "compress_history",
|
|
345
|
+
description: `Replaced fat ${fattest.role} message #${fattestIdx} content (~${fattestTokens} of ${historyTokensTotal} tokens, ${Math.round(fattestTokens / historyTokensTotal * 100)}% of history) with summary stub \u2014 token threshold ${summarizeAboveTokens} exceeded (history.length ${history.length} <= keepRecent ${keepRecent}, slice not possible)`
|
|
346
|
+
}
|
|
347
|
+
],
|
|
348
|
+
historyTokensTotal
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
264
353
|
}
|
|
265
354
|
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
266
355
|
const mutations = [];
|
|
267
356
|
const hints = { qualityWarning: [] };
|
|
268
357
|
let nextIR = ir;
|
|
358
|
+
const sequentialMode = nextIR.constraints?.toolOrchestration === "sequential";
|
|
269
359
|
for (const cliff of profile.cliffs) {
|
|
360
|
+
if (sequentialMode && cliff.reason.includes("L-040")) {
|
|
361
|
+
continue;
|
|
362
|
+
}
|
|
270
363
|
let triggered = false;
|
|
271
364
|
switch (cliff.metric) {
|
|
272
365
|
case "input_tokens":
|
|
@@ -489,10 +582,16 @@ function lower(ir, profile, hints = {}) {
|
|
|
489
582
|
}
|
|
490
583
|
function lowerAnthropic(ir, profile, hints) {
|
|
491
584
|
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
492
|
-
const
|
|
585
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
586
|
+
const policy = ir.historyCachePolicy;
|
|
587
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
588
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
493
589
|
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
494
590
|
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
495
|
-
const
|
|
591
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
592
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
593
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
594
|
+
const toolChoice = hints.wireOverrides?.parallelToolCalls === false && tools && tools.length > 0 ? { type: "auto", disable_parallel_tool_use: true } : void 0;
|
|
496
595
|
return {
|
|
497
596
|
request: {
|
|
498
597
|
provider: "anthropic",
|
|
@@ -500,10 +599,16 @@ function lowerAnthropic(ir, profile, hints) {
|
|
|
500
599
|
system: systemBlocks,
|
|
501
600
|
messages,
|
|
502
601
|
tools,
|
|
503
|
-
|
|
602
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
603
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
604
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
605
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
606
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens,
|
|
607
|
+
tool_choice: toolChoice
|
|
504
608
|
},
|
|
505
609
|
diagnostics: {
|
|
506
610
|
cacheableTokens,
|
|
611
|
+
historyCacheableTokens,
|
|
507
612
|
estimatedCacheSavingsUsd: cacheSavings
|
|
508
613
|
}
|
|
509
614
|
};
|
|
@@ -536,17 +641,64 @@ function buildAnthropicSystemBlocks(sections, profile) {
|
|
|
536
641
|
}
|
|
537
642
|
return blocks;
|
|
538
643
|
}
|
|
539
|
-
function buildAnthropicMessages(history, currentTurn) {
|
|
644
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
540
645
|
const out = [];
|
|
541
|
-
for (
|
|
646
|
+
for (let i = 0; i < history.length; i++) {
|
|
647
|
+
const m = history[i];
|
|
542
648
|
if (m.role === "system") continue;
|
|
543
|
-
|
|
649
|
+
const shouldMark = i === markIndex;
|
|
650
|
+
out.push({
|
|
651
|
+
role: m.role,
|
|
652
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
653
|
+
});
|
|
544
654
|
}
|
|
545
655
|
if (currentTurn && currentTurn.role !== "system") {
|
|
546
656
|
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
547
657
|
}
|
|
548
658
|
return out;
|
|
549
659
|
}
|
|
660
|
+
function attachAnthropicCacheControl(m) {
|
|
661
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
662
|
+
const blocks = m.parts;
|
|
663
|
+
const last = blocks[blocks.length - 1];
|
|
664
|
+
const withMarker = {
|
|
665
|
+
...last,
|
|
666
|
+
cache_control: { type: "ephemeral" }
|
|
667
|
+
};
|
|
668
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
669
|
+
}
|
|
670
|
+
return [
|
|
671
|
+
{
|
|
672
|
+
type: "text",
|
|
673
|
+
text: m.content,
|
|
674
|
+
cache_control: { type: "ephemeral" }
|
|
675
|
+
}
|
|
676
|
+
];
|
|
677
|
+
}
|
|
678
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
679
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
680
|
+
if (historyLen === 0) return -1;
|
|
681
|
+
if (policy.strategy === "all-but-latest") {
|
|
682
|
+
return historyLen - 1;
|
|
683
|
+
}
|
|
684
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
685
|
+
return idx >= 0 ? idx : -1;
|
|
686
|
+
}
|
|
687
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
688
|
+
let total = 0;
|
|
689
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
690
|
+
const m = history[i];
|
|
691
|
+
if (m.role === "system") continue;
|
|
692
|
+
if (Array.isArray(m.parts)) {
|
|
693
|
+
for (const p of m.parts) {
|
|
694
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
695
|
+
}
|
|
696
|
+
} else if (typeof m.content === "string") {
|
|
697
|
+
total += countTokens(m.content);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
return total;
|
|
701
|
+
}
|
|
550
702
|
function toAnthropicTools(tools) {
|
|
551
703
|
return tools.map((t) => ({
|
|
552
704
|
name: t.name,
|
|
@@ -581,6 +733,9 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
581
733
|
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
582
734
|
const meetsMin = cacheableTokens >= minTokens;
|
|
583
735
|
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
736
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
737
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
738
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
584
739
|
return {
|
|
585
740
|
request: {
|
|
586
741
|
provider: "google",
|
|
@@ -592,6 +747,7 @@ function lowerGoogle(ir, profile, hints) {
|
|
|
592
747
|
},
|
|
593
748
|
diagnostics: {
|
|
594
749
|
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
750
|
+
historyCacheableTokens,
|
|
595
751
|
estimatedCacheSavingsUsd: cacheSavings
|
|
596
752
|
}
|
|
597
753
|
};
|
|
@@ -639,6 +795,10 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
639
795
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
640
796
|
});
|
|
641
797
|
}
|
|
798
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
799
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
800
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
801
|
+
const openaiParallelToolCalls = hints.wireOverrides?.parallelToolCalls === false && ir.tools && ir.tools.length > 0 ? false : void 0;
|
|
642
802
|
return {
|
|
643
803
|
request: {
|
|
644
804
|
provider: "openai",
|
|
@@ -646,9 +806,14 @@ function lowerOpenAI(ir, profile, hints) {
|
|
|
646
806
|
messages,
|
|
647
807
|
tools: ir.tools && ir.tools.length > 0 ? toOpenAITools(ir.tools) : void 0,
|
|
648
808
|
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
649
|
-
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
809
|
+
reasoning_effort: hints.forceTerseOutput ? "low" : void 0,
|
|
810
|
+
parallel_tool_calls: openaiParallelToolCalls
|
|
650
811
|
},
|
|
651
|
-
diagnostics: {
|
|
812
|
+
diagnostics: {
|
|
813
|
+
cacheableTokens: 0,
|
|
814
|
+
historyCacheableTokens,
|
|
815
|
+
estimatedCacheSavingsUsd: 0
|
|
816
|
+
}
|
|
652
817
|
};
|
|
653
818
|
}
|
|
654
819
|
function toOpenAITools(tools) {
|
|
@@ -675,6 +840,9 @@ function lowerDeepSeek(ir, profile) {
|
|
|
675
840
|
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
676
841
|
});
|
|
677
842
|
}
|
|
843
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
844
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
845
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
678
846
|
return {
|
|
679
847
|
request: {
|
|
680
848
|
provider: "deepseek",
|
|
@@ -689,7 +857,11 @@ function lowerDeepSeek(ir, profile) {
|
|
|
689
857
|
}
|
|
690
858
|
})) : void 0
|
|
691
859
|
},
|
|
692
|
-
diagnostics: {
|
|
860
|
+
diagnostics: {
|
|
861
|
+
cacheableTokens: 0,
|
|
862
|
+
historyCacheableTokens,
|
|
863
|
+
estimatedCacheSavingsUsd: 0
|
|
864
|
+
}
|
|
693
865
|
};
|
|
694
866
|
}
|
|
695
867
|
function sortSections(sections) {
|
|
@@ -765,7 +937,24 @@ var PROFILES_RAW = [
|
|
|
765
937
|
],
|
|
766
938
|
strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
|
|
767
939
|
weaknesses: ["cost", "latency"],
|
|
768
|
-
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output."
|
|
940
|
+
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
|
|
941
|
+
// Frontier perf. Drops on archetypes where parallel-tool throughput
|
|
942
|
+
// (hunt) or low-budget cost-sensitivity (classify/summarize) matters
|
|
943
|
+
// more than reasoning depth.
|
|
944
|
+
archetypePerf: {
|
|
945
|
+
critique: 10,
|
|
946
|
+
plan: 10,
|
|
947
|
+
generate: 9,
|
|
948
|
+
ask: 9,
|
|
949
|
+
extract: 9,
|
|
950
|
+
transform: 9,
|
|
951
|
+
hunt: 8,
|
|
952
|
+
// strong but Flash dominates parallel tool throughput
|
|
953
|
+
summarize: 8,
|
|
954
|
+
// overkill for tolerant archetype; cost-out of frontier
|
|
955
|
+
classify: 8
|
|
956
|
+
// overkill; brain-validated cheaper models cover this
|
|
957
|
+
}
|
|
769
958
|
},
|
|
770
959
|
{
|
|
771
960
|
id: "claude-opus-4-6",
|
|
@@ -797,7 +986,20 @@ var PROFILES_RAW = [
|
|
|
797
986
|
],
|
|
798
987
|
strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
|
|
799
988
|
weaknesses: ["cost", "latency"],
|
|
800
|
-
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only)."
|
|
989
|
+
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
|
|
990
|
+
// One notch below 4.7 across the board — extended-thinking edge does
|
|
991
|
+
// not flip any archetype ranking. Legacy: chains should prefer 4.7.
|
|
992
|
+
archetypePerf: {
|
|
993
|
+
critique: 9,
|
|
994
|
+
plan: 9,
|
|
995
|
+
generate: 9,
|
|
996
|
+
ask: 9,
|
|
997
|
+
extract: 9,
|
|
998
|
+
transform: 9,
|
|
999
|
+
hunt: 7,
|
|
1000
|
+
summarize: 8,
|
|
1001
|
+
classify: 8
|
|
1002
|
+
}
|
|
801
1003
|
},
|
|
802
1004
|
{
|
|
803
1005
|
id: "claude-sonnet-4-6",
|
|
@@ -821,7 +1023,23 @@ var PROFILES_RAW = [
|
|
|
821
1023
|
],
|
|
822
1024
|
strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
|
|
823
1025
|
weaknesses: [],
|
|
824
|
-
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output."
|
|
1026
|
+
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
|
|
1027
|
+
// Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
|
|
1028
|
+
// in starter chains; tier 1 cross-provider for hunt/summarize/classify.
|
|
1029
|
+
archetypePerf: {
|
|
1030
|
+
ask: 9,
|
|
1031
|
+
generate: 9,
|
|
1032
|
+
plan: 9,
|
|
1033
|
+
critique: 9,
|
|
1034
|
+
extract: 9,
|
|
1035
|
+
transform: 9,
|
|
1036
|
+
hunt: 7,
|
|
1037
|
+
// strong but Flash beats on parallel tool throughput
|
|
1038
|
+
summarize: 8,
|
|
1039
|
+
// overkill for tolerant archetype
|
|
1040
|
+
classify: 8
|
|
1041
|
+
// overkill
|
|
1042
|
+
}
|
|
825
1043
|
},
|
|
826
1044
|
{
|
|
827
1045
|
id: "claude-haiku-4-5",
|
|
@@ -851,7 +1069,23 @@ var PROFILES_RAW = [
|
|
|
851
1069
|
],
|
|
852
1070
|
strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
|
|
853
1071
|
weaknesses: ["complex_reasoning", "large_tool_sets"],
|
|
854
|
-
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`."
|
|
1072
|
+
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
|
|
1073
|
+
// Tier 1 cross-provider anchor for short-output chains (classify/
|
|
1074
|
+
// summarize/extract/transform). Falls off on plan/critique where
|
|
1075
|
+
// reasoning depth matters; competes with Pro on cost+latency.
|
|
1076
|
+
archetypePerf: {
|
|
1077
|
+
classify: 8,
|
|
1078
|
+
summarize: 8,
|
|
1079
|
+
ask: 7,
|
|
1080
|
+
transform: 7,
|
|
1081
|
+
extract: 7,
|
|
1082
|
+
hunt: 6,
|
|
1083
|
+
// tool reliability drops at 16 — cliff guard fires
|
|
1084
|
+
generate: 6,
|
|
1085
|
+
plan: 5,
|
|
1086
|
+
critique: 4
|
|
1087
|
+
// reasoning depth gap vs Sonnet/Opus
|
|
1088
|
+
}
|
|
855
1089
|
},
|
|
856
1090
|
// ── Google ──
|
|
857
1091
|
{
|
|
@@ -929,7 +1163,131 @@ var PROFILES_RAW = [
|
|
|
929
1163
|
],
|
|
930
1164
|
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
931
1165
|
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
932
|
-
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs."
|
|
1166
|
+
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
|
|
1167
|
+
// Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
|
|
1168
|
+
// 15-75 calls/step beats Sonnet — L-040), summarize, classify.
|
|
1169
|
+
archetypePerf: {
|
|
1170
|
+
hunt: 9,
|
|
1171
|
+
// L-040: parallel tool throughput 15-75/step
|
|
1172
|
+
classify: 7,
|
|
1173
|
+
// brain-validated, 218 rows
|
|
1174
|
+
summarize: 7,
|
|
1175
|
+
// brain-validated; cliff strips tools when present
|
|
1176
|
+
transform: 7,
|
|
1177
|
+
ask: 7,
|
|
1178
|
+
generate: 6,
|
|
1179
|
+
plan: 5,
|
|
1180
|
+
extract: 6,
|
|
1181
|
+
// alpha.8 MAX_TOKENS history on structured output
|
|
1182
|
+
critique: 4
|
|
1183
|
+
// reasoning shallower than Sonnet/Opus
|
|
1184
|
+
}
|
|
1185
|
+
},
|
|
1186
|
+
{
|
|
1187
|
+
// ── Gemini 2.5 Flash-Lite ──
|
|
1188
|
+
// Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
|
|
1189
|
+
// it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
|
|
1190
|
+
// stable. Positioned BELOW Flash on the cost/perf frontier:
|
|
1191
|
+
// input $0.10/M (Flash $0.30/M) — 3× cheaper
|
|
1192
|
+
// output $0.40/M (Flash $2.50/M) — 6× cheaper
|
|
1193
|
+
// cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
|
|
1194
|
+
// Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
|
|
1195
|
+
// is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
|
|
1196
|
+
// thresholds. The brain will validate/relax these as evidence accumulates
|
|
1197
|
+
// per (archetype, model) tuple. Currently ZERO brain rows for this model.
|
|
1198
|
+
id: "gemini-2.5-flash-lite",
|
|
1199
|
+
verifiedAgainstDocs: "2026-05-13",
|
|
1200
|
+
provider: "google",
|
|
1201
|
+
status: "current",
|
|
1202
|
+
maxContextTokens: 1048576,
|
|
1203
|
+
maxOutputTokens: 65536,
|
|
1204
|
+
maxTools: 128,
|
|
1205
|
+
parallelToolCalls: true,
|
|
1206
|
+
structuredOutput: "native",
|
|
1207
|
+
systemPromptMode: "separate",
|
|
1208
|
+
streaming: true,
|
|
1209
|
+
cliffs: [
|
|
1210
|
+
{
|
|
1211
|
+
metric: "input_tokens",
|
|
1212
|
+
threshold: 8e3,
|
|
1213
|
+
action: "downgrade_quality_warning",
|
|
1214
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
1215
|
+
},
|
|
1216
|
+
{
|
|
1217
|
+
metric: "tool_count",
|
|
1218
|
+
threshold: 10,
|
|
1219
|
+
action: "drop_to_top_relevant",
|
|
1220
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
1221
|
+
},
|
|
1222
|
+
{
|
|
1223
|
+
metric: "thinking_with_short_output",
|
|
1224
|
+
threshold: 1,
|
|
1225
|
+
action: "force_thinking_budget_zero",
|
|
1226
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
1227
|
+
},
|
|
1228
|
+
{
|
|
1229
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
1230
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
1231
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
1232
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
1233
|
+
metric: "tool_count",
|
|
1234
|
+
threshold: 1,
|
|
1235
|
+
whenIntent: "summarize",
|
|
1236
|
+
action: "strip_tools",
|
|
1237
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
1238
|
+
}
|
|
1239
|
+
],
|
|
1240
|
+
costInputPer1m: 0.1,
|
|
1241
|
+
costOutputPer1m: 0.4,
|
|
1242
|
+
lowering: {
|
|
1243
|
+
...GOOGLE_LOWERING_BASE,
|
|
1244
|
+
// Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
|
|
1245
|
+
// $0.10/M input. Material for repeat-prompt workloads (classify shape).
|
|
1246
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1247
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1248
|
+
},
|
|
1249
|
+
recovery: [
|
|
1250
|
+
{
|
|
1251
|
+
signal: "empty_response_after_tool",
|
|
1252
|
+
action: "retry_with_params",
|
|
1253
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1254
|
+
maxRetries: 1,
|
|
1255
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
1256
|
+
},
|
|
1257
|
+
{
|
|
1258
|
+
signal: "empty_response",
|
|
1259
|
+
action: "retry_with_params",
|
|
1260
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1261
|
+
maxRetries: 1,
|
|
1262
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
1263
|
+
},
|
|
1264
|
+
{
|
|
1265
|
+
signal: "malformed_function_call",
|
|
1266
|
+
action: "escalate",
|
|
1267
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1268
|
+
}
|
|
1269
|
+
],
|
|
1270
|
+
strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1271
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1272
|
+
notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
|
|
1273
|
+
// Tier 3 emergency floor for summarize/classify chains. ZERO brain
|
|
1274
|
+
// rows — all values are starter hypotheses anchored to "smaller
|
|
1275
|
+
// sibling of Flash, at-or-below Flash perf on every archetype." The
|
|
1276
|
+
// first 50 brain rows per archetype will validate or relax these.
|
|
1277
|
+
archetypePerf: {
|
|
1278
|
+
classify: 6,
|
|
1279
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
1280
|
+
summarize: 6,
|
|
1281
|
+
// starter hypothesis — verify; cliff strips tools
|
|
1282
|
+
transform: 6,
|
|
1283
|
+
// starter hypothesis — verify
|
|
1284
|
+
ask: 5,
|
|
1285
|
+
hunt: 5,
|
|
1286
|
+
generate: 4,
|
|
1287
|
+
extract: 4,
|
|
1288
|
+
plan: 3,
|
|
1289
|
+
critique: 3
|
|
1290
|
+
}
|
|
933
1291
|
},
|
|
934
1292
|
{
|
|
935
1293
|
id: "gemini-2.5-pro",
|
|
@@ -965,7 +1323,21 @@ var PROFILES_RAW = [
|
|
|
965
1323
|
}
|
|
966
1324
|
],
|
|
967
1325
|
strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
|
|
968
|
-
weaknesses: ["pricing_above_200k"]
|
|
1326
|
+
weaknesses: ["pricing_above_200k"],
|
|
1327
|
+
// Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
|
|
1328
|
+
// Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
|
|
1329
|
+
archetypePerf: {
|
|
1330
|
+
critique: 9,
|
|
1331
|
+
plan: 9,
|
|
1332
|
+
ask: 8,
|
|
1333
|
+
generate: 8,
|
|
1334
|
+
extract: 8,
|
|
1335
|
+
transform: 8,
|
|
1336
|
+
hunt: 8,
|
|
1337
|
+
// tier 1 cross-provider for hunt chain
|
|
1338
|
+
summarize: 7,
|
|
1339
|
+
classify: 7
|
|
1340
|
+
}
|
|
969
1341
|
},
|
|
970
1342
|
{
|
|
971
1343
|
id: "gemini-3.1-pro-preview",
|
|
@@ -1003,7 +1375,23 @@ var PROFILES_RAW = [
|
|
|
1003
1375
|
],
|
|
1004
1376
|
strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
|
|
1005
1377
|
weaknesses: ["cost", "preview_status", "pricing_above_200k"],
|
|
1006
|
-
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA."
|
|
1378
|
+
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
|
|
1379
|
+
// Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
|
|
1380
|
+
// coding / reasoning per Google's release notes. Preview status:
|
|
1381
|
+
// chains should stay on 2.5 Pro until GA. Starter hypothesis.
|
|
1382
|
+
archetypePerf: {
|
|
1383
|
+
critique: 10,
|
|
1384
|
+
// Google claims step-change on reasoning
|
|
1385
|
+
plan: 10,
|
|
1386
|
+
ask: 9,
|
|
1387
|
+
generate: 9,
|
|
1388
|
+
extract: 9,
|
|
1389
|
+
transform: 8,
|
|
1390
|
+
hunt: 9,
|
|
1391
|
+
// step-change agentic per Google
|
|
1392
|
+
summarize: 8,
|
|
1393
|
+
classify: 7
|
|
1394
|
+
}
|
|
1007
1395
|
},
|
|
1008
1396
|
// ── DeepSeek ──
|
|
1009
1397
|
// 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
|
|
@@ -1043,7 +1431,24 @@ var PROFILES_RAW = [
|
|
|
1043
1431
|
],
|
|
1044
1432
|
strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
|
|
1045
1433
|
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
1046
|
-
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES."
|
|
1434
|
+
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
|
|
1435
|
+
// Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
|
|
1436
|
+
// classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
|
|
1437
|
+
// Falls off on hunt (sequential tools — L-040) and reasoning depth.
|
|
1438
|
+
archetypePerf: {
|
|
1439
|
+
classify: 7,
|
|
1440
|
+
// brain-validated, 169 rows
|
|
1441
|
+
summarize: 7,
|
|
1442
|
+
// archetype-tolerant, no brain evidence yet
|
|
1443
|
+
ask: 6,
|
|
1444
|
+
transform: 6,
|
|
1445
|
+
generate: 5,
|
|
1446
|
+
plan: 5,
|
|
1447
|
+
extract: 5,
|
|
1448
|
+
critique: 4,
|
|
1449
|
+
hunt: 4
|
|
1450
|
+
// sequential tool calls only — L-040
|
|
1451
|
+
}
|
|
1047
1452
|
},
|
|
1048
1453
|
{
|
|
1049
1454
|
id: "deepseek-v4-pro",
|
|
@@ -1079,141 +1484,1360 @@ var PROFILES_RAW = [
|
|
|
1079
1484
|
],
|
|
1080
1485
|
strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
|
|
1081
1486
|
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
1082
|
-
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking."
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
);
|
|
1098
|
-
function getProfile(id) {
|
|
1099
|
-
const canonical = canonicalId(id);
|
|
1100
|
-
const p = PROFILE_INDEX.get(canonical);
|
|
1101
|
-
if (!p) {
|
|
1102
|
-
const known = [...PROFILE_INDEX.keys(), ...Object.keys(ALIASES)].join(", ");
|
|
1103
|
-
throw new Error(`Unknown model id: "${id}". Known: ${known}`);
|
|
1104
|
-
}
|
|
1105
|
-
return p;
|
|
1106
|
-
}
|
|
1107
|
-
function tryGetProfile(id) {
|
|
1108
|
-
return PROFILE_INDEX.get(canonicalId(id));
|
|
1109
|
-
}
|
|
1110
|
-
function allProfiles() {
|
|
1111
|
-
return PROFILES_RAW;
|
|
1112
|
-
}
|
|
1113
|
-
function profilesByProvider(provider) {
|
|
1114
|
-
return PROFILES_RAW.filter((p) => p.provider === provider);
|
|
1115
|
-
}
|
|
1116
|
-
|
|
1117
|
-
// src/compile.ts
|
|
1118
|
-
var counter = 0;
|
|
1119
|
-
function makeHandle() {
|
|
1120
|
-
counter = (counter + 1) % 1e6;
|
|
1121
|
-
return `c${Date.now().toString(36)}-${counter.toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
|
|
1122
|
-
}
|
|
1123
|
-
function compile(ir, opts = {}) {
|
|
1124
|
-
const resolver = opts.profileResolver ?? getProfile;
|
|
1125
|
-
validateIR(ir);
|
|
1126
|
-
const sliced = passSlice(ir);
|
|
1127
|
-
const deduped = passDedupe(sliced.value);
|
|
1128
|
-
const toolFiltered = passToolRelevance(deduped.value, {
|
|
1129
|
-
threshold: opts.toolRelevanceThreshold
|
|
1130
|
-
});
|
|
1131
|
-
const compressed = passCompressHistory(toolFiltered.value, {
|
|
1132
|
-
summarizeOlderThan: opts.compressHistoryAfter
|
|
1133
|
-
});
|
|
1134
|
-
let workingIR = compressed.value;
|
|
1135
|
-
const accumulatedMutations = [
|
|
1136
|
-
...sliced.mutations,
|
|
1137
|
-
...deduped.mutations,
|
|
1138
|
-
...toolFiltered.mutations,
|
|
1139
|
-
...compressed.mutations
|
|
1140
|
-
];
|
|
1141
|
-
const inputTokens = estimateInputTokens(workingIR);
|
|
1142
|
-
const scores = passScoreTargets(workingIR, {
|
|
1143
|
-
estimatedInputTokens: inputTokens,
|
|
1144
|
-
profilesById: resolver,
|
|
1145
|
-
policy: opts.policy
|
|
1146
|
-
});
|
|
1147
|
-
accumulatedMutations.push(...scores.mutations);
|
|
1148
|
-
const target = pickTarget(workingIR, scores.value);
|
|
1149
|
-
if (!target) {
|
|
1150
|
-
throw new Error(
|
|
1151
|
-
`compile(): no allowed model fits the request. Scores: ${JSON.stringify(scores.value, null, 2)}`
|
|
1152
|
-
);
|
|
1153
|
-
}
|
|
1154
|
-
const profile = resolver(target.modelId);
|
|
1155
|
-
const fallbackChain = scores.value.filter((s) => s.modelId !== target.modelId && s.fits).sort((a, b) => b.rank - a.rank).map((s) => s.modelId);
|
|
1156
|
-
const cliffs = passApplyCliffs(workingIR, profile, inputTokens);
|
|
1157
|
-
workingIR = cliffs.value.ir;
|
|
1158
|
-
accumulatedMutations.push(...cliffs.mutations);
|
|
1159
|
-
const lowered = lower(workingIR, profile, {
|
|
1160
|
-
forceThinkingZero: cliffs.value.loweringHints.forceThinkingZero,
|
|
1161
|
-
forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput
|
|
1162
|
-
});
|
|
1163
|
-
validateFinalFit(workingIR, profile, inputTokens);
|
|
1164
|
-
const handle = makeHandle();
|
|
1165
|
-
const finalShape = computeShape(workingIR, inputTokens);
|
|
1166
|
-
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
1167
|
-
return {
|
|
1168
|
-
handle,
|
|
1169
|
-
target: profile.id,
|
|
1170
|
-
provider: profile.provider,
|
|
1171
|
-
request: lowered.request,
|
|
1172
|
-
tokensIn: inputTokens,
|
|
1173
|
-
estimatedCostUsd: target.estimatedCostUsd,
|
|
1174
|
-
mutationsApplied: accumulatedMutations,
|
|
1175
|
-
fallbackChain,
|
|
1176
|
-
diagnostics: {
|
|
1177
|
-
sectionsKept: workingIR.sections.length,
|
|
1178
|
-
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
1179
|
-
toolsKept: workingIR.tools?.length ?? 0,
|
|
1180
|
-
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
1181
|
-
historyKept: workingIR.history?.length ?? 0,
|
|
1182
|
-
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1183
|
-
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1184
|
-
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd
|
|
1487
|
+
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
|
|
1488
|
+
// Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
|
|
1489
|
+
// bumped one notch over V4-Flash; same parallel-tool ceiling.
|
|
1490
|
+
archetypePerf: {
|
|
1491
|
+
plan: 7,
|
|
1492
|
+
// §3.3 tier 3 for plan
|
|
1493
|
+
critique: 6,
|
|
1494
|
+
ask: 7,
|
|
1495
|
+
generate: 6,
|
|
1496
|
+
classify: 7,
|
|
1497
|
+
summarize: 7,
|
|
1498
|
+
extract: 6,
|
|
1499
|
+
transform: 6,
|
|
1500
|
+
hunt: 4
|
|
1501
|
+
// sequential tools — same as V4-Flash
|
|
1185
1502
|
}
|
|
1186
|
-
}
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
}
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1503
|
+
},
|
|
1504
|
+
// ── OpenAI ──
|
|
1505
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
1506
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
1507
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
1508
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
1509
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
1510
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
1511
|
+
//
|
|
1512
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
1513
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
1514
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
1515
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
1516
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
1517
|
+
//
|
|
1518
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
1519
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
1520
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
1521
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
1522
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
1523
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
1524
|
+
// pricing zone.
|
|
1525
|
+
{
|
|
1526
|
+
id: "gpt-5.5",
|
|
1527
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1528
|
+
provider: "openai",
|
|
1529
|
+
status: "current",
|
|
1530
|
+
maxContextTokens: 105e4,
|
|
1531
|
+
maxOutputTokens: 128e3,
|
|
1532
|
+
maxTools: 64,
|
|
1533
|
+
parallelToolCalls: true,
|
|
1534
|
+
structuredOutput: "native",
|
|
1535
|
+
systemPromptMode: "inline",
|
|
1536
|
+
streaming: true,
|
|
1537
|
+
cliffs: [
|
|
1538
|
+
{
|
|
1539
|
+
metric: "input_tokens",
|
|
1540
|
+
threshold: 272e3,
|
|
1541
|
+
action: "downgrade_quality_warning",
|
|
1542
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
1543
|
+
}
|
|
1544
|
+
],
|
|
1545
|
+
costInputPer1m: 5,
|
|
1546
|
+
costOutputPer1m: 30,
|
|
1547
|
+
lowering: {
|
|
1548
|
+
system: { mode: "inline" },
|
|
1549
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
1550
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
1551
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
1552
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1553
|
+
tools: { format: "openai" }
|
|
1554
|
+
},
|
|
1555
|
+
recovery: [
|
|
1556
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1557
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1558
|
+
],
|
|
1559
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
1560
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
1561
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
1562
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
1563
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
1564
|
+
archetypePerf: {
|
|
1565
|
+
critique: 9,
|
|
1566
|
+
plan: 9,
|
|
1567
|
+
generate: 9,
|
|
1568
|
+
ask: 9,
|
|
1569
|
+
extract: 9,
|
|
1570
|
+
transform: 9,
|
|
1571
|
+
hunt: 8,
|
|
1572
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
1573
|
+
summarize: 7,
|
|
1574
|
+
// overkill for tolerant archetype
|
|
1575
|
+
classify: 7
|
|
1576
|
+
// overkill; cheaper models cover this
|
|
1208
1577
|
}
|
|
1209
|
-
}
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1578
|
+
},
|
|
1579
|
+
{
|
|
1580
|
+
id: "gpt-5.4",
|
|
1581
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1582
|
+
provider: "openai",
|
|
1583
|
+
status: "current",
|
|
1584
|
+
maxContextTokens: 105e4,
|
|
1585
|
+
maxOutputTokens: 128e3,
|
|
1586
|
+
maxTools: 64,
|
|
1587
|
+
parallelToolCalls: true,
|
|
1588
|
+
structuredOutput: "native",
|
|
1589
|
+
systemPromptMode: "inline",
|
|
1590
|
+
streaming: true,
|
|
1591
|
+
cliffs: [
|
|
1592
|
+
{
|
|
1593
|
+
metric: "input_tokens",
|
|
1594
|
+
threshold: 272e3,
|
|
1595
|
+
action: "downgrade_quality_warning",
|
|
1596
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
1597
|
+
}
|
|
1598
|
+
],
|
|
1599
|
+
costInputPer1m: 2.5,
|
|
1600
|
+
costOutputPer1m: 15,
|
|
1601
|
+
lowering: {
|
|
1602
|
+
system: { mode: "inline" },
|
|
1603
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1604
|
+
tools: { format: "openai" }
|
|
1605
|
+
},
|
|
1606
|
+
recovery: [
|
|
1607
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1608
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1609
|
+
],
|
|
1610
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
1611
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
1612
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
1613
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
1614
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
1615
|
+
archetypePerf: {
|
|
1616
|
+
critique: 8,
|
|
1617
|
+
plan: 8,
|
|
1618
|
+
generate: 8,
|
|
1619
|
+
ask: 8,
|
|
1620
|
+
extract: 8,
|
|
1621
|
+
transform: 8,
|
|
1622
|
+
hunt: 7,
|
|
1623
|
+
summarize: 7,
|
|
1624
|
+
classify: 7
|
|
1625
|
+
}
|
|
1626
|
+
},
|
|
1627
|
+
{
|
|
1628
|
+
id: "gpt-5.4-mini",
|
|
1629
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1630
|
+
provider: "openai",
|
|
1631
|
+
status: "current",
|
|
1632
|
+
maxContextTokens: 4e5,
|
|
1633
|
+
maxOutputTokens: 128e3,
|
|
1634
|
+
maxTools: 64,
|
|
1635
|
+
parallelToolCalls: true,
|
|
1636
|
+
structuredOutput: "native",
|
|
1637
|
+
systemPromptMode: "inline",
|
|
1638
|
+
streaming: true,
|
|
1639
|
+
cliffs: [],
|
|
1640
|
+
costInputPer1m: 0.75,
|
|
1641
|
+
costOutputPer1m: 4.5,
|
|
1642
|
+
lowering: {
|
|
1643
|
+
system: { mode: "inline" },
|
|
1644
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1645
|
+
tools: { format: "openai" }
|
|
1646
|
+
},
|
|
1647
|
+
recovery: [
|
|
1648
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1649
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1650
|
+
],
|
|
1651
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
1652
|
+
weaknesses: ["reasoning_depth"],
|
|
1653
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
1654
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
1655
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
1656
|
+
// OpenAI claims strong coding/subagent perf.
|
|
1657
|
+
archetypePerf: {
|
|
1658
|
+
ask: 7,
|
|
1659
|
+
generate: 7,
|
|
1660
|
+
extract: 7,
|
|
1661
|
+
transform: 7,
|
|
1662
|
+
classify: 7,
|
|
1663
|
+
summarize: 7,
|
|
1664
|
+
hunt: 7,
|
|
1665
|
+
plan: 6,
|
|
1666
|
+
critique: 5
|
|
1667
|
+
// reasoning depth gap — frontier models handle this
|
|
1668
|
+
}
|
|
1669
|
+
},
|
|
1670
|
+
{
|
|
1671
|
+
id: "gpt-5.4-nano",
|
|
1672
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
1673
|
+
provider: "openai",
|
|
1674
|
+
status: "current",
|
|
1675
|
+
maxContextTokens: 4e5,
|
|
1676
|
+
maxOutputTokens: 128e3,
|
|
1677
|
+
maxTools: 64,
|
|
1678
|
+
parallelToolCalls: true,
|
|
1679
|
+
structuredOutput: "native",
|
|
1680
|
+
systemPromptMode: "inline",
|
|
1681
|
+
streaming: true,
|
|
1682
|
+
cliffs: [],
|
|
1683
|
+
costInputPer1m: 0.2,
|
|
1684
|
+
costOutputPer1m: 1.25,
|
|
1685
|
+
lowering: {
|
|
1686
|
+
system: { mode: "inline" },
|
|
1687
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
1688
|
+
tools: { format: "openai" }
|
|
1689
|
+
},
|
|
1690
|
+
recovery: [
|
|
1691
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
1692
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
1693
|
+
],
|
|
1694
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
1695
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
1696
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
1697
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
1698
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
1699
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
1700
|
+
archetypePerf: {
|
|
1701
|
+
classify: 7,
|
|
1702
|
+
summarize: 6,
|
|
1703
|
+
ask: 6,
|
|
1704
|
+
transform: 6,
|
|
1705
|
+
extract: 6,
|
|
1706
|
+
generate: 5,
|
|
1707
|
+
hunt: 5,
|
|
1708
|
+
plan: 4,
|
|
1709
|
+
critique: 3
|
|
1710
|
+
// not for reasoning archetypes
|
|
1711
|
+
}
|
|
1712
|
+
},
|
|
1713
|
+
// ── Auto-onboarded (UNVERIFIED) ──
|
|
1714
|
+
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
1715
|
+
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
1716
|
+
// provider docs. Verify before promoting status to 'current' (L-049/L-081).
|
|
1717
|
+
{
|
|
1718
|
+
// s37 (2026-05-21): UNVERIFIED-AUTO-ONBOARD → verified against
|
|
1719
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview +
|
|
1720
|
+
// ai.google.dev/gemini-api/docs/pricing. L-081 catches:
|
|
1721
|
+
// maxOutputTokens 65_535 → 65_536 (off-by-one)
|
|
1722
|
+
// costInputPer1m 0.30 → 0.50 (template-cloned from 2.5-flash; actual is 1.67× more expensive)
|
|
1723
|
+
// costOutputPer1m 2.50 → 3.00 (template-cloned; actual 1.2× more expensive)
|
|
1724
|
+
// cache discount default 0.25 → 0.10 (10× discount, $0.05/$0.50 per docs)
|
|
1725
|
+
// Cliffs inherited from 2.5-flash conservatively. The 8K-context-quality
|
|
1726
|
+
// cliff was a 2.5-Flash observation — Google positions Gemini 3 as
|
|
1727
|
+
// sustained-frontier-on-long-context; brain evidence will validate/relax.
|
|
1728
|
+
// Kept as guard for now.
|
|
1729
|
+
id: "gemini-3-flash-preview",
|
|
1730
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1731
|
+
provider: "google",
|
|
1732
|
+
status: "preview",
|
|
1733
|
+
maxContextTokens: 1048576,
|
|
1734
|
+
maxOutputTokens: 65536,
|
|
1735
|
+
maxTools: 128,
|
|
1736
|
+
parallelToolCalls: true,
|
|
1737
|
+
structuredOutput: "native",
|
|
1738
|
+
systemPromptMode: "separate",
|
|
1739
|
+
streaming: true,
|
|
1740
|
+
cliffs: [
|
|
1741
|
+
{
|
|
1742
|
+
metric: "input_tokens",
|
|
1743
|
+
threshold: 8e3,
|
|
1744
|
+
action: "downgrade_quality_warning",
|
|
1745
|
+
reason: "Inherited from 2.5-flash guard; brain evidence on Gemini 3 long-context quality will validate/relax"
|
|
1746
|
+
},
|
|
1747
|
+
{
|
|
1748
|
+
metric: "tool_count",
|
|
1749
|
+
threshold: 20,
|
|
1750
|
+
action: "drop_to_top_relevant",
|
|
1751
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit) \u2014 inherited from Flash family"
|
|
1752
|
+
},
|
|
1753
|
+
{
|
|
1754
|
+
metric: "thinking_with_short_output",
|
|
1755
|
+
threshold: 1,
|
|
1756
|
+
action: "force_thinking_budget_zero",
|
|
1757
|
+
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
1758
|
+
},
|
|
1759
|
+
{
|
|
1760
|
+
// Inherited from gemini-2.5-flash s11 trust artifact. Family-likely
|
|
1761
|
+
// failure mode for Flash architecture. Keep preemptively until brain
|
|
1762
|
+
// evidence on Gemini 3 specifically.
|
|
1763
|
+
metric: "tool_count",
|
|
1764
|
+
threshold: 1,
|
|
1765
|
+
whenIntent: "summarize",
|
|
1766
|
+
action: "strip_tools",
|
|
1767
|
+
reason: "Inherited from 2.5-flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3-flash-preview specifically."
|
|
1768
|
+
}
|
|
1769
|
+
],
|
|
1770
|
+
costInputPer1m: 0.5,
|
|
1771
|
+
costOutputPer1m: 3,
|
|
1772
|
+
lowering: {
|
|
1773
|
+
...GOOGLE_LOWERING_BASE,
|
|
1774
|
+
// 10× cache discount per Google pricing: $0.05/M cached vs $0.50/M input.
|
|
1775
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1776
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1777
|
+
},
|
|
1778
|
+
recovery: [
|
|
1779
|
+
{
|
|
1780
|
+
signal: "empty_response_after_tool",
|
|
1781
|
+
action: "retry_with_params",
|
|
1782
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1783
|
+
maxRetries: 1,
|
|
1784
|
+
reason: "Known: empty after tool result \u2014 retry with thinking off"
|
|
1785
|
+
},
|
|
1786
|
+
{
|
|
1787
|
+
signal: "empty_response",
|
|
1788
|
+
action: "retry_with_params",
|
|
1789
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1790
|
+
maxRetries: 1,
|
|
1791
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
1792
|
+
},
|
|
1793
|
+
{
|
|
1794
|
+
signal: "malformed_function_call",
|
|
1795
|
+
action: "escalate",
|
|
1796
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
1797
|
+
}
|
|
1798
|
+
],
|
|
1799
|
+
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
1800
|
+
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
1801
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Step-change positioning vs 2.5-flash on agentic loops per Google's release notes (Dec 2025). Pricing 1.67\xD7/1.2\xD7 higher than 2.5-flash; cache discount 10\xD7 (vs 4\xD7 for 2.5). Status=preview until brain evidence accumulates.",
|
|
1802
|
+
// Anchored to 2.5-flash archetypePerf as starter, with judgment adjustments
|
|
1803
|
+
// for Google's "step-change on agentic" positioning. Brain evidence (zero
|
|
1804
|
+
// rows today) will replace these starter values.
|
|
1805
|
+
archetypePerf: {
|
|
1806
|
+
hunt: 9,
|
|
1807
|
+
// Inherits 2.5-flash L-040 parallel-tool tier; Google positions 3 as agentic-loop upgrade
|
|
1808
|
+
classify: 7,
|
|
1809
|
+
// Inherits 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
1810
|
+
summarize: 7,
|
|
1811
|
+
// Inherits 2.5-flash; cliff strips tools when present
|
|
1812
|
+
transform: 7,
|
|
1813
|
+
ask: 8,
|
|
1814
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
1815
|
+
generate: 7,
|
|
1816
|
+
// +1 vs 2.5-flash — agentic coding upgrade per Google
|
|
1817
|
+
plan: 6,
|
|
1818
|
+
// +1 vs 2.5-flash — complex iterations per positioning
|
|
1819
|
+
extract: 6,
|
|
1820
|
+
critique: 5
|
|
1821
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
1822
|
+
}
|
|
1823
|
+
},
|
|
1824
|
+
{
|
|
1825
|
+
// ── Gemini 3.1 Flash-Lite ──
|
|
1826
|
+
// Onboarded 2026-05-16 by auto-onboarder; s37 (2026-05-21) verified
|
|
1827
|
+
// against ai.google.dev/gemini-api/docs/pricing.
|
|
1828
|
+
//
|
|
1829
|
+
// L-081 CATCHES (template clone from 2.5-flash-lite was 2.5-3.75× too cheap):
|
|
1830
|
+
// costInputPer1m 0.10 → 0.25 (template clone undervalued by 2.5×)
|
|
1831
|
+
// costOutputPer1m 0.40 → 1.50 (template clone undervalued by 3.75×)
|
|
1832
|
+
//
|
|
1833
|
+
// Real 3.1-flash-lite is NOT a cost-equivalent successor to 2.5-flash-lite —
|
|
1834
|
+
// it sits between 2.5-flash-lite ($0.10/$0.40) and 2.5-flash ($0.30/$2.50).
|
|
1835
|
+
// Cache discount 10× verified ($0.025/M cached vs $0.25/M input).
|
|
1836
|
+
//
|
|
1837
|
+
// Cliffs are HYPOTHESIZED from 2.5-flash family; brain evidence pending.
|
|
1838
|
+
id: "gemini-3.1-flash-lite",
|
|
1839
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1840
|
+
provider: "google",
|
|
1841
|
+
status: "preview",
|
|
1842
|
+
maxContextTokens: 1048576,
|
|
1843
|
+
maxOutputTokens: 65536,
|
|
1844
|
+
maxTools: 128,
|
|
1845
|
+
parallelToolCalls: true,
|
|
1846
|
+
structuredOutput: "native",
|
|
1847
|
+
systemPromptMode: "separate",
|
|
1848
|
+
streaming: true,
|
|
1849
|
+
cliffs: [
|
|
1850
|
+
{
|
|
1851
|
+
metric: "input_tokens",
|
|
1852
|
+
threshold: 8e3,
|
|
1853
|
+
action: "downgrade_quality_warning",
|
|
1854
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
1855
|
+
},
|
|
1856
|
+
{
|
|
1857
|
+
metric: "tool_count",
|
|
1858
|
+
threshold: 10,
|
|
1859
|
+
action: "drop_to_top_relevant",
|
|
1860
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
1861
|
+
},
|
|
1862
|
+
{
|
|
1863
|
+
metric: "thinking_with_short_output",
|
|
1864
|
+
threshold: 1,
|
|
1865
|
+
action: "force_thinking_budget_zero",
|
|
1866
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
1867
|
+
},
|
|
1868
|
+
{
|
|
1869
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
1870
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
1871
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
1872
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
1873
|
+
metric: "tool_count",
|
|
1874
|
+
threshold: 1,
|
|
1875
|
+
whenIntent: "summarize",
|
|
1876
|
+
action: "strip_tools",
|
|
1877
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
1878
|
+
}
|
|
1879
|
+
],
|
|
1880
|
+
costInputPer1m: 0.25,
|
|
1881
|
+
costOutputPer1m: 1.5,
|
|
1882
|
+
lowering: {
|
|
1883
|
+
...GOOGLE_LOWERING_BASE,
|
|
1884
|
+
// Cache discount 10× (vs Flash 4×) — Google docs s37: $0.025/M cached vs
|
|
1885
|
+
// $0.25/M input. Material for repeat-prompt workloads (classify shape).
|
|
1886
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1887
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1888
|
+
},
|
|
1889
|
+
recovery: [
|
|
1890
|
+
{
|
|
1891
|
+
signal: "empty_response_after_tool",
|
|
1892
|
+
action: "retry_with_params",
|
|
1893
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1894
|
+
maxRetries: 1,
|
|
1895
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
1896
|
+
},
|
|
1897
|
+
{
|
|
1898
|
+
signal: "empty_response",
|
|
1899
|
+
action: "retry_with_params",
|
|
1900
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1901
|
+
maxRetries: 1,
|
|
1902
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
1903
|
+
},
|
|
1904
|
+
{
|
|
1905
|
+
signal: "malformed_function_call",
|
|
1906
|
+
action: "escalate",
|
|
1907
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1908
|
+
}
|
|
1909
|
+
],
|
|
1910
|
+
strengths: ["low_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1911
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1912
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Sits between 2.5-flash-lite (cheaper) and 2.5-flash (more expensive) on cost frontier; 2.5\xD7 more expensive than initial template-clone. Cliffs hypothesized from Flash family \u2014 brain evidence pending.",
|
|
1913
|
+
// Tier 2-3 floor for summarize/classify chains at the new (verified) price
|
|
1914
|
+
// point. ZERO brain rows — values are starter hypotheses anchored to
|
|
1915
|
+
// "smaller sibling of Flash at higher cost than 2.5-flash-lite." The first
|
|
1916
|
+
// 50 brain rows per archetype will validate or relax these.
|
|
1917
|
+
archetypePerf: {
|
|
1918
|
+
classify: 6,
|
|
1919
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
1920
|
+
summarize: 6,
|
|
1921
|
+
// starter hypothesis — verify; cliff strips tools
|
|
1922
|
+
transform: 6,
|
|
1923
|
+
// starter hypothesis — verify
|
|
1924
|
+
ask: 5,
|
|
1925
|
+
hunt: 5,
|
|
1926
|
+
generate: 4,
|
|
1927
|
+
extract: 4,
|
|
1928
|
+
plan: 3,
|
|
1929
|
+
critique: 3
|
|
1930
|
+
}
|
|
1931
|
+
},
|
|
1932
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1933
|
+
// Gemini 3.5 Flash — hand-onboarded s37 (2026-05-21)
|
|
1934
|
+
//
|
|
1935
|
+
// Google positioning ("Most intelligent for sustained frontier performance
|
|
1936
|
+
// on agentic and coding tasks" / "particularly effective for rapid agentic
|
|
1937
|
+
// loops involving complex coding cycles and iterations") suggests this is
|
|
1938
|
+
// the Flash-family upgrade specifically aimed at hunt-shape workloads.
|
|
1939
|
+
// Pricing 5× input / 3.6× output vs 2.5-flash — material cost premium.
|
|
1940
|
+
// archetypePerf adjusted +1 vs 2.5-flash on ask/generate/plan/critique
|
|
1941
|
+
// (sustained-frontier positioning); hunt held at 9 inherited from L-040
|
|
1942
|
+
// family parallel-tool tier; brain evidence will validate within 50 rows.
|
|
1943
|
+
//
|
|
1944
|
+
// Cliffs inherited conservatively from 2.5-flash. Google's "sustained
|
|
1945
|
+
// frontier on long-context" positioning suggests the 8K cliff may not
|
|
1946
|
+
// apply to 3.5 — keep as guard until brain evidence shows otherwise.
|
|
1947
|
+
//
|
|
1948
|
+
// Specs verified against:
|
|
1949
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3.5-flash
|
|
1950
|
+
// ai.google.dev/gemini-api/docs/pricing (Standard tier)
|
|
1951
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1952
|
+
{
|
|
1953
|
+
id: "gemini-3.5-flash",
|
|
1954
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1955
|
+
provider: "google",
|
|
1956
|
+
status: "current",
|
|
1957
|
+
maxContextTokens: 1048576,
|
|
1958
|
+
maxOutputTokens: 65536,
|
|
1959
|
+
maxTools: 128,
|
|
1960
|
+
parallelToolCalls: true,
|
|
1961
|
+
structuredOutput: "native",
|
|
1962
|
+
systemPromptMode: "separate",
|
|
1963
|
+
streaming: true,
|
|
1964
|
+
cliffs: [
|
|
1965
|
+
{
|
|
1966
|
+
metric: "input_tokens",
|
|
1967
|
+
threshold: 8e3,
|
|
1968
|
+
action: "downgrade_quality_warning",
|
|
1969
|
+
reason: "Inherited from 2.5-flash guard; Google positions 3.5 as sustained-frontier-on-long-context but brain evidence pending"
|
|
1970
|
+
},
|
|
1971
|
+
{
|
|
1972
|
+
metric: "tool_count",
|
|
1973
|
+
threshold: 20,
|
|
1974
|
+
action: "drop_to_top_relevant",
|
|
1975
|
+
reason: "Inherited from Flash family: tool reliability drops above ~20 (despite 128 hard limit). Validate per (archetype, model) after n\u226520."
|
|
1976
|
+
},
|
|
1977
|
+
{
|
|
1978
|
+
metric: "thinking_with_short_output",
|
|
1979
|
+
threshold: 1,
|
|
1980
|
+
action: "force_thinking_budget_zero",
|
|
1981
|
+
reason: "Thinking mode supported per Google docs; same drain risk as 2.5-flash \u2014 thinking tokens consume maxOutputTokens"
|
|
1982
|
+
},
|
|
1983
|
+
{
|
|
1984
|
+
// Inherited from 2.5-flash s11 trust artifact (5/5 empty rate on
|
|
1985
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered).
|
|
1986
|
+
// Family-likely failure mode for Flash architecture across versions.
|
|
1987
|
+
// Keep preemptively until brain evidence on 3.5-flash specifically.
|
|
1988
|
+
metric: "tool_count",
|
|
1989
|
+
threshold: 1,
|
|
1990
|
+
whenIntent: "summarize",
|
|
1991
|
+
action: "strip_tools",
|
|
1992
|
+
reason: "Inherited from 2.5-flash s11 cliff (kgauto commit 3872832): summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3.5-flash specifically."
|
|
1993
|
+
}
|
|
1994
|
+
],
|
|
1995
|
+
costInputPer1m: 1.5,
|
|
1996
|
+
costOutputPer1m: 9,
|
|
1997
|
+
lowering: {
|
|
1998
|
+
...GOOGLE_LOWERING_BASE,
|
|
1999
|
+
// 10× cache discount per Google pricing: $0.15/M cached vs $1.50/M input.
|
|
2000
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
2001
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
2002
|
+
},
|
|
2003
|
+
recovery: [
|
|
2004
|
+
{
|
|
2005
|
+
signal: "empty_response_after_tool",
|
|
2006
|
+
action: "retry_with_params",
|
|
2007
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
2008
|
+
maxRetries: 1,
|
|
2009
|
+
reason: "Inherited Flash-family pattern: empty after tool result \u2014 retry with thinking off"
|
|
2010
|
+
},
|
|
2011
|
+
{
|
|
2012
|
+
signal: "empty_response",
|
|
2013
|
+
action: "retry_with_params",
|
|
2014
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
2015
|
+
maxRetries: 1,
|
|
2016
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
2017
|
+
},
|
|
2018
|
+
{
|
|
2019
|
+
signal: "malformed_function_call",
|
|
2020
|
+
action: "escalate",
|
|
2021
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
2022
|
+
}
|
|
2023
|
+
],
|
|
2024
|
+
strengths: ["agentic_loops", "coding", "1m_context", "parallel_tools", "thinking_mode", "sustained_frontier"],
|
|
2025
|
+
weaknesses: ["cost_vs_2_5_flash", "no_brain_evidence_yet"],
|
|
2026
|
+
notes: "Hand-onboarded s37 (2026-05-21) verified against Google docs. Stable status; positioned as Flash-family upgrade for agentic loops and coding. 5\xD7/3.6\xD7 more expensive than 2.5-flash but Google claims step-change on sustained frontier work. archetypePerf adjustments are judgment-grounded starter hypotheses \u2014 brain evidence will validate within ~50 rows per archetype.",
|
|
2027
|
+
// Starter hypothesis: anchored to 2.5-flash archetypePerf with +1
|
|
2028
|
+
// adjustments where Google's positioning explicitly supports
|
|
2029
|
+
// (agentic/coding/sustained). Hunt held at 9 inherited from L-040 family
|
|
2030
|
+
// parallel-tool tier. Brain evidence will replace.
|
|
2031
|
+
archetypePerf: {
|
|
2032
|
+
hunt: 9,
|
|
2033
|
+
// Inherited from 2.5-flash L-040 parallel-tool tier; Google positions 3.5 as agentic-loop champion
|
|
2034
|
+
classify: 7,
|
|
2035
|
+
// Inherited from 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
2036
|
+
summarize: 7,
|
|
2037
|
+
// Inherited from 2.5-flash; cliff strips tools when present
|
|
2038
|
+
transform: 7,
|
|
2039
|
+
ask: 8,
|
|
2040
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
2041
|
+
generate: 8,
|
|
2042
|
+
// +1 vs 2.5-flash (6→8) — Google: "complex coding cycles and iterations"
|
|
2043
|
+
plan: 7,
|
|
2044
|
+
// +1 vs 2.5-flash (5→7) — "complex iterations" positioning
|
|
2045
|
+
extract: 7,
|
|
2046
|
+
// +1 vs 2.5-flash — sustained-frontier on structured tasks
|
|
2047
|
+
critique: 5
|
|
2048
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
2049
|
+
}
|
|
2050
|
+
}
|
|
2051
|
+
];
|
|
2052
|
+
var ALIASES = {
|
|
2053
|
+
// DeepSeek's own model routing — both names served by V4-Flash.
|
|
2054
|
+
"deepseek-chat": "deepseek-v4-flash",
|
|
2055
|
+
"deepseek-reasoner": "deepseek-v4-flash",
|
|
2056
|
+
// Legacy kgauto typo — actual API alias is dash-form (alpha.1 had dot).
|
|
2057
|
+
"claude-haiku-4.5": "claude-haiku-4-5"
|
|
2058
|
+
};
|
|
2059
|
+
var brainHook = {};
|
|
2060
|
+
function _setProfileBrainHook(hook) {
|
|
2061
|
+
brainHook = hook;
|
|
2062
|
+
}
|
|
2063
|
+
function canonicalId(id) {
|
|
2064
|
+
return brainHook.resolveAlias?.(id) ?? ALIASES[id] ?? id;
|
|
2065
|
+
}
|
|
2066
|
+
var PROFILE_INDEX = new Map(
|
|
2067
|
+
PROFILES_RAW.map((p) => [p.id, p])
|
|
2068
|
+
);
|
|
2069
|
+
function getProfile(id) {
|
|
2070
|
+
const canonical = canonicalId(id);
|
|
2071
|
+
const fromBrain = brainHook.getProfile?.(canonical);
|
|
2072
|
+
if (fromBrain) return fromBrain;
|
|
2073
|
+
const p = PROFILE_INDEX.get(canonical);
|
|
2074
|
+
if (!p) {
|
|
2075
|
+
const known = [...PROFILE_INDEX.keys(), ...Object.keys(ALIASES)].join(", ");
|
|
2076
|
+
throw new Error(`Unknown model id: "${id}". Known: ${known}`);
|
|
2077
|
+
}
|
|
2078
|
+
return p;
|
|
2079
|
+
}
|
|
2080
|
+
function tryGetProfile(id) {
|
|
2081
|
+
const canonical = canonicalId(id);
|
|
2082
|
+
return brainHook.getProfile?.(canonical) ?? PROFILE_INDEX.get(canonical);
|
|
2083
|
+
}
|
|
2084
|
+
function allProfiles() {
|
|
2085
|
+
return PROFILES_RAW;
|
|
2086
|
+
}
|
|
2087
|
+
function allProfilesRaw() {
|
|
2088
|
+
return PROFILES_RAW;
|
|
2089
|
+
}
|
|
2090
|
+
function profilesByProvider(provider) {
|
|
2091
|
+
return PROFILES_RAW.filter((p) => p.provider === provider);
|
|
2092
|
+
}
|
|
2093
|
+
|
|
2094
|
+
// src/brain-query.ts
|
|
2095
|
+
var FRESH_SNAPSHOT = {
|
|
2096
|
+
data: null,
|
|
2097
|
+
expiresAt: 0,
|
|
2098
|
+
refreshing: false,
|
|
2099
|
+
warned: false
|
|
2100
|
+
};
|
|
2101
|
+
var snapshot = { ...FRESH_SNAPSHOT };
|
|
2102
|
+
var runtime;
|
|
2103
|
+
function configureBrainQuery(rt) {
|
|
2104
|
+
runtime = rt;
|
|
2105
|
+
snapshot = { ...FRESH_SNAPSHOT };
|
|
2106
|
+
}
|
|
2107
|
+
function createBrainQueryCache(opts) {
|
|
2108
|
+
return () => {
|
|
2109
|
+
const rt = runtime;
|
|
2110
|
+
if (!rt || !rt.enabledTables.has(opts.table)) {
|
|
2111
|
+
return opts.bundledFallback();
|
|
2112
|
+
}
|
|
2113
|
+
const now = Date.now();
|
|
2114
|
+
const stale = snapshot.expiresAt <= now;
|
|
2115
|
+
if (stale && !snapshot.refreshing) {
|
|
2116
|
+
snapshot.refreshing = true;
|
|
2117
|
+
void asyncRefresh(rt);
|
|
2118
|
+
}
|
|
2119
|
+
if (snapshot.data) {
|
|
2120
|
+
const rows = snapshot.data[opts.table];
|
|
2121
|
+
if (Array.isArray(rows) && rows.length > 0) {
|
|
2122
|
+
try {
|
|
2123
|
+
return opts.mapRows(rows);
|
|
2124
|
+
} catch {
|
|
2125
|
+
return opts.bundledFallback();
|
|
2126
|
+
}
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
return opts.bundledFallback();
|
|
2130
|
+
};
|
|
2131
|
+
}
|
|
2132
|
+
var pendingRefresh;
|
|
2133
|
+
async function asyncRefresh(rt) {
|
|
2134
|
+
const promise = doRefresh(rt);
|
|
2135
|
+
pendingRefresh = promise;
|
|
2136
|
+
try {
|
|
2137
|
+
await promise;
|
|
2138
|
+
} finally {
|
|
2139
|
+
if (pendingRefresh === promise) pendingRefresh = void 0;
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
var DEFAULT_CONFIG_URL = "https://kgauto-dashboard.vercel.app/api/kgauto-v2/config";
|
|
2143
|
+
async function doRefresh(rt) {
|
|
2144
|
+
const url = rt.configEndpoint ?? DEFAULT_CONFIG_URL;
|
|
2145
|
+
try {
|
|
2146
|
+
const res = await rt.fetchImpl(url, { method: "GET" });
|
|
2147
|
+
if (!res.ok) {
|
|
2148
|
+
throw new Error(`brain-query ${res.status}: ${res.statusText}`);
|
|
2149
|
+
}
|
|
2150
|
+
const body = await res.json();
|
|
2151
|
+
if (runtime !== rt) return;
|
|
2152
|
+
snapshot = {
|
|
2153
|
+
data: body,
|
|
2154
|
+
expiresAt: Date.now() + rt.ttlMs,
|
|
2155
|
+
refreshing: false,
|
|
2156
|
+
warned: snapshot.warned
|
|
2157
|
+
};
|
|
2158
|
+
} catch (err) {
|
|
2159
|
+
if (runtime !== rt) return;
|
|
2160
|
+
snapshot.refreshing = false;
|
|
2161
|
+
snapshot.expiresAt = Date.now() + rt.ttlMs;
|
|
2162
|
+
if (!snapshot.warned) {
|
|
2163
|
+
snapshot.warned = true;
|
|
2164
|
+
(rt.onError ?? defaultOnError)(err);
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
}
|
|
2168
|
+
function defaultOnError(err) {
|
|
2169
|
+
console.warn("[kgauto] brain-query failed (using bundled fallback):", err);
|
|
2170
|
+
}
|
|
2171
|
+
function isBrainQueryActiveFor(table) {
|
|
2172
|
+
return runtime !== void 0 && runtime.enabledTables.has(table);
|
|
2173
|
+
}
|
|
2174
|
+
async function getPerAxisMetrics(opts) {
|
|
2175
|
+
const fetchFn = opts.fetch ?? fetch;
|
|
2176
|
+
const endpoint = opts.endpoint ?? runtime?.endpoint;
|
|
2177
|
+
if (!endpoint) return null;
|
|
2178
|
+
const windowDays = opts.windowDays ?? 30;
|
|
2179
|
+
const body = {
|
|
2180
|
+
p_app_id: opts.appId,
|
|
2181
|
+
p_archetype: opts.archetype,
|
|
2182
|
+
p_model: opts.model,
|
|
2183
|
+
p_window_days: windowDays,
|
|
2184
|
+
p_quality_floor: opts.qualityFloor ?? null
|
|
2185
|
+
};
|
|
2186
|
+
const headers = {
|
|
2187
|
+
Accept: "application/json",
|
|
2188
|
+
"Content-Type": "application/json",
|
|
2189
|
+
...opts.apiKey ? { Authorization: `Bearer ${opts.apiKey}` } : {}
|
|
2190
|
+
};
|
|
2191
|
+
try {
|
|
2192
|
+
const res = await fetchFn(`${endpoint}/rpc/get_per_axis_metrics`, {
|
|
2193
|
+
method: "POST",
|
|
2194
|
+
headers,
|
|
2195
|
+
body: JSON.stringify(body)
|
|
2196
|
+
});
|
|
2197
|
+
if (!res.ok) return null;
|
|
2198
|
+
const raw = await res.json();
|
|
2199
|
+
return mapPerAxisMetrics(raw, opts.appId, opts.archetype, opts.model, windowDays);
|
|
2200
|
+
} catch {
|
|
2201
|
+
return null;
|
|
2202
|
+
}
|
|
2203
|
+
}
|
|
2204
|
+
function mapPerAxisMetrics(raw, fallbackAppId, fallbackArchetype, fallbackModel, fallbackWindowDays) {
|
|
2205
|
+
if (raw === null || raw === void 0) return null;
|
|
2206
|
+
if (typeof raw !== "object") return null;
|
|
2207
|
+
const r = raw;
|
|
2208
|
+
if (Array.isArray(raw)) {
|
|
2209
|
+
if (raw.length === 0) return null;
|
|
2210
|
+
return mapPerAxisMetrics(raw[0], fallbackAppId, fallbackArchetype, fallbackModel, fallbackWindowDays);
|
|
2211
|
+
}
|
|
2212
|
+
const num = (v) => {
|
|
2213
|
+
if (v === null || v === void 0) return null;
|
|
2214
|
+
if (typeof v === "number") return Number.isFinite(v) ? v : null;
|
|
2215
|
+
if (typeof v === "string") {
|
|
2216
|
+
const n = Number(v);
|
|
2217
|
+
return Number.isFinite(n) ? n : null;
|
|
2218
|
+
}
|
|
2219
|
+
return null;
|
|
2220
|
+
};
|
|
2221
|
+
const int = (v) => {
|
|
2222
|
+
const n = num(v);
|
|
2223
|
+
return n === null ? 0 : Math.trunc(n);
|
|
2224
|
+
};
|
|
2225
|
+
const bool = (v) => {
|
|
2226
|
+
if (v === null || v === void 0) return null;
|
|
2227
|
+
if (typeof v === "boolean") return v;
|
|
2228
|
+
return null;
|
|
2229
|
+
};
|
|
2230
|
+
const str = (v, fallback) => typeof v === "string" ? v : fallback;
|
|
2231
|
+
const cost = r.cost_efficiency ?? {};
|
|
2232
|
+
const time = r.time_efficiency ?? {};
|
|
2233
|
+
const rel = r.reliability ?? {};
|
|
2234
|
+
return {
|
|
2235
|
+
appId: str(r.app_id, fallbackAppId),
|
|
2236
|
+
archetype: str(r.archetype, fallbackArchetype),
|
|
2237
|
+
model: str(r.model, fallbackModel),
|
|
2238
|
+
windowDays: num(r.window_days) ?? fallbackWindowDays,
|
|
2239
|
+
nRows: int(r.n_rows),
|
|
2240
|
+
nRowsClean: int(r.n_rows_clean),
|
|
2241
|
+
nQualityOutcomes: int(r.n_quality_outcomes),
|
|
2242
|
+
magicRate: num(r.magic_rate),
|
|
2243
|
+
qualityFloorMet: bool(r.quality_floor_met),
|
|
2244
|
+
costEfficiency: {
|
|
2245
|
+
avgCostUsd: num(cost.avg_cost_usd),
|
|
2246
|
+
avgCostUsdClean: num(cost.avg_cost_usd_clean),
|
|
2247
|
+
avgInputTokens: num(cost.avg_input_tokens),
|
|
2248
|
+
avgOutputTokens: num(cost.avg_output_tokens),
|
|
2249
|
+
inputTokenRatio: num(cost.input_token_ratio)
|
|
2250
|
+
},
|
|
2251
|
+
timeEfficiency: {
|
|
2252
|
+
avgLatencyMs: num(time.avg_latency_ms),
|
|
2253
|
+
avgTtftMs: num(time.avg_ttft_ms)
|
|
2254
|
+
},
|
|
2255
|
+
reliability: {
|
|
2256
|
+
successRate: num(rel.success_rate),
|
|
2257
|
+
successRateClean: num(rel.success_rate_clean),
|
|
2258
|
+
emptyRate: num(rel.empty_rate),
|
|
2259
|
+
emptyRateClean: num(rel.empty_rate_clean)
|
|
2260
|
+
},
|
|
2261
|
+
evidenceFreshnessDays: num(r.evidence_freshness_days)
|
|
2262
|
+
};
|
|
2263
|
+
}
|
|
2264
|
+
|
|
2265
|
+
// src/archetype-perf-brain.ts
|
|
2266
|
+
function isPerfRow(x) {
|
|
2267
|
+
if (!x || typeof x !== "object") return false;
|
|
2268
|
+
const r = x;
|
|
2269
|
+
return typeof r.model_id === "string" && typeof r.archetype === "string" && typeof r.perf_score === "number";
|
|
2270
|
+
}
|
|
2271
|
+
function mapRowsToPerfMap(rows) {
|
|
2272
|
+
const out = /* @__PURE__ */ new Map();
|
|
2273
|
+
for (const row of rows) {
|
|
2274
|
+
if (!isPerfRow(row)) continue;
|
|
2275
|
+
const existing = out.get(row.model_id) ?? {};
|
|
2276
|
+
existing[row.archetype] = row.perf_score;
|
|
2277
|
+
out.set(row.model_id, existing);
|
|
2278
|
+
}
|
|
2279
|
+
return out;
|
|
2280
|
+
}
|
|
2281
|
+
function mapRowsToNMap(rows) {
|
|
2282
|
+
const out = /* @__PURE__ */ new Map();
|
|
2283
|
+
for (const row of rows) {
|
|
2284
|
+
if (!isPerfRow(row)) continue;
|
|
2285
|
+
if (typeof row.n !== "number") continue;
|
|
2286
|
+
const existing = out.get(row.model_id) ?? {};
|
|
2287
|
+
existing[row.archetype] = row.n;
|
|
2288
|
+
out.set(row.model_id, existing);
|
|
2289
|
+
}
|
|
2290
|
+
return out;
|
|
2291
|
+
}
|
|
2292
|
+
function bundledArchetypePerf() {
|
|
2293
|
+
const out = /* @__PURE__ */ new Map();
|
|
2294
|
+
for (const profile of allProfiles()) {
|
|
2295
|
+
if (profile.archetypePerf) out.set(profile.id, profile.archetypePerf);
|
|
2296
|
+
}
|
|
2297
|
+
return out;
|
|
2298
|
+
}
|
|
2299
|
+
function bundledArchetypePerfN() {
|
|
2300
|
+
return /* @__PURE__ */ new Map();
|
|
2301
|
+
}
|
|
2302
|
+
var loadArchetypePerfFromBrain = createBrainQueryCache({
|
|
2303
|
+
table: "kgauto_archetype_perf",
|
|
2304
|
+
mapRows: mapRowsToPerfMap,
|
|
2305
|
+
bundledFallback: bundledArchetypePerf
|
|
2306
|
+
});
|
|
2307
|
+
var loadArchetypePerfNFromBrain = createBrainQueryCache(
|
|
2308
|
+
{
|
|
2309
|
+
table: "kgauto_archetype_perf",
|
|
2310
|
+
mapRows: mapRowsToNMap,
|
|
2311
|
+
bundledFallback: bundledArchetypePerfN
|
|
2312
|
+
}
|
|
2313
|
+
);
|
|
2314
|
+
var MEASURED_GROUNDING_MIN_N = 10;
|
|
2315
|
+
function getArchetypePerfScore(modelId, archetype) {
|
|
2316
|
+
const score = loadArchetypePerfFromBrain().get(modelId)?.[archetype] ?? 5;
|
|
2317
|
+
const n = loadArchetypePerfNFromBrain().get(modelId)?.[archetype] ?? 0;
|
|
2318
|
+
const grounding = n >= MEASURED_GROUNDING_MIN_N ? "measured" : "judgment";
|
|
2319
|
+
return { score, n, grounding };
|
|
2320
|
+
}
|
|
2321
|
+
|
|
2322
|
+
// src/compatibility.ts
|
|
2323
|
+
var ARCHETYPE_FLOOR_DEFAULT = 6;
|
|
2324
|
+
var ABSOLUTE_FLOOR = 4;
|
|
2325
|
+
function rawArchetypePerf(profile, archetype) {
|
|
2326
|
+
return profile.archetypePerf?.[archetype] ?? 5;
|
|
2327
|
+
}
|
|
2328
|
+
function hasSequentialToolCliffForHunt(profile) {
|
|
2329
|
+
if (profile.parallelToolCalls !== false) return false;
|
|
2330
|
+
const huntScore = profile.archetypePerf?.hunt ?? 5;
|
|
2331
|
+
return huntScore < ARCHETYPE_FLOOR_DEFAULT;
|
|
2332
|
+
}
|
|
2333
|
+
function adapterForCliff(profile, archetype) {
|
|
2334
|
+
if (archetype === "hunt" && hasSequentialToolCliffForHunt(profile)) {
|
|
2335
|
+
const otherScores = [];
|
|
2336
|
+
if (profile.archetypePerf) {
|
|
2337
|
+
for (const [k, v] of Object.entries(profile.archetypePerf)) {
|
|
2338
|
+
if (k === "hunt") continue;
|
|
2339
|
+
if (typeof v === "number") otherScores.push(v);
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
2342
|
+
const sorted = [...otherScores].sort((a, b) => a - b);
|
|
2343
|
+
const median = sorted.length === 0 ? ARCHETYPE_FLOOR_DEFAULT + 1 : sorted[Math.floor(sorted.length / 2)] ?? ARCHETYPE_FLOOR_DEFAULT + 1;
|
|
2344
|
+
const estimated = Math.max(ARCHETYPE_FLOOR_DEFAULT + 1, median);
|
|
2345
|
+
return {
|
|
2346
|
+
adapter: {
|
|
2347
|
+
parameter: "toolOrchestration",
|
|
2348
|
+
value: "sequential",
|
|
2349
|
+
consequence: "Tool calls run one at a time instead of in parallel \u2014 slower per step but reliable for this model."
|
|
2350
|
+
},
|
|
2351
|
+
estimatedScoreWithAdapter: estimated
|
|
2352
|
+
};
|
|
2353
|
+
}
|
|
2354
|
+
return void 0;
|
|
2355
|
+
}
|
|
2356
|
+
function archetypeDescriptor(archetype) {
|
|
2357
|
+
return archetype;
|
|
2358
|
+
}
|
|
2359
|
+
function getModelCompatibility(modelId, intent) {
|
|
2360
|
+
const profile = tryGetProfile(modelId);
|
|
2361
|
+
if (!profile) {
|
|
2362
|
+
return {
|
|
2363
|
+
status: "reject",
|
|
2364
|
+
reason: `Model "${modelId}" is not registered with kgauto \u2014 no compatibility data available.`,
|
|
2365
|
+
archetypePerf: 0
|
|
2366
|
+
};
|
|
2367
|
+
}
|
|
2368
|
+
const { archetype, toolOrchestration } = intent;
|
|
2369
|
+
const rawScore = rawArchetypePerf(profile, archetype);
|
|
2370
|
+
const descriptor = archetypeDescriptor(archetype);
|
|
2371
|
+
const adapterMatch = adapterForCliff(profile, archetype);
|
|
2372
|
+
if (toolOrchestration === "sequential" && adapterMatch && adapterMatch.adapter.parameter === "toolOrchestration" && adapterMatch.adapter.value === "sequential") {
|
|
2373
|
+
return {
|
|
2374
|
+
status: "compatible",
|
|
2375
|
+
reason: `Suited for ${descriptor} with sequential tool calls.`,
|
|
2376
|
+
archetypePerf: rawScore
|
|
2377
|
+
};
|
|
2378
|
+
}
|
|
2379
|
+
if (rawScore >= ARCHETYPE_FLOOR_DEFAULT) {
|
|
2380
|
+
return {
|
|
2381
|
+
status: "compatible",
|
|
2382
|
+
reason: `Suited for ${descriptor}.`,
|
|
2383
|
+
archetypePerf: rawScore
|
|
2384
|
+
};
|
|
2385
|
+
}
|
|
2386
|
+
if (adapterMatch) {
|
|
2387
|
+
return {
|
|
2388
|
+
status: "requires-adapter",
|
|
2389
|
+
reason: `Best with ${adapterMatch.adapter.value} ${adapterMatch.adapter.parameter === "toolOrchestration" ? "tool calls" : adapterMatch.adapter.parameter} for ${descriptor} \u2014 slower but works.`,
|
|
2390
|
+
archetypePerf: rawScore,
|
|
2391
|
+
archetypePerfWithAdapter: adapterMatch.estimatedScoreWithAdapter,
|
|
2392
|
+
adapter: adapterMatch.adapter
|
|
2393
|
+
};
|
|
2394
|
+
}
|
|
2395
|
+
return {
|
|
2396
|
+
status: "reject",
|
|
2397
|
+
reason: `Not suited for ${descriptor} \u2014 would underperform significantly.`,
|
|
2398
|
+
archetypePerf: rawScore
|
|
2399
|
+
};
|
|
2400
|
+
}
|
|
2401
|
+
|
|
2402
|
+
// src/advisor.ts
|
|
2403
|
+
var QUALITY_FLOOR_FOR_RECOMMENDATION = 6;
|
|
2404
|
+
var TIER_DOWN_COST_RATIO = 0.5;
|
|
2405
|
+
var COST_MISMATCHED_CHOSEN_SCORE_CEILING = 7;
|
|
2406
|
+
function runAdvisor(ir, result, profile, policy, phase2) {
|
|
2407
|
+
const out = [];
|
|
2408
|
+
out.push(...detectCachingOff(ir, profile));
|
|
2409
|
+
out.push(...detectSingleChunkSystem(ir, profile));
|
|
2410
|
+
out.push(...detectToolBloat(ir, result));
|
|
2411
|
+
out.push(...detectHistoryUncached(ir, profile));
|
|
2412
|
+
out.push(...detectSingleModelArray(ir, policy));
|
|
2413
|
+
if (policy?.posture !== "locked") {
|
|
2414
|
+
out.push(...detectCostMismatchedArchetype(ir, profile, phase2));
|
|
2415
|
+
out.push(...detectModelStaleEvidence(ir, profile));
|
|
2416
|
+
out.push(...detectTierDown(ir, profile, phase2));
|
|
2417
|
+
}
|
|
2418
|
+
if (!translatorClearedToolCallCliff(phase2)) {
|
|
2419
|
+
out.push(...detectArchetypePerfFloorBreach(ir, profile));
|
|
2420
|
+
}
|
|
2421
|
+
return out;
|
|
2422
|
+
}
|
|
2423
|
+
function translatorClearedToolCallCliff(phase2) {
|
|
2424
|
+
const rewrites = phase2?.sectionRewritesApplied;
|
|
2425
|
+
if (!rewrites || rewrites.length === 0) return false;
|
|
2426
|
+
for (const rw of rewrites) {
|
|
2427
|
+
if (rw.kind === "tool_call_contract") return true;
|
|
2428
|
+
}
|
|
2429
|
+
return false;
|
|
2430
|
+
}
|
|
2431
|
+
function detectCachingOff(ir, profile) {
|
|
2432
|
+
if (profile.provider !== "anthropic") return [];
|
|
2433
|
+
const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
|
|
2434
|
+
if (totalChars < 2e3) return [];
|
|
2435
|
+
const anyCacheable = ir.sections.some((s) => s.cacheable === true);
|
|
2436
|
+
if (anyCacheable) return [];
|
|
2437
|
+
return [
|
|
2438
|
+
{
|
|
2439
|
+
level: "warn",
|
|
2440
|
+
code: "caching-off-on-claude",
|
|
2441
|
+
message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
|
|
2442
|
+
suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
|
|
2443
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2444
|
+
}
|
|
2445
|
+
];
|
|
2446
|
+
}
|
|
2447
|
+
function detectSingleChunkSystem(ir, profile) {
|
|
2448
|
+
if (profile.provider !== "anthropic") return [];
|
|
2449
|
+
if (ir.sections.length !== 1) return [];
|
|
2450
|
+
const only = ir.sections[0];
|
|
2451
|
+
if (!only || only.text.length <= 1e3) return [];
|
|
2452
|
+
return [
|
|
2453
|
+
{
|
|
2454
|
+
level: "info",
|
|
2455
|
+
code: "single-chunk-system",
|
|
2456
|
+
message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
|
|
2457
|
+
suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
|
|
2458
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2459
|
+
}
|
|
2460
|
+
];
|
|
2461
|
+
}
|
|
2462
|
+
function detectToolBloat(ir, result) {
|
|
2463
|
+
const SHORT_OUTPUT = /* @__PURE__ */ new Set([
|
|
2464
|
+
"classify",
|
|
2465
|
+
"extract",
|
|
2466
|
+
"summarize",
|
|
2467
|
+
"transform",
|
|
2468
|
+
"critique"
|
|
2469
|
+
]);
|
|
2470
|
+
if (!ir.tools || ir.tools.length === 0) return [];
|
|
2471
|
+
const toolsKept = result.diagnostics.toolsKept;
|
|
2472
|
+
if (toolsKept <= 10) return [];
|
|
2473
|
+
if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
|
|
2474
|
+
return [
|
|
2475
|
+
{
|
|
2476
|
+
level: "warn",
|
|
2477
|
+
code: "tool-bloat",
|
|
2478
|
+
message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
|
|
2479
|
+
suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
|
|
2480
|
+
docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
|
|
2481
|
+
}
|
|
2482
|
+
];
|
|
2483
|
+
}
|
|
2484
|
+
function detectHistoryUncached(ir, profile) {
|
|
2485
|
+
if (profile.provider !== "anthropic") return [];
|
|
2486
|
+
if (!ir.history || ir.history.length < 2) return [];
|
|
2487
|
+
if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
|
|
2488
|
+
return [];
|
|
2489
|
+
}
|
|
2490
|
+
return [
|
|
2491
|
+
{
|
|
2492
|
+
level: "warn",
|
|
2493
|
+
code: "history-uncached-on-claude",
|
|
2494
|
+
message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
|
|
2495
|
+
suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
|
|
2496
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2497
|
+
}
|
|
2498
|
+
];
|
|
2499
|
+
}
|
|
2500
|
+
function detectSingleModelArray(ir, policy) {
|
|
2501
|
+
if (ir.models.length !== 1) return [];
|
|
2502
|
+
if (policy?.posture === "locked") return [];
|
|
2503
|
+
const only = ir.models[0];
|
|
2504
|
+
return [
|
|
2505
|
+
{
|
|
2506
|
+
level: "warn",
|
|
2507
|
+
code: "single-model-array",
|
|
2508
|
+
message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
|
|
2509
|
+
suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
|
|
2510
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
|
|
2511
|
+
}
|
|
2512
|
+
];
|
|
2513
|
+
}
|
|
2514
|
+
function detectCostMismatchedArchetype(ir, profile, phase2) {
|
|
2515
|
+
if (!phase2 || phase2.fallbackChain.length === 0) return [];
|
|
2516
|
+
if (!phase2.profileResolver) return [];
|
|
2517
|
+
const archetype = ir.intent.archetype;
|
|
2518
|
+
const chosenScore = getArchetypePerfScore(profile.id, archetype);
|
|
2519
|
+
const chosenHasRoomToGrow = chosenScore.grounding === "judgment" || chosenScore.score < COST_MISMATCHED_CHOSEN_SCORE_CEILING;
|
|
2520
|
+
if (!chosenHasRoomToGrow) return [];
|
|
2521
|
+
let bestAlt = null;
|
|
2522
|
+
for (const altId of phase2.fallbackChain) {
|
|
2523
|
+
const altProfile = phase2.profileResolver(altId);
|
|
2524
|
+
if (!altProfile) continue;
|
|
2525
|
+
if (altProfile.id === profile.id) continue;
|
|
2526
|
+
const altScore = getArchetypePerfScore(altProfile.id, archetype);
|
|
2527
|
+
if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
|
|
2528
|
+
if (altScore.score < chosenScore.score) continue;
|
|
2529
|
+
if (altProfile.costInputPer1m >= profile.costInputPer1m) continue;
|
|
2530
|
+
if (!bestAlt || altScore.score > bestAlt.score.score || altScore.score === bestAlt.score.score && altProfile.costInputPer1m < bestAlt.profile.costInputPer1m) {
|
|
2531
|
+
bestAlt = { id: altId, profile: altProfile, score: altScore };
|
|
2532
|
+
}
|
|
2533
|
+
}
|
|
2534
|
+
if (!bestAlt) return [];
|
|
2535
|
+
const tierDownWouldFire = bestAlt.score.grounding === "measured" && bestAlt.profile.costInputPer1m <= profile.costInputPer1m * TIER_DOWN_COST_RATIO;
|
|
2536
|
+
if (tierDownWouldFire) return [];
|
|
2537
|
+
const chosenGrounding = chosenScore.grounding === "judgment" ? `archetypePerf.${archetype}=judgment` : `archetypePerf.${archetype}=${chosenScore.score}`;
|
|
2538
|
+
const altGrounding = bestAlt.score.grounding === "measured" ? `archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}` : `archetypePerf.${archetype}=${bestAlt.score.score}, judgment`;
|
|
2539
|
+
return [
|
|
2540
|
+
{
|
|
2541
|
+
level: "warn",
|
|
2542
|
+
code: "cost-mismatched-archetype",
|
|
2543
|
+
message: `Cost-mismatched-archetype: target=${profile.id} (${chosenGrounding}) selected for ${archetype}. Alternative ${bestAlt.id} (${altGrounding}) is cheaper ($${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} per 1M) at equal-or-better quality.`,
|
|
2544
|
+
suggestion: `Consider declaring \`${bestAlt.id}\` as the primary model for this archetype, or relax to posture='open' to let kgauto select among the chain. If the chosen model is required for compliance/brand reasons, set \`policy.posture = 'locked'\` to silence this rule.`,
|
|
2545
|
+
recommendationType: profile.provider === bestAlt.profile.provider ? "tier-down" : "model-swap",
|
|
2546
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2547
|
+
}
|
|
2548
|
+
];
|
|
2549
|
+
}
|
|
2550
|
+
function detectModelStaleEvidence(ir, profile) {
|
|
2551
|
+
if (!isBrainQueryActiveFor("kgauto_archetype_perf")) return [];
|
|
2552
|
+
const archetype = ir.intent.archetype;
|
|
2553
|
+
const chosen = getArchetypePerfScore(profile.id, archetype);
|
|
2554
|
+
if (chosen.grounding !== "judgment") return [];
|
|
2555
|
+
return [
|
|
2556
|
+
{
|
|
2557
|
+
level: "info",
|
|
2558
|
+
code: "model-stale-evidence",
|
|
2559
|
+
message: `Model-stale-evidence: target=${profile.id} archetype=${archetype} is judgment-grounded (n=${chosen.n}) despite brain-query mode being active. Measurement substrate is wired but the brain hasn't accumulated >=10 outcomes for this (model, archetype) tuple yet \u2014 routing decisions remain pre-measured for this slot.`,
|
|
2560
|
+
suggestion: "Verify that `record()` is being called on every call() outcome with the appropriate `actualModel` and `mutationsApplied` fields. Once the brain accumulates n>=10 rows on this tuple, the score promotes from judgment to measured automatically (5-min SWR cache). No code change required from your side \u2014 this is the substrate signaling the gap.",
|
|
2561
|
+
recommendationType: "prompt-fix",
|
|
2562
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2563
|
+
}
|
|
2564
|
+
];
|
|
2565
|
+
}
|
|
2566
|
+
function detectTierDown(ir, profile, phase2) {
|
|
2567
|
+
if (!phase2 || phase2.fallbackChain.length === 0) return [];
|
|
2568
|
+
if (!phase2.profileResolver) return [];
|
|
2569
|
+
const archetype = ir.intent.archetype;
|
|
2570
|
+
const chosenScore = getArchetypePerfScore(profile.id, archetype);
|
|
2571
|
+
const chosenCost = profile.costInputPer1m;
|
|
2572
|
+
let bestAlt = null;
|
|
2573
|
+
for (const altId of phase2.fallbackChain) {
|
|
2574
|
+
const altProfile = phase2.profileResolver(altId);
|
|
2575
|
+
if (!altProfile) continue;
|
|
2576
|
+
if (altProfile.id === profile.id) continue;
|
|
2577
|
+
const altScore = getArchetypePerfScore(altProfile.id, archetype);
|
|
2578
|
+
if (altScore.grounding !== "measured") continue;
|
|
2579
|
+
if (altScore.score < QUALITY_FLOOR_FOR_RECOMMENDATION) continue;
|
|
2580
|
+
if (altScore.score < chosenScore.score) continue;
|
|
2581
|
+
if (altProfile.costInputPer1m > chosenCost * TIER_DOWN_COST_RATIO) continue;
|
|
2582
|
+
if (!bestAlt || altProfile.costInputPer1m < bestAlt.profile.costInputPer1m || altProfile.costInputPer1m === bestAlt.profile.costInputPer1m && altScore.score > bestAlt.score.score) {
|
|
2583
|
+
bestAlt = { id: altId, profile: altProfile, score: altScore };
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
if (!bestAlt) return [];
|
|
2587
|
+
const chosenDesc = chosenScore.grounding === "measured" ? `archetypePerf.${archetype}=${chosenScore.score} (measured, n=${chosenScore.n})` : `archetypePerf.${archetype}=${chosenScore.score} (${chosenScore.grounding})`;
|
|
2588
|
+
return [
|
|
2589
|
+
{
|
|
2590
|
+
level: "warn",
|
|
2591
|
+
code: "tier-down",
|
|
2592
|
+
message: `Tier-down: target=${profile.id} (${chosenDesc}) selected for ${archetype}. Brain shows ${bestAlt.id} delivers equal-or-better quality (archetypePerf.${archetype}=${bestAlt.score.score}, measured, n=${bestAlt.score.n}) at $${bestAlt.profile.costInputPer1m}/$${bestAlt.profile.costOutputPer1m} per 1M vs $${profile.costInputPer1m}/$${profile.costOutputPer1m} \u2014 a measured tier-down opportunity.`,
|
|
2593
|
+
suggestion: `Move \`${bestAlt.id}\` to primary for this archetype. The brain has n=${bestAlt.score.n} measured outcomes backing the recommendation; this is data, not opinion. If posture='locked' is required (compliance/brand promise), set it explicitly to silence this rule.`,
|
|
2594
|
+
recommendationType: "tier-down",
|
|
2595
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2596
|
+
}
|
|
2597
|
+
];
|
|
2598
|
+
}
|
|
2599
|
+
function detectArchetypePerfFloorBreach(ir, profile) {
|
|
2600
|
+
const compat = getModelCompatibility(profile.id, {
|
|
2601
|
+
archetype: ir.intent.archetype,
|
|
2602
|
+
toolOrchestration: ir.constraints?.toolOrchestration
|
|
2603
|
+
});
|
|
2604
|
+
if (compat.status === "compatible") return [];
|
|
2605
|
+
if (compat.status === "requires-adapter") {
|
|
2606
|
+
return [
|
|
2607
|
+
{
|
|
2608
|
+
level: "warn",
|
|
2609
|
+
code: "archetype-perf-floor-breach",
|
|
2610
|
+
message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}). A known adapter would lift it: ${compat.adapter.parameter}=${compat.adapter.value}. ${compat.adapter.consequence}`,
|
|
2611
|
+
suggestion: `Pass \`ir.constraints.${compat.adapter.parameter} = '${compat.adapter.value}'\` for this call, OR pick a model whose archetypePerf for ${ir.intent.archetype} already clears the floor (call \`getModelCompatibility(modelId, { archetype: '${ir.intent.archetype}' })\` to check). Estimated post-adapter score: ${compat.archetypePerfWithAdapter}/10.`,
|
|
2612
|
+
recommendationType: "prompt-fix",
|
|
2613
|
+
suggestedAdaptation: compat.adapter,
|
|
2614
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2615
|
+
}
|
|
2616
|
+
];
|
|
2617
|
+
}
|
|
2618
|
+
return [
|
|
2619
|
+
{
|
|
2620
|
+
level: "critical",
|
|
2621
|
+
code: "archetype-perf-floor-breach",
|
|
2622
|
+
message: `${profile.id} sits below the archetype floor for ${ir.intent.archetype} (score ${compat.archetypePerf}/10, floor ${6}) and no known adapter would lift it. ${compat.reason}`,
|
|
2623
|
+
suggestion: `Swap to a model whose archetypePerf for ${ir.intent.archetype} clears the floor. Use \`getModelCompatibility(candidateId, { archetype: '${ir.intent.archetype}' })\` to vet candidates, or \`getDefaultFallbackChain({ archetype: '${ir.intent.archetype}', posture: 'open' })\` for a library-picked chain that respects the floor by construction.`,
|
|
2624
|
+
recommendationType: "model-swap",
|
|
2625
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
2626
|
+
}
|
|
2627
|
+
];
|
|
2628
|
+
}
|
|
2629
|
+
|
|
2630
|
+
// src/translator.ts
|
|
2631
|
+
var TRANSLATOR_FLOOR = ARCHETYPE_FLOOR_DEFAULT;
|
|
2632
|
+
var RULE_SEQUENTIAL_TOOL_CLIFF = "sequential-tool-cliff-below-floor";
|
|
2633
|
+
var SEQUENTIAL_TOOL_PREAMBLE = "IMPORTANT: Use one tool call per response. Wait for the tool result before deciding the next tool. Do NOT batch tool calls in parallel.";
|
|
2634
|
+
function applySectionRewrites(args) {
|
|
2635
|
+
const { ir, profile, archetype } = args;
|
|
2636
|
+
if (!Array.isArray(ir.sections) || ir.sections.length === 0) {
|
|
2637
|
+
return { rewrittenIR: ir, rewrites: [] };
|
|
2638
|
+
}
|
|
2639
|
+
if (!profile.archetypePerf) {
|
|
2640
|
+
return { rewrittenIR: ir, rewrites: [] };
|
|
2641
|
+
}
|
|
2642
|
+
const archetypeScore = profile.archetypePerf[archetype];
|
|
2643
|
+
const cliffFires = typeof archetypeScore === "number" && archetypeScore < TRANSLATOR_FLOOR;
|
|
2644
|
+
if (!cliffFires) {
|
|
2645
|
+
return { rewrittenIR: ir, rewrites: [] };
|
|
2646
|
+
}
|
|
2647
|
+
const rewrites = [];
|
|
2648
|
+
const newSections = ir.sections.map((section) => {
|
|
2649
|
+
if (section.kind !== "tool_call_contract") return section;
|
|
2650
|
+
const originalText = section.text;
|
|
2651
|
+
const transformedText = `${SEQUENTIAL_TOOL_PREAMBLE}
|
|
2652
|
+
|
|
2653
|
+
${originalText}`;
|
|
2654
|
+
rewrites.push({
|
|
2655
|
+
sectionId: section.id,
|
|
2656
|
+
kind: "tool_call_contract",
|
|
2657
|
+
rule: RULE_SEQUENTIAL_TOOL_CLIFF,
|
|
2658
|
+
originalText,
|
|
2659
|
+
transformedText,
|
|
2660
|
+
wireOverrides: { parallelToolCalls: false }
|
|
2661
|
+
});
|
|
2662
|
+
return { ...section, text: transformedText };
|
|
2663
|
+
});
|
|
2664
|
+
if (rewrites.length === 0) {
|
|
2665
|
+
return { rewrittenIR: ir, rewrites: [] };
|
|
2666
|
+
}
|
|
2667
|
+
const rewrittenIR = { ...ir, sections: newSections };
|
|
2668
|
+
return { rewrittenIR, rewrites };
|
|
2669
|
+
}
|
|
2670
|
+
|
|
2671
|
+
// src/compile.ts
|
|
2672
|
+
var counter = 0;
|
|
2673
|
+
function makeHandle() {
|
|
2674
|
+
counter = (counter + 1) % 1e6;
|
|
2675
|
+
return `c${Date.now().toString(36)}-${counter.toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
|
|
2676
|
+
}
|
|
2677
|
+
function compile(ir, opts = {}) {
|
|
2678
|
+
const resolver = opts.profileResolver ?? getProfile;
|
|
2679
|
+
validateIR(ir);
|
|
2680
|
+
const sliced = passSlice(ir);
|
|
2681
|
+
const deduped = passDedupe(sliced.value);
|
|
2682
|
+
const toolFiltered = passToolRelevance(deduped.value, {
|
|
2683
|
+
threshold: opts.toolRelevanceThreshold
|
|
2684
|
+
});
|
|
2685
|
+
const compressed = passCompressHistory(toolFiltered.value, {
|
|
2686
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
2687
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
2688
|
+
});
|
|
2689
|
+
let workingIR = compressed.value;
|
|
2690
|
+
const accumulatedMutations = [
|
|
2691
|
+
...sliced.mutations,
|
|
2692
|
+
...deduped.mutations,
|
|
2693
|
+
...toolFiltered.mutations,
|
|
2694
|
+
...compressed.mutations
|
|
2695
|
+
];
|
|
2696
|
+
const inputTokens = estimateInputTokens(workingIR);
|
|
2697
|
+
const scores = passScoreTargets(workingIR, {
|
|
2698
|
+
estimatedInputTokens: inputTokens,
|
|
2699
|
+
profilesById: resolver,
|
|
2700
|
+
policy: opts.policy
|
|
2701
|
+
});
|
|
2702
|
+
accumulatedMutations.push(...scores.mutations);
|
|
2703
|
+
const target = pickTarget(workingIR, scores.value);
|
|
2704
|
+
if (!target) {
|
|
2705
|
+
throw new Error(
|
|
2706
|
+
`compile(): no allowed model fits the request. Scores: ${JSON.stringify(scores.value, null, 2)}`
|
|
2707
|
+
);
|
|
2708
|
+
}
|
|
2709
|
+
const profile = resolver(target.modelId);
|
|
2710
|
+
const fallbackChain = scores.value.filter((s) => s.modelId !== target.modelId && s.fits).sort((a, b) => b.rank - a.rank).map((s) => s.modelId);
|
|
2711
|
+
const cliffs = passApplyCliffs(workingIR, profile, inputTokens);
|
|
2712
|
+
workingIR = cliffs.value.ir;
|
|
2713
|
+
accumulatedMutations.push(...cliffs.mutations);
|
|
2714
|
+
const translated = applySectionRewrites({
|
|
2715
|
+
ir: workingIR,
|
|
2716
|
+
profile,
|
|
2717
|
+
archetype: ir.intent.archetype
|
|
2718
|
+
});
|
|
2719
|
+
workingIR = translated.rewrittenIR;
|
|
2720
|
+
const sectionRewritesApplied = translated.rewrites;
|
|
2721
|
+
let wireOverrides;
|
|
2722
|
+
for (const rw of sectionRewritesApplied) {
|
|
2723
|
+
if (!rw.wireOverrides) continue;
|
|
2724
|
+
if (!wireOverrides) wireOverrides = {};
|
|
2725
|
+
if (rw.wireOverrides.parallelToolCalls !== void 0) {
|
|
2726
|
+
wireOverrides.parallelToolCalls = rw.wireOverrides.parallelToolCalls;
|
|
2727
|
+
}
|
|
2728
|
+
}
|
|
2729
|
+
for (const rw of sectionRewritesApplied) {
|
|
2730
|
+
accumulatedMutations.push({
|
|
2731
|
+
id: `translator:${rw.rule}:${rw.sectionId}`,
|
|
2732
|
+
source: "translator",
|
|
2733
|
+
passName: "translator",
|
|
2734
|
+
description: `Rewrote section "${rw.sectionId}" (kind=${rw.kind}) via rule "${rw.rule}".`
|
|
2735
|
+
});
|
|
2736
|
+
}
|
|
2737
|
+
const lowered = lower(workingIR, profile, {
|
|
2738
|
+
forceThinkingZero: cliffs.value.loweringHints.forceThinkingZero,
|
|
2739
|
+
forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput,
|
|
2740
|
+
wireOverrides
|
|
2741
|
+
});
|
|
2742
|
+
validateFinalFit(workingIR, profile, inputTokens);
|
|
2743
|
+
const handle = makeHandle();
|
|
2744
|
+
const finalShape = computeShape(workingIR, inputTokens);
|
|
2745
|
+
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
2746
|
+
const diagnostics = {
|
|
2747
|
+
sectionsKept: workingIR.sections.length,
|
|
2748
|
+
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
2749
|
+
toolsKept: workingIR.tools?.length ?? 0,
|
|
2750
|
+
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
2751
|
+
historyKept: workingIR.history?.length ?? 0,
|
|
2752
|
+
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
2753
|
+
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
2754
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
2755
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
2756
|
+
historyTokensTotal: compressed.historyTokensTotal,
|
|
2757
|
+
// alpha.20 E3: mirror the consumer's declared mode for Glass-Box +
|
|
2758
|
+
// brain observability. Undefined when not declared (pre-alpha.20).
|
|
2759
|
+
toolOrchestration: ir.constraints?.toolOrchestration
|
|
2760
|
+
};
|
|
2761
|
+
if (ir.intent.archetype === "hunt" && ir.constraints?.toolOrchestration === "sequential") {
|
|
2762
|
+
accumulatedMutations.push({
|
|
2763
|
+
id: "sequential-mode-chain-selected",
|
|
2764
|
+
source: "tool_orchestration",
|
|
2765
|
+
passName: "compile",
|
|
2766
|
+
description: "ir.constraints.toolOrchestration='sequential' selected the DeepSeek-tier-0 hunt chain overlay (L-040 parallel-tool cliff doesn't apply at single-step granularity)."
|
|
2767
|
+
});
|
|
2768
|
+
}
|
|
2769
|
+
const phase2ProfileResolver = opts.profileResolver ? (id) => {
|
|
2770
|
+
try {
|
|
2771
|
+
return opts.profileResolver(id);
|
|
2772
|
+
} catch {
|
|
2773
|
+
return void 0;
|
|
2774
|
+
}
|
|
2775
|
+
} : tryGetProfile;
|
|
2776
|
+
const advisories = runAdvisor(
|
|
2777
|
+
ir,
|
|
2778
|
+
{
|
|
2779
|
+
target: profile.id,
|
|
2780
|
+
provider: profile.provider,
|
|
2781
|
+
tokensIn: inputTokens,
|
|
2782
|
+
diagnostics
|
|
2783
|
+
},
|
|
2784
|
+
profile,
|
|
2785
|
+
opts.policy,
|
|
2786
|
+
{
|
|
2787
|
+
fallbackChain,
|
|
2788
|
+
profileResolver: phase2ProfileResolver,
|
|
2789
|
+
// alpha.29 — feed translator rewrites to the advisor so the
|
|
2790
|
+
// `archetype-perf-floor-breach` rule can suppress when the translator
|
|
2791
|
+
// already cleared the cliff for the same archetype. Without this,
|
|
2792
|
+
// both the rewrite AND the advisory fire — noisy, and the advisory
|
|
2793
|
+
// would mislead consumers into thinking the cliff is unaddressed.
|
|
2794
|
+
sectionRewritesApplied
|
|
2795
|
+
}
|
|
2796
|
+
);
|
|
2797
|
+
return {
|
|
2798
|
+
handle,
|
|
2799
|
+
target: profile.id,
|
|
2800
|
+
provider: profile.provider,
|
|
2801
|
+
request: lowered.request,
|
|
2802
|
+
tokensIn: inputTokens,
|
|
2803
|
+
estimatedCostUsd: target.estimatedCostUsd,
|
|
2804
|
+
mutationsApplied: accumulatedMutations,
|
|
2805
|
+
fallbackChain,
|
|
2806
|
+
advisories,
|
|
2807
|
+
diagnostics,
|
|
2808
|
+
sectionRewritesApplied,
|
|
2809
|
+
wireOverrides
|
|
2810
|
+
};
|
|
2811
|
+
}
|
|
2812
|
+
function validateIR(ir) {
|
|
2813
|
+
if (!ir.appId) throw new Error("compile(): ir.appId is required");
|
|
2814
|
+
if (!ir.intent || !ir.intent.archetype) {
|
|
2815
|
+
throw new Error("compile(): ir.intent.archetype is required (use a dialect-v1 archetype)");
|
|
2816
|
+
}
|
|
2817
|
+
if (!Array.isArray(ir.models) || ir.models.length === 0) {
|
|
2818
|
+
throw new Error("compile(): ir.models must be a non-empty array");
|
|
2819
|
+
}
|
|
2820
|
+
if (!Array.isArray(ir.sections)) {
|
|
2821
|
+
throw new Error("compile(): ir.sections must be an array");
|
|
2822
|
+
}
|
|
2823
|
+
}
|
|
2824
|
+
function pickTarget(ir, scores) {
|
|
2825
|
+
if (ir.constraints?.forceModel) {
|
|
2826
|
+
const forced = scores.find((s) => s.modelId === ir.constraints.forceModel);
|
|
2827
|
+
if (forced && forced.fits) return forced;
|
|
2828
|
+
if (forced) {
|
|
2829
|
+
throw new Error(
|
|
2830
|
+
`compile(): forceModel="${ir.constraints.forceModel}" does not fit: ${forced.rejectReasons.join("; ")}`
|
|
2831
|
+
);
|
|
2832
|
+
}
|
|
2833
|
+
}
|
|
2834
|
+
const fitting = scores.filter((s) => s.fits).sort((a, b) => b.rank - a.rank);
|
|
2835
|
+
return fitting[0];
|
|
2836
|
+
}
|
|
2837
|
+
function validateFinalFit(ir, profile, tokens) {
|
|
2838
|
+
if (tokens > profile.maxContextTokens) {
|
|
2839
|
+
throw new Error(
|
|
2840
|
+
`compile(): final IR is ${tokens} tokens, exceeds ${profile.id} context (${profile.maxContextTokens})`
|
|
1217
2841
|
);
|
|
1218
2842
|
}
|
|
1219
2843
|
if ((ir.tools?.length ?? 0) > profile.maxTools) {
|
|
@@ -1223,14 +2847,92 @@ function validateFinalFit(ir, profile, tokens) {
|
|
|
1223
2847
|
}
|
|
1224
2848
|
}
|
|
1225
2849
|
|
|
2850
|
+
// src/pricing-brain.ts
|
|
2851
|
+
function isPricingRow(x) {
|
|
2852
|
+
if (!x || typeof x !== "object") return false;
|
|
2853
|
+
const r = x;
|
|
2854
|
+
return typeof r.model_id === "string" && typeof r.cost_input_per_1m === "number" && typeof r.cost_output_per_1m === "number" && typeof r.valid_from === "string";
|
|
2855
|
+
}
|
|
2856
|
+
function mapRowsToPricing(rows) {
|
|
2857
|
+
const out = [];
|
|
2858
|
+
for (const row of rows) {
|
|
2859
|
+
if (!isPricingRow(row)) continue;
|
|
2860
|
+
out.push({
|
|
2861
|
+
modelId: row.model_id,
|
|
2862
|
+
costInputPer1m: row.cost_input_per_1m,
|
|
2863
|
+
costOutputPer1m: row.cost_output_per_1m,
|
|
2864
|
+
cacheInputPer1m: row.cache_input_per_1m ?? void 0,
|
|
2865
|
+
cacheCreationPer1m: row.cache_creation_per_1m ?? void 0,
|
|
2866
|
+
validFrom: Date.parse(row.valid_from),
|
|
2867
|
+
validUntil: row.valid_until == null ? void 0 : Date.parse(row.valid_until),
|
|
2868
|
+
source: row.source ?? void 0
|
|
2869
|
+
});
|
|
2870
|
+
}
|
|
2871
|
+
return out;
|
|
2872
|
+
}
|
|
2873
|
+
function bundledPricing() {
|
|
2874
|
+
const out = [];
|
|
2875
|
+
for (const profile of allProfiles()) {
|
|
2876
|
+
out.push({
|
|
2877
|
+
modelId: profile.id,
|
|
2878
|
+
costInputPer1m: profile.costInputPer1m,
|
|
2879
|
+
costOutputPer1m: profile.costOutputPer1m,
|
|
2880
|
+
cacheInputPer1m: profile.lowering.cache.discount !== void 0 && profile.lowering.cache.discount > 0 ? profile.costInputPer1m * profile.lowering.cache.discount : void 0,
|
|
2881
|
+
validFrom: 0,
|
|
2882
|
+
validUntil: void 0,
|
|
2883
|
+
source: "profile_seed"
|
|
2884
|
+
});
|
|
2885
|
+
}
|
|
2886
|
+
return out;
|
|
2887
|
+
}
|
|
2888
|
+
var loadPricingFromBrain = createBrainQueryCache({
|
|
2889
|
+
table: "kgauto_pricing",
|
|
2890
|
+
mapRows: mapRowsToPricing,
|
|
2891
|
+
bundledFallback: bundledPricing
|
|
2892
|
+
});
|
|
2893
|
+
function resolvePricingAt(modelId, at = /* @__PURE__ */ new Date()) {
|
|
2894
|
+
const ts = at.getTime();
|
|
2895
|
+
const all = loadPricingFromBrain();
|
|
2896
|
+
let best;
|
|
2897
|
+
for (const row of all) {
|
|
2898
|
+
if (row.modelId !== modelId) continue;
|
|
2899
|
+
if (row.validFrom > ts) continue;
|
|
2900
|
+
if (row.validUntil !== void 0 && row.validUntil <= ts) continue;
|
|
2901
|
+
if (!best || row.validFrom > best.validFrom) best = row;
|
|
2902
|
+
}
|
|
2903
|
+
return best;
|
|
2904
|
+
}
|
|
2905
|
+
|
|
1226
2906
|
// src/brain.ts
|
|
1227
2907
|
var activeConfig;
|
|
1228
2908
|
function configureBrain(config) {
|
|
1229
2909
|
const endpoint = config.endpoint.replace(/\/outcomes\/?$/, "");
|
|
1230
2910
|
activeConfig = { ...config, endpoint };
|
|
2911
|
+
const bq = config.brainQuery ?? {};
|
|
2912
|
+
const enabledTables = /* @__PURE__ */ new Set();
|
|
2913
|
+
if (bq.chains !== false) enabledTables.add("kgauto_chains");
|
|
2914
|
+
if (bq.perf !== false) enabledTables.add("kgauto_archetype_perf");
|
|
2915
|
+
if (bq.pricing !== false) enabledTables.add("kgauto_pricing");
|
|
2916
|
+
if (bq.models !== false) {
|
|
2917
|
+
enabledTables.add("kgauto_models");
|
|
2918
|
+
enabledTables.add("kgauto_aliases");
|
|
2919
|
+
}
|
|
2920
|
+
if (enabledTables.size === 0) {
|
|
2921
|
+
configureBrainQuery(void 0);
|
|
2922
|
+
return;
|
|
2923
|
+
}
|
|
2924
|
+
configureBrainQuery({
|
|
2925
|
+
endpoint,
|
|
2926
|
+
configEndpoint: bq.configEndpoint,
|
|
2927
|
+
ttlMs: bq.cacheTtlMs ?? 3e5,
|
|
2928
|
+
fetchImpl: config.fetchImpl ?? fetch,
|
|
2929
|
+
enabledTables,
|
|
2930
|
+
onError: config.onError
|
|
2931
|
+
});
|
|
1231
2932
|
}
|
|
1232
2933
|
function clearBrain() {
|
|
1233
2934
|
activeConfig = void 0;
|
|
2935
|
+
configureBrainQuery(void 0);
|
|
1234
2936
|
}
|
|
1235
2937
|
var compileRegistry = /* @__PURE__ */ new Map();
|
|
1236
2938
|
var REGISTRY_MAX_ENTRIES = 1e4;
|
|
@@ -1257,6 +2959,9 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1257
2959
|
tokens
|
|
1258
2960
|
);
|
|
1259
2961
|
const shapeKey = `${shape.contextBucket}-${shape.toolCountBucket}-${shape.historyDepth}-${shape.outputMode}`;
|
|
2962
|
+
const toolsCount = result.diagnostics.toolsKept;
|
|
2963
|
+
const historyDepth = Array.isArray(ir.history) ? ir.history.length : 0;
|
|
2964
|
+
const systemPromptChars = estimateSystemPromptChars(ir.sections);
|
|
1260
2965
|
compileRegistry.set(result.handle, {
|
|
1261
2966
|
appId,
|
|
1262
2967
|
archetype,
|
|
@@ -1266,9 +2971,35 @@ function registerCompile(appId, archetype, ir, result) {
|
|
|
1266
2971
|
learningKey: learningKey(archetype, result.target, shape),
|
|
1267
2972
|
estimatedTokensIn: tokens,
|
|
1268
2973
|
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1269
|
-
|
|
2974
|
+
// alpha.30: cache the in-memory advisories so record() can auto-persist
|
|
2975
|
+
// to `compile_outcome_advisories` without consumer-side threading.
|
|
2976
|
+
advisoriesFromCompile: result.advisories ?? [],
|
|
2977
|
+
startedAt: Date.now(),
|
|
2978
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
2979
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal,
|
|
2980
|
+
// alpha.20 E3: capture consumer's declared mode for the brain payload.
|
|
2981
|
+
toolOrchestration: result.diagnostics.toolOrchestration,
|
|
2982
|
+
// alpha.28: shape fields for Glass-Box renderer.
|
|
2983
|
+
toolsCount,
|
|
2984
|
+
historyDepth,
|
|
2985
|
+
systemPromptChars,
|
|
2986
|
+
// alpha.29: translator activity — persisted on the brain row so
|
|
2987
|
+
// cross-app aggregates can answer "Sonnet narration rule fired N times,
|
|
2988
|
+
// outcome quality lifted to M."
|
|
2989
|
+
sectionRewritesApplied: result.sectionRewritesApplied
|
|
1270
2990
|
});
|
|
1271
2991
|
}
|
|
2992
|
+
function estimateSystemPromptChars(sections) {
|
|
2993
|
+
if (!Array.isArray(sections) || sections.length === 0) return void 0;
|
|
2994
|
+
let total = 0;
|
|
2995
|
+
for (const s of sections) {
|
|
2996
|
+
if (s && typeof s === "object") {
|
|
2997
|
+
const content = s.content;
|
|
2998
|
+
if (typeof content === "string") total += content.length;
|
|
2999
|
+
}
|
|
3000
|
+
}
|
|
3001
|
+
return total > 0 ? total : void 0;
|
|
3002
|
+
}
|
|
1272
3003
|
async function record(input) {
|
|
1273
3004
|
const reg = compileRegistry.get(input.handle);
|
|
1274
3005
|
if (reg) compileRegistry.delete(input.handle);
|
|
@@ -1279,11 +3010,22 @@ async function record(input) {
|
|
|
1279
3010
|
const config = activeConfig;
|
|
1280
3011
|
const fetchFn = config.fetchImpl ?? fetch;
|
|
1281
3012
|
const send = async () => {
|
|
3013
|
+
let outcomeId;
|
|
1282
3014
|
try {
|
|
1283
3015
|
const res = await fetchFn(`${config.endpoint}/outcomes`, {
|
|
1284
3016
|
method: "POST",
|
|
1285
3017
|
headers: {
|
|
1286
3018
|
"Content-Type": "application/json",
|
|
3019
|
+
// alpha.20: request the inserted row back so we can JOIN advisories
|
|
3020
|
+
// to it via outcome_id. PostgREST returns the row when
|
|
3021
|
+
// `Prefer: return=representation` is set; proxies that pass the
|
|
3022
|
+
// header through (the recommended `const row = { ...body }` shape
|
|
3023
|
+
// from OutcomePayload's forward-compat rule) will surface
|
|
3024
|
+
// the row id. Proxies that don't (legacy / hand-rolled shapes)
|
|
3025
|
+
// simply produce no parseable id → secondary advisory POST is
|
|
3026
|
+
// skipped silently. Best-effort — primary outcome row is the
|
|
3027
|
+
// load-bearing write.
|
|
3028
|
+
Prefer: "return=representation",
|
|
1287
3029
|
...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
|
|
1288
3030
|
},
|
|
1289
3031
|
body: JSON.stringify(payload)
|
|
@@ -1292,8 +3034,31 @@ async function record(input) {
|
|
|
1292
3034
|
const text = await res.text().catch(() => "<no body>");
|
|
1293
3035
|
throw new Error(`brain ${res.status}: ${text}`);
|
|
1294
3036
|
}
|
|
3037
|
+
outcomeId = await tryExtractOutcomeId(res);
|
|
3038
|
+
} catch (err) {
|
|
3039
|
+
(config.onError ?? defaultOnError2)(err);
|
|
3040
|
+
return;
|
|
3041
|
+
}
|
|
3042
|
+
const advisories = input.advisories ?? reg?.advisoriesFromCompile;
|
|
3043
|
+
if (!advisories || advisories.length === 0) return;
|
|
3044
|
+
if (outcomeId === void 0) return;
|
|
3045
|
+
try {
|
|
3046
|
+
const advisoryPayload = advisories.map((a) => buildAdvisoryRow(outcomeId, a));
|
|
3047
|
+
const res = await fetchFn(`${config.endpoint}/compile_outcome_advisories`, {
|
|
3048
|
+
method: "POST",
|
|
3049
|
+
headers: {
|
|
3050
|
+
"Content-Type": "application/json",
|
|
3051
|
+
Prefer: "return=minimal",
|
|
3052
|
+
...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
|
|
3053
|
+
},
|
|
3054
|
+
body: JSON.stringify(advisoryPayload)
|
|
3055
|
+
});
|
|
3056
|
+
if (!res.ok) {
|
|
3057
|
+
const text = await res.text().catch(() => "<no body>");
|
|
3058
|
+
throw new Error(`brain advisories ${res.status}: ${text}`);
|
|
3059
|
+
}
|
|
1295
3060
|
} catch (err) {
|
|
1296
|
-
(config.onError ??
|
|
3061
|
+
(config.onError ?? defaultOnError2)(err);
|
|
1297
3062
|
}
|
|
1298
3063
|
};
|
|
1299
3064
|
if (config.sync) {
|
|
@@ -1302,13 +3067,18 @@ async function record(input) {
|
|
|
1302
3067
|
void send();
|
|
1303
3068
|
}
|
|
1304
3069
|
}
|
|
1305
|
-
function
|
|
3070
|
+
function defaultOnError2(err) {
|
|
1306
3071
|
console.warn("[kgauto] brain record failed:", err);
|
|
1307
3072
|
}
|
|
1308
3073
|
function buildPayload(input, reg) {
|
|
1309
3074
|
const compileTarget = reg?.model;
|
|
1310
3075
|
const actual = input.actualModel ?? compileTarget;
|
|
1311
3076
|
const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
|
|
3077
|
+
const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
|
|
3078
|
+
const costModel = actual;
|
|
3079
|
+
const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
|
|
3080
|
+
const fellOverFrom = input.fellOverFrom ?? requested;
|
|
3081
|
+
const fallbackReason = fellOverFrom ? input.fallbackReason : void 0;
|
|
1312
3082
|
return {
|
|
1313
3083
|
handle: input.handle,
|
|
1314
3084
|
app_id: reg?.appId,
|
|
@@ -1318,7 +3088,7 @@ function buildPayload(input, reg) {
|
|
|
1318
3088
|
provider: reg?.provider,
|
|
1319
3089
|
shape_key: reg?.shapeKey,
|
|
1320
3090
|
learning_key: reg?.learningKey,
|
|
1321
|
-
mutations_applied:
|
|
3091
|
+
mutations_applied: mutationsApplied,
|
|
1322
3092
|
tokens_in: input.tokensIn,
|
|
1323
3093
|
tokens_out: input.tokensOut,
|
|
1324
3094
|
estimated_tokens_in: reg?.estimatedTokensIn,
|
|
@@ -1332,8 +3102,117 @@ function buildPayload(input, reg) {
|
|
|
1332
3102
|
oracle_rationale: input.oracleScore?.rationale,
|
|
1333
3103
|
prompt_preview: input.promptPreview,
|
|
1334
3104
|
response_preview: input.responsePreview,
|
|
1335
|
-
dialect_version: "v1"
|
|
3105
|
+
dialect_version: "v1",
|
|
3106
|
+
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
3107
|
+
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
3108
|
+
cost_usd_actual: costUsdActual,
|
|
3109
|
+
ttft_ms: input.ttftMs,
|
|
3110
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
3111
|
+
history_tokens_at_compile: reg?.historyTokensTotal,
|
|
3112
|
+
// alpha.20 E3: mirror consumer's declared tool-orchestration mode so
|
|
3113
|
+
// the brain can measure per-mode model perf separately (DeepSeek in
|
|
3114
|
+
// sequential vs parallel mode is two different stories — L-040).
|
|
3115
|
+
// Null when consumer hadn't adopted the constraint yet.
|
|
3116
|
+
tool_orchestration: reg?.toolOrchestration ?? null,
|
|
3117
|
+
// alpha.28 — Glass-Box renderer substrate (migration 018). All optional;
|
|
3118
|
+
// omitted-undefined PostgREST inserts store NULL → renderer renders "—".
|
|
3119
|
+
finish_reason: input.finishReason,
|
|
3120
|
+
total_ms: input.totalMs ?? input.latencyMs,
|
|
3121
|
+
tools_count: input.toolsCount ?? reg?.toolsCount,
|
|
3122
|
+
history_depth: input.historyDepth ?? reg?.historyDepth,
|
|
3123
|
+
system_prompt_chars: input.systemPromptChars ?? reg?.systemPromptChars,
|
|
3124
|
+
fell_over_from: fellOverFrom,
|
|
3125
|
+
fallback_reason: fallbackReason,
|
|
3126
|
+
// alpha.29 — translator activity (migration 019). Send NULL when no
|
|
3127
|
+
// rewrites fired so the brain's "did the translator do anything?"
|
|
3128
|
+
// queries can use `IS NOT NULL` cleanly.
|
|
3129
|
+
section_rewrites_applied: reg?.sectionRewritesApplied && reg.sectionRewritesApplied.length > 0 ? reg.sectionRewritesApplied : null
|
|
3130
|
+
};
|
|
3131
|
+
}
|
|
3132
|
+
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
3133
|
+
if (tokensIn === 0 && tokensOut === 0) return void 0;
|
|
3134
|
+
const brainRow = resolvePricingAt(modelId);
|
|
3135
|
+
if (brainRow && (brainRow.costInputPer1m > 0 || brainRow.costOutputPer1m > 0)) {
|
|
3136
|
+
const inUsd2 = tokensIn / 1e6 * brainRow.costInputPer1m;
|
|
3137
|
+
const outUsd2 = tokensOut / 1e6 * brainRow.costOutputPer1m;
|
|
3138
|
+
return Math.round((inUsd2 + outUsd2) * 1e6) / 1e6;
|
|
3139
|
+
}
|
|
3140
|
+
const profile = tryGetProfile(modelId);
|
|
3141
|
+
if (!profile) return void 0;
|
|
3142
|
+
const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
|
|
3143
|
+
const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
|
|
3144
|
+
return Math.round((inUsd + outUsd) * 1e6) / 1e6;
|
|
3145
|
+
}
|
|
3146
|
+
async function tryExtractOutcomeId(res) {
|
|
3147
|
+
try {
|
|
3148
|
+
const ct = res.headers?.get?.("content-type") ?? "";
|
|
3149
|
+
if (ct && !ct.includes("application/json")) return void 0;
|
|
3150
|
+
if (typeof res.json !== "function") return void 0;
|
|
3151
|
+
const body = await res.json();
|
|
3152
|
+
if (Array.isArray(body) && body.length > 0) {
|
|
3153
|
+
const first = body[0];
|
|
3154
|
+
const id = first?.id;
|
|
3155
|
+
if (typeof id === "number") return id;
|
|
3156
|
+
} else if (body && typeof body === "object") {
|
|
3157
|
+
const id = body.id;
|
|
3158
|
+
if (typeof id === "number") return id;
|
|
3159
|
+
}
|
|
3160
|
+
return void 0;
|
|
3161
|
+
} catch {
|
|
3162
|
+
return void 0;
|
|
3163
|
+
}
|
|
3164
|
+
}
|
|
3165
|
+
function buildAdvisoryRow(outcomeId, a) {
|
|
3166
|
+
return {
|
|
3167
|
+
outcome_id: outcomeId,
|
|
3168
|
+
code: a.code,
|
|
3169
|
+
level: a.level,
|
|
3170
|
+
message: a.message,
|
|
3171
|
+
...a.recommendationType ? { recommendation_type: a.recommendationType } : {},
|
|
3172
|
+
...a.suggestion ? { suggestion: a.suggestion } : {},
|
|
3173
|
+
...a.docsUrl ? { docs_url: a.docsUrl } : {}
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3176
|
+
async function recordOutcome(input) {
|
|
3177
|
+
if (!activeConfig) {
|
|
3178
|
+
return { ok: false, reason: "brain_not_configured" };
|
|
3179
|
+
}
|
|
3180
|
+
const config = activeConfig;
|
|
3181
|
+
const fetchFn = config.fetchImpl ?? fetch;
|
|
3182
|
+
const payload = {
|
|
3183
|
+
outcome_id: input.outcomeId,
|
|
3184
|
+
outcome: input.outcome,
|
|
3185
|
+
rating: input.rating ?? null,
|
|
3186
|
+
reason: input.reason ?? null,
|
|
3187
|
+
observed_confidence: input.observedConfidence ?? null
|
|
3188
|
+
};
|
|
3189
|
+
const send = async () => {
|
|
3190
|
+
try {
|
|
3191
|
+
const res = await fetchFn(`${config.endpoint}/compile_outcome_quality`, {
|
|
3192
|
+
method: "POST",
|
|
3193
|
+
headers: {
|
|
3194
|
+
"Content-Type": "application/json",
|
|
3195
|
+
...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
|
|
3196
|
+
},
|
|
3197
|
+
body: JSON.stringify(payload)
|
|
3198
|
+
});
|
|
3199
|
+
if (!res.ok) {
|
|
3200
|
+
const text = await res.text().catch(() => "<no body>");
|
|
3201
|
+
const err = new Error(`brain ${res.status}: ${text}`);
|
|
3202
|
+
(config.onError ?? defaultOnError2)(err);
|
|
3203
|
+
return { ok: false, reason: "persistence_failed" };
|
|
3204
|
+
}
|
|
3205
|
+
return { ok: true };
|
|
3206
|
+
} catch (err) {
|
|
3207
|
+
(config.onError ?? defaultOnError2)(err);
|
|
3208
|
+
return { ok: false, reason: "persistence_failed" };
|
|
3209
|
+
}
|
|
1336
3210
|
};
|
|
3211
|
+
if (config.sync) {
|
|
3212
|
+
return send();
|
|
3213
|
+
}
|
|
3214
|
+
void send();
|
|
3215
|
+
return { ok: true };
|
|
1337
3216
|
}
|
|
1338
3217
|
|
|
1339
3218
|
// src/ir.ts
|
|
@@ -1350,6 +3229,67 @@ var CallError = class extends Error {
|
|
|
1350
3229
|
}
|
|
1351
3230
|
};
|
|
1352
3231
|
|
|
3232
|
+
// src/env.ts
|
|
3233
|
+
var SUPPORTED_PROVIDERS = Object.freeze([
|
|
3234
|
+
"anthropic",
|
|
3235
|
+
"google",
|
|
3236
|
+
"openai",
|
|
3237
|
+
"deepseek"
|
|
3238
|
+
]);
|
|
3239
|
+
function isSupportedProvider(p) {
|
|
3240
|
+
return SUPPORTED_PROVIDERS.includes(p);
|
|
3241
|
+
}
|
|
3242
|
+
var PROVIDER_ENV_KEYS = Object.freeze({
|
|
3243
|
+
anthropic: Object.freeze(["ANTHROPIC_API_KEY"]),
|
|
3244
|
+
google: Object.freeze([
|
|
3245
|
+
"GOOGLE_API_KEY",
|
|
3246
|
+
"GEMINI_API_KEY",
|
|
3247
|
+
"GOOGLE_GENERATIVE_AI_API_KEY"
|
|
3248
|
+
]),
|
|
3249
|
+
openai: Object.freeze(["OPENAI_API_KEY"]),
|
|
3250
|
+
deepseek: Object.freeze(["DEEPSEEK_API_KEY"])
|
|
3251
|
+
});
|
|
3252
|
+
function defaultEnv() {
|
|
3253
|
+
return typeof process !== "undefined" && process.env ? process.env : {};
|
|
3254
|
+
}
|
|
3255
|
+
function readKeyValue(raw) {
|
|
3256
|
+
if (raw === void 0) return void 0;
|
|
3257
|
+
const trimmed = raw.trim();
|
|
3258
|
+
return trimmed.length > 0 ? trimmed : void 0;
|
|
3259
|
+
}
|
|
3260
|
+
function resolveProviderKey(provider, opts = {}) {
|
|
3261
|
+
if (!isSupportedProvider(provider)) return void 0;
|
|
3262
|
+
const explicit = readKeyValue(opts.apiKeys?.[provider]);
|
|
3263
|
+
if (explicit) return explicit;
|
|
3264
|
+
const env = opts.envSource ?? defaultEnv();
|
|
3265
|
+
for (const name of PROVIDER_ENV_KEYS[provider]) {
|
|
3266
|
+
const v = readKeyValue(env[name]);
|
|
3267
|
+
if (v) return v;
|
|
3268
|
+
}
|
|
3269
|
+
return void 0;
|
|
3270
|
+
}
|
|
3271
|
+
function isProviderReachable(provider, opts = {}) {
|
|
3272
|
+
return resolveProviderKey(provider, opts) !== void 0;
|
|
3273
|
+
}
|
|
3274
|
+
function isModelReachable(modelId, opts = {}) {
|
|
3275
|
+
const profile = tryGetProfile(modelId);
|
|
3276
|
+
if (!profile) return false;
|
|
3277
|
+
return isProviderReachable(profile.provider, opts);
|
|
3278
|
+
}
|
|
3279
|
+
function getReachabilityDiagnostic(opts = {}) {
|
|
3280
|
+
const env = opts.envSource ?? defaultEnv();
|
|
3281
|
+
const out = {};
|
|
3282
|
+
for (const provider of SUPPORTED_PROVIDERS) {
|
|
3283
|
+
if (readKeyValue(opts.apiKeys?.[provider])) {
|
|
3284
|
+
out[provider] = { reachable: true, via: "apiKeys" };
|
|
3285
|
+
continue;
|
|
3286
|
+
}
|
|
3287
|
+
const envKeyFound = PROVIDER_ENV_KEYS[provider].find((name) => readKeyValue(env[name]));
|
|
3288
|
+
out[provider] = envKeyFound ? { reachable: true, via: "env", envKeyFound } : { reachable: false, via: null };
|
|
3289
|
+
}
|
|
3290
|
+
return out;
|
|
3291
|
+
}
|
|
3292
|
+
|
|
1353
3293
|
// src/execute.ts
|
|
1354
3294
|
var ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
1355
3295
|
var OPENAI_URL = "https://api.openai.com/v1/chat/completions";
|
|
@@ -1372,7 +3312,7 @@ async function execute(request, opts = {}) {
|
|
|
1372
3312
|
}
|
|
1373
3313
|
}
|
|
1374
3314
|
async function executeAnthropic(request, opts) {
|
|
1375
|
-
const apiKey = opts.apiKeys
|
|
3315
|
+
const apiKey = resolveProviderKey("anthropic", { apiKeys: opts.apiKeys });
|
|
1376
3316
|
if (!apiKey) {
|
|
1377
3317
|
return terminalError(401, "auth", "ANTHROPIC_API_KEY missing");
|
|
1378
3318
|
}
|
|
@@ -1411,7 +3351,7 @@ function normalizeAnthropic(raw) {
|
|
|
1411
3351
|
return { text, structuredOutput: null, toolCalls, tokens, finishReason: r.stop_reason, raw };
|
|
1412
3352
|
}
|
|
1413
3353
|
async function executeGoogle(request, opts) {
|
|
1414
|
-
const apiKey =
|
|
3354
|
+
const apiKey = resolveProviderKey("google", { apiKeys: opts.apiKeys });
|
|
1415
3355
|
if (!apiKey) {
|
|
1416
3356
|
return terminalError(401, "auth", "GOOGLE_API_KEY/GEMINI_API_KEY missing");
|
|
1417
3357
|
}
|
|
@@ -1453,7 +3393,7 @@ function normalizeGoogle(raw) {
|
|
|
1453
3393
|
return { text, structuredOutput: null, toolCalls, tokens, finishReason: candidate?.finishReason, raw };
|
|
1454
3394
|
}
|
|
1455
3395
|
async function executeOpenAI(request, opts) {
|
|
1456
|
-
const apiKey = opts.apiKeys
|
|
3396
|
+
const apiKey = resolveProviderKey("openai", { apiKeys: opts.apiKeys });
|
|
1457
3397
|
if (!apiKey) {
|
|
1458
3398
|
return terminalError(401, "auth", "OPENAI_API_KEY missing");
|
|
1459
3399
|
}
|
|
@@ -1475,7 +3415,7 @@ async function executeOpenAI(request, opts) {
|
|
|
1475
3415
|
return { ok: true, status: res.status, response: normalizeOpenAILike(json) };
|
|
1476
3416
|
}
|
|
1477
3417
|
async function executeDeepSeek(request, opts) {
|
|
1478
|
-
const apiKey = opts.apiKeys
|
|
3418
|
+
const apiKey = resolveProviderKey("deepseek", { apiKeys: opts.apiKeys });
|
|
1479
3419
|
if (!apiKey) {
|
|
1480
3420
|
return terminalError(401, "auth", "DEEPSEEK_API_KEY missing");
|
|
1481
3421
|
}
|
|
@@ -1525,60 +3465,826 @@ function classifyHttpError(status, body) {
|
|
|
1525
3465
|
if (status === 429) {
|
|
1526
3466
|
return { ok: false, status, errorType: "retryable", errorCode: "rate_limit", message, raw: body };
|
|
1527
3467
|
}
|
|
1528
|
-
if (status === 408) {
|
|
1529
|
-
return { ok: false, status, errorType: "retryable", errorCode: "timeout", message, raw: body };
|
|
3468
|
+
if (status === 408) {
|
|
3469
|
+
return { ok: false, status, errorType: "retryable", errorCode: "timeout", message, raw: body };
|
|
3470
|
+
}
|
|
3471
|
+
if (status >= 500) {
|
|
3472
|
+
return { ok: false, status, errorType: "retryable", errorCode: "server_error", message, raw: body };
|
|
3473
|
+
}
|
|
3474
|
+
if (status === 404) {
|
|
3475
|
+
return { ok: false, status, errorType: "retryable", errorCode: "model_not_found", message, raw: body };
|
|
3476
|
+
}
|
|
3477
|
+
if (status === 401 || status === 403) {
|
|
3478
|
+
return { ok: false, status, errorType: "terminal", errorCode: "auth", message, raw: body };
|
|
3479
|
+
}
|
|
3480
|
+
if (status === 400) {
|
|
3481
|
+
return { ok: false, status, errorType: "terminal", errorCode: "invalid_request", message, raw: body };
|
|
3482
|
+
}
|
|
3483
|
+
return { ok: false, status, errorType: "terminal", errorCode: "unknown", message, raw: body };
|
|
3484
|
+
}
|
|
3485
|
+
function extractErrorMessage(body) {
|
|
3486
|
+
if (!body || typeof body !== "object") return void 0;
|
|
3487
|
+
const b = body;
|
|
3488
|
+
if (b.error && typeof b.error === "object") {
|
|
3489
|
+
const e = b.error;
|
|
3490
|
+
if (typeof e.message === "string") return e.message;
|
|
3491
|
+
}
|
|
3492
|
+
if (typeof b.message === "string") return b.message;
|
|
3493
|
+
return void 0;
|
|
3494
|
+
}
|
|
3495
|
+
function terminalError(status, code, message) {
|
|
3496
|
+
return { ok: false, status, errorType: "terminal", errorCode: code, message, raw: null };
|
|
3497
|
+
}
|
|
3498
|
+
function retryableError(status, code, message, raw) {
|
|
3499
|
+
return { ok: false, status, errorType: "retryable", errorCode: code, message, raw };
|
|
3500
|
+
}
|
|
3501
|
+
function tryParseJson(s) {
|
|
3502
|
+
if (typeof s !== "string" || s.length === 0) return void 0;
|
|
3503
|
+
try {
|
|
3504
|
+
const parsed = JSON.parse(s);
|
|
3505
|
+
return typeof parsed === "object" && parsed !== null ? parsed : void 0;
|
|
3506
|
+
} catch {
|
|
3507
|
+
return void 0;
|
|
3508
|
+
}
|
|
3509
|
+
}
|
|
3510
|
+
|
|
3511
|
+
// src/chains-brain.ts
|
|
3512
|
+
function isChainsRow(x) {
|
|
3513
|
+
if (!x || typeof x !== "object") return false;
|
|
3514
|
+
const r = x;
|
|
3515
|
+
return typeof r.archetype === "string" && typeof r.tier === "number" && typeof r.model_id === "string";
|
|
3516
|
+
}
|
|
3517
|
+
function mapRowsToChains(rows) {
|
|
3518
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
3519
|
+
for (const row of rows) {
|
|
3520
|
+
if (!isChainsRow(row)) continue;
|
|
3521
|
+
const list = grouped.get(row.archetype) ?? [];
|
|
3522
|
+
list.push(row);
|
|
3523
|
+
grouped.set(row.archetype, list);
|
|
3524
|
+
}
|
|
3525
|
+
const out = {};
|
|
3526
|
+
for (const [archetype, group] of grouped.entries()) {
|
|
3527
|
+
group.sort((a, b) => a.tier - b.tier);
|
|
3528
|
+
out[archetype] = group.map((r) => r.model_id);
|
|
3529
|
+
}
|
|
3530
|
+
const bundled = getAllStarterChains();
|
|
3531
|
+
for (const archetype of Object.keys(bundled)) {
|
|
3532
|
+
if (!out[archetype]) out[archetype] = bundled[archetype];
|
|
3533
|
+
}
|
|
3534
|
+
return out;
|
|
3535
|
+
}
|
|
3536
|
+
var loadChainsFromBrain = createBrainQueryCache({
|
|
3537
|
+
table: "kgauto_chains",
|
|
3538
|
+
mapRows: mapRowsToChains,
|
|
3539
|
+
bundledFallback: getAllStarterChains
|
|
3540
|
+
});
|
|
3541
|
+
|
|
3542
|
+
// src/fallback.ts
|
|
3543
|
+
var STARTER_CHAINS_GROUNDED = {
|
|
3544
|
+
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
3545
|
+
critique: [
|
|
3546
|
+
{ id: "claude-opus-4-7", grounding: "judgment", reason: "Highest reasoning bar, no degradation tier \u2014 engineer pick, awaiting measured backing" },
|
|
3547
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Same-provider walk-down from Opus on 429" },
|
|
3548
|
+
{ id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor in similar quality bracket" },
|
|
3549
|
+
{ id: "gpt-5.5", grounding: "judgment", reason: "alpha.16: third-provider frontier-tier floor (archetypePerf=9)" }
|
|
3550
|
+
],
|
|
3551
|
+
// Reasoning matters — Sonnet primary; walk UP to Opus on 429.
|
|
3552
|
+
plan: [
|
|
3553
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Reasoning + cost balance \u2014 engineer pick" },
|
|
3554
|
+
{ id: "claude-opus-4-7", grounding: "judgment", reason: 'Same-provider walk-UP on 429 (rare exception to "always cheaper")' },
|
|
3555
|
+
{ id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
|
|
3556
|
+
{ id: "deepseek-v4-pro", grounding: "judgment", reason: "Tier 3 cost floor \u2014 no brain evidence yet" }
|
|
3557
|
+
],
|
|
3558
|
+
// Quality + cost match.
|
|
3559
|
+
generate: [
|
|
3560
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality + cost match \u2014 engineer pick" },
|
|
3561
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
|
|
3562
|
+
{ id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
|
|
3563
|
+
{ id: "gpt-5.4-mini", grounding: "judgment", reason: "alpha.16: third-provider tail (archetypePerf=7) \u2014 closes mono-Anthropic gap" }
|
|
3564
|
+
],
|
|
3565
|
+
// ask::sonnet — STARTER_CHAINS calls this "Quality + cost match" but
|
|
3566
|
+
// tt-intel s78 prod data showed 27% empty rate. Labeled 'judgment' until
|
|
3567
|
+
// evidence either validates or refutes the placement.
|
|
3568
|
+
ask: [
|
|
3569
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality + cost match \u2014 engineer pick. NOTE: tt-intel s78 prod showed 27% empty rate; placement awaits measurement validation" },
|
|
3570
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
|
|
3571
|
+
{ id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
|
|
3572
|
+
{ id: "gpt-5.4-mini", grounding: "judgment", reason: "alpha.16: third-provider tail (archetypePerf=7)" }
|
|
3573
|
+
],
|
|
3574
|
+
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff,
|
|
3575
|
+
// capability-fact); DeepSeek skipped (no brain evidence).
|
|
3576
|
+
extract: [
|
|
3577
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Reliable structured-output anchor \u2014 engineer pick" },
|
|
3578
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down with native structured output" },
|
|
3579
|
+
{ id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor with structured-output support" },
|
|
3580
|
+
{ id: "gpt-5.4", grounding: "capability-fact", reason: "alpha.16: third-provider floor \u2014 native structured-output capability (archetypePerf=8)" }
|
|
3581
|
+
],
|
|
3582
|
+
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
3583
|
+
transform: [
|
|
3584
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality anchor \u2014 engineer pick" },
|
|
3585
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Same-provider step-down" },
|
|
3586
|
+
{ id: "gemini-2.5-pro", grounding: "judgment", reason: "Cross-provider anchor" },
|
|
3587
|
+
{ id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost floor \u2014 forgiving archetype tolerates Flash" }
|
|
3588
|
+
],
|
|
3589
|
+
// Parallel-tool throughput champion — Flash leads on the L-040 cliff
|
|
3590
|
+
// (capability-fact: Flash 15-75 parallel calls/step vs DeepSeek 7-8).
|
|
3591
|
+
hunt: [
|
|
3592
|
+
{ id: "gemini-2.5-flash", grounding: "capability-fact", reason: "L-040 parallel-tool throughput champion (15-75 calls/step)" },
|
|
3593
|
+
{ id: "gemini-2.5-pro", grounding: "capability-fact", reason: "Cross-provider tier 1 with strong parallel-tool support" },
|
|
3594
|
+
{ id: "claude-sonnet-4-6", grounding: "judgment", reason: "Quality safety net for blocked-Flash case" },
|
|
3595
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Reduced tool budget \u2014 cliff at 16 fires" }
|
|
3596
|
+
],
|
|
3597
|
+
// Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1.
|
|
3598
|
+
summarize: [
|
|
3599
|
+
{ id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost-sensitive primary \u2014 engineer pick" },
|
|
3600
|
+
{ id: "deepseek-v4-flash", grounding: "measured", reason: "Brain-validated tier 1 for cost-sensitive summarize workloads", n: 169 },
|
|
3601
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Quality safety net" },
|
|
3602
|
+
{ id: "gemini-2.5-flash-lite", grounding: "judgment", reason: "Emergency floor \u2014 onboarded s22, no brain evidence yet" }
|
|
3603
|
+
],
|
|
3604
|
+
// Brain-validated DeepSeek tier 1 (169 rows, 0% empty rate).
|
|
3605
|
+
classify: [
|
|
3606
|
+
{ id: "gemini-2.5-flash", grounding: "judgment", reason: "Cost-sensitive primary \u2014 engineer pick" },
|
|
3607
|
+
{ id: "deepseek-v4-flash", grounding: "measured", reason: "Brain-validated tier 1 (169 rows, 0% empty rate)", n: 169 },
|
|
3608
|
+
{ id: "claude-haiku-4-5", grounding: "judgment", reason: "Quality safety net" },
|
|
3609
|
+
{ id: "gemini-2.5-flash-lite", grounding: "judgment", reason: "Cache-discount 10\xD7 floor for repeat-prompt workloads" }
|
|
3610
|
+
]
|
|
3611
|
+
};
|
|
3612
|
+
var STARTER_CHAINS = (() => {
|
|
3613
|
+
const out = {};
|
|
3614
|
+
for (const [archetype, entries] of Object.entries(STARTER_CHAINS_GROUNDED)) {
|
|
3615
|
+
out[archetype] = entries.map((e) => e.id);
|
|
3616
|
+
}
|
|
3617
|
+
return out;
|
|
3618
|
+
})();
|
|
3619
|
+
var STARTER_CHAINS_BY_MODE_GROUNDED = {
|
|
3620
|
+
hunt: {
|
|
3621
|
+
sequential: [
|
|
3622
|
+
{
|
|
3623
|
+
id: "deepseek-v4-pro",
|
|
3624
|
+
grounding: "judgment",
|
|
3625
|
+
reason: "alpha.20 E3: cheap + good reasoning at single-step granularity; L-040 cliff silenced when sequential \u2014 hypothesis not yet measured"
|
|
3626
|
+
},
|
|
3627
|
+
{
|
|
3628
|
+
id: "deepseek-v4-flash",
|
|
3629
|
+
grounding: "judgment",
|
|
3630
|
+
reason: "Cheapest viable; sibling-provider fallback"
|
|
3631
|
+
},
|
|
3632
|
+
{
|
|
3633
|
+
id: "claude-sonnet-4-6",
|
|
3634
|
+
grounding: "judgment",
|
|
3635
|
+
reason: "Cross-provider safety net \u2014 Sonnet handles sequential agentic loops cleanly"
|
|
3636
|
+
},
|
|
3637
|
+
{
|
|
3638
|
+
id: "gemini-2.5-pro",
|
|
3639
|
+
grounding: "judgment",
|
|
3640
|
+
reason: "Third-provider tail when no DeepSeek key reachable"
|
|
3641
|
+
}
|
|
3642
|
+
]
|
|
3643
|
+
}
|
|
3644
|
+
};
|
|
3645
|
+
var STARTER_CHAINS_BY_MODE = (() => {
|
|
3646
|
+
const out = {};
|
|
3647
|
+
for (const [archetype, modes] of Object.entries(STARTER_CHAINS_BY_MODE_GROUNDED)) {
|
|
3648
|
+
if (modes?.sequential) {
|
|
3649
|
+
out[archetype] = {
|
|
3650
|
+
sequential: modes.sequential.map((e) => e.id)
|
|
3651
|
+
};
|
|
3652
|
+
}
|
|
3653
|
+
}
|
|
3654
|
+
return out;
|
|
3655
|
+
})();
|
|
3656
|
+
function resolveStarterForMode(archetype, toolOrchestration, allChains) {
|
|
3657
|
+
if (toolOrchestration === "sequential") {
|
|
3658
|
+
const overlay = STARTER_CHAINS_BY_MODE[archetype]?.sequential;
|
|
3659
|
+
if (overlay) return [...overlay];
|
|
3660
|
+
}
|
|
3661
|
+
return allChains[archetype];
|
|
3662
|
+
}
|
|
3663
|
+
function getDefaultFallbackChain(opts) {
|
|
3664
|
+
const { archetype, primary, maxDepth = 3, policy, reachability, toolOrchestration } = opts;
|
|
3665
|
+
if (maxDepth < 1) {
|
|
3666
|
+
throw new Error(
|
|
3667
|
+
`getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
|
|
3668
|
+
);
|
|
3669
|
+
}
|
|
3670
|
+
const allChains = loadChainsFromBrain();
|
|
3671
|
+
const starter = resolveStarterForMode(archetype, toolOrchestration, allChains);
|
|
3672
|
+
if (!starter) {
|
|
3673
|
+
throw new Error(
|
|
3674
|
+
`getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(allChains).join(", ")}`
|
|
3675
|
+
);
|
|
3676
|
+
}
|
|
3677
|
+
let chain;
|
|
3678
|
+
if (primary) {
|
|
3679
|
+
chain = [primary, ...starter.filter((id) => id !== primary)];
|
|
3680
|
+
} else {
|
|
3681
|
+
chain = [...starter];
|
|
3682
|
+
}
|
|
3683
|
+
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
3684
|
+
const blocked = new Set(policy.blockedModels);
|
|
3685
|
+
chain = chain.filter((id) => !blocked.has(id));
|
|
3686
|
+
}
|
|
3687
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3688
|
+
const deduped = [];
|
|
3689
|
+
for (const id of chain) {
|
|
3690
|
+
if (!seen.has(id)) {
|
|
3691
|
+
seen.add(id);
|
|
3692
|
+
deduped.push(id);
|
|
3693
|
+
}
|
|
3694
|
+
}
|
|
3695
|
+
let filtered = deduped;
|
|
3696
|
+
if (reachability) {
|
|
3697
|
+
filtered = deduped.filter((id) => isModelReachable(id, reachability));
|
|
3698
|
+
}
|
|
3699
|
+
return filtered.slice(0, maxDepth);
|
|
3700
|
+
}
|
|
3701
|
+
function getStarterChain(archetype) {
|
|
3702
|
+
const chain = STARTER_CHAINS[archetype];
|
|
3703
|
+
if (!chain) {
|
|
3704
|
+
throw new Error(
|
|
3705
|
+
`getStarterChain: unknown archetype "${archetype}"`
|
|
3706
|
+
);
|
|
3707
|
+
}
|
|
3708
|
+
return [...chain];
|
|
3709
|
+
}
|
|
3710
|
+
function getAllStarterChains() {
|
|
3711
|
+
const out = {};
|
|
3712
|
+
for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
|
|
3713
|
+
out[archetype] = [...chain];
|
|
3714
|
+
}
|
|
3715
|
+
return out;
|
|
3716
|
+
}
|
|
3717
|
+
function getSequentialStarterChain(archetype) {
|
|
3718
|
+
const overlay = STARTER_CHAINS_BY_MODE[archetype]?.sequential;
|
|
3719
|
+
return overlay ? [...overlay] : void 0;
|
|
3720
|
+
}
|
|
3721
|
+
function copyEntry(e) {
|
|
3722
|
+
const out = { id: e.id, grounding: e.grounding };
|
|
3723
|
+
if (e.reason !== void 0) out.reason = e.reason;
|
|
3724
|
+
if (e.n !== void 0) out.n = e.n;
|
|
3725
|
+
return out;
|
|
3726
|
+
}
|
|
3727
|
+
function lookupStaticEntry(id, archetype) {
|
|
3728
|
+
const archetypeEntries = STARTER_CHAINS_GROUNDED[archetype];
|
|
3729
|
+
if (archetypeEntries) {
|
|
3730
|
+
const hit = archetypeEntries.find((e) => e.id === id);
|
|
3731
|
+
if (hit) return hit;
|
|
3732
|
+
}
|
|
3733
|
+
const seqOverlay = STARTER_CHAINS_BY_MODE_GROUNDED[archetype]?.sequential;
|
|
3734
|
+
if (seqOverlay) {
|
|
3735
|
+
const hit = seqOverlay.find((e) => e.id === id);
|
|
3736
|
+
if (hit) return hit;
|
|
3737
|
+
}
|
|
3738
|
+
return void 0;
|
|
3739
|
+
}
|
|
3740
|
+
function resolveGroundedChainForArchetype(archetype, toolOrchestration) {
|
|
3741
|
+
if (toolOrchestration === "sequential") {
|
|
3742
|
+
const overlay = STARTER_CHAINS_BY_MODE_GROUNDED[archetype]?.sequential;
|
|
3743
|
+
if (overlay) return overlay.map(copyEntry);
|
|
3744
|
+
}
|
|
3745
|
+
const allChains = loadChainsFromBrain();
|
|
3746
|
+
const ids = allChains[archetype];
|
|
3747
|
+
if (!ids) return void 0;
|
|
3748
|
+
return ids.map((id) => {
|
|
3749
|
+
const known = lookupStaticEntry(id, archetype);
|
|
3750
|
+
if (known) return copyEntry(known);
|
|
3751
|
+
return { id, grounding: "judgment" };
|
|
3752
|
+
});
|
|
3753
|
+
}
|
|
3754
|
+
function getDefaultFallbackChainWithGrounding(opts) {
|
|
3755
|
+
const {
|
|
3756
|
+
archetype,
|
|
3757
|
+
primary,
|
|
3758
|
+
maxDepth = 3,
|
|
3759
|
+
policy,
|
|
3760
|
+
reachability,
|
|
3761
|
+
toolOrchestration
|
|
3762
|
+
} = opts;
|
|
3763
|
+
if (maxDepth < 1) {
|
|
3764
|
+
throw new Error(
|
|
3765
|
+
`getDefaultFallbackChainWithGrounding: maxDepth must be >= 1, got ${maxDepth}`
|
|
3766
|
+
);
|
|
3767
|
+
}
|
|
3768
|
+
const starter = resolveGroundedChainForArchetype(archetype, toolOrchestration);
|
|
3769
|
+
if (!starter) {
|
|
3770
|
+
throw new Error(
|
|
3771
|
+
`getDefaultFallbackChainWithGrounding: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS_GROUNDED).join(", ")}`
|
|
3772
|
+
);
|
|
3773
|
+
}
|
|
3774
|
+
let chain;
|
|
3775
|
+
if (primary) {
|
|
3776
|
+
const primaryEntry = (() => {
|
|
3777
|
+
const inStarter = starter.find((e) => e.id === primary);
|
|
3778
|
+
if (inStarter) return copyEntry(inStarter);
|
|
3779
|
+
const knownAnywhere = lookupStaticEntry(primary, archetype);
|
|
3780
|
+
if (knownAnywhere) return { ...copyEntry(knownAnywhere), id: primary };
|
|
3781
|
+
return { id: primary, grounding: "judgment" };
|
|
3782
|
+
})();
|
|
3783
|
+
chain = [primaryEntry, ...starter.filter((e) => e.id !== primary)];
|
|
3784
|
+
} else {
|
|
3785
|
+
chain = [...starter];
|
|
3786
|
+
}
|
|
3787
|
+
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
3788
|
+
const blocked = new Set(policy.blockedModels);
|
|
3789
|
+
chain = chain.filter((e) => !blocked.has(e.id));
|
|
3790
|
+
}
|
|
3791
|
+
const seen = /* @__PURE__ */ new Set();
|
|
3792
|
+
const deduped = [];
|
|
3793
|
+
for (const e of chain) {
|
|
3794
|
+
if (!seen.has(e.id)) {
|
|
3795
|
+
seen.add(e.id);
|
|
3796
|
+
deduped.push(e);
|
|
3797
|
+
}
|
|
3798
|
+
}
|
|
3799
|
+
let filtered = deduped;
|
|
3800
|
+
if (reachability) {
|
|
3801
|
+
filtered = deduped.filter((e) => isModelReachable(e.id, reachability));
|
|
3802
|
+
}
|
|
3803
|
+
return filtered.slice(0, maxDepth);
|
|
3804
|
+
}
|
|
3805
|
+
function getStarterChainWithGrounding(archetype) {
|
|
3806
|
+
const entries = STARTER_CHAINS_GROUNDED[archetype];
|
|
3807
|
+
if (!entries) {
|
|
3808
|
+
throw new Error(
|
|
3809
|
+
`getStarterChainWithGrounding: unknown archetype "${archetype}"`
|
|
3810
|
+
);
|
|
3811
|
+
}
|
|
3812
|
+
return entries.map(copyEntry);
|
|
3813
|
+
}
|
|
3814
|
+
function getAllStarterChainsWithGrounding() {
|
|
3815
|
+
const out = {};
|
|
3816
|
+
for (const [archetype, entries] of Object.entries(STARTER_CHAINS_GROUNDED)) {
|
|
3817
|
+
out[archetype] = entries.map(copyEntry);
|
|
3818
|
+
}
|
|
3819
|
+
return out;
|
|
3820
|
+
}
|
|
3821
|
+
function getSequentialStarterChainWithGrounding(archetype) {
|
|
3822
|
+
const overlay = STARTER_CHAINS_BY_MODE_GROUNDED[archetype]?.sequential;
|
|
3823
|
+
return overlay ? overlay.map(copyEntry) : void 0;
|
|
3824
|
+
}
|
|
3825
|
+
function ensureCrossProviderTail(opts) {
|
|
3826
|
+
const { chain, archetype, apiKeys, envSource } = opts;
|
|
3827
|
+
if (chain.length < 1) return { chain };
|
|
3828
|
+
const providers = /* @__PURE__ */ new Set();
|
|
3829
|
+
for (const t of chain) {
|
|
3830
|
+
const p = tryGetProfile(t);
|
|
3831
|
+
if (p) providers.add(p.provider);
|
|
3832
|
+
}
|
|
3833
|
+
if (providers.size >= 2) return { chain };
|
|
3834
|
+
const existingProvider = providers.values().next().value;
|
|
3835
|
+
if (!existingProvider) return { chain };
|
|
3836
|
+
const allChains = loadChainsFromBrain();
|
|
3837
|
+
const fullChain = allChains[archetype];
|
|
3838
|
+
if (!fullChain) return { chain };
|
|
3839
|
+
for (const candidate of fullChain) {
|
|
3840
|
+
if (chain.includes(candidate)) continue;
|
|
3841
|
+
const cp = tryGetProfile(candidate);
|
|
3842
|
+
if (!cp || cp.provider === existingProvider) continue;
|
|
3843
|
+
if (!isModelReachable(candidate, { apiKeys, envSource })) continue;
|
|
3844
|
+
return { chain: [...chain, candidate], appended: candidate };
|
|
3845
|
+
}
|
|
3846
|
+
return { chain };
|
|
3847
|
+
}
|
|
3848
|
+
|
|
3849
|
+
// src/glassbox/types.ts
|
|
3850
|
+
var GLASSBOX_STREAM_TTL_MS = 6e4;
|
|
3851
|
+
|
|
3852
|
+
// src/glassbox/pubsub-memory.ts
|
|
3853
|
+
var MemoryPubSub = class {
|
|
3854
|
+
subscribers = /* @__PURE__ */ new Map();
|
|
3855
|
+
async publish(channelKey, event) {
|
|
3856
|
+
const subs = this.subscribers.get(channelKey);
|
|
3857
|
+
if (!subs || subs.size === 0) return;
|
|
3858
|
+
for (const sub of subs) {
|
|
3859
|
+
if (sub.closed) continue;
|
|
3860
|
+
try {
|
|
3861
|
+
sub.controller.enqueue(event);
|
|
3862
|
+
} catch {
|
|
3863
|
+
sub.closed = true;
|
|
3864
|
+
continue;
|
|
3865
|
+
}
|
|
3866
|
+
this.refreshTtl(channelKey, sub);
|
|
3867
|
+
}
|
|
3868
|
+
}
|
|
3869
|
+
subscribe(channelKey) {
|
|
3870
|
+
const self = this;
|
|
3871
|
+
let sub;
|
|
3872
|
+
return new ReadableStream({
|
|
3873
|
+
start(controller) {
|
|
3874
|
+
sub = {
|
|
3875
|
+
controller,
|
|
3876
|
+
ttlTimer: setTimeout(() => {
|
|
3877
|
+
self.closeSubscriber(channelKey, sub);
|
|
3878
|
+
}, GLASSBOX_STREAM_TTL_MS),
|
|
3879
|
+
closed: false
|
|
3880
|
+
};
|
|
3881
|
+
let set = self.subscribers.get(channelKey);
|
|
3882
|
+
if (!set) {
|
|
3883
|
+
set = /* @__PURE__ */ new Set();
|
|
3884
|
+
self.subscribers.set(channelKey, set);
|
|
3885
|
+
}
|
|
3886
|
+
set.add(sub);
|
|
3887
|
+
},
|
|
3888
|
+
cancel() {
|
|
3889
|
+
if (sub) self.removeSubscriber(channelKey, sub);
|
|
3890
|
+
}
|
|
3891
|
+
});
|
|
1530
3892
|
}
|
|
1531
|
-
|
|
1532
|
-
|
|
3893
|
+
/**
|
|
3894
|
+
* Refresh the rolling TTL for a subscriber after an event lands. Replaces
|
|
3895
|
+
* the existing timer with a fresh 60s one.
|
|
3896
|
+
*/
|
|
3897
|
+
refreshTtl(channelKey, sub) {
|
|
3898
|
+
clearTimeout(sub.ttlTimer);
|
|
3899
|
+
sub.ttlTimer = setTimeout(() => {
|
|
3900
|
+
this.closeSubscriber(channelKey, sub);
|
|
3901
|
+
}, GLASSBOX_STREAM_TTL_MS);
|
|
1533
3902
|
}
|
|
1534
|
-
|
|
1535
|
-
|
|
3903
|
+
/**
|
|
3904
|
+
* Close the subscriber's stream cleanly and remove from the fan-out set.
|
|
3905
|
+
* Idempotent — safe to call multiple times.
|
|
3906
|
+
*/
|
|
3907
|
+
closeSubscriber(channelKey, sub) {
|
|
3908
|
+
if (sub.closed) return;
|
|
3909
|
+
sub.closed = true;
|
|
3910
|
+
clearTimeout(sub.ttlTimer);
|
|
3911
|
+
try {
|
|
3912
|
+
sub.controller.close();
|
|
3913
|
+
} catch {
|
|
3914
|
+
}
|
|
3915
|
+
this.removeSubscriber(channelKey, sub);
|
|
1536
3916
|
}
|
|
1537
|
-
|
|
1538
|
-
|
|
3917
|
+
removeSubscriber(channelKey, sub) {
|
|
3918
|
+
clearTimeout(sub.ttlTimer);
|
|
3919
|
+
const set = this.subscribers.get(channelKey);
|
|
3920
|
+
if (!set) return;
|
|
3921
|
+
set.delete(sub);
|
|
3922
|
+
if (set.size === 0) this.subscribers.delete(channelKey);
|
|
1539
3923
|
}
|
|
1540
|
-
|
|
1541
|
-
|
|
3924
|
+
/**
|
|
3925
|
+
* Test-only reset. Tears down all subscribers, clears all state. Calling
|
|
3926
|
+
* outside of tests is harmless but cancels every active stream.
|
|
3927
|
+
*/
|
|
3928
|
+
_reset() {
|
|
3929
|
+
for (const [, set] of this.subscribers) {
|
|
3930
|
+
for (const sub of set) {
|
|
3931
|
+
this.closeSubscriber("", sub);
|
|
3932
|
+
}
|
|
3933
|
+
}
|
|
3934
|
+
this.subscribers.clear();
|
|
1542
3935
|
}
|
|
1543
|
-
|
|
3936
|
+
};
|
|
3937
|
+
|
|
3938
|
+
// src/glassbox/pubsub-upstash.ts
|
|
3939
|
+
var UpstashPubSub = class {
|
|
3940
|
+
url;
|
|
3941
|
+
token;
|
|
3942
|
+
fetchImpl;
|
|
3943
|
+
blockMs;
|
|
3944
|
+
maxLen;
|
|
3945
|
+
constructor(cfg) {
|
|
3946
|
+
this.url = cfg.url.replace(/\/$/, "");
|
|
3947
|
+
this.token = cfg.token;
|
|
3948
|
+
this.fetchImpl = cfg.fetchImpl ?? globalThis.fetch.bind(globalThis);
|
|
3949
|
+
this.blockMs = cfg.blockMs ?? 100;
|
|
3950
|
+
this.maxLen = cfg.maxLen ?? 100;
|
|
3951
|
+
}
|
|
3952
|
+
async publish(channelKey, event) {
|
|
3953
|
+
const key = channelKey;
|
|
3954
|
+
const payload = JSON.stringify(event);
|
|
3955
|
+
await this.cmd([
|
|
3956
|
+
"XADD",
|
|
3957
|
+
key,
|
|
3958
|
+
"MAXLEN",
|
|
3959
|
+
"~",
|
|
3960
|
+
String(this.maxLen),
|
|
3961
|
+
"*",
|
|
3962
|
+
"event",
|
|
3963
|
+
payload
|
|
3964
|
+
]);
|
|
3965
|
+
await this.cmd(["EXPIRE", key, String(Math.ceil(GLASSBOX_STREAM_TTL_MS / 1e3))]);
|
|
3966
|
+
}
|
|
3967
|
+
subscribe(channelKey) {
|
|
3968
|
+
const key = channelKey;
|
|
3969
|
+
const self = this;
|
|
3970
|
+
let cursor = "$";
|
|
3971
|
+
let cancelled = false;
|
|
3972
|
+
let ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
|
|
3973
|
+
return new ReadableStream({
|
|
3974
|
+
async start(controller) {
|
|
3975
|
+
try {
|
|
3976
|
+
while (!cancelled && Date.now() < ttlDeadline) {
|
|
3977
|
+
const resp = await self.cmd([
|
|
3978
|
+
"XREAD",
|
|
3979
|
+
"BLOCK",
|
|
3980
|
+
String(self.blockMs),
|
|
3981
|
+
"STREAMS",
|
|
3982
|
+
key,
|
|
3983
|
+
cursor
|
|
3984
|
+
]);
|
|
3985
|
+
if (cancelled) break;
|
|
3986
|
+
const parsed = parseXReadResult(resp.result);
|
|
3987
|
+
if (parsed.entries.length === 0) {
|
|
3988
|
+
continue;
|
|
3989
|
+
}
|
|
3990
|
+
for (const entry of parsed.entries) {
|
|
3991
|
+
const evt = decodeEvent(entry.fields);
|
|
3992
|
+
if (evt) {
|
|
3993
|
+
try {
|
|
3994
|
+
controller.enqueue(evt);
|
|
3995
|
+
} catch {
|
|
3996
|
+
cancelled = true;
|
|
3997
|
+
break;
|
|
3998
|
+
}
|
|
3999
|
+
}
|
|
4000
|
+
cursor = entry.id;
|
|
4001
|
+
}
|
|
4002
|
+
ttlDeadline = Date.now() + GLASSBOX_STREAM_TTL_MS;
|
|
4003
|
+
}
|
|
4004
|
+
} catch (err) {
|
|
4005
|
+
if (!cancelled) {
|
|
4006
|
+
try {
|
|
4007
|
+
controller.error(err);
|
|
4008
|
+
} catch {
|
|
4009
|
+
}
|
|
4010
|
+
return;
|
|
4011
|
+
}
|
|
4012
|
+
}
|
|
4013
|
+
try {
|
|
4014
|
+
controller.close();
|
|
4015
|
+
} catch {
|
|
4016
|
+
}
|
|
4017
|
+
},
|
|
4018
|
+
cancel() {
|
|
4019
|
+
cancelled = true;
|
|
4020
|
+
}
|
|
4021
|
+
});
|
|
4022
|
+
}
|
|
4023
|
+
async cmd(args) {
|
|
4024
|
+
const res = await this.fetchImpl(this.url, {
|
|
4025
|
+
method: "POST",
|
|
4026
|
+
headers: {
|
|
4027
|
+
Authorization: `Bearer ${this.token}`,
|
|
4028
|
+
"Content-Type": "application/json"
|
|
4029
|
+
},
|
|
4030
|
+
body: JSON.stringify(args)
|
|
4031
|
+
});
|
|
4032
|
+
if (!res.ok) {
|
|
4033
|
+
throw new Error(`Upstash ${args[0]} failed: HTTP ${res.status}`);
|
|
4034
|
+
}
|
|
4035
|
+
const json = await res.json();
|
|
4036
|
+
if (json.error) {
|
|
4037
|
+
throw new Error(`Upstash ${args[0]} failed: ${json.error}`);
|
|
4038
|
+
}
|
|
4039
|
+
return json;
|
|
4040
|
+
}
|
|
4041
|
+
};
|
|
4042
|
+
function traceChannel(traceId) {
|
|
4043
|
+
return `glassbox:trace:${traceId}`;
|
|
1544
4044
|
}
|
|
1545
|
-
function
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
4045
|
+
function appChannel(appId) {
|
|
4046
|
+
return `glassbox:app:${appId}`;
|
|
4047
|
+
}
|
|
4048
|
+
function decodeEvent(fields) {
|
|
4049
|
+
const raw = fields["event"];
|
|
4050
|
+
if (!raw) return void 0;
|
|
4051
|
+
try {
|
|
4052
|
+
const parsed = JSON.parse(raw);
|
|
4053
|
+
if (typeof parsed.kind === "string" && typeof parsed.at === "number") {
|
|
4054
|
+
return parsed;
|
|
4055
|
+
}
|
|
4056
|
+
return void 0;
|
|
4057
|
+
} catch {
|
|
4058
|
+
return void 0;
|
|
1551
4059
|
}
|
|
1552
|
-
if (typeof b.message === "string") return b.message;
|
|
1553
|
-
return void 0;
|
|
1554
4060
|
}
|
|
1555
|
-
function
|
|
1556
|
-
return {
|
|
4061
|
+
function parseXReadResult(raw) {
|
|
4062
|
+
if (!Array.isArray(raw)) return { entries: [] };
|
|
4063
|
+
const entries = [];
|
|
4064
|
+
for (const stream of raw) {
|
|
4065
|
+
if (!Array.isArray(stream) || stream.length < 2) continue;
|
|
4066
|
+
const streamEntries = stream[1];
|
|
4067
|
+
if (!Array.isArray(streamEntries)) continue;
|
|
4068
|
+
for (const entry of streamEntries) {
|
|
4069
|
+
if (!Array.isArray(entry) || entry.length < 2) continue;
|
|
4070
|
+
const id = String(entry[0]);
|
|
4071
|
+
const flat = entry[1];
|
|
4072
|
+
if (!Array.isArray(flat)) continue;
|
|
4073
|
+
const fields = {};
|
|
4074
|
+
for (let i = 0; i < flat.length; i += 2) {
|
|
4075
|
+
const k = flat[i];
|
|
4076
|
+
const v = flat[i + 1];
|
|
4077
|
+
if (typeof k === "string") fields[k] = String(v ?? "");
|
|
4078
|
+
}
|
|
4079
|
+
entries.push({ id, fields });
|
|
4080
|
+
}
|
|
4081
|
+
}
|
|
4082
|
+
return { entries };
|
|
1557
4083
|
}
|
|
1558
|
-
|
|
1559
|
-
|
|
4084
|
+
|
|
4085
|
+
// src/glassbox/emit.ts
|
|
4086
|
+
var activePubSub;
|
|
4087
|
+
function getPubSub() {
|
|
4088
|
+
if (activePubSub) return activePubSub;
|
|
4089
|
+
const url = readEnv("UPSTASH_REDIS_URL");
|
|
4090
|
+
const token = readEnv("UPSTASH_REDIS_TOKEN");
|
|
4091
|
+
if (url && token) {
|
|
4092
|
+
activePubSub = new UpstashPubSub({ url, token });
|
|
4093
|
+
} else {
|
|
4094
|
+
activePubSub = new MemoryPubSub();
|
|
4095
|
+
}
|
|
4096
|
+
return activePubSub;
|
|
1560
4097
|
}
|
|
1561
|
-
function
|
|
1562
|
-
if (typeof s !== "string" || s.length === 0) return void 0;
|
|
4098
|
+
function readEnv(key) {
|
|
1563
4099
|
try {
|
|
1564
|
-
|
|
1565
|
-
|
|
4100
|
+
if (typeof process !== "undefined" && process.env) {
|
|
4101
|
+
const v = process.env[key];
|
|
4102
|
+
return v && v.trim() !== "" ? v : void 0;
|
|
4103
|
+
}
|
|
4104
|
+
} catch {
|
|
4105
|
+
}
|
|
4106
|
+
return void 0;
|
|
4107
|
+
}
|
|
4108
|
+
function emitGlassboxEvent(traceId, appId, kind, data) {
|
|
4109
|
+
if (!traceId) return;
|
|
4110
|
+
const event = { kind, at: Date.now(), data };
|
|
4111
|
+
const ps = getPubSub();
|
|
4112
|
+
try {
|
|
4113
|
+
const p1 = ps.publish(traceChannel(traceId), event);
|
|
4114
|
+
if (p1 && typeof p1.then === "function") {
|
|
4115
|
+
p1.catch(() => {
|
|
4116
|
+
});
|
|
4117
|
+
}
|
|
1566
4118
|
} catch {
|
|
1567
|
-
return void 0;
|
|
1568
4119
|
}
|
|
4120
|
+
if (appId) {
|
|
4121
|
+
try {
|
|
4122
|
+
const p2 = ps.publish(appChannel(appId), event);
|
|
4123
|
+
if (p2 && typeof p2.then === "function") {
|
|
4124
|
+
p2.catch(() => {
|
|
4125
|
+
});
|
|
4126
|
+
}
|
|
4127
|
+
} catch {
|
|
4128
|
+
}
|
|
4129
|
+
}
|
|
4130
|
+
}
|
|
4131
|
+
function emitCompileStart(traceId, appId, data) {
|
|
4132
|
+
emitGlassboxEvent(traceId, appId, "compile.start", data);
|
|
4133
|
+
}
|
|
4134
|
+
function emitCompileDone(traceId, appId, data) {
|
|
4135
|
+
emitGlassboxEvent(traceId, appId, "compile.done", data);
|
|
4136
|
+
}
|
|
4137
|
+
function emitExecuteAttempt(traceId, appId, data) {
|
|
4138
|
+
emitGlassboxEvent(traceId, appId, "execute.attempt", data);
|
|
4139
|
+
}
|
|
4140
|
+
function emitExecuteSuccess(traceId, appId, data) {
|
|
4141
|
+
emitGlassboxEvent(traceId, appId, "execute.success", data);
|
|
4142
|
+
}
|
|
4143
|
+
function emitAdvisoryFired(traceId, appId, data) {
|
|
4144
|
+
emitGlassboxEvent(traceId, appId, "advisory.fired", data);
|
|
4145
|
+
}
|
|
4146
|
+
function emitFallbackWalked(traceId, appId, data) {
|
|
4147
|
+
emitGlassboxEvent(traceId, appId, "fallback.walked", data);
|
|
1569
4148
|
}
|
|
1570
4149
|
|
|
1571
4150
|
// src/call.ts
|
|
1572
4151
|
async function call(ir, opts = {}) {
|
|
4152
|
+
const traceId = generateTraceId();
|
|
4153
|
+
safeEmit(
|
|
4154
|
+
() => emitCompileStart(traceId, ir.appId, {
|
|
4155
|
+
appId: ir.appId,
|
|
4156
|
+
archetype: ir.intent.archetype,
|
|
4157
|
+
models: ir.models
|
|
4158
|
+
})
|
|
4159
|
+
);
|
|
1573
4160
|
const initial = compileAndRegister(ir, opts);
|
|
4161
|
+
safeEmit(
|
|
4162
|
+
() => emitCompileDone(traceId, ir.appId, {
|
|
4163
|
+
target: initial.target,
|
|
4164
|
+
provider: initial.provider,
|
|
4165
|
+
fallbackChain: initial.fallbackChain,
|
|
4166
|
+
tokensIn: initial.tokensIn,
|
|
4167
|
+
estimatedCostUsd: initial.estimatedCostUsd,
|
|
4168
|
+
mutationsApplied: initial.mutationsApplied,
|
|
4169
|
+
advisories: initial.advisories
|
|
4170
|
+
})
|
|
4171
|
+
);
|
|
4172
|
+
for (const adv of initial.advisories) {
|
|
4173
|
+
safeEmit(
|
|
4174
|
+
() => emitAdvisoryFired(traceId, ir.appId, { code: adv.code, message: adv.message })
|
|
4175
|
+
);
|
|
4176
|
+
}
|
|
1574
4177
|
const start = Date.now();
|
|
1575
4178
|
const attempts = [];
|
|
1576
|
-
const
|
|
4179
|
+
const rawTargets = [initial.target, ...initial.fallbackChain];
|
|
4180
|
+
let unreachableFiltered;
|
|
4181
|
+
let targetsToTry;
|
|
4182
|
+
if (opts.noAutoFilter) {
|
|
4183
|
+
targetsToTry = rawTargets;
|
|
4184
|
+
} else {
|
|
4185
|
+
const dropped = [];
|
|
4186
|
+
targetsToTry = [];
|
|
4187
|
+
for (const t of rawTargets) {
|
|
4188
|
+
if (isModelReachable(t, { apiKeys: opts.apiKeys })) {
|
|
4189
|
+
targetsToTry.push(t);
|
|
4190
|
+
} else {
|
|
4191
|
+
dropped.push(t);
|
|
4192
|
+
}
|
|
4193
|
+
}
|
|
4194
|
+
unreachableFiltered = dropped;
|
|
4195
|
+
if (targetsToTry.length === 0) {
|
|
4196
|
+
const latencyMs2 = Date.now() - start;
|
|
4197
|
+
await record({
|
|
4198
|
+
handle: initial.handle,
|
|
4199
|
+
tokensIn: 0,
|
|
4200
|
+
tokensOut: 0,
|
|
4201
|
+
latencyMs: latencyMs2,
|
|
4202
|
+
success: false,
|
|
4203
|
+
errorType: "no_reachable_models",
|
|
4204
|
+
promptPreview: extractPromptPreview(ir)
|
|
4205
|
+
});
|
|
4206
|
+
const noReachableAttempts = dropped.map((m) => ({
|
|
4207
|
+
model: m,
|
|
4208
|
+
status: "terminal",
|
|
4209
|
+
errorCode: "unreachable_provider",
|
|
4210
|
+
message: `No API key for ${m}'s provider \u2014 set one of PROVIDER_ENV_KEYS or pass apiKeys`
|
|
4211
|
+
}));
|
|
4212
|
+
throw new CallError(
|
|
4213
|
+
`call(): no reachable models in chain. Filtered: [${dropped.join(", ")}]. Add a key for one provider, or pass apiKeys.`,
|
|
4214
|
+
noReachableAttempts,
|
|
4215
|
+
void 0,
|
|
4216
|
+
"no_reachable_models"
|
|
4217
|
+
);
|
|
4218
|
+
}
|
|
4219
|
+
const archetypeName = ir.intent?.archetype;
|
|
4220
|
+
if (archetypeName) {
|
|
4221
|
+
const ensured = ensureCrossProviderTail({
|
|
4222
|
+
chain: targetsToTry,
|
|
4223
|
+
archetype: archetypeName,
|
|
4224
|
+
apiKeys: opts.apiKeys
|
|
4225
|
+
});
|
|
4226
|
+
if (ensured.appended) {
|
|
4227
|
+
targetsToTry = ensured.chain;
|
|
4228
|
+
}
|
|
4229
|
+
}
|
|
4230
|
+
}
|
|
4231
|
+
let policyBlockedFiltered;
|
|
4232
|
+
if (opts.policy?.blockedModels && opts.policy.blockedModels.length > 0) {
|
|
4233
|
+
const blocked = new Set(opts.policy.blockedModels);
|
|
4234
|
+
const filtered = [];
|
|
4235
|
+
const dropped = [];
|
|
4236
|
+
for (const t of targetsToTry) {
|
|
4237
|
+
if (blocked.has(t)) {
|
|
4238
|
+
dropped.push(t);
|
|
4239
|
+
} else {
|
|
4240
|
+
filtered.push(t);
|
|
4241
|
+
}
|
|
4242
|
+
}
|
|
4243
|
+
if (dropped.length > 0) {
|
|
4244
|
+
policyBlockedFiltered = dropped;
|
|
4245
|
+
targetsToTry = filtered;
|
|
4246
|
+
}
|
|
4247
|
+
if (targetsToTry.length === 0) {
|
|
4248
|
+
const latencyMs2 = Date.now() - start;
|
|
4249
|
+
await record({
|
|
4250
|
+
handle: initial.handle,
|
|
4251
|
+
tokensIn: 0,
|
|
4252
|
+
tokensOut: 0,
|
|
4253
|
+
latencyMs: latencyMs2,
|
|
4254
|
+
success: false,
|
|
4255
|
+
errorType: "all_blocked_by_policy",
|
|
4256
|
+
promptPreview: extractPromptPreview(ir)
|
|
4257
|
+
});
|
|
4258
|
+
const blockedAttempts = dropped.map((m) => ({
|
|
4259
|
+
model: m,
|
|
4260
|
+
status: "terminal",
|
|
4261
|
+
errorCode: "blocked_by_policy",
|
|
4262
|
+
message: `Skipped \u2014 model ${m} is in CompilePolicy.blockedModels`
|
|
4263
|
+
}));
|
|
4264
|
+
throw new CallError(
|
|
4265
|
+
`call(): all chain targets blocked by CompilePolicy.blockedModels: [${dropped.join(", ")}]`,
|
|
4266
|
+
blockedAttempts,
|
|
4267
|
+
void 0,
|
|
4268
|
+
"all_blocked_by_policy"
|
|
4269
|
+
);
|
|
4270
|
+
}
|
|
4271
|
+
}
|
|
1577
4272
|
let activeCompile = initial;
|
|
1578
4273
|
let lastErr;
|
|
4274
|
+
const failedProviders = /* @__PURE__ */ new Set();
|
|
1579
4275
|
for (let i = 0; i < targetsToTry.length; i++) {
|
|
1580
4276
|
const targetModel = targetsToTry[i];
|
|
1581
|
-
|
|
4277
|
+
const targetProfile = tryGetProfile(targetModel);
|
|
4278
|
+
if (targetProfile && failedProviders.has(targetProfile.provider) && !opts.noFallback) {
|
|
4279
|
+
attempts.push({
|
|
4280
|
+
model: targetModel,
|
|
4281
|
+
status: "terminal",
|
|
4282
|
+
errorCode: "auth_inferred",
|
|
4283
|
+
message: `Skipped \u2014 provider ${targetProfile.provider} returned 401/403 earlier in this call; same key inferred to fail`
|
|
4284
|
+
});
|
|
4285
|
+
continue;
|
|
4286
|
+
}
|
|
4287
|
+
if (targetModel !== initial.target) {
|
|
1582
4288
|
try {
|
|
1583
4289
|
activeCompile = compileAndRegister(
|
|
1584
4290
|
{
|
|
@@ -1598,59 +4304,112 @@ async function call(ir, opts = {}) {
|
|
|
1598
4304
|
continue;
|
|
1599
4305
|
}
|
|
1600
4306
|
}
|
|
4307
|
+
safeEmit(
|
|
4308
|
+
() => emitExecuteAttempt(traceId, ir.appId, { model: targetModel, attemptIndex: i })
|
|
4309
|
+
);
|
|
1601
4310
|
const exec = await execute(activeCompile.request, {
|
|
1602
4311
|
apiKeys: opts.apiKeys,
|
|
1603
4312
|
fetchImpl: opts.fetchImpl,
|
|
1604
4313
|
providerOverrides: opts.providerOverrides
|
|
1605
4314
|
});
|
|
1606
|
-
|
|
4315
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
4316
|
+
if (validated.ok) {
|
|
1607
4317
|
attempts.push({ model: targetModel, status: "success" });
|
|
1608
4318
|
const latencyMs2 = Date.now() - start;
|
|
1609
|
-
|
|
1610
|
-
|
|
4319
|
+
safeEmit(
|
|
4320
|
+
() => emitExecuteSuccess(traceId, ir.appId, {
|
|
4321
|
+
model: targetModel,
|
|
4322
|
+
tokensIn: validated.response.tokens.input,
|
|
4323
|
+
tokensOut: validated.response.tokens.output,
|
|
4324
|
+
latencyMs: latencyMs2
|
|
4325
|
+
})
|
|
4326
|
+
);
|
|
4327
|
+
const fellOver = targetModel !== initial.target;
|
|
4328
|
+
const fallbackReason = fellOver ? normalizeFallbackReason(attempts) : void 0;
|
|
4329
|
+
await record({
|
|
1611
4330
|
handle: initial.handle,
|
|
1612
|
-
tokensIn:
|
|
1613
|
-
tokensOut:
|
|
4331
|
+
tokensIn: validated.response.tokens.input,
|
|
4332
|
+
tokensOut: validated.response.tokens.output,
|
|
1614
4333
|
latencyMs: latencyMs2,
|
|
1615
4334
|
success: true,
|
|
1616
|
-
emptyResponse:
|
|
1617
|
-
toolsCalled:
|
|
4335
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
4336
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
1618
4337
|
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
1619
|
-
|
|
4338
|
+
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
4339
|
+
promptPreview: extractPromptPreview(ir),
|
|
4340
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
4341
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
4342
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated,
|
|
4343
|
+
// alpha.28 — Glass-Box renderer substrate (migration 018). call()
|
|
4344
|
+
// owns the lifecycle so it has direct visibility into finishReason
|
|
4345
|
+
// (from the normalized provider response), totalMs (mirrors latencyMs
|
|
4346
|
+
// for non-streaming; future streaming variant may diverge), and the
|
|
4347
|
+
// fell-over-from / fallback-reason pair (already computed above for
|
|
4348
|
+
// the CallResult return shape).
|
|
4349
|
+
finishReason: validated.response.finishReason,
|
|
4350
|
+
totalMs: latencyMs2,
|
|
4351
|
+
fellOverFrom: fellOver ? initial.target : void 0,
|
|
4352
|
+
fallbackReason
|
|
1620
4353
|
});
|
|
4354
|
+
if (fellOver) {
|
|
4355
|
+
const firstFailed = attempts.find((a) => a.status !== "success");
|
|
4356
|
+
if (firstFailed) {
|
|
4357
|
+
safeEmit(
|
|
4358
|
+
() => emitFallbackWalked(traceId, ir.appId, {
|
|
4359
|
+
from: initial.target,
|
|
4360
|
+
to: targetModel,
|
|
4361
|
+
reason: fallbackReason ?? "unknown",
|
|
4362
|
+
attempt: firstFailed
|
|
4363
|
+
})
|
|
4364
|
+
);
|
|
4365
|
+
}
|
|
4366
|
+
}
|
|
1621
4367
|
return {
|
|
1622
4368
|
handle: initial.handle,
|
|
1623
4369
|
actualModel: targetModel,
|
|
1624
4370
|
requestedModel: initial.target,
|
|
1625
4371
|
provider: activeCompile.provider,
|
|
1626
|
-
response:
|
|
4372
|
+
response: validated.response,
|
|
1627
4373
|
latencyMs: latencyMs2,
|
|
1628
4374
|
mutationsApplied: activeCompile.mutationsApplied,
|
|
1629
|
-
attempts
|
|
4375
|
+
attempts,
|
|
4376
|
+
servedBy: targetModel,
|
|
4377
|
+
fellOverFrom: fellOver ? initial.target : void 0,
|
|
4378
|
+
fallbackReason,
|
|
4379
|
+
unreachableFiltered,
|
|
4380
|
+
policyBlockedFiltered,
|
|
4381
|
+
traceId
|
|
1630
4382
|
};
|
|
1631
4383
|
}
|
|
1632
4384
|
attempts.push({
|
|
1633
4385
|
model: targetModel,
|
|
1634
|
-
status:
|
|
1635
|
-
errorCode:
|
|
1636
|
-
message:
|
|
4386
|
+
status: validated.errorType,
|
|
4387
|
+
errorCode: validated.errorCode,
|
|
4388
|
+
message: validated.message
|
|
1637
4389
|
});
|
|
1638
|
-
lastErr =
|
|
1639
|
-
if (
|
|
4390
|
+
lastErr = validated;
|
|
4391
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
4392
|
+
if (validated.errorCode === "auth" && !opts.noFallback && activeCompile.provider) {
|
|
4393
|
+
failedProviders.add(activeCompile.provider);
|
|
4394
|
+
continue;
|
|
4395
|
+
}
|
|
1640
4396
|
break;
|
|
1641
4397
|
}
|
|
1642
4398
|
}
|
|
1643
4399
|
const latencyMs = Date.now() - start;
|
|
1644
|
-
|
|
4400
|
+
await record({
|
|
1645
4401
|
handle: initial.handle,
|
|
1646
4402
|
tokensIn: 0,
|
|
1647
4403
|
tokensOut: 0,
|
|
1648
4404
|
latencyMs,
|
|
1649
4405
|
success: false,
|
|
1650
|
-
errorType: lastErr?.errorCode
|
|
4406
|
+
errorType: lastErr?.errorCode,
|
|
4407
|
+
promptPreview: extractPromptPreview(ir)
|
|
1651
4408
|
});
|
|
4409
|
+
const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
|
|
4410
|
+
const blockedNote = policyBlockedFiltered && policyBlockedFiltered.length > 0 ? ` (also policy-blocked: [${policyBlockedFiltered.join(", ")}])` : "";
|
|
1652
4411
|
throw new CallError(
|
|
1653
|
-
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}`,
|
|
4412
|
+
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}${blockedNote}`,
|
|
1654
4413
|
attempts,
|
|
1655
4414
|
lastErr?.status,
|
|
1656
4415
|
lastErr?.errorCode
|
|
@@ -1665,20 +4424,74 @@ function compileAndRegister(ir, opts) {
|
|
|
1665
4424
|
registerCompile(ir.appId, ir.intent.archetype, ir, result);
|
|
1666
4425
|
return result;
|
|
1667
4426
|
}
|
|
1668
|
-
function
|
|
1669
|
-
|
|
1670
|
-
if (
|
|
4427
|
+
function extractPromptPreview(ir) {
|
|
4428
|
+
const turn = ir.currentTurn?.content;
|
|
4429
|
+
if (turn) return turn.slice(0, 200);
|
|
4430
|
+
const lastHist = ir.history?.[ir.history.length - 1]?.content;
|
|
4431
|
+
if (lastHist) return lastHist.slice(0, 200);
|
|
4432
|
+
return void 0;
|
|
4433
|
+
}
|
|
4434
|
+
function validateStructuredContract(exec, ir) {
|
|
4435
|
+
if (!ir.constraints?.structuredOutput) {
|
|
4436
|
+
return { ok: true, response: exec.response };
|
|
4437
|
+
}
|
|
4438
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
4439
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
4440
|
+
return {
|
|
4441
|
+
ok: false,
|
|
4442
|
+
status: exec.status,
|
|
4443
|
+
errorType: "retryable",
|
|
4444
|
+
errorCode: "max_tokens_on_structured_output",
|
|
4445
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
4446
|
+
raw: exec.response.raw
|
|
4447
|
+
};
|
|
4448
|
+
}
|
|
4449
|
+
if (!exec.response.text) {
|
|
4450
|
+
return { ok: true, response: exec.response };
|
|
4451
|
+
}
|
|
1671
4452
|
try {
|
|
1672
|
-
const parsed = JSON.parse(response.text);
|
|
1673
|
-
return { ...response, structuredOutput: parsed };
|
|
4453
|
+
const parsed = JSON.parse(exec.response.text);
|
|
4454
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
1674
4455
|
} catch (err) {
|
|
1675
4456
|
return {
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
4457
|
+
ok: false,
|
|
4458
|
+
status: exec.status,
|
|
4459
|
+
errorType: "retryable",
|
|
4460
|
+
errorCode: "structured_output_parse_failed",
|
|
4461
|
+
message: err instanceof Error ? err.message : String(err),
|
|
4462
|
+
raw: exec.response.raw
|
|
1679
4463
|
};
|
|
1680
4464
|
}
|
|
1681
4465
|
}
|
|
4466
|
+
function normalizeFallbackReason(attempts) {
|
|
4467
|
+
const first = attempts.find((a) => a.status !== "success");
|
|
4468
|
+
if (!first) return void 0;
|
|
4469
|
+
const code = first.errorCode ?? "";
|
|
4470
|
+
if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
|
|
4471
|
+
if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
|
|
4472
|
+
return "cliff";
|
|
4473
|
+
}
|
|
4474
|
+
if (code === "cost_cap_exceeded") return "cost_cap";
|
|
4475
|
+
if (code === "auth" || code === "auth_inferred") return "provider_auth_failed";
|
|
4476
|
+
return "provider_error";
|
|
4477
|
+
}
|
|
4478
|
+
function generateTraceId() {
|
|
4479
|
+
try {
|
|
4480
|
+
const g = globalThis;
|
|
4481
|
+
if (g.crypto && typeof g.crypto.randomUUID === "function") {
|
|
4482
|
+
return g.crypto.randomUUID();
|
|
4483
|
+
}
|
|
4484
|
+
} catch {
|
|
4485
|
+
}
|
|
4486
|
+
const hex = (n) => Math.floor(Math.random() * Math.pow(16, n)).toString(16).padStart(n, "0");
|
|
4487
|
+
return `${hex(8)}-${hex(4)}-${hex(4)}-${hex(4)}-${hex(12)}`;
|
|
4488
|
+
}
|
|
4489
|
+
function safeEmit(fn) {
|
|
4490
|
+
try {
|
|
4491
|
+
fn();
|
|
4492
|
+
} catch {
|
|
4493
|
+
}
|
|
4494
|
+
}
|
|
1682
4495
|
|
|
1683
4496
|
// src/oracle.ts
|
|
1684
4497
|
var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
|
|
@@ -1768,6 +4581,397 @@ function clamp(n) {
|
|
|
1768
4581
|
return Math.max(0, Math.min(1, n));
|
|
1769
4582
|
}
|
|
1770
4583
|
|
|
4584
|
+
// src/advisories-api.ts
|
|
4585
|
+
var SEVERITY_SET = /* @__PURE__ */ new Set(["info", "warn", "critical"]);
|
|
4586
|
+
var STATUS_SET = /* @__PURE__ */ new Set(["open", "snoozed", "resolved"]);
|
|
4587
|
+
var RESOLUTION_SOURCE_SET = /* @__PURE__ */ new Set([
|
|
4588
|
+
"auto",
|
|
4589
|
+
"consumer-marked",
|
|
4590
|
+
"declined"
|
|
4591
|
+
]);
|
|
4592
|
+
function asString(v) {
|
|
4593
|
+
return typeof v === "string" && v.length > 0 ? v : void 0;
|
|
4594
|
+
}
|
|
4595
|
+
function asSeverity(v) {
|
|
4596
|
+
if (typeof v === "string" && SEVERITY_SET.has(v)) {
|
|
4597
|
+
return v;
|
|
4598
|
+
}
|
|
4599
|
+
return "info";
|
|
4600
|
+
}
|
|
4601
|
+
function asStatus(v) {
|
|
4602
|
+
if (typeof v === "string" && STATUS_SET.has(v)) {
|
|
4603
|
+
return v;
|
|
4604
|
+
}
|
|
4605
|
+
return "open";
|
|
4606
|
+
}
|
|
4607
|
+
function asResolutionSource(v) {
|
|
4608
|
+
if (typeof v === "string" && RESOLUTION_SOURCE_SET.has(v)) {
|
|
4609
|
+
return v;
|
|
4610
|
+
}
|
|
4611
|
+
return void 0;
|
|
4612
|
+
}
|
|
4613
|
+
function rowToAdvisory(row) {
|
|
4614
|
+
const archetype = asString(row.applies_to_archetype);
|
|
4615
|
+
const model = asString(row.applies_to_model);
|
|
4616
|
+
const docsLink = asString(row.docs_url);
|
|
4617
|
+
const suggestion = asString(row.suggestion);
|
|
4618
|
+
let suggestedFix = null;
|
|
4619
|
+
if (docsLink || suggestion) {
|
|
4620
|
+
suggestedFix = { type: "manual" };
|
|
4621
|
+
if (docsLink) suggestedFix.docsLink = docsLink;
|
|
4622
|
+
if (suggestion) suggestedFix.before = suggestion;
|
|
4623
|
+
}
|
|
4624
|
+
const out = {
|
|
4625
|
+
id: typeof row.id === "string" ? row.id : "",
|
|
4626
|
+
rule: typeof row.rule === "string" ? row.rule : "",
|
|
4627
|
+
severity: asSeverity(row.severity),
|
|
4628
|
+
openedAt: typeof row.opened_at === "string" ? row.opened_at : "",
|
|
4629
|
+
lastObservedAt: typeof row.last_observed_at === "string" ? row.last_observed_at : "",
|
|
4630
|
+
observationCount: typeof row.observation_count === "number" ? row.observation_count : 0,
|
|
4631
|
+
appliesTo: {
|
|
4632
|
+
...archetype ? { archetype } : {},
|
|
4633
|
+
...model ? { model } : {}
|
|
4634
|
+
},
|
|
4635
|
+
message: typeof row.message === "string" ? row.message : "",
|
|
4636
|
+
suggestedFix,
|
|
4637
|
+
autoApplicable: false,
|
|
4638
|
+
// reserved — alpha.30+
|
|
4639
|
+
status: asStatus(row.status)
|
|
4640
|
+
};
|
|
4641
|
+
const resolvedAt = asString(row.resolved_at);
|
|
4642
|
+
if (resolvedAt) out.resolvedAt = resolvedAt;
|
|
4643
|
+
const resolutionSource = asResolutionSource(row.resolution_source);
|
|
4644
|
+
if (resolutionSource) out.resolutionSource = resolutionSource;
|
|
4645
|
+
const resolutionNote = asString(row.resolution_note);
|
|
4646
|
+
if (resolutionNote) out.resolutionNote = resolutionNote;
|
|
4647
|
+
return out;
|
|
4648
|
+
}
|
|
4649
|
+
function resolveFetch(injected) {
|
|
4650
|
+
return injected ?? ((...args) => globalThis.fetch(...args));
|
|
4651
|
+
}
|
|
4652
|
+
function normalizeEndpoint(endpoint) {
|
|
4653
|
+
return endpoint.replace(/\/+$/, "");
|
|
4654
|
+
}
|
|
4655
|
+
async function getActionableAdvisories(opts) {
|
|
4656
|
+
const {
|
|
4657
|
+
appId,
|
|
4658
|
+
severity,
|
|
4659
|
+
status,
|
|
4660
|
+
brainEndpoint,
|
|
4661
|
+
brainJwt,
|
|
4662
|
+
brainAnonKey,
|
|
4663
|
+
fetch: injectedFetch
|
|
4664
|
+
} = opts;
|
|
4665
|
+
if (!appId) {
|
|
4666
|
+
throw new Error("getActionableAdvisories: appId is required");
|
|
4667
|
+
}
|
|
4668
|
+
const doFetch = resolveFetch(injectedFetch);
|
|
4669
|
+
const base = normalizeEndpoint(brainEndpoint);
|
|
4670
|
+
const qs = new URLSearchParams();
|
|
4671
|
+
qs.set("app_id", `eq.${appId}`);
|
|
4672
|
+
if (severity) qs.set("severity", `eq.${severity}`);
|
|
4673
|
+
const effectiveStatus = status ?? "open";
|
|
4674
|
+
if (effectiveStatus !== "all") {
|
|
4675
|
+
qs.set("status", `eq.${effectiveStatus}`);
|
|
4676
|
+
}
|
|
4677
|
+
qs.set("order", "last_observed_at.desc");
|
|
4678
|
+
const url = `${base}/rest/v1/actionable_advisories_v?${qs.toString()}`;
|
|
4679
|
+
let res;
|
|
4680
|
+
try {
|
|
4681
|
+
res = await doFetch(url, {
|
|
4682
|
+
method: "GET",
|
|
4683
|
+
headers: {
|
|
4684
|
+
Authorization: `Bearer ${brainJwt}`,
|
|
4685
|
+
apikey: brainAnonKey,
|
|
4686
|
+
Accept: "application/json"
|
|
4687
|
+
}
|
|
4688
|
+
});
|
|
4689
|
+
} catch (err) {
|
|
4690
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4691
|
+
throw new Error(`getActionableAdvisories: network error: ${msg}`);
|
|
4692
|
+
}
|
|
4693
|
+
if (res.status === 401 || res.status === 403) {
|
|
4694
|
+
throw new Error("getActionableAdvisories: brain auth misconfig");
|
|
4695
|
+
}
|
|
4696
|
+
if (res.status >= 500) {
|
|
4697
|
+
throw new Error(`getActionableAdvisories: brain unavailable (${res.status})`);
|
|
4698
|
+
}
|
|
4699
|
+
if (!res.ok) {
|
|
4700
|
+
throw new Error(`getActionableAdvisories: bad request (${res.status})`);
|
|
4701
|
+
}
|
|
4702
|
+
let rows;
|
|
4703
|
+
try {
|
|
4704
|
+
rows = await res.json();
|
|
4705
|
+
} catch {
|
|
4706
|
+
throw new Error("getActionableAdvisories: malformed brain response");
|
|
4707
|
+
}
|
|
4708
|
+
if (!Array.isArray(rows)) {
|
|
4709
|
+
throw new Error("getActionableAdvisories: expected array from brain");
|
|
4710
|
+
}
|
|
4711
|
+
const out = [];
|
|
4712
|
+
for (const raw of rows) {
|
|
4713
|
+
if (raw && typeof raw === "object") {
|
|
4714
|
+
out.push(rowToAdvisory(raw));
|
|
4715
|
+
}
|
|
4716
|
+
}
|
|
4717
|
+
return out;
|
|
4718
|
+
}
|
|
4719
|
+
async function markAdvisoryResolved(opts) {
|
|
4720
|
+
const {
|
|
4721
|
+
id,
|
|
4722
|
+
resolutionNote,
|
|
4723
|
+
brainEndpoint,
|
|
4724
|
+
brainJwt,
|
|
4725
|
+
brainAnonKey,
|
|
4726
|
+
fetch: injectedFetch
|
|
4727
|
+
} = opts;
|
|
4728
|
+
if (!id) {
|
|
4729
|
+
return { ok: false, reason: "id_required" };
|
|
4730
|
+
}
|
|
4731
|
+
const doFetch = resolveFetch(injectedFetch);
|
|
4732
|
+
const base = normalizeEndpoint(brainEndpoint);
|
|
4733
|
+
const lookupUrl = `${base}/rest/v1/actionable_advisories_v?id=eq.${encodeURIComponent(id)}&select=app_id,rule`;
|
|
4734
|
+
let lookupRes;
|
|
4735
|
+
try {
|
|
4736
|
+
lookupRes = await doFetch(lookupUrl, {
|
|
4737
|
+
method: "GET",
|
|
4738
|
+
headers: {
|
|
4739
|
+
Authorization: `Bearer ${brainJwt}`,
|
|
4740
|
+
apikey: brainAnonKey,
|
|
4741
|
+
Accept: "application/json"
|
|
4742
|
+
}
|
|
4743
|
+
});
|
|
4744
|
+
} catch (err) {
|
|
4745
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4746
|
+
return { ok: false, reason: `network_error:${msg}` };
|
|
4747
|
+
}
|
|
4748
|
+
if (lookupRes.status === 401 || lookupRes.status === 403) {
|
|
4749
|
+
return { ok: false, reason: "brain_auth_misconfig" };
|
|
4750
|
+
}
|
|
4751
|
+
if (lookupRes.status >= 500) {
|
|
4752
|
+
return { ok: false, reason: "brain_unavailable" };
|
|
4753
|
+
}
|
|
4754
|
+
if (!lookupRes.ok) {
|
|
4755
|
+
return { ok: false, reason: `brain_lookup_failed:${lookupRes.status}` };
|
|
4756
|
+
}
|
|
4757
|
+
let lookupRows;
|
|
4758
|
+
try {
|
|
4759
|
+
lookupRows = await lookupRes.json();
|
|
4760
|
+
} catch {
|
|
4761
|
+
return { ok: false, reason: "brain_lookup_malformed" };
|
|
4762
|
+
}
|
|
4763
|
+
if (!Array.isArray(lookupRows) || lookupRows.length === 0) {
|
|
4764
|
+
return { ok: false, reason: "advisory_not_found" };
|
|
4765
|
+
}
|
|
4766
|
+
const tuple = lookupRows[0];
|
|
4767
|
+
const appId = typeof tuple.app_id === "string" ? tuple.app_id : "";
|
|
4768
|
+
const code = typeof tuple.rule === "string" ? tuple.rule : "";
|
|
4769
|
+
if (!appId || !code) {
|
|
4770
|
+
return { ok: false, reason: "advisory_tuple_invalid" };
|
|
4771
|
+
}
|
|
4772
|
+
const outcomesUrl = `${base}/rest/v1/compile_outcomes?app_id=eq.${encodeURIComponent(appId)}&select=id`;
|
|
4773
|
+
let outcomesRes;
|
|
4774
|
+
try {
|
|
4775
|
+
outcomesRes = await doFetch(outcomesUrl, {
|
|
4776
|
+
method: "GET",
|
|
4777
|
+
headers: {
|
|
4778
|
+
Authorization: `Bearer ${brainJwt}`,
|
|
4779
|
+
apikey: brainAnonKey,
|
|
4780
|
+
Accept: "application/json"
|
|
4781
|
+
}
|
|
4782
|
+
});
|
|
4783
|
+
} catch (err) {
|
|
4784
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4785
|
+
return { ok: false, reason: `network_error:${msg}` };
|
|
4786
|
+
}
|
|
4787
|
+
if (outcomesRes.status === 401 || outcomesRes.status === 403) {
|
|
4788
|
+
return { ok: false, reason: "brain_auth_misconfig" };
|
|
4789
|
+
}
|
|
4790
|
+
if (outcomesRes.status >= 500) {
|
|
4791
|
+
return { ok: false, reason: "brain_unavailable" };
|
|
4792
|
+
}
|
|
4793
|
+
if (!outcomesRes.ok) {
|
|
4794
|
+
return { ok: false, reason: `brain_lookup_failed:${outcomesRes.status}` };
|
|
4795
|
+
}
|
|
4796
|
+
let outcomeRows;
|
|
4797
|
+
try {
|
|
4798
|
+
outcomeRows = await outcomesRes.json();
|
|
4799
|
+
} catch {
|
|
4800
|
+
return { ok: false, reason: "brain_lookup_malformed" };
|
|
4801
|
+
}
|
|
4802
|
+
if (!Array.isArray(outcomeRows)) {
|
|
4803
|
+
return { ok: false, reason: "brain_lookup_malformed" };
|
|
4804
|
+
}
|
|
4805
|
+
const outcomeIds = [];
|
|
4806
|
+
for (const row of outcomeRows) {
|
|
4807
|
+
if (row && typeof row === "object") {
|
|
4808
|
+
const idVal = row.id;
|
|
4809
|
+
if (typeof idVal === "number" && Number.isFinite(idVal)) {
|
|
4810
|
+
outcomeIds.push(idVal);
|
|
4811
|
+
}
|
|
4812
|
+
}
|
|
4813
|
+
}
|
|
4814
|
+
if (outcomeIds.length === 0) {
|
|
4815
|
+
return { ok: true };
|
|
4816
|
+
}
|
|
4817
|
+
const inList = outcomeIds.join(",");
|
|
4818
|
+
const patchUrl = `${base}/rest/v1/compile_outcome_advisories?outcome_id=in.(${inList})&code=eq.${encodeURIComponent(code)}&resolved_at=is.null`;
|
|
4819
|
+
const patchBody = {
|
|
4820
|
+
resolved_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
4821
|
+
resolution_source: "consumer-marked"
|
|
4822
|
+
};
|
|
4823
|
+
if (resolutionNote !== void 0) {
|
|
4824
|
+
patchBody.resolution_note = resolutionNote;
|
|
4825
|
+
}
|
|
4826
|
+
let patchRes;
|
|
4827
|
+
try {
|
|
4828
|
+
patchRes = await doFetch(patchUrl, {
|
|
4829
|
+
method: "PATCH",
|
|
4830
|
+
headers: {
|
|
4831
|
+
Authorization: `Bearer ${brainJwt}`,
|
|
4832
|
+
apikey: brainAnonKey,
|
|
4833
|
+
"Content-Type": "application/json",
|
|
4834
|
+
Accept: "application/json",
|
|
4835
|
+
// PostgREST default is no return; we don't need the row back.
|
|
4836
|
+
Prefer: "return=minimal"
|
|
4837
|
+
},
|
|
4838
|
+
body: JSON.stringify(patchBody)
|
|
4839
|
+
});
|
|
4840
|
+
} catch (err) {
|
|
4841
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
4842
|
+
return { ok: false, reason: `network_error:${msg}` };
|
|
4843
|
+
}
|
|
4844
|
+
if (patchRes.status === 401 || patchRes.status === 403) {
|
|
4845
|
+
return { ok: false, reason: "brain_auth_misconfig" };
|
|
4846
|
+
}
|
|
4847
|
+
if (patchRes.status >= 500) {
|
|
4848
|
+
return { ok: false, reason: "brain_unavailable" };
|
|
4849
|
+
}
|
|
4850
|
+
if (!patchRes.ok) {
|
|
4851
|
+
return { ok: false, reason: `patch_failed:${patchRes.status}` };
|
|
4852
|
+
}
|
|
4853
|
+
return { ok: true };
|
|
4854
|
+
}
|
|
4855
|
+
|
|
4856
|
+
// src/models-brain.ts
|
|
4857
|
+
function isModelRow(x) {
|
|
4858
|
+
if (!x || typeof x !== "object") return false;
|
|
4859
|
+
const r = x;
|
|
4860
|
+
return typeof r.model_id === "string" && typeof r.provider === "string";
|
|
4861
|
+
}
|
|
4862
|
+
function isAliasRow(x) {
|
|
4863
|
+
if (!x || typeof x !== "object") return false;
|
|
4864
|
+
const r = x;
|
|
4865
|
+
return typeof r.alias_id === "string" && typeof r.canonical_id === "string";
|
|
4866
|
+
}
|
|
4867
|
+
function rowToProfile(row) {
|
|
4868
|
+
try {
|
|
4869
|
+
if (row.cliffs !== void 0 && row.cliffs !== null && !Array.isArray(row.cliffs)) {
|
|
4870
|
+
return null;
|
|
4871
|
+
}
|
|
4872
|
+
if (row.recovery !== void 0 && row.recovery !== null && !Array.isArray(row.recovery)) {
|
|
4873
|
+
return null;
|
|
4874
|
+
}
|
|
4875
|
+
if (row.lowering !== void 0 && row.lowering !== null && (typeof row.lowering !== "object" || Array.isArray(row.lowering))) {
|
|
4876
|
+
return null;
|
|
4877
|
+
}
|
|
4878
|
+
return {
|
|
4879
|
+
id: row.model_id,
|
|
4880
|
+
provider: row.provider,
|
|
4881
|
+
status: row.status ?? "current",
|
|
4882
|
+
maxContextTokens: row.max_context_tokens ?? 0,
|
|
4883
|
+
maxOutputTokens: row.max_output_tokens ?? 0,
|
|
4884
|
+
maxTools: row.max_tools ?? 0,
|
|
4885
|
+
parallelToolCalls: row.parallel_tool_calls ?? false,
|
|
4886
|
+
structuredOutput: row.structured_output ?? "none",
|
|
4887
|
+
systemPromptMode: row.system_prompt_mode ?? "inline",
|
|
4888
|
+
streaming: row.streaming ?? true,
|
|
4889
|
+
cliffs: row.cliffs ?? [],
|
|
4890
|
+
costInputPer1m: row.cost_input_per_1m ?? 0,
|
|
4891
|
+
costOutputPer1m: row.cost_output_per_1m ?? 0,
|
|
4892
|
+
lowering: row.lowering ?? { system: { mode: "inline" }, cache: { strategy: "unsupported" } },
|
|
4893
|
+
recovery: row.recovery ?? [],
|
|
4894
|
+
strengths: row.strengths ?? [],
|
|
4895
|
+
weaknesses: row.weaknesses ?? [],
|
|
4896
|
+
notes: row.notes ?? void 0,
|
|
4897
|
+
verifiedAgainstDocs: row.verified_against_docs ?? void 0,
|
|
4898
|
+
archetypePerf: row.archetype_perf ?? void 0
|
|
4899
|
+
};
|
|
4900
|
+
} catch {
|
|
4901
|
+
return null;
|
|
4902
|
+
}
|
|
4903
|
+
}
|
|
4904
|
+
function profileToRow(profile, opts = {}) {
|
|
4905
|
+
const row = {
|
|
4906
|
+
model_id: profile.id,
|
|
4907
|
+
provider: profile.provider,
|
|
4908
|
+
status: profile.status,
|
|
4909
|
+
max_context_tokens: profile.maxContextTokens,
|
|
4910
|
+
max_output_tokens: profile.maxOutputTokens,
|
|
4911
|
+
max_tools: profile.maxTools,
|
|
4912
|
+
parallel_tool_calls: profile.parallelToolCalls,
|
|
4913
|
+
structured_output: profile.structuredOutput,
|
|
4914
|
+
system_prompt_mode: profile.systemPromptMode,
|
|
4915
|
+
streaming: profile.streaming,
|
|
4916
|
+
cliffs: profile.cliffs,
|
|
4917
|
+
cost_input_per_1m: profile.costInputPer1m,
|
|
4918
|
+
cost_output_per_1m: profile.costOutputPer1m,
|
|
4919
|
+
lowering: profile.lowering,
|
|
4920
|
+
recovery: profile.recovery,
|
|
4921
|
+
strengths: profile.strengths,
|
|
4922
|
+
weaknesses: profile.weaknesses,
|
|
4923
|
+
notes: profile.notes ?? null,
|
|
4924
|
+
archetype_perf: profile.archetypePerf ?? null,
|
|
4925
|
+
active: opts.active ?? true
|
|
4926
|
+
};
|
|
4927
|
+
if (opts.verifiedAgainstDocs !== void 0) {
|
|
4928
|
+
row.verified_against_docs = opts.verifiedAgainstDocs;
|
|
4929
|
+
} else if (profile.verifiedAgainstDocs !== void 0) {
|
|
4930
|
+
const v = profile.verifiedAgainstDocs;
|
|
4931
|
+
row.verified_against_docs = /^\d{4}-\d{2}-\d{2}/.test(v) ? v : null;
|
|
4932
|
+
}
|
|
4933
|
+
if (opts.versionAdded !== void 0) row.version_added = opts.versionAdded;
|
|
4934
|
+
if (opts.versionRemoved !== void 0) row.version_removed = opts.versionRemoved;
|
|
4935
|
+
return row;
|
|
4936
|
+
}
|
|
4937
|
+
function mapRowsToModels(rows) {
|
|
4938
|
+
const out = /* @__PURE__ */ new Map();
|
|
4939
|
+
for (const row of rows) {
|
|
4940
|
+
if (!isModelRow(row)) continue;
|
|
4941
|
+
const profile = rowToProfile(row);
|
|
4942
|
+
if (profile) out.set(profile.id, profile);
|
|
4943
|
+
}
|
|
4944
|
+
return out;
|
|
4945
|
+
}
|
|
4946
|
+
function mapRowsToAliases(rows) {
|
|
4947
|
+
const out = {};
|
|
4948
|
+
for (const row of rows) {
|
|
4949
|
+
if (!isAliasRow(row)) continue;
|
|
4950
|
+
out[row.alias_id] = row.canonical_id;
|
|
4951
|
+
}
|
|
4952
|
+
return out;
|
|
4953
|
+
}
|
|
4954
|
+
function bundledModels() {
|
|
4955
|
+
return new Map(allProfilesRaw().map((p) => [p.id, p]));
|
|
4956
|
+
}
|
|
4957
|
+
function bundledAliases() {
|
|
4958
|
+
return { ...ALIASES };
|
|
4959
|
+
}
|
|
4960
|
+
var loadModelsFromBrain = createBrainQueryCache({
|
|
4961
|
+
table: "kgauto_models",
|
|
4962
|
+
mapRows: mapRowsToModels,
|
|
4963
|
+
bundledFallback: bundledModels
|
|
4964
|
+
});
|
|
4965
|
+
var loadAliasesFromBrain = createBrainQueryCache({
|
|
4966
|
+
table: "kgauto_aliases",
|
|
4967
|
+
mapRows: mapRowsToAliases,
|
|
4968
|
+
bundledFallback: bundledAliases
|
|
4969
|
+
});
|
|
4970
|
+
_setProfileBrainHook({
|
|
4971
|
+
getProfile: (canonical) => loadModelsFromBrain().get(canonical),
|
|
4972
|
+
resolveAlias: (id) => loadAliasesFromBrain()[id]
|
|
4973
|
+
});
|
|
4974
|
+
|
|
1771
4975
|
// src/index.ts
|
|
1772
4976
|
function compile2(ir, opts) {
|
|
1773
4977
|
const result = compile(ir, opts);
|
|
@@ -1776,12 +4980,19 @@ function compile2(ir, opts) {
|
|
|
1776
4980
|
}
|
|
1777
4981
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1778
4982
|
0 && (module.exports = {
|
|
4983
|
+
ABSOLUTE_FLOOR,
|
|
1779
4984
|
ALIASES,
|
|
1780
4985
|
ALL_ARCHETYPES,
|
|
4986
|
+
ARCHETYPE_FLOOR_DEFAULT,
|
|
1781
4987
|
CallError,
|
|
1782
4988
|
DIALECT_VERSION,
|
|
1783
4989
|
INTENT_ARCHETYPES,
|
|
4990
|
+
MEASURED_GROUNDING_MIN_N,
|
|
4991
|
+
PROVIDER_ENV_KEYS,
|
|
4992
|
+
RULE_SEQUENTIAL_TOOL_CLIFF,
|
|
4993
|
+
TRANSLATOR_FLOOR,
|
|
1784
4994
|
allProfiles,
|
|
4995
|
+
applySectionRewrites,
|
|
1785
4996
|
bucketContext,
|
|
1786
4997
|
bucketHistory,
|
|
1787
4998
|
bucketToolCount,
|
|
@@ -1792,13 +5003,41 @@ function compile2(ir, opts) {
|
|
|
1792
5003
|
configureBrain,
|
|
1793
5004
|
countTokens,
|
|
1794
5005
|
execute,
|
|
5006
|
+
getActionableAdvisories,
|
|
5007
|
+
getAllStarterChains,
|
|
5008
|
+
getAllStarterChainsWithGrounding,
|
|
5009
|
+
getArchetypePerfScore,
|
|
5010
|
+
getDefaultFallbackChain,
|
|
5011
|
+
getDefaultFallbackChainWithGrounding,
|
|
5012
|
+
getModelCompatibility,
|
|
5013
|
+
getPerAxisMetrics,
|
|
1795
5014
|
getProfile,
|
|
5015
|
+
getReachabilityDiagnostic,
|
|
5016
|
+
getSequentialStarterChain,
|
|
5017
|
+
getSequentialStarterChainWithGrounding,
|
|
5018
|
+
getStarterChain,
|
|
5019
|
+
getStarterChainWithGrounding,
|
|
1796
5020
|
hashShape,
|
|
1797
5021
|
isArchetype,
|
|
5022
|
+
isBrainQueryActiveFor,
|
|
5023
|
+
isModelReachable,
|
|
5024
|
+
isProviderReachable,
|
|
1798
5025
|
learningKey,
|
|
5026
|
+
loadAliasesFromBrain,
|
|
5027
|
+
loadArchetypePerfFromBrain,
|
|
5028
|
+
loadArchetypePerfNFromBrain,
|
|
5029
|
+
loadChainsFromBrain,
|
|
5030
|
+
loadModelsFromBrain,
|
|
5031
|
+
loadPricingFromBrain,
|
|
5032
|
+
markAdvisoryResolved,
|
|
5033
|
+
profileToRow,
|
|
1799
5034
|
profilesByProvider,
|
|
1800
5035
|
record,
|
|
5036
|
+
recordOutcome,
|
|
1801
5037
|
resetTokenizer,
|
|
5038
|
+
resolvePricingAt,
|
|
5039
|
+
resolveProviderKey,
|
|
5040
|
+
runAdvisor,
|
|
1802
5041
|
setTokenizer,
|
|
1803
5042
|
tryGetProfile
|
|
1804
5043
|
});
|