@warmdrift/kgauto-compiler 2.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -0
- package/dist/chunk-3KVKELZN.mjs +657 -0
- package/dist/chunk-5TI6PNSK.mjs +95 -0
- package/dist/dialect.d.mts +99 -0
- package/dist/dialect.d.ts +99 -0
- package/dist/dialect.js +127 -0
- package/dist/dialect.mjs +22 -0
- package/dist/index.d.mts +509 -0
- package/dist/index.d.ts +509 -0
- package/dist/index.js +2559 -0
- package/dist/index.mjs +1784 -0
- package/dist/profiles-Bgri1pe7.d.ts +728 -0
- package/dist/profiles-DO6R9moS.d.mts +728 -0
- package/dist/profiles.d.mts +2 -0
- package/dist/profiles.d.ts +2 -0
- package/dist/profiles.js +685 -0
- package/dist/profiles.mjs +14 -0
- package/package.json +59 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2559 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/index.ts
|
|
21
|
+
var index_exports = {};
|
|
22
|
+
__export(index_exports, {
|
|
23
|
+
ALIASES: () => ALIASES,
|
|
24
|
+
ALL_ARCHETYPES: () => ALL_ARCHETYPES,
|
|
25
|
+
CallError: () => CallError,
|
|
26
|
+
DIALECT_VERSION: () => DIALECT_VERSION,
|
|
27
|
+
INTENT_ARCHETYPES: () => INTENT_ARCHETYPES,
|
|
28
|
+
PROVIDER_ENV_KEYS: () => PROVIDER_ENV_KEYS,
|
|
29
|
+
allProfiles: () => allProfiles,
|
|
30
|
+
bucketContext: () => bucketContext,
|
|
31
|
+
bucketHistory: () => bucketHistory,
|
|
32
|
+
bucketToolCount: () => bucketToolCount,
|
|
33
|
+
buildLLMJudge: () => buildLLMJudge,
|
|
34
|
+
call: () => call,
|
|
35
|
+
clearBrain: () => clearBrain,
|
|
36
|
+
compile: () => compile2,
|
|
37
|
+
configureBrain: () => configureBrain,
|
|
38
|
+
countTokens: () => countTokens,
|
|
39
|
+
execute: () => execute,
|
|
40
|
+
getAllStarterChains: () => getAllStarterChains,
|
|
41
|
+
getDefaultFallbackChain: () => getDefaultFallbackChain,
|
|
42
|
+
getProfile: () => getProfile,
|
|
43
|
+
getReachabilityDiagnostic: () => getReachabilityDiagnostic,
|
|
44
|
+
getStarterChain: () => getStarterChain,
|
|
45
|
+
hashShape: () => hashShape,
|
|
46
|
+
isArchetype: () => isArchetype,
|
|
47
|
+
isModelReachable: () => isModelReachable,
|
|
48
|
+
isProviderReachable: () => isProviderReachable,
|
|
49
|
+
learningKey: () => learningKey,
|
|
50
|
+
profilesByProvider: () => profilesByProvider,
|
|
51
|
+
record: () => record,
|
|
52
|
+
resetTokenizer: () => resetTokenizer,
|
|
53
|
+
resolveProviderKey: () => resolveProviderKey,
|
|
54
|
+
runAdvisor: () => runAdvisor,
|
|
55
|
+
setTokenizer: () => setTokenizer,
|
|
56
|
+
tryGetProfile: () => tryGetProfile
|
|
57
|
+
});
|
|
58
|
+
module.exports = __toCommonJS(index_exports);
|
|
59
|
+
|
|
60
|
+
// src/tokenizer.ts
|
|
61
|
+
var tokenizerImpl = defaultCharBasedCounter;
|
|
62
|
+
function defaultCharBasedCounter(text) {
|
|
63
|
+
if (!text) return 0;
|
|
64
|
+
return Math.max(1, Math.ceil(text.length / 4));
|
|
65
|
+
}
|
|
66
|
+
function setTokenizer(impl) {
|
|
67
|
+
tokenizerImpl = impl;
|
|
68
|
+
}
|
|
69
|
+
function resetTokenizer() {
|
|
70
|
+
tokenizerImpl = defaultCharBasedCounter;
|
|
71
|
+
}
|
|
72
|
+
function countTokens(text) {
|
|
73
|
+
if (!text) return 0;
|
|
74
|
+
try {
|
|
75
|
+
const n = tokenizerImpl(text);
|
|
76
|
+
return Number.isFinite(n) && n >= 0 ? n : defaultCharBasedCounter(text);
|
|
77
|
+
} catch {
|
|
78
|
+
return defaultCharBasedCounter(text);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
function countToolTokens(tool) {
|
|
82
|
+
const namePart = countTokens(tool.name);
|
|
83
|
+
const descPart = tool.description ? countTokens(tool.description) : 0;
|
|
84
|
+
const paramPart = tool.parameters ? countTokens(JSON.stringify(tool.parameters)) : 0;
|
|
85
|
+
return namePart + descPart + paramPart + 8;
|
|
86
|
+
}
|
|
87
|
+
function countMessagesTokens(messages) {
|
|
88
|
+
let total = 0;
|
|
89
|
+
for (const m of messages) {
|
|
90
|
+
total += countTokens(m.content) + 4;
|
|
91
|
+
}
|
|
92
|
+
return total;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// src/dialect.ts
|
|
96
|
+
var DIALECT_VERSION = "v1";
|
|
97
|
+
var INTENT_ARCHETYPES = {
|
|
98
|
+
ask: {
|
|
99
|
+
name: "ask",
|
|
100
|
+
description: "Filter, search, or interrogate existing data",
|
|
101
|
+
examples: ["filter creators by criteria", "find docs matching query", "lookup a record"]
|
|
102
|
+
},
|
|
103
|
+
hunt: {
|
|
104
|
+
name: "hunt",
|
|
105
|
+
description: "Discover new entities not in the current dataset",
|
|
106
|
+
examples: ["find new prospects", "crawl for unindexed sources", "expand a seed list"]
|
|
107
|
+
},
|
|
108
|
+
classify: {
|
|
109
|
+
name: "classify",
|
|
110
|
+
description: "Assign a category from a finite set",
|
|
111
|
+
examples: ["intent detection", "sentiment", "route-to-team"]
|
|
112
|
+
},
|
|
113
|
+
summarize: {
|
|
114
|
+
name: "summarize",
|
|
115
|
+
description: "Compress text or data while preserving meaning",
|
|
116
|
+
examples: ["dashboard insight", "meeting notes", "briefing"]
|
|
117
|
+
},
|
|
118
|
+
generate: {
|
|
119
|
+
name: "generate",
|
|
120
|
+
description: "Produce new content from a prompt or template",
|
|
121
|
+
examples: ["draft email", "create marketing copy", "co-founder conversation"]
|
|
122
|
+
},
|
|
123
|
+
extract: {
|
|
124
|
+
name: "extract",
|
|
125
|
+
description: "Pull structured data from unstructured input",
|
|
126
|
+
examples: ["parse invoice", "extract entities", "transcript \u2192 action items"]
|
|
127
|
+
},
|
|
128
|
+
plan: {
|
|
129
|
+
name: "plan",
|
|
130
|
+
description: "Multi-step decomposition of a goal",
|
|
131
|
+
examples: ["build a roadmap", "sequence tasks", "break a feature into steps"]
|
|
132
|
+
},
|
|
133
|
+
critique: {
|
|
134
|
+
name: "critique",
|
|
135
|
+
description: "Quality assessment, review, or scoring",
|
|
136
|
+
examples: ["code review", "design feedback", "oracle judgment"]
|
|
137
|
+
},
|
|
138
|
+
transform: {
|
|
139
|
+
name: "transform",
|
|
140
|
+
description: "Change format or style while preserving content",
|
|
141
|
+
examples: ["markdown \u2192 html", "formal \u2192 casual", "translate"]
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
var ALL_ARCHETYPES = Object.keys(INTENT_ARCHETYPES);
|
|
145
|
+
function isArchetype(name) {
|
|
146
|
+
return name in INTENT_ARCHETYPES;
|
|
147
|
+
}
|
|
148
|
+
function bucketContext(tokens) {
|
|
149
|
+
if (tokens < 1e3) return "tiny";
|
|
150
|
+
if (tokens < 4e3) return "small";
|
|
151
|
+
if (tokens < 16e3) return "medium";
|
|
152
|
+
if (tokens < 64e3) return "large";
|
|
153
|
+
return "huge";
|
|
154
|
+
}
|
|
155
|
+
function bucketToolCount(count) {
|
|
156
|
+
if (count === 0) return "none";
|
|
157
|
+
if (count <= 5) return "few";
|
|
158
|
+
if (count <= 20) return "many";
|
|
159
|
+
return "massive";
|
|
160
|
+
}
|
|
161
|
+
function bucketHistory(turnCount) {
|
|
162
|
+
if (turnCount <= 1) return "single_turn";
|
|
163
|
+
if (turnCount <= 6) return "short";
|
|
164
|
+
return "long";
|
|
165
|
+
}
|
|
166
|
+
function hashShape(s) {
|
|
167
|
+
return [
|
|
168
|
+
s.contextBucket,
|
|
169
|
+
s.toolCountBucket,
|
|
170
|
+
s.historyDepth,
|
|
171
|
+
s.outputMode,
|
|
172
|
+
s.hasExamples ? "ex" : "no_ex"
|
|
173
|
+
].join("-");
|
|
174
|
+
}
|
|
175
|
+
function learningKey(archetype, model, shape) {
|
|
176
|
+
return `${DIALECT_VERSION}::${archetype}::${model}::${hashShape(shape)}`;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// src/passes.ts
|
|
180
|
+
function passSlice(ir) {
|
|
181
|
+
const intent = ir.intent.archetype;
|
|
182
|
+
const before = ir.sections.length;
|
|
183
|
+
const kept = ir.sections.filter((s) => !s.intents || s.intents.length === 0 || s.intents.includes(intent));
|
|
184
|
+
const dropped = before - kept.length;
|
|
185
|
+
if (dropped === 0) return { value: ir, mutations: [] };
|
|
186
|
+
return {
|
|
187
|
+
value: { ...ir, sections: kept },
|
|
188
|
+
mutations: [
|
|
189
|
+
{
|
|
190
|
+
id: `slice-${dropped}`,
|
|
191
|
+
source: "static_pass",
|
|
192
|
+
passName: "slice",
|
|
193
|
+
description: `Dropped ${dropped} of ${before} sections not tagged for intent=${intent}`
|
|
194
|
+
}
|
|
195
|
+
]
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
function passDedupe(ir) {
|
|
199
|
+
const seen = /* @__PURE__ */ new Map();
|
|
200
|
+
const order = [];
|
|
201
|
+
for (const s of ir.sections) {
|
|
202
|
+
const key = simpleHash(s.text.trim());
|
|
203
|
+
if (!seen.has(key)) {
|
|
204
|
+
seen.set(key, s);
|
|
205
|
+
order.push(key);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const deduped = order.map((k) => seen.get(k));
|
|
209
|
+
const dropped = ir.sections.length - deduped.length;
|
|
210
|
+
if (dropped === 0) return { value: ir, mutations: [] };
|
|
211
|
+
return {
|
|
212
|
+
value: { ...ir, sections: deduped },
|
|
213
|
+
mutations: [
|
|
214
|
+
{
|
|
215
|
+
id: `dedupe-${dropped}`,
|
|
216
|
+
source: "static_pass",
|
|
217
|
+
passName: "dedupe",
|
|
218
|
+
description: `Removed ${dropped} duplicate section(s) (by text hash)`
|
|
219
|
+
}
|
|
220
|
+
]
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
function passToolRelevance(ir, opts = {}) {
|
|
224
|
+
if (!ir.tools || ir.tools.length === 0) return { value: ir, mutations: [] };
|
|
225
|
+
const threshold = opts.threshold ?? 0.2;
|
|
226
|
+
const intent = ir.intent.archetype;
|
|
227
|
+
const scored = ir.tools.map((t) => {
|
|
228
|
+
const score = t.relevanceByIntent?.[intent] ?? 0.5;
|
|
229
|
+
return { tool: t, score };
|
|
230
|
+
});
|
|
231
|
+
const kept = scored.filter((s) => s.score >= threshold).sort((a, b) => b.score - a.score).map((s) => s.tool);
|
|
232
|
+
const limited = opts.maxKeep ? kept.slice(0, opts.maxKeep) : kept;
|
|
233
|
+
const dropped = ir.tools.length - limited.length;
|
|
234
|
+
if (dropped === 0) return { value: ir, mutations: [] };
|
|
235
|
+
return {
|
|
236
|
+
value: { ...ir, tools: limited },
|
|
237
|
+
mutations: [
|
|
238
|
+
{
|
|
239
|
+
id: `tool-relevance-${dropped}`,
|
|
240
|
+
source: "static_pass",
|
|
241
|
+
passName: "tool_relevance",
|
|
242
|
+
description: `Dropped ${dropped} of ${ir.tools.length} tools below relevance ${threshold} for intent=${intent}`
|
|
243
|
+
}
|
|
244
|
+
]
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
function totalHistoryTokens(history) {
|
|
248
|
+
let total = 0;
|
|
249
|
+
for (const m of history) {
|
|
250
|
+
if (typeof m.content === "string") total += countTokens(m.content);
|
|
251
|
+
}
|
|
252
|
+
return total;
|
|
253
|
+
}
|
|
254
|
+
function passCompressHistory(ir, opts = {}) {
|
|
255
|
+
const history = ir.history;
|
|
256
|
+
if (!history || history.length === 0) {
|
|
257
|
+
return { value: ir, mutations: [], historyTokensTotal: 0 };
|
|
258
|
+
}
|
|
259
|
+
const keepRecent = opts.keepRecent ?? 4;
|
|
260
|
+
const summarizeOlderThan = opts.summarizeOlderThan ?? 8;
|
|
261
|
+
const summarizeAboveTokens = opts.summarizeAboveTokens;
|
|
262
|
+
const historyTokensTotal = totalHistoryTokens(history);
|
|
263
|
+
const countThresholdHit = history.length > summarizeOlderThan;
|
|
264
|
+
const tokenThresholdHit = summarizeAboveTokens !== void 0 && historyTokensTotal > summarizeAboveTokens && history.length > keepRecent;
|
|
265
|
+
if (!countThresholdHit && !tokenThresholdHit) {
|
|
266
|
+
return { value: ir, mutations: [], historyTokensTotal };
|
|
267
|
+
}
|
|
268
|
+
const cutIndex = history.length - keepRecent;
|
|
269
|
+
const old = history.slice(0, cutIndex);
|
|
270
|
+
const recent = history.slice(cutIndex);
|
|
271
|
+
const userTurns = old.filter((m) => m.role === "user");
|
|
272
|
+
const firstUserLine = userTurns[0]?.content.split("\n")[0]?.slice(0, 200) ?? "";
|
|
273
|
+
const oldTokens = totalHistoryTokens(old);
|
|
274
|
+
const trigger = tokenThresholdHit && !countThresholdHit ? "tokens" : "count";
|
|
275
|
+
const summary = {
|
|
276
|
+
role: "system",
|
|
277
|
+
content: `[Earlier conversation: ${old.length} turns omitted (~${oldTokens} tokens). First user message: "${firstUserLine}"]`
|
|
278
|
+
};
|
|
279
|
+
return {
|
|
280
|
+
value: { ...ir, history: [summary, ...recent] },
|
|
281
|
+
mutations: [
|
|
282
|
+
{
|
|
283
|
+
id: `compress-history-${old.length}`,
|
|
284
|
+
source: "static_pass",
|
|
285
|
+
passName: "compress_history",
|
|
286
|
+
description: trigger === "tokens" ? `Compressed ${old.length} old turns (~${oldTokens} tokens) into 1 summary \u2014 token threshold ${summarizeAboveTokens} exceeded (kept ${keepRecent} recent)` : `Compressed ${old.length} old turns into 1 summary (kept ${keepRecent} recent)`
|
|
287
|
+
}
|
|
288
|
+
],
|
|
289
|
+
historyTokensTotal
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
function passApplyCliffs(ir, profile, estimatedInputTokens) {
|
|
293
|
+
const mutations = [];
|
|
294
|
+
const hints = { qualityWarning: [] };
|
|
295
|
+
let nextIR = ir;
|
|
296
|
+
for (const cliff of profile.cliffs) {
|
|
297
|
+
let triggered = false;
|
|
298
|
+
switch (cliff.metric) {
|
|
299
|
+
case "input_tokens":
|
|
300
|
+
triggered = estimatedInputTokens >= cliff.threshold;
|
|
301
|
+
break;
|
|
302
|
+
case "tool_count":
|
|
303
|
+
triggered = (nextIR.tools?.length ?? 0) >= cliff.threshold;
|
|
304
|
+
break;
|
|
305
|
+
case "history_turns":
|
|
306
|
+
triggered = (nextIR.history?.length ?? 0) >= cliff.threshold;
|
|
307
|
+
break;
|
|
308
|
+
case "thinking_with_short_output":
|
|
309
|
+
triggered = !!nextIR.constraints?.expectedShortOutput;
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
312
|
+
if (triggered && cliff.whenIntent && nextIR.intent.archetype !== cliff.whenIntent) {
|
|
313
|
+
triggered = false;
|
|
314
|
+
}
|
|
315
|
+
if (!triggered) continue;
|
|
316
|
+
switch (cliff.action) {
|
|
317
|
+
case "drop_to_top_relevant": {
|
|
318
|
+
const targetCount = Math.min(
|
|
319
|
+
Math.floor(cliff.threshold * 0.75),
|
|
320
|
+
Math.max(1, Math.floor((nextIR.tools?.length ?? 0) / 2))
|
|
321
|
+
);
|
|
322
|
+
if (nextIR.tools && nextIR.tools.length > targetCount) {
|
|
323
|
+
const intent = nextIR.intent.archetype;
|
|
324
|
+
const scored = nextIR.tools.map((t) => ({ tool: t, score: t.relevanceByIntent?.[intent] ?? 0.5 })).sort((a, b) => b.score - a.score).slice(0, targetCount).map((s) => s.tool);
|
|
325
|
+
nextIR = { ...nextIR, tools: scored };
|
|
326
|
+
mutations.push({
|
|
327
|
+
id: `cliff-${cliff.metric}`,
|
|
328
|
+
source: "cliff_guard",
|
|
329
|
+
passName: "apply_cliffs",
|
|
330
|
+
description: `${profile.id}: ${cliff.reason}; trimmed tools to ${targetCount}`
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
break;
|
|
334
|
+
}
|
|
335
|
+
case "force_thinking_budget_zero":
|
|
336
|
+
hints.forceThinkingZero = true;
|
|
337
|
+
mutations.push({
|
|
338
|
+
id: `cliff-thinking-zero`,
|
|
339
|
+
source: "cliff_guard",
|
|
340
|
+
passName: "apply_cliffs",
|
|
341
|
+
description: `${profile.id}: ${cliff.reason}`
|
|
342
|
+
});
|
|
343
|
+
break;
|
|
344
|
+
case "force_terse_output":
|
|
345
|
+
hints.forceTerseOutput = true;
|
|
346
|
+
mutations.push({
|
|
347
|
+
id: `cliff-terse`,
|
|
348
|
+
source: "cliff_guard",
|
|
349
|
+
passName: "apply_cliffs",
|
|
350
|
+
description: `${profile.id}: ${cliff.reason}`
|
|
351
|
+
});
|
|
352
|
+
break;
|
|
353
|
+
case "downgrade_quality_warning":
|
|
354
|
+
hints.qualityWarning.push(cliff.reason);
|
|
355
|
+
mutations.push({
|
|
356
|
+
id: `cliff-quality-warning`,
|
|
357
|
+
source: "cliff_guard",
|
|
358
|
+
passName: "apply_cliffs",
|
|
359
|
+
description: `${profile.id}: ${cliff.reason}`
|
|
360
|
+
});
|
|
361
|
+
break;
|
|
362
|
+
case "escalate_target":
|
|
363
|
+
hints.escalateRequested = true;
|
|
364
|
+
mutations.push({
|
|
365
|
+
id: `cliff-escalate`,
|
|
366
|
+
source: "cliff_guard",
|
|
367
|
+
passName: "apply_cliffs",
|
|
368
|
+
description: `${profile.id}: ${cliff.reason}`
|
|
369
|
+
});
|
|
370
|
+
break;
|
|
371
|
+
case "strip_tools": {
|
|
372
|
+
const droppedCount = nextIR.tools?.length ?? 0;
|
|
373
|
+
if (droppedCount > 0) {
|
|
374
|
+
nextIR = { ...nextIR, tools: [] };
|
|
375
|
+
mutations.push({
|
|
376
|
+
id: `cliff-strip-tools${cliff.whenIntent ? `-${cliff.whenIntent}` : ""}`,
|
|
377
|
+
source: "cliff_guard",
|
|
378
|
+
passName: "apply_cliffs",
|
|
379
|
+
description: `${profile.id}: ${cliff.reason} \u2014 stripped ${droppedCount} tools`
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
break;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
return { value: { ir: nextIR, loweringHints: hints }, mutations };
|
|
387
|
+
}
|
|
388
|
+
function passScoreTargets(ir, opts) {
|
|
389
|
+
const constraints = ir.constraints ?? {};
|
|
390
|
+
const policy = opts.policy ?? {};
|
|
391
|
+
const blockedSet = new Set(policy.blockedModels ?? []);
|
|
392
|
+
const preferredSet = new Set(policy.preferredModels ?? []);
|
|
393
|
+
const scores = [];
|
|
394
|
+
const policyMutations = [];
|
|
395
|
+
for (const modelId of ir.models) {
|
|
396
|
+
let profile;
|
|
397
|
+
try {
|
|
398
|
+
profile = opts.profilesById(modelId);
|
|
399
|
+
} catch {
|
|
400
|
+
scores.push({
|
|
401
|
+
modelId,
|
|
402
|
+
estimatedCostUsd: 0,
|
|
403
|
+
fits: false,
|
|
404
|
+
rejectReasons: ["unknown_model_id"],
|
|
405
|
+
qualityScore: 0,
|
|
406
|
+
rank: -Infinity
|
|
407
|
+
});
|
|
408
|
+
continue;
|
|
409
|
+
}
|
|
410
|
+
const reasons = [];
|
|
411
|
+
if (blockedSet.has(modelId)) {
|
|
412
|
+
reasons.push(`blocked_by_policy (consumer gated this model \u2014 see CompilePolicy.blockedModels)`);
|
|
413
|
+
}
|
|
414
|
+
if (opts.estimatedInputTokens > profile.maxContextTokens * 0.9) {
|
|
415
|
+
reasons.push(`exceeds context budget (${opts.estimatedInputTokens} > 0.9*${profile.maxContextTokens})`);
|
|
416
|
+
}
|
|
417
|
+
if ((ir.tools?.length ?? 0) > profile.maxTools) {
|
|
418
|
+
reasons.push(`exceeds maxTools (${ir.tools?.length} > ${profile.maxTools})`);
|
|
419
|
+
}
|
|
420
|
+
if (constraints.structuredOutput && profile.structuredOutput === "none") {
|
|
421
|
+
reasons.push(`structuredOutput requested but model has none`);
|
|
422
|
+
}
|
|
423
|
+
let qualityPenalty = 0;
|
|
424
|
+
for (const cliff of profile.cliffs) {
|
|
425
|
+
if (cliff.action !== "downgrade_quality_warning") continue;
|
|
426
|
+
let triggered = false;
|
|
427
|
+
if (cliff.metric === "input_tokens") triggered = opts.estimatedInputTokens >= cliff.threshold;
|
|
428
|
+
if (cliff.metric === "tool_count") triggered = (ir.tools?.length ?? 0) >= cliff.threshold;
|
|
429
|
+
if (triggered) qualityPenalty += 0.3;
|
|
430
|
+
}
|
|
431
|
+
const estimatedCostUsd = opts.estimatedInputTokens / 1e6 * profile.costInputPer1m;
|
|
432
|
+
if (policy.maxCostPerCallUsd !== void 0 && estimatedCostUsd > policy.maxCostPerCallUsd) {
|
|
433
|
+
reasons.push(
|
|
434
|
+
`exceeds_max_cost_per_call (estimated $${estimatedCostUsd.toFixed(4)} > policy ceiling $${policy.maxCostPerCallUsd.toFixed(4)})`
|
|
435
|
+
);
|
|
436
|
+
}
|
|
437
|
+
const baseQuality = profile.strengths.includes("reasoning") ? 0.85 : profile.strengths.includes("quality") ? 0.8 : 0.6;
|
|
438
|
+
const qualityScore = Math.max(0, baseQuality - qualityPenalty);
|
|
439
|
+
const callerOrderBoost = (ir.models.length - ir.models.indexOf(modelId)) * 0.1;
|
|
440
|
+
const costPenalty = estimatedCostUsd * 5;
|
|
441
|
+
const preferredBoost = preferredSet.has(modelId) ? 0.5 : 0;
|
|
442
|
+
const rank = qualityScore + callerOrderBoost - costPenalty - reasons.length * 10 + preferredBoost;
|
|
443
|
+
scores.push({
|
|
444
|
+
modelId,
|
|
445
|
+
estimatedCostUsd,
|
|
446
|
+
fits: reasons.length === 0,
|
|
447
|
+
rejectReasons: reasons,
|
|
448
|
+
qualityScore,
|
|
449
|
+
rank
|
|
450
|
+
});
|
|
451
|
+
if (blockedSet.has(modelId)) {
|
|
452
|
+
policyMutations.push({
|
|
453
|
+
id: `policy-blocked-${modelId}`,
|
|
454
|
+
source: "compile_policy",
|
|
455
|
+
passName: "score_targets",
|
|
456
|
+
description: `Model ${modelId} excluded by CompilePolicy.blockedModels`
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
if (policy.maxCostPerCallUsd !== void 0 && estimatedCostUsd > policy.maxCostPerCallUsd && !blockedSet.has(modelId)) {
|
|
460
|
+
policyMutations.push({
|
|
461
|
+
id: `policy-over-cost-${modelId}`,
|
|
462
|
+
source: "compile_policy",
|
|
463
|
+
passName: "score_targets",
|
|
464
|
+
description: `Model ${modelId} excluded \u2014 estimated cost $${estimatedCostUsd.toFixed(4)} exceeds policy ceiling $${policy.maxCostPerCallUsd.toFixed(4)}`
|
|
465
|
+
});
|
|
466
|
+
}
|
|
467
|
+
if (preferredSet.has(modelId) && reasons.length === 0) {
|
|
468
|
+
policyMutations.push({
|
|
469
|
+
id: `policy-preferred-${modelId}`,
|
|
470
|
+
source: "compile_policy",
|
|
471
|
+
passName: "score_targets",
|
|
472
|
+
description: `Model ${modelId} rank boosted by CompilePolicy.preferredModels`
|
|
473
|
+
});
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
return { value: scores, mutations: policyMutations };
|
|
477
|
+
}
|
|
478
|
+
function computeShape(ir, estimatedInputTokens) {
|
|
479
|
+
return {
|
|
480
|
+
contextBucket: bucketContext(estimatedInputTokens),
|
|
481
|
+
toolCountBucket: bucketToolCount(ir.tools?.length ?? 0),
|
|
482
|
+
historyDepth: bucketHistory(ir.history?.length ?? 0),
|
|
483
|
+
outputMode: ir.constraints?.structuredOutput ? "json" : ir.tools?.length ? "tool_call" : "text",
|
|
484
|
+
hasExamples: ir.sections.some((s) => /\bexample\b/i.test(s.id))
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
function estimateInputTokens(ir) {
|
|
488
|
+
const sectionTokens = ir.sections.reduce((sum, s) => sum + countTokens(s.text), 0);
|
|
489
|
+
const toolTokens = (ir.tools ?? []).reduce((sum, t) => sum + countToolTokens(t), 0);
|
|
490
|
+
const historyTokens = countMessagesTokens(ir.history ?? []);
|
|
491
|
+
const turnTokens = ir.currentTurn ? countTokens(ir.currentTurn.content) + 4 : 0;
|
|
492
|
+
return sectionTokens + toolTokens + historyTokens + turnTokens + 6;
|
|
493
|
+
}
|
|
494
|
+
function simpleHash(s) {
|
|
495
|
+
let h = 5381;
|
|
496
|
+
for (let i = 0; i < s.length; i++) {
|
|
497
|
+
h = (h << 5) + h + s.charCodeAt(i) | 0;
|
|
498
|
+
}
|
|
499
|
+
return (h >>> 0).toString(36);
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// src/lower.ts
|
|
503
|
+
function lower(ir, profile, hints = {}) {
|
|
504
|
+
switch (profile.provider) {
|
|
505
|
+
case "anthropic":
|
|
506
|
+
return lowerAnthropic(ir, profile, hints);
|
|
507
|
+
case "google":
|
|
508
|
+
return lowerGoogle(ir, profile, hints);
|
|
509
|
+
case "openai":
|
|
510
|
+
return lowerOpenAI(ir, profile, hints);
|
|
511
|
+
case "deepseek":
|
|
512
|
+
return lowerDeepSeek(ir, profile);
|
|
513
|
+
default:
|
|
514
|
+
throw new Error(`No lowering implementation for provider: ${profile.provider}`);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
function lowerAnthropic(ir, profile, hints) {
|
|
518
|
+
const systemBlocks = buildAnthropicSystemBlocks(ir.sections, profile);
|
|
519
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
520
|
+
const policy = ir.historyCachePolicy;
|
|
521
|
+
const markIndex = resolveHistoryMarkIndex(history.length, policy);
|
|
522
|
+
const messages = buildAnthropicMessages(history, ir.currentTurn, markIndex);
|
|
523
|
+
const tools = ir.tools ? toAnthropicTools(ir.tools) : void 0;
|
|
524
|
+
const cacheableTokens = computeCacheableTokens(systemBlocks);
|
|
525
|
+
const historyCacheableTokens = markIndex >= 0 ? sumHistoryTokens(history, markIndex) : 0;
|
|
526
|
+
const totalCacheableTokens = cacheableTokens + historyCacheableTokens;
|
|
527
|
+
const cacheSavings = totalCacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.1));
|
|
528
|
+
return {
|
|
529
|
+
request: {
|
|
530
|
+
provider: "anthropic",
|
|
531
|
+
model: profile.id,
|
|
532
|
+
system: systemBlocks,
|
|
533
|
+
messages,
|
|
534
|
+
tools,
|
|
535
|
+
// alpha.8: trust profile.maxOutputTokens. The historical Math.min(_, 4096)
|
|
536
|
+
// floor surprised every consumer once (PB-Cairn contract-gaps brief, Gap 3).
|
|
537
|
+
// Profile is the single source of truth; consumers wanting a tighter
|
|
538
|
+
// budget can pass providerOverrides.anthropic.max_tokens explicitly.
|
|
539
|
+
max_tokens: hints.forceTerseOutput ? 200 : profile.maxOutputTokens
|
|
540
|
+
},
|
|
541
|
+
diagnostics: {
|
|
542
|
+
cacheableTokens,
|
|
543
|
+
historyCacheableTokens,
|
|
544
|
+
estimatedCacheSavingsUsd: cacheSavings
|
|
545
|
+
}
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
function buildAnthropicSystemBlocks(sections, profile) {
|
|
549
|
+
if (sections.length === 0) return [];
|
|
550
|
+
const ordered = sortSections(sections);
|
|
551
|
+
const minTokens = profile.lowering.cache.minTokens ?? 1024;
|
|
552
|
+
const cacheable = [];
|
|
553
|
+
const dynamic = [];
|
|
554
|
+
for (const s of ordered) {
|
|
555
|
+
if (s.cacheable) cacheable.push(s);
|
|
556
|
+
else dynamic.push(s);
|
|
557
|
+
}
|
|
558
|
+
const blocks = [];
|
|
559
|
+
if (cacheable.length > 0) {
|
|
560
|
+
const cacheText = cacheable.map((s) => s.text).join("\n\n");
|
|
561
|
+
const cacheTextTokens = countTokens(cacheText);
|
|
562
|
+
const block = {
|
|
563
|
+
type: "text",
|
|
564
|
+
text: cacheText
|
|
565
|
+
};
|
|
566
|
+
if (cacheTextTokens >= minTokens) {
|
|
567
|
+
block.cache_control = { type: "ephemeral" };
|
|
568
|
+
}
|
|
569
|
+
blocks.push(block);
|
|
570
|
+
}
|
|
571
|
+
for (const s of dynamic) {
|
|
572
|
+
blocks.push({ type: "text", text: s.text });
|
|
573
|
+
}
|
|
574
|
+
return blocks;
|
|
575
|
+
}
|
|
576
|
+
function buildAnthropicMessages(history, currentTurn, markIndex) {
|
|
577
|
+
const out = [];
|
|
578
|
+
for (let i = 0; i < history.length; i++) {
|
|
579
|
+
const m = history[i];
|
|
580
|
+
if (m.role === "system") continue;
|
|
581
|
+
const shouldMark = i === markIndex;
|
|
582
|
+
out.push({
|
|
583
|
+
role: m.role,
|
|
584
|
+
content: shouldMark ? attachAnthropicCacheControl(m) : m.parts ?? m.content
|
|
585
|
+
});
|
|
586
|
+
}
|
|
587
|
+
if (currentTurn && currentTurn.role !== "system") {
|
|
588
|
+
out.push({ role: currentTurn.role, content: currentTurn.parts ?? currentTurn.content });
|
|
589
|
+
}
|
|
590
|
+
return out;
|
|
591
|
+
}
|
|
592
|
+
function attachAnthropicCacheControl(m) {
|
|
593
|
+
if (Array.isArray(m.parts) && m.parts.length > 0) {
|
|
594
|
+
const blocks = m.parts;
|
|
595
|
+
const last = blocks[blocks.length - 1];
|
|
596
|
+
const withMarker = {
|
|
597
|
+
...last,
|
|
598
|
+
cache_control: { type: "ephemeral" }
|
|
599
|
+
};
|
|
600
|
+
return [...blocks.slice(0, -1), withMarker];
|
|
601
|
+
}
|
|
602
|
+
return [
|
|
603
|
+
{
|
|
604
|
+
type: "text",
|
|
605
|
+
text: m.content,
|
|
606
|
+
cache_control: { type: "ephemeral" }
|
|
607
|
+
}
|
|
608
|
+
];
|
|
609
|
+
}
|
|
610
|
+
function resolveHistoryMarkIndex(historyLen, policy) {
|
|
611
|
+
if (!policy || policy.strategy === "none") return -1;
|
|
612
|
+
if (historyLen === 0) return -1;
|
|
613
|
+
if (policy.strategy === "all-but-latest") {
|
|
614
|
+
return historyLen - 1;
|
|
615
|
+
}
|
|
616
|
+
const idx = historyLen - 1 - policy.suffix;
|
|
617
|
+
return idx >= 0 ? idx : -1;
|
|
618
|
+
}
|
|
619
|
+
function sumHistoryTokens(history, throughIndex) {
|
|
620
|
+
let total = 0;
|
|
621
|
+
for (let i = 0; i <= throughIndex && i < history.length; i++) {
|
|
622
|
+
const m = history[i];
|
|
623
|
+
if (m.role === "system") continue;
|
|
624
|
+
if (Array.isArray(m.parts)) {
|
|
625
|
+
for (const p of m.parts) {
|
|
626
|
+
if (typeof p.text === "string") total += countTokens(p.text);
|
|
627
|
+
}
|
|
628
|
+
} else if (typeof m.content === "string") {
|
|
629
|
+
total += countTokens(m.content);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
return total;
|
|
633
|
+
}
|
|
634
|
+
function toAnthropicTools(tools) {
|
|
635
|
+
return tools.map((t) => ({
|
|
636
|
+
name: t.name,
|
|
637
|
+
description: t.description ?? "",
|
|
638
|
+
input_schema: t.parameters ?? { type: "object", properties: {} }
|
|
639
|
+
}));
|
|
640
|
+
}
|
|
641
|
+
function computeCacheableTokens(blocks) {
|
|
642
|
+
let total = 0;
|
|
643
|
+
for (const b of blocks) {
|
|
644
|
+
if (b.cache_control) total += countTokens(b.text);
|
|
645
|
+
}
|
|
646
|
+
return total;
|
|
647
|
+
}
|
|
648
|
+
function lowerGoogle(ir, profile, hints) {
|
|
649
|
+
const ordered = sortSections(ir.sections);
|
|
650
|
+
const systemText = ordered.map((s) => s.text).join("\n\n");
|
|
651
|
+
const generationConfig = {};
|
|
652
|
+
if (hints.forceThinkingZero && profile.lowering.thinking) {
|
|
653
|
+
setNestedField(generationConfig, profile.lowering.thinking.field.replace(/^generationConfig\./, ""), 0);
|
|
654
|
+
}
|
|
655
|
+
if (hints.forceTerseOutput) {
|
|
656
|
+
generationConfig.maxOutputTokens = 200;
|
|
657
|
+
}
|
|
658
|
+
if (ir.constraints?.structuredOutput && profile.structuredOutput === "native") {
|
|
659
|
+
generationConfig.responseMimeType = "application/json";
|
|
660
|
+
}
|
|
661
|
+
const contents = buildGoogleContents(ir.history ?? [], ir.currentTurn);
|
|
662
|
+
const tools = ir.tools && ir.tools.length > 0 ? toGoogleTools(ir.tools) : void 0;
|
|
663
|
+
const cacheable = ordered.filter((s) => s.cacheable);
|
|
664
|
+
const cacheableTokens = cacheable.reduce((sum, s) => sum + countTokens(s.text), 0);
|
|
665
|
+
const minTokens = profile.lowering.cache.minTokens ?? 4096;
|
|
666
|
+
const meetsMin = cacheableTokens >= minTokens;
|
|
667
|
+
const cacheSavings = meetsMin ? cacheableTokens / 1e6 * profile.costInputPer1m * (1 - (profile.lowering.cache.discount ?? 0.25)) : 0;
|
|
668
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
669
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
670
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
671
|
+
return {
|
|
672
|
+
request: {
|
|
673
|
+
provider: "google",
|
|
674
|
+
model: profile.id,
|
|
675
|
+
systemInstruction: systemText ? { role: "system", parts: [{ text: systemText }] } : void 0,
|
|
676
|
+
contents,
|
|
677
|
+
tools,
|
|
678
|
+
generationConfig: Object.keys(generationConfig).length > 0 ? generationConfig : void 0
|
|
679
|
+
},
|
|
680
|
+
diagnostics: {
|
|
681
|
+
cacheableTokens: meetsMin ? cacheableTokens : 0,
|
|
682
|
+
historyCacheableTokens,
|
|
683
|
+
estimatedCacheSavingsUsd: cacheSavings
|
|
684
|
+
}
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
function buildGoogleContents(history, currentTurn) {
|
|
688
|
+
const out = [];
|
|
689
|
+
for (const m of history) {
|
|
690
|
+
if (m.role === "system") continue;
|
|
691
|
+
out.push({
|
|
692
|
+
role: m.role === "assistant" ? "model" : m.role,
|
|
693
|
+
parts: m.parts ?? [{ text: m.content }]
|
|
694
|
+
});
|
|
695
|
+
}
|
|
696
|
+
if (currentTurn && currentTurn.role !== "system") {
|
|
697
|
+
out.push({
|
|
698
|
+
role: currentTurn.role === "assistant" ? "model" : currentTurn.role,
|
|
699
|
+
parts: currentTurn.parts ?? [{ text: currentTurn.content }]
|
|
700
|
+
});
|
|
701
|
+
}
|
|
702
|
+
return out;
|
|
703
|
+
}
|
|
704
|
+
function toGoogleTools(tools) {
|
|
705
|
+
return [
|
|
706
|
+
{
|
|
707
|
+
functionDeclarations: tools.map((t) => ({
|
|
708
|
+
name: t.name,
|
|
709
|
+
description: t.description ?? "",
|
|
710
|
+
parameters: t.parameters ?? { type: "object", properties: {} }
|
|
711
|
+
}))
|
|
712
|
+
}
|
|
713
|
+
];
|
|
714
|
+
}
|
|
715
|
+
function lowerOpenAI(ir, profile, hints) {
|
|
716
|
+
const ordered = sortSections(ir.sections);
|
|
717
|
+
const systemText = ordered.map((s) => s.text).join("\n\n");
|
|
718
|
+
const systemRole = profile.systemPromptMode === "as_developer" ? "developer" : "system";
|
|
719
|
+
const messages = systemText ? [{ role: systemRole, content: systemText }] : [];
|
|
720
|
+
for (const m of ir.history ?? []) {
|
|
721
|
+
if (m.role === "system") continue;
|
|
722
|
+
messages.push({ role: m.role, content: m.parts ?? m.content });
|
|
723
|
+
}
|
|
724
|
+
if (ir.currentTurn && ir.currentTurn.role !== "system") {
|
|
725
|
+
messages.push({
|
|
726
|
+
role: ir.currentTurn.role,
|
|
727
|
+
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
728
|
+
});
|
|
729
|
+
}
|
|
730
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
731
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
732
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
733
|
+
return {
|
|
734
|
+
request: {
|
|
735
|
+
provider: "openai",
|
|
736
|
+
model: profile.id,
|
|
737
|
+
messages,
|
|
738
|
+
tools: ir.tools && ir.tools.length > 0 ? toOpenAITools(ir.tools) : void 0,
|
|
739
|
+
response_format: ir.constraints?.structuredOutput ? { type: "json_object" } : void 0,
|
|
740
|
+
reasoning_effort: hints.forceTerseOutput ? "low" : void 0
|
|
741
|
+
},
|
|
742
|
+
diagnostics: {
|
|
743
|
+
cacheableTokens: 0,
|
|
744
|
+
historyCacheableTokens,
|
|
745
|
+
estimatedCacheSavingsUsd: 0
|
|
746
|
+
}
|
|
747
|
+
};
|
|
748
|
+
}
|
|
749
|
+
function toOpenAITools(tools) {
|
|
750
|
+
return tools.map((t) => ({
|
|
751
|
+
type: "function",
|
|
752
|
+
function: {
|
|
753
|
+
name: t.name,
|
|
754
|
+
description: t.description ?? "",
|
|
755
|
+
parameters: t.parameters ?? { type: "object", properties: {} }
|
|
756
|
+
}
|
|
757
|
+
}));
|
|
758
|
+
}
|
|
759
|
+
function lowerDeepSeek(ir, profile) {
|
|
760
|
+
const ordered = sortSections(ir.sections);
|
|
761
|
+
const systemText = ordered.map((s) => s.text).join("\n\n");
|
|
762
|
+
const messages = systemText ? [{ role: "system", content: systemText }] : [];
|
|
763
|
+
for (const m of ir.history ?? []) {
|
|
764
|
+
if (m.role === "system") continue;
|
|
765
|
+
messages.push({ role: m.role, content: m.parts ?? m.content });
|
|
766
|
+
}
|
|
767
|
+
if (ir.currentTurn && ir.currentTurn.role !== "system") {
|
|
768
|
+
messages.push({
|
|
769
|
+
role: ir.currentTurn.role,
|
|
770
|
+
content: ir.currentTurn.parts ?? ir.currentTurn.content
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
const history = (ir.history ?? []).filter((m) => m.role !== "system");
|
|
774
|
+
const histMarkIndex = resolveHistoryMarkIndex(history.length, ir.historyCachePolicy);
|
|
775
|
+
const historyCacheableTokens = histMarkIndex >= 0 ? sumHistoryTokens(history, histMarkIndex) : 0;
|
|
776
|
+
return {
|
|
777
|
+
request: {
|
|
778
|
+
provider: "deepseek",
|
|
779
|
+
model: profile.id,
|
|
780
|
+
messages,
|
|
781
|
+
tools: ir.tools && ir.tools.length > 0 ? ir.tools.slice(0, 1).map((t) => ({
|
|
782
|
+
type: "function",
|
|
783
|
+
function: {
|
|
784
|
+
name: t.name,
|
|
785
|
+
description: t.description ?? "",
|
|
786
|
+
parameters: t.parameters ?? { type: "object", properties: {} }
|
|
787
|
+
}
|
|
788
|
+
})) : void 0
|
|
789
|
+
},
|
|
790
|
+
diagnostics: {
|
|
791
|
+
cacheableTokens: 0,
|
|
792
|
+
historyCacheableTokens,
|
|
793
|
+
estimatedCacheSavingsUsd: 0
|
|
794
|
+
}
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
function sortSections(sections) {
|
|
798
|
+
return [...sections].sort((a, b) => {
|
|
799
|
+
const wa = a.weight ?? 100;
|
|
800
|
+
const wb = b.weight ?? 100;
|
|
801
|
+
return wa - wb;
|
|
802
|
+
});
|
|
803
|
+
}
|
|
804
|
+
function setNestedField(obj, path, value) {
|
|
805
|
+
const parts = path.split(".");
|
|
806
|
+
let cursor = obj;
|
|
807
|
+
for (let i = 0; i < parts.length - 1; i++) {
|
|
808
|
+
const key = parts[i];
|
|
809
|
+
if (!(key in cursor) || typeof cursor[key] !== "object" || cursor[key] === null) {
|
|
810
|
+
cursor[key] = {};
|
|
811
|
+
}
|
|
812
|
+
cursor = cursor[key];
|
|
813
|
+
}
|
|
814
|
+
cursor[parts[parts.length - 1]] = value;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// src/profiles.ts
|
|
818
|
+
var ANTHROPIC_LOWERING_BASE = {
|
|
819
|
+
system: { mode: "inline" },
|
|
820
|
+
cache: {
|
|
821
|
+
strategy: "cache_control",
|
|
822
|
+
minTokens: 1024,
|
|
823
|
+
discount: 0.1,
|
|
824
|
+
ttlSeconds: 300
|
|
825
|
+
},
|
|
826
|
+
tools: { format: "anthropic" }
|
|
827
|
+
};
|
|
828
|
+
var GOOGLE_LOWERING_BASE = {
|
|
829
|
+
system: { mode: "separate", field: "systemInstruction" },
|
|
830
|
+
cache: {
|
|
831
|
+
strategy: "cachedContent",
|
|
832
|
+
minTokens: 4096,
|
|
833
|
+
discount: 0.25,
|
|
834
|
+
ttlSeconds: 3600
|
|
835
|
+
},
|
|
836
|
+
tools: { format: "google" }
|
|
837
|
+
};
|
|
838
|
+
var PROFILES_RAW = [
|
|
839
|
+
// ── Anthropic ──
|
|
840
|
+
{
|
|
841
|
+
id: "claude-opus-4-7",
|
|
842
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
843
|
+
provider: "anthropic",
|
|
844
|
+
status: "current",
|
|
845
|
+
maxContextTokens: 1e6,
|
|
846
|
+
maxOutputTokens: 128e3,
|
|
847
|
+
maxTools: 64,
|
|
848
|
+
parallelToolCalls: true,
|
|
849
|
+
structuredOutput: "grammar",
|
|
850
|
+
systemPromptMode: "inline",
|
|
851
|
+
streaming: true,
|
|
852
|
+
cliffs: [],
|
|
853
|
+
costInputPer1m: 5,
|
|
854
|
+
costOutputPer1m: 25,
|
|
855
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
856
|
+
recovery: [
|
|
857
|
+
{
|
|
858
|
+
signal: "rate_limit",
|
|
859
|
+
action: "escalate",
|
|
860
|
+
reason: "429 from Anthropic \u2014 escalate to fallback chain"
|
|
861
|
+
},
|
|
862
|
+
{
|
|
863
|
+
signal: "model_not_found",
|
|
864
|
+
action: "escalate",
|
|
865
|
+
reason: "Model deprecated/renamed \u2014 escalate (L-061)"
|
|
866
|
+
}
|
|
867
|
+
],
|
|
868
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
|
|
869
|
+
weaknesses: ["cost", "latency"],
|
|
870
|
+
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
|
|
871
|
+
// Frontier perf. Drops on archetypes where parallel-tool throughput
|
|
872
|
+
// (hunt) or low-budget cost-sensitivity (classify/summarize) matters
|
|
873
|
+
// more than reasoning depth.
|
|
874
|
+
archetypePerf: {
|
|
875
|
+
critique: 10,
|
|
876
|
+
plan: 10,
|
|
877
|
+
generate: 9,
|
|
878
|
+
ask: 9,
|
|
879
|
+
extract: 9,
|
|
880
|
+
transform: 9,
|
|
881
|
+
hunt: 8,
|
|
882
|
+
// strong but Flash dominates parallel tool throughput
|
|
883
|
+
summarize: 8,
|
|
884
|
+
// overkill for tolerant archetype; cost-out of frontier
|
|
885
|
+
classify: 8
|
|
886
|
+
// overkill; brain-validated cheaper models cover this
|
|
887
|
+
}
|
|
888
|
+
},
|
|
889
|
+
{
|
|
890
|
+
id: "claude-opus-4-6",
|
|
891
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
892
|
+
provider: "anthropic",
|
|
893
|
+
status: "legacy",
|
|
894
|
+
maxContextTokens: 1e6,
|
|
895
|
+
maxOutputTokens: 128e3,
|
|
896
|
+
maxTools: 64,
|
|
897
|
+
parallelToolCalls: true,
|
|
898
|
+
structuredOutput: "grammar",
|
|
899
|
+
systemPromptMode: "inline",
|
|
900
|
+
streaming: true,
|
|
901
|
+
cliffs: [],
|
|
902
|
+
costInputPer1m: 5,
|
|
903
|
+
costOutputPer1m: 25,
|
|
904
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
905
|
+
recovery: [
|
|
906
|
+
{
|
|
907
|
+
signal: "rate_limit",
|
|
908
|
+
action: "escalate",
|
|
909
|
+
reason: "429 from Anthropic \u2014 escalate to fallback chain"
|
|
910
|
+
},
|
|
911
|
+
{
|
|
912
|
+
signal: "model_not_found",
|
|
913
|
+
action: "escalate",
|
|
914
|
+
reason: "Model deprecated/renamed \u2014 escalate (L-061)"
|
|
915
|
+
}
|
|
916
|
+
],
|
|
917
|
+
strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
|
|
918
|
+
weaknesses: ["cost", "latency"],
|
|
919
|
+
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
|
|
920
|
+
// One notch below 4.7 across the board — extended-thinking edge does
|
|
921
|
+
// not flip any archetype ranking. Legacy: chains should prefer 4.7.
|
|
922
|
+
archetypePerf: {
|
|
923
|
+
critique: 9,
|
|
924
|
+
plan: 9,
|
|
925
|
+
generate: 9,
|
|
926
|
+
ask: 9,
|
|
927
|
+
extract: 9,
|
|
928
|
+
transform: 9,
|
|
929
|
+
hunt: 7,
|
|
930
|
+
summarize: 8,
|
|
931
|
+
classify: 8
|
|
932
|
+
}
|
|
933
|
+
},
|
|
934
|
+
{
|
|
935
|
+
id: "claude-sonnet-4-6",
|
|
936
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
937
|
+
provider: "anthropic",
|
|
938
|
+
status: "current",
|
|
939
|
+
maxContextTokens: 1e6,
|
|
940
|
+
maxOutputTokens: 64e3,
|
|
941
|
+
maxTools: 64,
|
|
942
|
+
parallelToolCalls: true,
|
|
943
|
+
structuredOutput: "grammar",
|
|
944
|
+
systemPromptMode: "inline",
|
|
945
|
+
streaming: true,
|
|
946
|
+
cliffs: [],
|
|
947
|
+
costInputPer1m: 3,
|
|
948
|
+
costOutputPer1m: 15,
|
|
949
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
950
|
+
recovery: [
|
|
951
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" },
|
|
952
|
+
{ signal: "model_not_found", action: "escalate", reason: "Deprecated \u2014 escalate (L-061)" }
|
|
953
|
+
],
|
|
954
|
+
strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
|
|
955
|
+
weaknesses: [],
|
|
956
|
+
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
|
|
957
|
+
// Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
|
|
958
|
+
// in starter chains; tier 1 cross-provider for hunt/summarize/classify.
|
|
959
|
+
archetypePerf: {
|
|
960
|
+
ask: 9,
|
|
961
|
+
generate: 9,
|
|
962
|
+
plan: 9,
|
|
963
|
+
critique: 9,
|
|
964
|
+
extract: 9,
|
|
965
|
+
transform: 9,
|
|
966
|
+
hunt: 7,
|
|
967
|
+
// strong but Flash beats on parallel tool throughput
|
|
968
|
+
summarize: 8,
|
|
969
|
+
// overkill for tolerant archetype
|
|
970
|
+
classify: 8
|
|
971
|
+
// overkill
|
|
972
|
+
}
|
|
973
|
+
},
|
|
974
|
+
{
|
|
975
|
+
id: "claude-haiku-4-5",
|
|
976
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
977
|
+
provider: "anthropic",
|
|
978
|
+
status: "current",
|
|
979
|
+
maxContextTokens: 2e5,
|
|
980
|
+
maxOutputTokens: 64e3,
|
|
981
|
+
maxTools: 32,
|
|
982
|
+
parallelToolCalls: true,
|
|
983
|
+
structuredOutput: "grammar",
|
|
984
|
+
systemPromptMode: "inline",
|
|
985
|
+
streaming: true,
|
|
986
|
+
cliffs: [
|
|
987
|
+
{
|
|
988
|
+
metric: "tool_count",
|
|
989
|
+
threshold: 16,
|
|
990
|
+
action: "drop_to_top_relevant",
|
|
991
|
+
reason: "Haiku reliability degrades above ~16 tools"
|
|
992
|
+
}
|
|
993
|
+
],
|
|
994
|
+
costInputPer1m: 1,
|
|
995
|
+
costOutputPer1m: 5,
|
|
996
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
997
|
+
recovery: [
|
|
998
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to Sonnet" }
|
|
999
|
+
],
|
|
1000
|
+
strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
|
|
1001
|
+
weaknesses: ["complex_reasoning", "large_tool_sets"],
|
|
1002
|
+
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
|
|
1003
|
+
// Tier 1 cross-provider anchor for short-output chains (classify/
|
|
1004
|
+
// summarize/extract/transform). Falls off on plan/critique where
|
|
1005
|
+
// reasoning depth matters; competes with Pro on cost+latency.
|
|
1006
|
+
archetypePerf: {
|
|
1007
|
+
classify: 8,
|
|
1008
|
+
summarize: 8,
|
|
1009
|
+
ask: 7,
|
|
1010
|
+
transform: 7,
|
|
1011
|
+
extract: 7,
|
|
1012
|
+
hunt: 6,
|
|
1013
|
+
// tool reliability drops at 16 — cliff guard fires
|
|
1014
|
+
generate: 6,
|
|
1015
|
+
plan: 5,
|
|
1016
|
+
critique: 4
|
|
1017
|
+
// reasoning depth gap vs Sonnet/Opus
|
|
1018
|
+
}
|
|
1019
|
+
},
|
|
1020
|
+
// ── Google ──
|
|
1021
|
+
{
|
|
1022
|
+
id: "gemini-2.5-flash",
|
|
1023
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
1024
|
+
provider: "google",
|
|
1025
|
+
status: "current",
|
|
1026
|
+
maxContextTokens: 1048576,
|
|
1027
|
+
maxOutputTokens: 65535,
|
|
1028
|
+
maxTools: 128,
|
|
1029
|
+
parallelToolCalls: true,
|
|
1030
|
+
structuredOutput: "native",
|
|
1031
|
+
systemPromptMode: "separate",
|
|
1032
|
+
streaming: true,
|
|
1033
|
+
cliffs: [
|
|
1034
|
+
{
|
|
1035
|
+
metric: "input_tokens",
|
|
1036
|
+
threshold: 8e3,
|
|
1037
|
+
action: "downgrade_quality_warning",
|
|
1038
|
+
reason: "Quality degrades significantly above ~8K context tokens"
|
|
1039
|
+
},
|
|
1040
|
+
{
|
|
1041
|
+
metric: "tool_count",
|
|
1042
|
+
threshold: 20,
|
|
1043
|
+
action: "drop_to_top_relevant",
|
|
1044
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
|
|
1045
|
+
},
|
|
1046
|
+
{
|
|
1047
|
+
metric: "thinking_with_short_output",
|
|
1048
|
+
threshold: 1,
|
|
1049
|
+
action: "force_thinking_budget_zero",
|
|
1050
|
+
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
1051
|
+
},
|
|
1052
|
+
{
|
|
1053
|
+
// s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
|
|
1054
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered.
|
|
1055
|
+
// v1's disable_thinking_for_short_output already fired and didn't
|
|
1056
|
+
// help — disabling thinking is necessary but not sufficient. Tools
|
|
1057
|
+
// present + summarize intent confuses Flash into a no-output state
|
|
1058
|
+
// (likely tool-decision purgatory). Strip tools entirely for this
|
|
1059
|
+
// archetype on this model.
|
|
1060
|
+
metric: "tool_count",
|
|
1061
|
+
threshold: 1,
|
|
1062
|
+
whenIntent: "summarize",
|
|
1063
|
+
action: "strip_tools",
|
|
1064
|
+
reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
|
|
1065
|
+
}
|
|
1066
|
+
],
|
|
1067
|
+
costInputPer1m: 0.3,
|
|
1068
|
+
costOutputPer1m: 2.5,
|
|
1069
|
+
lowering: {
|
|
1070
|
+
...GOOGLE_LOWERING_BASE,
|
|
1071
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1072
|
+
},
|
|
1073
|
+
recovery: [
|
|
1074
|
+
{
|
|
1075
|
+
signal: "empty_response_after_tool",
|
|
1076
|
+
action: "retry_with_params",
|
|
1077
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1078
|
+
maxRetries: 1,
|
|
1079
|
+
reason: "Known: empty after tool result \u2014 retry with thinking off"
|
|
1080
|
+
},
|
|
1081
|
+
{
|
|
1082
|
+
signal: "empty_response",
|
|
1083
|
+
action: "retry_with_params",
|
|
1084
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1085
|
+
maxRetries: 1,
|
|
1086
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
1087
|
+
},
|
|
1088
|
+
{
|
|
1089
|
+
signal: "malformed_function_call",
|
|
1090
|
+
action: "escalate",
|
|
1091
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
1092
|
+
}
|
|
1093
|
+
],
|
|
1094
|
+
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
1095
|
+
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
1096
|
+
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
|
|
1097
|
+
// Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
|
|
1098
|
+
// 15-75 calls/step beats Sonnet — L-040), summarize, classify.
|
|
1099
|
+
archetypePerf: {
|
|
1100
|
+
hunt: 9,
|
|
1101
|
+
// L-040: parallel tool throughput 15-75/step
|
|
1102
|
+
classify: 7,
|
|
1103
|
+
// brain-validated, 218 rows
|
|
1104
|
+
summarize: 7,
|
|
1105
|
+
// brain-validated; cliff strips tools when present
|
|
1106
|
+
transform: 7,
|
|
1107
|
+
ask: 7,
|
|
1108
|
+
generate: 6,
|
|
1109
|
+
plan: 5,
|
|
1110
|
+
extract: 6,
|
|
1111
|
+
// alpha.8 MAX_TOKENS history on structured output
|
|
1112
|
+
critique: 4
|
|
1113
|
+
// reasoning shallower than Sonnet/Opus
|
|
1114
|
+
}
|
|
1115
|
+
},
|
|
1116
|
+
{
|
|
1117
|
+
// ── Gemini 2.5 Flash-Lite ──
|
|
1118
|
+
// Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
|
|
1119
|
+
// it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
|
|
1120
|
+
// stable. Positioned BELOW Flash on the cost/perf frontier:
|
|
1121
|
+
// input $0.10/M (Flash $0.30/M) — 3× cheaper
|
|
1122
|
+
// output $0.40/M (Flash $2.50/M) — 6× cheaper
|
|
1123
|
+
// cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
|
|
1124
|
+
// Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
|
|
1125
|
+
// is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
|
|
1126
|
+
// thresholds. The brain will validate/relax these as evidence accumulates
|
|
1127
|
+
// per (archetype, model) tuple. Currently ZERO brain rows for this model.
|
|
1128
|
+
id: "gemini-2.5-flash-lite",
|
|
1129
|
+
verifiedAgainstDocs: "2026-05-13",
|
|
1130
|
+
provider: "google",
|
|
1131
|
+
status: "current",
|
|
1132
|
+
maxContextTokens: 1048576,
|
|
1133
|
+
maxOutputTokens: 65536,
|
|
1134
|
+
maxTools: 128,
|
|
1135
|
+
parallelToolCalls: true,
|
|
1136
|
+
structuredOutput: "native",
|
|
1137
|
+
systemPromptMode: "separate",
|
|
1138
|
+
streaming: true,
|
|
1139
|
+
cliffs: [
|
|
1140
|
+
{
|
|
1141
|
+
metric: "input_tokens",
|
|
1142
|
+
threshold: 8e3,
|
|
1143
|
+
action: "downgrade_quality_warning",
|
|
1144
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
1145
|
+
},
|
|
1146
|
+
{
|
|
1147
|
+
metric: "tool_count",
|
|
1148
|
+
threshold: 10,
|
|
1149
|
+
action: "drop_to_top_relevant",
|
|
1150
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
1151
|
+
},
|
|
1152
|
+
{
|
|
1153
|
+
metric: "thinking_with_short_output",
|
|
1154
|
+
threshold: 1,
|
|
1155
|
+
action: "force_thinking_budget_zero",
|
|
1156
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
1157
|
+
},
|
|
1158
|
+
{
|
|
1159
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
1160
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
1161
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
1162
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
1163
|
+
metric: "tool_count",
|
|
1164
|
+
threshold: 1,
|
|
1165
|
+
whenIntent: "summarize",
|
|
1166
|
+
action: "strip_tools",
|
|
1167
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
1168
|
+
}
|
|
1169
|
+
],
|
|
1170
|
+
costInputPer1m: 0.1,
|
|
1171
|
+
costOutputPer1m: 0.4,
|
|
1172
|
+
lowering: {
|
|
1173
|
+
...GOOGLE_LOWERING_BASE,
|
|
1174
|
+
// Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
|
|
1175
|
+
// $0.10/M input. Material for repeat-prompt workloads (classify shape).
|
|
1176
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1177
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1178
|
+
},
|
|
1179
|
+
recovery: [
|
|
1180
|
+
{
|
|
1181
|
+
signal: "empty_response_after_tool",
|
|
1182
|
+
action: "retry_with_params",
|
|
1183
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1184
|
+
maxRetries: 1,
|
|
1185
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
1186
|
+
},
|
|
1187
|
+
{
|
|
1188
|
+
signal: "empty_response",
|
|
1189
|
+
action: "retry_with_params",
|
|
1190
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1191
|
+
maxRetries: 1,
|
|
1192
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
1193
|
+
},
|
|
1194
|
+
{
|
|
1195
|
+
signal: "malformed_function_call",
|
|
1196
|
+
action: "escalate",
|
|
1197
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1198
|
+
}
|
|
1199
|
+
],
|
|
1200
|
+
strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1201
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1202
|
+
notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
|
|
1203
|
+
// Tier 3 emergency floor for summarize/classify chains. ZERO brain
|
|
1204
|
+
// rows — all values are starter hypotheses anchored to "smaller
|
|
1205
|
+
// sibling of Flash, at-or-below Flash perf on every archetype." The
|
|
1206
|
+
// first 50 brain rows per archetype will validate or relax these.
|
|
1207
|
+
archetypePerf: {
|
|
1208
|
+
classify: 6,
|
|
1209
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
1210
|
+
summarize: 6,
|
|
1211
|
+
// starter hypothesis — verify; cliff strips tools
|
|
1212
|
+
transform: 6,
|
|
1213
|
+
// starter hypothesis — verify
|
|
1214
|
+
ask: 5,
|
|
1215
|
+
hunt: 5,
|
|
1216
|
+
generate: 4,
|
|
1217
|
+
extract: 4,
|
|
1218
|
+
plan: 3,
|
|
1219
|
+
critique: 3
|
|
1220
|
+
}
|
|
1221
|
+
},
|
|
1222
|
+
{
|
|
1223
|
+
id: "gemini-2.5-pro",
|
|
1224
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
1225
|
+
provider: "google",
|
|
1226
|
+
status: "current",
|
|
1227
|
+
maxContextTokens: 1048576,
|
|
1228
|
+
maxOutputTokens: 65535,
|
|
1229
|
+
maxTools: 128,
|
|
1230
|
+
parallelToolCalls: true,
|
|
1231
|
+
structuredOutput: "native",
|
|
1232
|
+
systemPromptMode: "separate",
|
|
1233
|
+
streaming: true,
|
|
1234
|
+
cliffs: [
|
|
1235
|
+
{
|
|
1236
|
+
metric: "input_tokens",
|
|
1237
|
+
threshold: 2e5,
|
|
1238
|
+
action: "downgrade_quality_warning",
|
|
1239
|
+
reason: "Pricing doubles above 200K: input $1.25\u2192$2.50/M, output $10\u2192$15/M"
|
|
1240
|
+
}
|
|
1241
|
+
],
|
|
1242
|
+
costInputPer1m: 1.25,
|
|
1243
|
+
costOutputPer1m: 10,
|
|
1244
|
+
lowering: {
|
|
1245
|
+
...GOOGLE_LOWERING_BASE,
|
|
1246
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1247
|
+
},
|
|
1248
|
+
recovery: [
|
|
1249
|
+
{
|
|
1250
|
+
signal: "malformed_function_call",
|
|
1251
|
+
action: "escalate",
|
|
1252
|
+
reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
|
|
1253
|
+
}
|
|
1254
|
+
],
|
|
1255
|
+
strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
|
|
1256
|
+
weaknesses: ["pricing_above_200k"],
|
|
1257
|
+
// Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
|
|
1258
|
+
// Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
|
|
1259
|
+
archetypePerf: {
|
|
1260
|
+
critique: 9,
|
|
1261
|
+
plan: 9,
|
|
1262
|
+
ask: 8,
|
|
1263
|
+
generate: 8,
|
|
1264
|
+
extract: 8,
|
|
1265
|
+
transform: 8,
|
|
1266
|
+
hunt: 8,
|
|
1267
|
+
// tier 1 cross-provider for hunt chain
|
|
1268
|
+
summarize: 7,
|
|
1269
|
+
classify: 7
|
|
1270
|
+
}
|
|
1271
|
+
},
|
|
1272
|
+
{
|
|
1273
|
+
id: "gemini-3.1-pro-preview",
|
|
1274
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
1275
|
+
provider: "google",
|
|
1276
|
+
status: "preview",
|
|
1277
|
+
maxContextTokens: 1048576,
|
|
1278
|
+
maxOutputTokens: 65535,
|
|
1279
|
+
maxTools: 128,
|
|
1280
|
+
parallelToolCalls: true,
|
|
1281
|
+
structuredOutput: "native",
|
|
1282
|
+
systemPromptMode: "separate",
|
|
1283
|
+
streaming: true,
|
|
1284
|
+
cliffs: [
|
|
1285
|
+
{
|
|
1286
|
+
metric: "input_tokens",
|
|
1287
|
+
threshold: 2e5,
|
|
1288
|
+
action: "downgrade_quality_warning",
|
|
1289
|
+
reason: "Pricing doubles above 200K: input $2\u2192$4/M, output $12\u2192$18/M"
|
|
1290
|
+
}
|
|
1291
|
+
],
|
|
1292
|
+
costInputPer1m: 2,
|
|
1293
|
+
costOutputPer1m: 12,
|
|
1294
|
+
lowering: {
|
|
1295
|
+
...GOOGLE_LOWERING_BASE,
|
|
1296
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1297
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1298
|
+
},
|
|
1299
|
+
recovery: [
|
|
1300
|
+
{
|
|
1301
|
+
signal: "malformed_function_call",
|
|
1302
|
+
action: "escalate",
|
|
1303
|
+
reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
|
|
1304
|
+
}
|
|
1305
|
+
],
|
|
1306
|
+
strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
|
|
1307
|
+
weaknesses: ["cost", "preview_status", "pricing_above_200k"],
|
|
1308
|
+
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
|
|
1309
|
+
// Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
|
|
1310
|
+
// coding / reasoning per Google's release notes. Preview status:
|
|
1311
|
+
// chains should stay on 2.5 Pro until GA. Starter hypothesis.
|
|
1312
|
+
archetypePerf: {
|
|
1313
|
+
critique: 10,
|
|
1314
|
+
// Google claims step-change on reasoning
|
|
1315
|
+
plan: 10,
|
|
1316
|
+
ask: 9,
|
|
1317
|
+
generate: 9,
|
|
1318
|
+
extract: 9,
|
|
1319
|
+
transform: 8,
|
|
1320
|
+
hunt: 9,
|
|
1321
|
+
// step-change agentic per Google
|
|
1322
|
+
summarize: 8,
|
|
1323
|
+
classify: 7
|
|
1324
|
+
}
|
|
1325
|
+
},
|
|
1326
|
+
// ── DeepSeek ──
|
|
1327
|
+
// 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
|
|
1328
|
+
// to `deepseek-v4-flash` non-thinking mode. Old kgauto profile claimed 64k
|
|
1329
|
+
// context + $0.27/$1.10 — actual is 1M context + $0.14/$0.28. Now modeled
|
|
1330
|
+
// as: V4-Flash + V4-Pro as canonical profiles; deepseek-chat and
|
|
1331
|
+
// deepseek-reasoner registered as aliases (see ALIASES below).
|
|
1332
|
+
{
|
|
1333
|
+
id: "deepseek-v4-flash",
|
|
1334
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
1335
|
+
provider: "deepseek",
|
|
1336
|
+
status: "current",
|
|
1337
|
+
maxContextTokens: 1e6,
|
|
1338
|
+
maxOutputTokens: 384e3,
|
|
1339
|
+
maxTools: 16,
|
|
1340
|
+
parallelToolCalls: false,
|
|
1341
|
+
structuredOutput: "native",
|
|
1342
|
+
systemPromptMode: "inline",
|
|
1343
|
+
streaming: true,
|
|
1344
|
+
cliffs: [
|
|
1345
|
+
{
|
|
1346
|
+
metric: "tool_count",
|
|
1347
|
+
threshold: 1,
|
|
1348
|
+
action: "drop_to_top_relevant",
|
|
1349
|
+
reason: "Sequential tool calls only \u2014 L-040"
|
|
1350
|
+
}
|
|
1351
|
+
],
|
|
1352
|
+
costInputPer1m: 0.14,
|
|
1353
|
+
costOutputPer1m: 0.28,
|
|
1354
|
+
lowering: {
|
|
1355
|
+
system: { mode: "inline" },
|
|
1356
|
+
cache: { strategy: "unsupported" },
|
|
1357
|
+
tools: { format: "deepseek" }
|
|
1358
|
+
},
|
|
1359
|
+
recovery: [
|
|
1360
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
|
|
1361
|
+
],
|
|
1362
|
+
strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
|
|
1363
|
+
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
1364
|
+
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
|
|
1365
|
+
// Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
|
|
1366
|
+
// classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
|
|
1367
|
+
// Falls off on hunt (sequential tools — L-040) and reasoning depth.
|
|
1368
|
+
archetypePerf: {
|
|
1369
|
+
classify: 7,
|
|
1370
|
+
// brain-validated, 169 rows
|
|
1371
|
+
summarize: 7,
|
|
1372
|
+
// archetype-tolerant, no brain evidence yet
|
|
1373
|
+
ask: 6,
|
|
1374
|
+
transform: 6,
|
|
1375
|
+
generate: 5,
|
|
1376
|
+
plan: 5,
|
|
1377
|
+
extract: 5,
|
|
1378
|
+
critique: 4,
|
|
1379
|
+
hunt: 4
|
|
1380
|
+
// sequential tool calls only — L-040
|
|
1381
|
+
}
|
|
1382
|
+
},
|
|
1383
|
+
{
|
|
1384
|
+
id: "deepseek-v4-pro",
|
|
1385
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
1386
|
+
provider: "deepseek",
|
|
1387
|
+
status: "current",
|
|
1388
|
+
maxContextTokens: 1e6,
|
|
1389
|
+
maxOutputTokens: 384e3,
|
|
1390
|
+
maxTools: 16,
|
|
1391
|
+
parallelToolCalls: false,
|
|
1392
|
+
structuredOutput: "native",
|
|
1393
|
+
systemPromptMode: "inline",
|
|
1394
|
+
streaming: true,
|
|
1395
|
+
cliffs: [
|
|
1396
|
+
{
|
|
1397
|
+
metric: "tool_count",
|
|
1398
|
+
threshold: 1,
|
|
1399
|
+
action: "drop_to_top_relevant",
|
|
1400
|
+
reason: "Sequential tool calls only \u2014 L-040"
|
|
1401
|
+
}
|
|
1402
|
+
],
|
|
1403
|
+
// Profile carries REGULAR pricing, not the 75%-off promo (ends 2026-05-31).
|
|
1404
|
+
// Under-estimating cost is worse than over-estimating for budget caps.
|
|
1405
|
+
costInputPer1m: 1.74,
|
|
1406
|
+
costOutputPer1m: 3.48,
|
|
1407
|
+
lowering: {
|
|
1408
|
+
system: { mode: "inline" },
|
|
1409
|
+
cache: { strategy: "unsupported" },
|
|
1410
|
+
tools: { format: "deepseek" }
|
|
1411
|
+
},
|
|
1412
|
+
recovery: [
|
|
1413
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
|
|
1414
|
+
],
|
|
1415
|
+
strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
|
|
1416
|
+
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
1417
|
+
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
|
|
1418
|
+
// Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
|
|
1419
|
+
// bumped one notch over V4-Flash; same parallel-tool ceiling.
|
|
1420
|
+
archetypePerf: {
|
|
1421
|
+
plan: 7,
|
|
1422
|
+
// §3.3 tier 3 for plan
|
|
1423
|
+
critique: 6,
|
|
1424
|
+
ask: 7,
|
|
1425
|
+
generate: 6,
|
|
1426
|
+
classify: 7,
|
|
1427
|
+
summarize: 7,
|
|
1428
|
+
extract: 6,
|
|
1429
|
+
transform: 6,
|
|
1430
|
+
hunt: 4
|
|
1431
|
+
// sequential tools — same as V4-Flash
|
|
1432
|
+
}
|
|
1433
|
+
}
|
|
1434
|
+
];
|
|
1435
|
+
var ALIASES = {
|
|
1436
|
+
// DeepSeek's own model routing — both names served by V4-Flash.
|
|
1437
|
+
"deepseek-chat": "deepseek-v4-flash",
|
|
1438
|
+
"deepseek-reasoner": "deepseek-v4-flash",
|
|
1439
|
+
// Legacy kgauto typo — actual API alias is dash-form (alpha.1 had dot).
|
|
1440
|
+
"claude-haiku-4.5": "claude-haiku-4-5"
|
|
1441
|
+
};
|
|
1442
|
+
function canonicalId(id) {
|
|
1443
|
+
return ALIASES[id] ?? id;
|
|
1444
|
+
}
|
|
1445
|
+
var PROFILE_INDEX = new Map(
|
|
1446
|
+
PROFILES_RAW.map((p) => [p.id, p])
|
|
1447
|
+
);
|
|
1448
|
+
function getProfile(id) {
|
|
1449
|
+
const canonical = canonicalId(id);
|
|
1450
|
+
const p = PROFILE_INDEX.get(canonical);
|
|
1451
|
+
if (!p) {
|
|
1452
|
+
const known = [...PROFILE_INDEX.keys(), ...Object.keys(ALIASES)].join(", ");
|
|
1453
|
+
throw new Error(`Unknown model id: "${id}". Known: ${known}`);
|
|
1454
|
+
}
|
|
1455
|
+
return p;
|
|
1456
|
+
}
|
|
1457
|
+
function tryGetProfile(id) {
|
|
1458
|
+
return PROFILE_INDEX.get(canonicalId(id));
|
|
1459
|
+
}
|
|
1460
|
+
function allProfiles() {
|
|
1461
|
+
return PROFILES_RAW;
|
|
1462
|
+
}
|
|
1463
|
+
function profilesByProvider(provider) {
|
|
1464
|
+
return PROFILES_RAW.filter((p) => p.provider === provider);
|
|
1465
|
+
}
|
|
1466
|
+
|
|
1467
|
+
// src/advisor.ts
|
|
1468
|
+
function runAdvisor(ir, result, profile, policy) {
|
|
1469
|
+
const out = [];
|
|
1470
|
+
out.push(...detectCachingOff(ir, profile));
|
|
1471
|
+
out.push(...detectSingleChunkSystem(ir, profile));
|
|
1472
|
+
out.push(...detectToolBloat(ir, result));
|
|
1473
|
+
out.push(...detectHistoryUncached(ir, profile));
|
|
1474
|
+
out.push(...detectSingleModelArray(ir, policy));
|
|
1475
|
+
return out;
|
|
1476
|
+
}
|
|
1477
|
+
function detectCachingOff(ir, profile) {
|
|
1478
|
+
if (profile.provider !== "anthropic") return [];
|
|
1479
|
+
const totalChars = ir.sections.reduce((s, sec) => s + sec.text.length, 0);
|
|
1480
|
+
if (totalChars < 2e3) return [];
|
|
1481
|
+
const anyCacheable = ir.sections.some((s) => s.cacheable === true);
|
|
1482
|
+
if (anyCacheable) return [];
|
|
1483
|
+
return [
|
|
1484
|
+
{
|
|
1485
|
+
level: "warn",
|
|
1486
|
+
code: "caching-off-on-claude",
|
|
1487
|
+
message: `System prompt is ${totalChars} chars on Anthropic but no PromptSection has cacheable=true. Anthropic prompt caching cuts cached-prefix input cost by ~90% on subsequent calls; without it, every turn re-pays full price for the static system context.`,
|
|
1488
|
+
suggestion: "Mark stable system sections (role, persona, tool policy) with `cacheable: true`. The lowering pass concatenates cacheable sections into a single cache-controlled block before the dynamic ones.",
|
|
1489
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1490
|
+
}
|
|
1491
|
+
];
|
|
1492
|
+
}
|
|
1493
|
+
function detectSingleChunkSystem(ir, profile) {
|
|
1494
|
+
if (profile.provider !== "anthropic") return [];
|
|
1495
|
+
if (ir.sections.length !== 1) return [];
|
|
1496
|
+
const only = ir.sections[0];
|
|
1497
|
+
if (!only || only.text.length <= 1e3) return [];
|
|
1498
|
+
return [
|
|
1499
|
+
{
|
|
1500
|
+
level: "info",
|
|
1501
|
+
code: "single-chunk-system",
|
|
1502
|
+
message: `System prompt is a single ${only.text.length}-char chunk. Splitting into NamedChunks (static role/persona vs dynamic context) gives the lowering pass a finer cache-marker boundary \u2014 only the static portion needs to be byte-stable for the cache to hit.`,
|
|
1503
|
+
suggestion: "Refactor the system builder to return an array of `PromptSection` shaped { id, text, cacheable?: boolean }. Static chunks (role, persona, tool policy) get `cacheable: true`; dynamic ones (current context, today's date) don't.",
|
|
1504
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1505
|
+
}
|
|
1506
|
+
];
|
|
1507
|
+
}
|
|
1508
|
+
function detectToolBloat(ir, result) {
|
|
1509
|
+
const SHORT_OUTPUT = /* @__PURE__ */ new Set([
|
|
1510
|
+
"classify",
|
|
1511
|
+
"extract",
|
|
1512
|
+
"summarize",
|
|
1513
|
+
"transform",
|
|
1514
|
+
"critique"
|
|
1515
|
+
]);
|
|
1516
|
+
if (!ir.tools || ir.tools.length === 0) return [];
|
|
1517
|
+
const toolsKept = result.diagnostics.toolsKept;
|
|
1518
|
+
if (toolsKept <= 10) return [];
|
|
1519
|
+
if (!SHORT_OUTPUT.has(ir.intent.archetype)) return [];
|
|
1520
|
+
return [
|
|
1521
|
+
{
|
|
1522
|
+
level: "warn",
|
|
1523
|
+
code: "tool-bloat",
|
|
1524
|
+
message: `${toolsKept} tools kept after the relevance pass for archetype="${ir.intent.archetype}" (consumer declared ${ir.tools.length}). This archetype is short-output and rarely needs more than 3 tools; each tool definition eats ~350 tokens of context budget.`,
|
|
1525
|
+
suggestion: "Tighten `relevanceByIntent: { [archetype]: 0..1 }` per ToolDefinition. Tools below `toolRelevanceThreshold` (default 0.2) get dropped. Without `relevanceByIntent`, every tool defaults to neutral (0.5) and stays.",
|
|
1526
|
+
docsUrl: "https://github.com/stue/kgauto/blob/main/v2/README.md#tools"
|
|
1527
|
+
}
|
|
1528
|
+
];
|
|
1529
|
+
}
|
|
1530
|
+
function detectHistoryUncached(ir, profile) {
|
|
1531
|
+
if (profile.provider !== "anthropic") return [];
|
|
1532
|
+
if (!ir.history || ir.history.length < 2) return [];
|
|
1533
|
+
if (ir.historyCachePolicy && ir.historyCachePolicy.strategy !== "none") {
|
|
1534
|
+
return [];
|
|
1535
|
+
}
|
|
1536
|
+
return [
|
|
1537
|
+
{
|
|
1538
|
+
level: "warn",
|
|
1539
|
+
code: "history-uncached-on-claude",
|
|
1540
|
+
message: `${ir.history.length} history messages on Anthropic with no historyCachePolicy. Every turn re-pays for the full conversation context; with caching, subsequent turns hit the cache at ~10% the input cost.`,
|
|
1541
|
+
suggestion: "Set `historyCachePolicy: { strategy: 'all-but-latest' }` on this IR. The lowering pass marks the message immediately preceding currentTurn with cache_control; subsequent turns whose history prefix matches byte-for-byte hit the cache.",
|
|
1542
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#best-practice-advisories"
|
|
1543
|
+
}
|
|
1544
|
+
];
|
|
1545
|
+
}
|
|
1546
|
+
function detectSingleModelArray(ir, policy) {
|
|
1547
|
+
if (ir.models.length !== 1) return [];
|
|
1548
|
+
if (policy?.posture === "locked") return [];
|
|
1549
|
+
const only = ir.models[0];
|
|
1550
|
+
return [
|
|
1551
|
+
{
|
|
1552
|
+
level: "warn",
|
|
1553
|
+
code: "single-model-array",
|
|
1554
|
+
message: `\`ir.models\` has length 1 (only "${only}") and posture is not 'locked'. A single-model chain has no safety net \u2014 the first 429 / 5xx / cliff hits the user as a failure. Master plan \xA71.2 closes the reliability gap with a 2-step minimum.`,
|
|
1555
|
+
suggestion: "Use `getDefaultFallbackChain({ archetype: ir.intent.archetype, primary: '" + only + "', posture: 'preferred' })` for a user-anchored chain, or `getDefaultFallbackChain({ archetype, posture: 'open' })` for library-picked. If single-model is intentional (compliance/brand promise), set `policy.posture = 'locked'` to silence this rule.",
|
|
1556
|
+
docsUrl: "https://github.com/stue/command-center/blob/main/interfaces/kgauto.md#single-model-array"
|
|
1557
|
+
}
|
|
1558
|
+
];
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
// src/compile.ts
|
|
1562
|
+
var counter = 0;
|
|
1563
|
+
function makeHandle() {
|
|
1564
|
+
counter = (counter + 1) % 1e6;
|
|
1565
|
+
return `c${Date.now().toString(36)}-${counter.toString(36)}-${Math.random().toString(36).slice(2, 6)}`;
|
|
1566
|
+
}
|
|
1567
|
+
function compile(ir, opts = {}) {
|
|
1568
|
+
const resolver = opts.profileResolver ?? getProfile;
|
|
1569
|
+
validateIR(ir);
|
|
1570
|
+
const sliced = passSlice(ir);
|
|
1571
|
+
const deduped = passDedupe(sliced.value);
|
|
1572
|
+
const toolFiltered = passToolRelevance(deduped.value, {
|
|
1573
|
+
threshold: opts.toolRelevanceThreshold
|
|
1574
|
+
});
|
|
1575
|
+
const compressed = passCompressHistory(toolFiltered.value, {
|
|
1576
|
+
summarizeOlderThan: opts.compressHistoryAfter,
|
|
1577
|
+
summarizeAboveTokens: opts.compressHistoryAboveTokens
|
|
1578
|
+
});
|
|
1579
|
+
let workingIR = compressed.value;
|
|
1580
|
+
const accumulatedMutations = [
|
|
1581
|
+
...sliced.mutations,
|
|
1582
|
+
...deduped.mutations,
|
|
1583
|
+
...toolFiltered.mutations,
|
|
1584
|
+
...compressed.mutations
|
|
1585
|
+
];
|
|
1586
|
+
const inputTokens = estimateInputTokens(workingIR);
|
|
1587
|
+
const scores = passScoreTargets(workingIR, {
|
|
1588
|
+
estimatedInputTokens: inputTokens,
|
|
1589
|
+
profilesById: resolver,
|
|
1590
|
+
policy: opts.policy
|
|
1591
|
+
});
|
|
1592
|
+
accumulatedMutations.push(...scores.mutations);
|
|
1593
|
+
const target = pickTarget(workingIR, scores.value);
|
|
1594
|
+
if (!target) {
|
|
1595
|
+
throw new Error(
|
|
1596
|
+
`compile(): no allowed model fits the request. Scores: ${JSON.stringify(scores.value, null, 2)}`
|
|
1597
|
+
);
|
|
1598
|
+
}
|
|
1599
|
+
const profile = resolver(target.modelId);
|
|
1600
|
+
const fallbackChain = scores.value.filter((s) => s.modelId !== target.modelId && s.fits).sort((a, b) => b.rank - a.rank).map((s) => s.modelId);
|
|
1601
|
+
const cliffs = passApplyCliffs(workingIR, profile, inputTokens);
|
|
1602
|
+
workingIR = cliffs.value.ir;
|
|
1603
|
+
accumulatedMutations.push(...cliffs.mutations);
|
|
1604
|
+
const lowered = lower(workingIR, profile, {
|
|
1605
|
+
forceThinkingZero: cliffs.value.loweringHints.forceThinkingZero,
|
|
1606
|
+
forceTerseOutput: cliffs.value.loweringHints.forceTerseOutput
|
|
1607
|
+
});
|
|
1608
|
+
validateFinalFit(workingIR, profile, inputTokens);
|
|
1609
|
+
const handle = makeHandle();
|
|
1610
|
+
const finalShape = computeShape(workingIR, inputTokens);
|
|
1611
|
+
const _learningKey = learningKey(ir.intent.archetype, profile.id, finalShape);
|
|
1612
|
+
const diagnostics = {
|
|
1613
|
+
sectionsKept: workingIR.sections.length,
|
|
1614
|
+
sectionsDropped: ir.sections.length - workingIR.sections.length,
|
|
1615
|
+
toolsKept: workingIR.tools?.length ?? 0,
|
|
1616
|
+
toolsDropped: (ir.tools?.length ?? 0) - (workingIR.tools?.length ?? 0),
|
|
1617
|
+
historyKept: workingIR.history?.length ?? 0,
|
|
1618
|
+
historyDropped: (ir.history?.length ?? 0) - (workingIR.history?.length ?? 0),
|
|
1619
|
+
cacheableTokens: lowered.diagnostics.cacheableTokens,
|
|
1620
|
+
estimatedCacheSavingsUsd: lowered.diagnostics.estimatedCacheSavingsUsd,
|
|
1621
|
+
historyCacheableTokens: lowered.diagnostics.historyCacheableTokens,
|
|
1622
|
+
historyTokensTotal: compressed.historyTokensTotal
|
|
1623
|
+
};
|
|
1624
|
+
const advisories = runAdvisor(
|
|
1625
|
+
ir,
|
|
1626
|
+
{
|
|
1627
|
+
target: profile.id,
|
|
1628
|
+
provider: profile.provider,
|
|
1629
|
+
tokensIn: inputTokens,
|
|
1630
|
+
diagnostics
|
|
1631
|
+
},
|
|
1632
|
+
profile,
|
|
1633
|
+
opts.policy
|
|
1634
|
+
);
|
|
1635
|
+
return {
|
|
1636
|
+
handle,
|
|
1637
|
+
target: profile.id,
|
|
1638
|
+
provider: profile.provider,
|
|
1639
|
+
request: lowered.request,
|
|
1640
|
+
tokensIn: inputTokens,
|
|
1641
|
+
estimatedCostUsd: target.estimatedCostUsd,
|
|
1642
|
+
mutationsApplied: accumulatedMutations,
|
|
1643
|
+
fallbackChain,
|
|
1644
|
+
advisories,
|
|
1645
|
+
diagnostics
|
|
1646
|
+
};
|
|
1647
|
+
}
|
|
1648
|
+
function validateIR(ir) {
|
|
1649
|
+
if (!ir.appId) throw new Error("compile(): ir.appId is required");
|
|
1650
|
+
if (!ir.intent || !ir.intent.archetype) {
|
|
1651
|
+
throw new Error("compile(): ir.intent.archetype is required (use a dialect-v1 archetype)");
|
|
1652
|
+
}
|
|
1653
|
+
if (!Array.isArray(ir.models) || ir.models.length === 0) {
|
|
1654
|
+
throw new Error("compile(): ir.models must be a non-empty array");
|
|
1655
|
+
}
|
|
1656
|
+
if (!Array.isArray(ir.sections)) {
|
|
1657
|
+
throw new Error("compile(): ir.sections must be an array");
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
function pickTarget(ir, scores) {
|
|
1661
|
+
if (ir.constraints?.forceModel) {
|
|
1662
|
+
const forced = scores.find((s) => s.modelId === ir.constraints.forceModel);
|
|
1663
|
+
if (forced && forced.fits) return forced;
|
|
1664
|
+
if (forced) {
|
|
1665
|
+
throw new Error(
|
|
1666
|
+
`compile(): forceModel="${ir.constraints.forceModel}" does not fit: ${forced.rejectReasons.join("; ")}`
|
|
1667
|
+
);
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
const fitting = scores.filter((s) => s.fits).sort((a, b) => b.rank - a.rank);
|
|
1671
|
+
return fitting[0];
|
|
1672
|
+
}
|
|
1673
|
+
function validateFinalFit(ir, profile, tokens) {
|
|
1674
|
+
if (tokens > profile.maxContextTokens) {
|
|
1675
|
+
throw new Error(
|
|
1676
|
+
`compile(): final IR is ${tokens} tokens, exceeds ${profile.id} context (${profile.maxContextTokens})`
|
|
1677
|
+
);
|
|
1678
|
+
}
|
|
1679
|
+
if ((ir.tools?.length ?? 0) > profile.maxTools) {
|
|
1680
|
+
throw new Error(
|
|
1681
|
+
`compile(): final IR has ${ir.tools?.length} tools, exceeds ${profile.id} maxTools (${profile.maxTools})`
|
|
1682
|
+
);
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
// src/brain.ts
|
|
1687
|
+
var activeConfig;
|
|
1688
|
+
function configureBrain(config) {
|
|
1689
|
+
const endpoint = config.endpoint.replace(/\/outcomes\/?$/, "");
|
|
1690
|
+
activeConfig = { ...config, endpoint };
|
|
1691
|
+
}
|
|
1692
|
+
function clearBrain() {
|
|
1693
|
+
activeConfig = void 0;
|
|
1694
|
+
}
|
|
1695
|
+
var compileRegistry = /* @__PURE__ */ new Map();
|
|
1696
|
+
var REGISTRY_MAX_ENTRIES = 1e4;
|
|
1697
|
+
function registerCompile(appId, archetype, ir, result) {
|
|
1698
|
+
if (compileRegistry.size >= REGISTRY_MAX_ENTRIES) {
|
|
1699
|
+
const cutoff = Math.floor(REGISTRY_MAX_ENTRIES * 0.25);
|
|
1700
|
+
let evicted = 0;
|
|
1701
|
+
for (const k of compileRegistry.keys()) {
|
|
1702
|
+
compileRegistry.delete(k);
|
|
1703
|
+
if (++evicted >= cutoff) break;
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
const tokens = result.tokensIn;
|
|
1707
|
+
const shape = computeShape(
|
|
1708
|
+
{
|
|
1709
|
+
appId,
|
|
1710
|
+
intent: { name: archetype, archetype },
|
|
1711
|
+
sections: ir.sections,
|
|
1712
|
+
tools: ir.tools,
|
|
1713
|
+
history: ir.history,
|
|
1714
|
+
models: ir.models,
|
|
1715
|
+
constraints: ir.constraints
|
|
1716
|
+
},
|
|
1717
|
+
tokens
|
|
1718
|
+
);
|
|
1719
|
+
const shapeKey = `${shape.contextBucket}-${shape.toolCountBucket}-${shape.historyDepth}-${shape.outputMode}`;
|
|
1720
|
+
compileRegistry.set(result.handle, {
|
|
1721
|
+
appId,
|
|
1722
|
+
archetype,
|
|
1723
|
+
model: result.target,
|
|
1724
|
+
provider: result.provider,
|
|
1725
|
+
shapeKey,
|
|
1726
|
+
learningKey: learningKey(archetype, result.target, shape),
|
|
1727
|
+
estimatedTokensIn: tokens,
|
|
1728
|
+
mutationsApplied: result.mutationsApplied.map((m) => m.id),
|
|
1729
|
+
startedAt: Date.now(),
|
|
1730
|
+
historyCacheableTokens: result.diagnostics.historyCacheableTokens,
|
|
1731
|
+
historyTokensTotal: result.diagnostics.historyTokensTotal
|
|
1732
|
+
});
|
|
1733
|
+
}
|
|
1734
|
+
async function record(input) {
|
|
1735
|
+
const reg = compileRegistry.get(input.handle);
|
|
1736
|
+
if (reg) compileRegistry.delete(input.handle);
|
|
1737
|
+
if (!activeConfig) {
|
|
1738
|
+
return;
|
|
1739
|
+
}
|
|
1740
|
+
const payload = buildPayload(input, reg);
|
|
1741
|
+
const config = activeConfig;
|
|
1742
|
+
const fetchFn = config.fetchImpl ?? fetch;
|
|
1743
|
+
const send = async () => {
|
|
1744
|
+
try {
|
|
1745
|
+
const res = await fetchFn(`${config.endpoint}/outcomes`, {
|
|
1746
|
+
method: "POST",
|
|
1747
|
+
headers: {
|
|
1748
|
+
"Content-Type": "application/json",
|
|
1749
|
+
...config.apiKey ? { Authorization: `Bearer ${config.apiKey}` } : {}
|
|
1750
|
+
},
|
|
1751
|
+
body: JSON.stringify(payload)
|
|
1752
|
+
});
|
|
1753
|
+
if (!res.ok) {
|
|
1754
|
+
const text = await res.text().catch(() => "<no body>");
|
|
1755
|
+
throw new Error(`brain ${res.status}: ${text}`);
|
|
1756
|
+
}
|
|
1757
|
+
} catch (err) {
|
|
1758
|
+
(config.onError ?? defaultOnError)(err);
|
|
1759
|
+
}
|
|
1760
|
+
};
|
|
1761
|
+
if (config.sync) {
|
|
1762
|
+
await send();
|
|
1763
|
+
} else {
|
|
1764
|
+
void send();
|
|
1765
|
+
}
|
|
1766
|
+
}
|
|
1767
|
+
function defaultOnError(err) {
|
|
1768
|
+
console.warn("[kgauto] brain record failed:", err);
|
|
1769
|
+
}
|
|
1770
|
+
function buildPayload(input, reg) {
|
|
1771
|
+
const compileTarget = reg?.model;
|
|
1772
|
+
const actual = input.actualModel ?? compileTarget;
|
|
1773
|
+
const requested = input.actualModel && compileTarget && input.actualModel !== compileTarget ? compileTarget : void 0;
|
|
1774
|
+
const mutationsApplied = input.mutationsApplied ?? reg?.mutationsApplied ?? [];
|
|
1775
|
+
const costModel = actual;
|
|
1776
|
+
const costUsdActual = costModel ? computeCostUsd(costModel, input.tokensIn, input.tokensOut) : void 0;
|
|
1777
|
+
return {
|
|
1778
|
+
handle: input.handle,
|
|
1779
|
+
app_id: reg?.appId,
|
|
1780
|
+
intent_archetype: reg?.archetype,
|
|
1781
|
+
model: actual,
|
|
1782
|
+
requested_model: requested,
|
|
1783
|
+
provider: reg?.provider,
|
|
1784
|
+
shape_key: reg?.shapeKey,
|
|
1785
|
+
learning_key: reg?.learningKey,
|
|
1786
|
+
mutations_applied: mutationsApplied,
|
|
1787
|
+
tokens_in: input.tokensIn,
|
|
1788
|
+
tokens_out: input.tokensOut,
|
|
1789
|
+
estimated_tokens_in: reg?.estimatedTokensIn,
|
|
1790
|
+
latency_ms: input.latencyMs,
|
|
1791
|
+
success: input.success,
|
|
1792
|
+
empty_response: input.emptyResponse ?? input.tokensOut === 0,
|
|
1793
|
+
error_type: input.errorType,
|
|
1794
|
+
tools_called: input.toolsCalled,
|
|
1795
|
+
oracle_score: input.oracleScore?.score,
|
|
1796
|
+
oracle_dimensions: input.oracleScore?.dimensions,
|
|
1797
|
+
oracle_rationale: input.oracleScore?.rationale,
|
|
1798
|
+
prompt_preview: input.promptPreview,
|
|
1799
|
+
response_preview: input.responsePreview,
|
|
1800
|
+
dialect_version: "v1",
|
|
1801
|
+
cache_read_input_tokens: input.cacheReadInputTokens,
|
|
1802
|
+
cache_creation_input_tokens: input.cacheCreationInputTokens,
|
|
1803
|
+
cost_usd_actual: costUsdActual,
|
|
1804
|
+
ttft_ms: input.ttftMs,
|
|
1805
|
+
history_cacheable_tokens: reg?.historyCacheableTokens,
|
|
1806
|
+
history_tokens_at_compile: reg?.historyTokensTotal
|
|
1807
|
+
};
|
|
1808
|
+
}
|
|
1809
|
+
function computeCostUsd(modelId, tokensIn, tokensOut) {
|
|
1810
|
+
if (tokensIn === 0 && tokensOut === 0) return void 0;
|
|
1811
|
+
const profile = tryGetProfile(modelId);
|
|
1812
|
+
if (!profile) return void 0;
|
|
1813
|
+
const inUsd = tokensIn / 1e6 * profile.costInputPer1m;
|
|
1814
|
+
const outUsd = tokensOut / 1e6 * profile.costOutputPer1m;
|
|
1815
|
+
return Math.round((inUsd + outUsd) * 1e6) / 1e6;
|
|
1816
|
+
}
|
|
1817
|
+
|
|
1818
|
+
// src/ir.ts
|
|
1819
|
+
var CallError = class extends Error {
|
|
1820
|
+
attempts;
|
|
1821
|
+
lastErrorCode;
|
|
1822
|
+
lastStatus;
|
|
1823
|
+
constructor(message, attempts, lastStatus, lastErrorCode) {
|
|
1824
|
+
super(message);
|
|
1825
|
+
this.name = "CallError";
|
|
1826
|
+
this.attempts = attempts;
|
|
1827
|
+
this.lastStatus = lastStatus;
|
|
1828
|
+
this.lastErrorCode = lastErrorCode;
|
|
1829
|
+
}
|
|
1830
|
+
};
|
|
1831
|
+
|
|
1832
|
+
// src/env.ts
|
|
1833
|
+
var SUPPORTED_PROVIDERS = Object.freeze([
|
|
1834
|
+
"anthropic",
|
|
1835
|
+
"google",
|
|
1836
|
+
"openai",
|
|
1837
|
+
"deepseek"
|
|
1838
|
+
]);
|
|
1839
|
+
function isSupportedProvider(p) {
|
|
1840
|
+
return SUPPORTED_PROVIDERS.includes(p);
|
|
1841
|
+
}
|
|
1842
|
+
var PROVIDER_ENV_KEYS = Object.freeze({
|
|
1843
|
+
anthropic: Object.freeze(["ANTHROPIC_API_KEY"]),
|
|
1844
|
+
google: Object.freeze([
|
|
1845
|
+
"GOOGLE_API_KEY",
|
|
1846
|
+
"GEMINI_API_KEY",
|
|
1847
|
+
"GOOGLE_GENERATIVE_AI_API_KEY"
|
|
1848
|
+
]),
|
|
1849
|
+
openai: Object.freeze(["OPENAI_API_KEY"]),
|
|
1850
|
+
deepseek: Object.freeze(["DEEPSEEK_API_KEY"])
|
|
1851
|
+
});
|
|
1852
|
+
function defaultEnv() {
|
|
1853
|
+
return typeof process !== "undefined" && process.env ? process.env : {};
|
|
1854
|
+
}
|
|
1855
|
+
function resolveProviderKey(provider, opts = {}) {
|
|
1856
|
+
if (!isSupportedProvider(provider)) return void 0;
|
|
1857
|
+
const explicit = opts.apiKeys?.[provider];
|
|
1858
|
+
if (explicit) return explicit;
|
|
1859
|
+
const env = opts.envSource ?? defaultEnv();
|
|
1860
|
+
for (const name of PROVIDER_ENV_KEYS[provider]) {
|
|
1861
|
+
const v = env[name];
|
|
1862
|
+
if (v) return v;
|
|
1863
|
+
}
|
|
1864
|
+
return void 0;
|
|
1865
|
+
}
|
|
1866
|
+
function isProviderReachable(provider, opts = {}) {
|
|
1867
|
+
return resolveProviderKey(provider, opts) !== void 0;
|
|
1868
|
+
}
|
|
1869
|
+
function isModelReachable(modelId, opts = {}) {
|
|
1870
|
+
const profile = tryGetProfile(modelId);
|
|
1871
|
+
if (!profile) return false;
|
|
1872
|
+
return isProviderReachable(profile.provider, opts);
|
|
1873
|
+
}
|
|
1874
|
+
function getReachabilityDiagnostic(opts = {}) {
|
|
1875
|
+
const env = opts.envSource ?? defaultEnv();
|
|
1876
|
+
const out = {};
|
|
1877
|
+
for (const provider of SUPPORTED_PROVIDERS) {
|
|
1878
|
+
if (opts.apiKeys?.[provider]) {
|
|
1879
|
+
out[provider] = { reachable: true, via: "apiKeys" };
|
|
1880
|
+
continue;
|
|
1881
|
+
}
|
|
1882
|
+
const envKeyFound = PROVIDER_ENV_KEYS[provider].find((name) => env[name]);
|
|
1883
|
+
out[provider] = envKeyFound ? { reachable: true, via: "env", envKeyFound } : { reachable: false, via: null };
|
|
1884
|
+
}
|
|
1885
|
+
return out;
|
|
1886
|
+
}
|
|
1887
|
+
|
|
1888
|
+
// src/execute.ts
|
|
1889
|
+
var ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
1890
|
+
var OPENAI_URL = "https://api.openai.com/v1/chat/completions";
|
|
1891
|
+
var DEEPSEEK_URL = "https://api.deepseek.com/chat/completions";
|
|
1892
|
+
async function execute(request, opts = {}) {
|
|
1893
|
+
const merged = applyOverrides(request, opts.providerOverrides);
|
|
1894
|
+
switch (merged.provider) {
|
|
1895
|
+
case "anthropic":
|
|
1896
|
+
return executeAnthropic(merged, opts);
|
|
1897
|
+
case "google":
|
|
1898
|
+
return executeGoogle(merged, opts);
|
|
1899
|
+
case "openai":
|
|
1900
|
+
return executeOpenAI(merged, opts);
|
|
1901
|
+
case "deepseek":
|
|
1902
|
+
return executeDeepSeek(merged, opts);
|
|
1903
|
+
default: {
|
|
1904
|
+
const _exhaustive = merged;
|
|
1905
|
+
throw new Error(`execute(): no executor for provider: ${JSON.stringify(_exhaustive)}`);
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
}
|
|
1909
|
+
async function executeAnthropic(request, opts) {
|
|
1910
|
+
const apiKey = resolveProviderKey("anthropic", { apiKeys: opts.apiKeys });
|
|
1911
|
+
if (!apiKey) {
|
|
1912
|
+
return terminalError(401, "auth", "ANTHROPIC_API_KEY missing");
|
|
1913
|
+
}
|
|
1914
|
+
const { provider: _provider, ...body } = request;
|
|
1915
|
+
const fetchFn = opts.fetchImpl ?? fetch;
|
|
1916
|
+
let res;
|
|
1917
|
+
let json;
|
|
1918
|
+
try {
|
|
1919
|
+
res = await fetchFn(ANTHROPIC_URL, {
|
|
1920
|
+
method: "POST",
|
|
1921
|
+
headers: {
|
|
1922
|
+
"x-api-key": apiKey,
|
|
1923
|
+
"anthropic-version": "2023-06-01",
|
|
1924
|
+
"content-type": "application/json"
|
|
1925
|
+
},
|
|
1926
|
+
body: JSON.stringify(body)
|
|
1927
|
+
});
|
|
1928
|
+
json = await res.json().catch(() => ({}));
|
|
1929
|
+
} catch (err) {
|
|
1930
|
+
return retryableError(0, "network_error", String(err), null);
|
|
1931
|
+
}
|
|
1932
|
+
if (!res.ok) return classifyHttpError(res.status, json);
|
|
1933
|
+
return { ok: true, status: res.status, response: normalizeAnthropic(json) };
|
|
1934
|
+
}
|
|
1935
|
+
function normalizeAnthropic(raw) {
|
|
1936
|
+
const r = raw;
|
|
1937
|
+
const text = (r.content ?? []).filter((b) => b.type === "text" && typeof b.text === "string").map((b) => b.text).join("");
|
|
1938
|
+
const toolCalls = (r.content ?? []).filter((b) => b.type === "tool_use" && b.name && b.id).map((b) => ({ id: b.id, name: b.name, args: b.input ?? {} }));
|
|
1939
|
+
const tokens = {
|
|
1940
|
+
input: r.usage?.input_tokens ?? 0,
|
|
1941
|
+
output: r.usage?.output_tokens ?? 0,
|
|
1942
|
+
total: (r.usage?.input_tokens ?? 0) + (r.usage?.output_tokens ?? 0),
|
|
1943
|
+
cached: r.usage?.cache_read_input_tokens,
|
|
1944
|
+
cacheCreated: r.usage?.cache_creation_input_tokens
|
|
1945
|
+
};
|
|
1946
|
+
return { text, structuredOutput: null, toolCalls, tokens, finishReason: r.stop_reason, raw };
|
|
1947
|
+
}
|
|
1948
|
+
async function executeGoogle(request, opts) {
|
|
1949
|
+
const apiKey = resolveProviderKey("google", { apiKeys: opts.apiKeys });
|
|
1950
|
+
if (!apiKey) {
|
|
1951
|
+
return terminalError(401, "auth", "GOOGLE_API_KEY/GEMINI_API_KEY missing");
|
|
1952
|
+
}
|
|
1953
|
+
const { provider: _provider, model, ...body } = request;
|
|
1954
|
+
const url = `https://generativelanguage.googleapis.com/v1beta/models/${encodeURIComponent(model)}:generateContent?key=${encodeURIComponent(apiKey)}`;
|
|
1955
|
+
const fetchFn = opts.fetchImpl ?? fetch;
|
|
1956
|
+
let res;
|
|
1957
|
+
let json;
|
|
1958
|
+
try {
|
|
1959
|
+
res = await fetchFn(url, {
|
|
1960
|
+
method: "POST",
|
|
1961
|
+
headers: { "content-type": "application/json" },
|
|
1962
|
+
body: JSON.stringify(body)
|
|
1963
|
+
});
|
|
1964
|
+
json = await res.json().catch(() => ({}));
|
|
1965
|
+
} catch (err) {
|
|
1966
|
+
return retryableError(0, "network_error", String(err), null);
|
|
1967
|
+
}
|
|
1968
|
+
if (!res.ok) return classifyHttpError(res.status, json);
|
|
1969
|
+
return { ok: true, status: res.status, response: normalizeGoogle(json) };
|
|
1970
|
+
}
|
|
1971
|
+
function normalizeGoogle(raw) {
|
|
1972
|
+
const r = raw;
|
|
1973
|
+
const candidate = r.candidates?.[0];
|
|
1974
|
+
const parts = candidate?.content?.parts ?? [];
|
|
1975
|
+
const text = parts.filter((p) => typeof p.text === "string").map((p) => p.text).join("");
|
|
1976
|
+
const toolCalls = parts.filter((p) => p.functionCall?.name).map((p, i) => ({
|
|
1977
|
+
id: `gemini-call-${i}`,
|
|
1978
|
+
name: p.functionCall.name,
|
|
1979
|
+
args: p.functionCall.args ?? {}
|
|
1980
|
+
}));
|
|
1981
|
+
const u = r.usageMetadata ?? {};
|
|
1982
|
+
const tokens = {
|
|
1983
|
+
input: u.promptTokenCount ?? 0,
|
|
1984
|
+
output: u.candidatesTokenCount ?? 0,
|
|
1985
|
+
total: u.totalTokenCount ?? (u.promptTokenCount ?? 0) + (u.candidatesTokenCount ?? 0),
|
|
1986
|
+
cached: u.cachedContentTokenCount
|
|
1987
|
+
};
|
|
1988
|
+
return { text, structuredOutput: null, toolCalls, tokens, finishReason: candidate?.finishReason, raw };
|
|
1989
|
+
}
|
|
1990
|
+
async function executeOpenAI(request, opts) {
|
|
1991
|
+
const apiKey = resolveProviderKey("openai", { apiKeys: opts.apiKeys });
|
|
1992
|
+
if (!apiKey) {
|
|
1993
|
+
return terminalError(401, "auth", "OPENAI_API_KEY missing");
|
|
1994
|
+
}
|
|
1995
|
+
const { provider: _provider, ...body } = request;
|
|
1996
|
+
const fetchFn = opts.fetchImpl ?? fetch;
|
|
1997
|
+
let res;
|
|
1998
|
+
let json;
|
|
1999
|
+
try {
|
|
2000
|
+
res = await fetchFn(OPENAI_URL, {
|
|
2001
|
+
method: "POST",
|
|
2002
|
+
headers: { authorization: `Bearer ${apiKey}`, "content-type": "application/json" },
|
|
2003
|
+
body: JSON.stringify(body)
|
|
2004
|
+
});
|
|
2005
|
+
json = await res.json().catch(() => ({}));
|
|
2006
|
+
} catch (err) {
|
|
2007
|
+
return retryableError(0, "network_error", String(err), null);
|
|
2008
|
+
}
|
|
2009
|
+
if (!res.ok) return classifyHttpError(res.status, json);
|
|
2010
|
+
return { ok: true, status: res.status, response: normalizeOpenAILike(json) };
|
|
2011
|
+
}
|
|
2012
|
+
async function executeDeepSeek(request, opts) {
|
|
2013
|
+
const apiKey = resolveProviderKey("deepseek", { apiKeys: opts.apiKeys });
|
|
2014
|
+
if (!apiKey) {
|
|
2015
|
+
return terminalError(401, "auth", "DEEPSEEK_API_KEY missing");
|
|
2016
|
+
}
|
|
2017
|
+
const { provider: _provider, ...body } = request;
|
|
2018
|
+
const fetchFn = opts.fetchImpl ?? fetch;
|
|
2019
|
+
let res;
|
|
2020
|
+
let json;
|
|
2021
|
+
try {
|
|
2022
|
+
res = await fetchFn(DEEPSEEK_URL, {
|
|
2023
|
+
method: "POST",
|
|
2024
|
+
headers: { authorization: `Bearer ${apiKey}`, "content-type": "application/json" },
|
|
2025
|
+
body: JSON.stringify(body)
|
|
2026
|
+
});
|
|
2027
|
+
json = await res.json().catch(() => ({}));
|
|
2028
|
+
} catch (err) {
|
|
2029
|
+
return retryableError(0, "network_error", String(err), null);
|
|
2030
|
+
}
|
|
2031
|
+
if (!res.ok) return classifyHttpError(res.status, json);
|
|
2032
|
+
return { ok: true, status: res.status, response: normalizeOpenAILike(json) };
|
|
2033
|
+
}
|
|
2034
|
+
function normalizeOpenAILike(raw) {
|
|
2035
|
+
const r = raw;
|
|
2036
|
+
const choice = r.choices?.[0];
|
|
2037
|
+
const text = choice?.message?.content ?? "";
|
|
2038
|
+
const toolCalls = (choice?.message?.tool_calls ?? []).filter((tc) => tc.function?.name).map((tc, i) => ({
|
|
2039
|
+
id: tc.id ?? `tc-${i}`,
|
|
2040
|
+
name: tc.function.name,
|
|
2041
|
+
args: tryParseJson(tc.function?.arguments) ?? {}
|
|
2042
|
+
}));
|
|
2043
|
+
const u = r.usage ?? {};
|
|
2044
|
+
const tokens = {
|
|
2045
|
+
input: u.prompt_tokens ?? 0,
|
|
2046
|
+
output: u.completion_tokens ?? 0,
|
|
2047
|
+
total: u.total_tokens ?? (u.prompt_tokens ?? 0) + (u.completion_tokens ?? 0),
|
|
2048
|
+
cached: u.prompt_tokens_details?.cached_tokens
|
|
2049
|
+
};
|
|
2050
|
+
return { text, structuredOutput: null, toolCalls, tokens, finishReason: choice?.finish_reason, raw };
|
|
2051
|
+
}
|
|
2052
|
+
function applyOverrides(request, overrides) {
|
|
2053
|
+
if (!overrides) return request;
|
|
2054
|
+
const layer = overrides[request.provider];
|
|
2055
|
+
if (!layer) return request;
|
|
2056
|
+
return { ...request, ...layer };
|
|
2057
|
+
}
|
|
2058
|
+
function classifyHttpError(status, body) {
|
|
2059
|
+
const message = extractErrorMessage(body) ?? `HTTP ${status}`;
|
|
2060
|
+
if (status === 429) {
|
|
2061
|
+
return { ok: false, status, errorType: "retryable", errorCode: "rate_limit", message, raw: body };
|
|
2062
|
+
}
|
|
2063
|
+
if (status === 408) {
|
|
2064
|
+
return { ok: false, status, errorType: "retryable", errorCode: "timeout", message, raw: body };
|
|
2065
|
+
}
|
|
2066
|
+
if (status >= 500) {
|
|
2067
|
+
return { ok: false, status, errorType: "retryable", errorCode: "server_error", message, raw: body };
|
|
2068
|
+
}
|
|
2069
|
+
if (status === 404) {
|
|
2070
|
+
return { ok: false, status, errorType: "retryable", errorCode: "model_not_found", message, raw: body };
|
|
2071
|
+
}
|
|
2072
|
+
if (status === 401 || status === 403) {
|
|
2073
|
+
return { ok: false, status, errorType: "terminal", errorCode: "auth", message, raw: body };
|
|
2074
|
+
}
|
|
2075
|
+
if (status === 400) {
|
|
2076
|
+
return { ok: false, status, errorType: "terminal", errorCode: "invalid_request", message, raw: body };
|
|
2077
|
+
}
|
|
2078
|
+
return { ok: false, status, errorType: "terminal", errorCode: "unknown", message, raw: body };
|
|
2079
|
+
}
|
|
2080
|
+
function extractErrorMessage(body) {
|
|
2081
|
+
if (!body || typeof body !== "object") return void 0;
|
|
2082
|
+
const b = body;
|
|
2083
|
+
if (b.error && typeof b.error === "object") {
|
|
2084
|
+
const e = b.error;
|
|
2085
|
+
if (typeof e.message === "string") return e.message;
|
|
2086
|
+
}
|
|
2087
|
+
if (typeof b.message === "string") return b.message;
|
|
2088
|
+
return void 0;
|
|
2089
|
+
}
|
|
2090
|
+
function terminalError(status, code, message) {
|
|
2091
|
+
return { ok: false, status, errorType: "terminal", errorCode: code, message, raw: null };
|
|
2092
|
+
}
|
|
2093
|
+
function retryableError(status, code, message, raw) {
|
|
2094
|
+
return { ok: false, status, errorType: "retryable", errorCode: code, message, raw };
|
|
2095
|
+
}
|
|
2096
|
+
function tryParseJson(s) {
|
|
2097
|
+
if (typeof s !== "string" || s.length === 0) return void 0;
|
|
2098
|
+
try {
|
|
2099
|
+
const parsed = JSON.parse(s);
|
|
2100
|
+
return typeof parsed === "object" && parsed !== null ? parsed : void 0;
|
|
2101
|
+
} catch {
|
|
2102
|
+
return void 0;
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
// src/call.ts
|
|
2107
|
+
async function call(ir, opts = {}) {
|
|
2108
|
+
const initial = compileAndRegister(ir, opts);
|
|
2109
|
+
const start = Date.now();
|
|
2110
|
+
const attempts = [];
|
|
2111
|
+
const rawTargets = [initial.target, ...initial.fallbackChain];
|
|
2112
|
+
let unreachableFiltered;
|
|
2113
|
+
let targetsToTry;
|
|
2114
|
+
if (opts.noAutoFilter) {
|
|
2115
|
+
targetsToTry = rawTargets;
|
|
2116
|
+
} else {
|
|
2117
|
+
const dropped = [];
|
|
2118
|
+
targetsToTry = [];
|
|
2119
|
+
for (const t of rawTargets) {
|
|
2120
|
+
if (isModelReachable(t, { apiKeys: opts.apiKeys })) {
|
|
2121
|
+
targetsToTry.push(t);
|
|
2122
|
+
} else {
|
|
2123
|
+
dropped.push(t);
|
|
2124
|
+
}
|
|
2125
|
+
}
|
|
2126
|
+
unreachableFiltered = dropped;
|
|
2127
|
+
if (targetsToTry.length === 0) {
|
|
2128
|
+
const latencyMs2 = Date.now() - start;
|
|
2129
|
+
await record({
|
|
2130
|
+
handle: initial.handle,
|
|
2131
|
+
tokensIn: 0,
|
|
2132
|
+
tokensOut: 0,
|
|
2133
|
+
latencyMs: latencyMs2,
|
|
2134
|
+
success: false,
|
|
2135
|
+
errorType: "no_reachable_models",
|
|
2136
|
+
promptPreview: extractPromptPreview(ir)
|
|
2137
|
+
});
|
|
2138
|
+
const noReachableAttempts = dropped.map((m) => ({
|
|
2139
|
+
model: m,
|
|
2140
|
+
status: "terminal",
|
|
2141
|
+
errorCode: "unreachable_provider",
|
|
2142
|
+
message: `No API key for ${m}'s provider \u2014 set one of PROVIDER_ENV_KEYS or pass apiKeys`
|
|
2143
|
+
}));
|
|
2144
|
+
throw new CallError(
|
|
2145
|
+
`call(): no reachable models in chain. Filtered: [${dropped.join(", ")}]. Add a key for one provider, or pass apiKeys.`,
|
|
2146
|
+
noReachableAttempts,
|
|
2147
|
+
void 0,
|
|
2148
|
+
"no_reachable_models"
|
|
2149
|
+
);
|
|
2150
|
+
}
|
|
2151
|
+
}
|
|
2152
|
+
let activeCompile = initial;
|
|
2153
|
+
let lastErr;
|
|
2154
|
+
for (let i = 0; i < targetsToTry.length; i++) {
|
|
2155
|
+
const targetModel = targetsToTry[i];
|
|
2156
|
+
if (targetModel !== initial.target) {
|
|
2157
|
+
try {
|
|
2158
|
+
activeCompile = compileAndRegister(
|
|
2159
|
+
{
|
|
2160
|
+
...ir,
|
|
2161
|
+
models: ir.models.includes(targetModel) ? ir.models : [targetModel, ...ir.models],
|
|
2162
|
+
constraints: { ...ir.constraints ?? {}, forceModel: targetModel }
|
|
2163
|
+
},
|
|
2164
|
+
opts
|
|
2165
|
+
);
|
|
2166
|
+
} catch (err) {
|
|
2167
|
+
attempts.push({
|
|
2168
|
+
model: targetModel,
|
|
2169
|
+
status: "terminal",
|
|
2170
|
+
errorCode: "compile_error",
|
|
2171
|
+
message: err instanceof Error ? err.message : String(err)
|
|
2172
|
+
});
|
|
2173
|
+
continue;
|
|
2174
|
+
}
|
|
2175
|
+
}
|
|
2176
|
+
const exec = await execute(activeCompile.request, {
|
|
2177
|
+
apiKeys: opts.apiKeys,
|
|
2178
|
+
fetchImpl: opts.fetchImpl,
|
|
2179
|
+
providerOverrides: opts.providerOverrides
|
|
2180
|
+
});
|
|
2181
|
+
const validated = exec.ok ? validateStructuredContract(exec, ir) : exec;
|
|
2182
|
+
if (validated.ok) {
|
|
2183
|
+
attempts.push({ model: targetModel, status: "success" });
|
|
2184
|
+
const latencyMs2 = Date.now() - start;
|
|
2185
|
+
await record({
|
|
2186
|
+
handle: initial.handle,
|
|
2187
|
+
tokensIn: validated.response.tokens.input,
|
|
2188
|
+
tokensOut: validated.response.tokens.output,
|
|
2189
|
+
latencyMs: latencyMs2,
|
|
2190
|
+
success: true,
|
|
2191
|
+
emptyResponse: validated.response.tokens.output === 0,
|
|
2192
|
+
toolsCalled: validated.response.toolCalls.map((tc) => tc.name),
|
|
2193
|
+
actualModel: targetModel !== initial.target ? targetModel : void 0,
|
|
2194
|
+
mutationsApplied: targetModel !== initial.target ? activeCompile.mutationsApplied.map((m) => m.id) : void 0,
|
|
2195
|
+
promptPreview: extractPromptPreview(ir),
|
|
2196
|
+
responsePreview: validated.response.text.slice(0, 200),
|
|
2197
|
+
cacheReadInputTokens: validated.response.tokens.cached,
|
|
2198
|
+
cacheCreationInputTokens: validated.response.tokens.cacheCreated
|
|
2199
|
+
});
|
|
2200
|
+
const fellOver = targetModel !== initial.target;
|
|
2201
|
+
return {
|
|
2202
|
+
handle: initial.handle,
|
|
2203
|
+
actualModel: targetModel,
|
|
2204
|
+
requestedModel: initial.target,
|
|
2205
|
+
provider: activeCompile.provider,
|
|
2206
|
+
response: validated.response,
|
|
2207
|
+
latencyMs: latencyMs2,
|
|
2208
|
+
mutationsApplied: activeCompile.mutationsApplied,
|
|
2209
|
+
attempts,
|
|
2210
|
+
servedBy: targetModel,
|
|
2211
|
+
fellOverFrom: fellOver ? initial.target : void 0,
|
|
2212
|
+
fallbackReason: fellOver ? normalizeFallbackReason(attempts) : void 0,
|
|
2213
|
+
unreachableFiltered
|
|
2214
|
+
};
|
|
2215
|
+
}
|
|
2216
|
+
attempts.push({
|
|
2217
|
+
model: targetModel,
|
|
2218
|
+
status: validated.errorType,
|
|
2219
|
+
errorCode: validated.errorCode,
|
|
2220
|
+
message: validated.message
|
|
2221
|
+
});
|
|
2222
|
+
lastErr = validated;
|
|
2223
|
+
if (validated.errorType === "terminal" || opts.noFallback) {
|
|
2224
|
+
break;
|
|
2225
|
+
}
|
|
2226
|
+
}
|
|
2227
|
+
const latencyMs = Date.now() - start;
|
|
2228
|
+
await record({
|
|
2229
|
+
handle: initial.handle,
|
|
2230
|
+
tokensIn: 0,
|
|
2231
|
+
tokensOut: 0,
|
|
2232
|
+
latencyMs,
|
|
2233
|
+
success: false,
|
|
2234
|
+
errorType: lastErr?.errorCode,
|
|
2235
|
+
promptPreview: extractPromptPreview(ir)
|
|
2236
|
+
});
|
|
2237
|
+
const filteredNote = unreachableFiltered && unreachableFiltered.length > 0 ? ` (also auto-filtered: [${unreachableFiltered.join(", ")}] \u2014 no API key)` : "";
|
|
2238
|
+
throw new CallError(
|
|
2239
|
+
`call(): all attempts failed${lastErr ? ` \u2014 ${lastErr.errorCode}: ${lastErr.message}` : ""}${filteredNote}`,
|
|
2240
|
+
attempts,
|
|
2241
|
+
lastErr?.status,
|
|
2242
|
+
lastErr?.errorCode
|
|
2243
|
+
);
|
|
2244
|
+
}
|
|
2245
|
+
function compileAndRegister(ir, opts) {
|
|
2246
|
+
const result = compile(ir, {
|
|
2247
|
+
policy: opts.policy,
|
|
2248
|
+
toolRelevanceThreshold: opts.toolRelevanceThreshold,
|
|
2249
|
+
compressHistoryAfter: opts.compressHistoryAfter
|
|
2250
|
+
});
|
|
2251
|
+
registerCompile(ir.appId, ir.intent.archetype, ir, result);
|
|
2252
|
+
return result;
|
|
2253
|
+
}
|
|
2254
|
+
function extractPromptPreview(ir) {
|
|
2255
|
+
const turn = ir.currentTurn?.content;
|
|
2256
|
+
if (turn) return turn.slice(0, 200);
|
|
2257
|
+
const lastHist = ir.history?.[ir.history.length - 1]?.content;
|
|
2258
|
+
if (lastHist) return lastHist.slice(0, 200);
|
|
2259
|
+
return void 0;
|
|
2260
|
+
}
|
|
2261
|
+
function validateStructuredContract(exec, ir) {
|
|
2262
|
+
if (!ir.constraints?.structuredOutput) {
|
|
2263
|
+
return { ok: true, response: exec.response };
|
|
2264
|
+
}
|
|
2265
|
+
const finish = (exec.response.finishReason ?? "").toLowerCase();
|
|
2266
|
+
if (finish === "max_tokens" || finish === "length") {
|
|
2267
|
+
return {
|
|
2268
|
+
ok: false,
|
|
2269
|
+
status: exec.status,
|
|
2270
|
+
errorType: "retryable",
|
|
2271
|
+
errorCode: "max_tokens_on_structured_output",
|
|
2272
|
+
message: `Provider returned finishReason="${exec.response.finishReason}" on a structured-output call \u2014 output truncated mid-token, JSON cannot be valid`,
|
|
2273
|
+
raw: exec.response.raw
|
|
2274
|
+
};
|
|
2275
|
+
}
|
|
2276
|
+
if (!exec.response.text) {
|
|
2277
|
+
return { ok: true, response: exec.response };
|
|
2278
|
+
}
|
|
2279
|
+
try {
|
|
2280
|
+
const parsed = JSON.parse(exec.response.text);
|
|
2281
|
+
return { ok: true, response: { ...exec.response, structuredOutput: parsed } };
|
|
2282
|
+
} catch (err) {
|
|
2283
|
+
return {
|
|
2284
|
+
ok: false,
|
|
2285
|
+
status: exec.status,
|
|
2286
|
+
errorType: "retryable",
|
|
2287
|
+
errorCode: "structured_output_parse_failed",
|
|
2288
|
+
message: err instanceof Error ? err.message : String(err),
|
|
2289
|
+
raw: exec.response.raw
|
|
2290
|
+
};
|
|
2291
|
+
}
|
|
2292
|
+
}
|
|
2293
|
+
function normalizeFallbackReason(attempts) {
|
|
2294
|
+
const first = attempts.find((a) => a.status !== "success");
|
|
2295
|
+
if (!first) return void 0;
|
|
2296
|
+
const code = first.errorCode ?? "";
|
|
2297
|
+
if (code === "rate_limit_429" || code === "rate_limit") return "rate_limit";
|
|
2298
|
+
if (code === "max_tokens_on_structured_output" || code === "structured_output_parse_failed") {
|
|
2299
|
+
return "cliff";
|
|
2300
|
+
}
|
|
2301
|
+
if (code === "cost_cap_exceeded") return "cost_cap";
|
|
2302
|
+
return "provider_error";
|
|
2303
|
+
}
|
|
2304
|
+
|
|
2305
|
+
// src/oracle.ts
|
|
2306
|
+
var DEFAULT_DIMENSIONS = ["correctness", "completeness", "conciseness", "format"];
|
|
2307
|
+
var judgeCallTimes = [];
|
|
2308
|
+
function buildLLMJudge(opts) {
|
|
2309
|
+
const dimensions = opts.dimensions ?? DEFAULT_DIMENSIONS;
|
|
2310
|
+
const rateLimit = opts.rateLimitPerMin;
|
|
2311
|
+
return async (ctx) => {
|
|
2312
|
+
if (rateLimit) {
|
|
2313
|
+
const now = Date.now();
|
|
2314
|
+
judgeCallTimes = judgeCallTimes.filter((t) => now - t < 6e4);
|
|
2315
|
+
if (judgeCallTimes.length >= rateLimit) {
|
|
2316
|
+
return { score: 0.5, rationale: "judge rate-limited; default neutral" };
|
|
2317
|
+
}
|
|
2318
|
+
judgeCallTimes.push(now);
|
|
2319
|
+
}
|
|
2320
|
+
const prompt = buildJudgePrompt(ctx, dimensions);
|
|
2321
|
+
let raw;
|
|
2322
|
+
try {
|
|
2323
|
+
raw = await opts.judgeCall(prompt);
|
|
2324
|
+
} catch (err) {
|
|
2325
|
+
return {
|
|
2326
|
+
score: 0.5,
|
|
2327
|
+
rationale: `judge error: ${err instanceof Error ? err.message : String(err)}`
|
|
2328
|
+
};
|
|
2329
|
+
}
|
|
2330
|
+
return parseJudgeOutput(raw, dimensions);
|
|
2331
|
+
};
|
|
2332
|
+
}
|
|
2333
|
+
function buildJudgePrompt(ctx, dimensions) {
|
|
2334
|
+
return `You are an oracle scoring an AI response. Output ONLY a JSON object, no other text.
|
|
2335
|
+
|
|
2336
|
+
Intent archetype: ${ctx.archetype}
|
|
2337
|
+
App: ${ctx.appId} / ${ctx.intentName}
|
|
2338
|
+
Tools were involved: ${ctx.hadTools}
|
|
2339
|
+
|
|
2340
|
+
User asked:
|
|
2341
|
+
${ctx.userTurn ?? "<not provided>"}
|
|
2342
|
+
|
|
2343
|
+
Response:
|
|
2344
|
+
${ctx.response.slice(0, 4e3)}
|
|
2345
|
+
|
|
2346
|
+
Score from 0.0 to 1.0 on each dimension. 1.0 = perfect, 0.5 = mediocre, 0.0 = wrong.
|
|
2347
|
+
Be honest \u2014 most responses are 0.5-0.8. Reserve 0.9+ for genuinely excellent.
|
|
2348
|
+
|
|
2349
|
+
Respond with this JSON shape:
|
|
2350
|
+
{
|
|
2351
|
+
"score": <overall 0..1>,
|
|
2352
|
+
"dimensions": { ${dimensions.map((d) => `"${d}": <0..1>`).join(", ")} },
|
|
2353
|
+
"rationale": "<one sentence>"
|
|
2354
|
+
}`;
|
|
2355
|
+
}
|
|
2356
|
+
function parseJudgeOutput(raw, dimensions) {
|
|
2357
|
+
const cleaned = raw.replace(/^```(?:json)?\s*/i, "").replace(/\s*```\s*$/, "").trim();
|
|
2358
|
+
let parsed;
|
|
2359
|
+
try {
|
|
2360
|
+
parsed = JSON.parse(cleaned);
|
|
2361
|
+
} catch {
|
|
2362
|
+
const match = cleaned.match(/\{[\s\S]*\}/);
|
|
2363
|
+
if (match) {
|
|
2364
|
+
try {
|
|
2365
|
+
parsed = JSON.parse(match[0]);
|
|
2366
|
+
} catch {
|
|
2367
|
+
return { score: 0.5, rationale: "judge output unparseable" };
|
|
2368
|
+
}
|
|
2369
|
+
} else {
|
|
2370
|
+
return { score: 0.5, rationale: "judge output unparseable" };
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
if (!parsed || typeof parsed !== "object") {
|
|
2374
|
+
return { score: 0.5, rationale: "judge output not an object" };
|
|
2375
|
+
}
|
|
2376
|
+
const obj = parsed;
|
|
2377
|
+
const score = clamp(typeof obj.score === "number" ? obj.score : 0.5);
|
|
2378
|
+
const dims = {};
|
|
2379
|
+
if (obj.dimensions && typeof obj.dimensions === "object") {
|
|
2380
|
+
for (const d of dimensions) {
|
|
2381
|
+
const v = obj.dimensions[d];
|
|
2382
|
+
dims[d] = clamp(typeof v === "number" ? v : 0.5);
|
|
2383
|
+
}
|
|
2384
|
+
}
|
|
2385
|
+
const rationale = typeof obj.rationale === "string" ? obj.rationale : void 0;
|
|
2386
|
+
return { score, dimensions: Object.keys(dims).length > 0 ? dims : void 0, rationale };
|
|
2387
|
+
}
|
|
2388
|
+
function clamp(n) {
|
|
2389
|
+
if (!Number.isFinite(n)) return 0.5;
|
|
2390
|
+
return Math.max(0, Math.min(1, n));
|
|
2391
|
+
}
|
|
2392
|
+
|
|
2393
|
+
// src/fallback.ts
|
|
2394
|
+
var STARTER_CHAINS = {
|
|
2395
|
+
// Reasoning floor — never degrade. Walk UP on 429 to Opus → cross-provider.
|
|
2396
|
+
critique: [
|
|
2397
|
+
"claude-opus-4-7",
|
|
2398
|
+
"claude-sonnet-4-6",
|
|
2399
|
+
"gemini-2.5-pro"
|
|
2400
|
+
],
|
|
2401
|
+
// Reasoning matters — Sonnet primary; walk UP to Opus on 429 (rare exception
|
|
2402
|
+
// to "always cheaper"); cross-provider via Pro; DeepSeek Pro as tier 3 floor.
|
|
2403
|
+
plan: [
|
|
2404
|
+
"claude-sonnet-4-6",
|
|
2405
|
+
"claude-opus-4-7",
|
|
2406
|
+
"gemini-2.5-pro",
|
|
2407
|
+
"deepseek-v4-pro"
|
|
2408
|
+
],
|
|
2409
|
+
// Quality + cost match. Walk Sonnet → Haiku same-provider, Pro cross,
|
|
2410
|
+
// Flash floor for the open-posture chain.
|
|
2411
|
+
generate: [
|
|
2412
|
+
"claude-sonnet-4-6",
|
|
2413
|
+
"claude-haiku-4-5",
|
|
2414
|
+
"gemini-2.5-pro",
|
|
2415
|
+
"gemini-2.5-flash"
|
|
2416
|
+
],
|
|
2417
|
+
ask: [
|
|
2418
|
+
"claude-sonnet-4-6",
|
|
2419
|
+
"claude-haiku-4-5",
|
|
2420
|
+
"gemini-2.5-pro",
|
|
2421
|
+
"gemini-2.5-flash"
|
|
2422
|
+
],
|
|
2423
|
+
// Structured-output archetype — Flash skipped (alpha.8 MAX_TOKENS cliff),
|
|
2424
|
+
// DeepSeek skipped (no brain evidence). Floor at Haiku.
|
|
2425
|
+
extract: [
|
|
2426
|
+
"claude-sonnet-4-6",
|
|
2427
|
+
"claude-haiku-4-5",
|
|
2428
|
+
"gemini-2.5-pro"
|
|
2429
|
+
],
|
|
2430
|
+
// Forgiving archetype — Sonnet primary but Flash safely floors it.
|
|
2431
|
+
transform: [
|
|
2432
|
+
"claude-sonnet-4-6",
|
|
2433
|
+
"claude-haiku-4-5",
|
|
2434
|
+
"gemini-2.5-pro",
|
|
2435
|
+
"gemini-2.5-flash"
|
|
2436
|
+
],
|
|
2437
|
+
// Parallel-tool throughput champion (Flash, L-040). Tier 1 cross-provider
|
|
2438
|
+
// Pro; tier 2 Sonnet (quality safety net for blocked-Flash case); tier 3
|
|
2439
|
+
// Haiku (reduced tool budget — cliff at 16 fires).
|
|
2440
|
+
hunt: [
|
|
2441
|
+
"gemini-2.5-flash",
|
|
2442
|
+
"gemini-2.5-pro",
|
|
2443
|
+
"claude-sonnet-4-6",
|
|
2444
|
+
"claude-haiku-4-5"
|
|
2445
|
+
],
|
|
2446
|
+
// Cost-sensitive + tolerant. DeepSeek brain-evidence tier 1; Haiku tier 2
|
|
2447
|
+
// for quality safety; Flash-Lite emergency floor (onboarded s22).
|
|
2448
|
+
summarize: [
|
|
2449
|
+
"gemini-2.5-flash",
|
|
2450
|
+
"deepseek-v4-flash",
|
|
2451
|
+
"claude-haiku-4-5",
|
|
2452
|
+
"gemini-2.5-flash-lite"
|
|
2453
|
+
],
|
|
2454
|
+
// Brain-validated DeepSeek tier 1 (169 rows, 0% empty); Haiku tier 2;
|
|
2455
|
+
// Flash-Lite floor for repeat-prompt workloads (cache-discount 10×).
|
|
2456
|
+
classify: [
|
|
2457
|
+
"gemini-2.5-flash",
|
|
2458
|
+
"deepseek-v4-flash",
|
|
2459
|
+
"claude-haiku-4-5",
|
|
2460
|
+
"gemini-2.5-flash-lite"
|
|
2461
|
+
]
|
|
2462
|
+
};
|
|
2463
|
+
function getDefaultFallbackChain(opts) {
|
|
2464
|
+
const { archetype, primary, maxDepth = 3, policy, reachability } = opts;
|
|
2465
|
+
if (maxDepth < 1) {
|
|
2466
|
+
throw new Error(
|
|
2467
|
+
`getDefaultFallbackChain: maxDepth must be >= 1, got ${maxDepth}`
|
|
2468
|
+
);
|
|
2469
|
+
}
|
|
2470
|
+
const starter = STARTER_CHAINS[archetype];
|
|
2471
|
+
if (!starter) {
|
|
2472
|
+
throw new Error(
|
|
2473
|
+
`getDefaultFallbackChain: unknown archetype "${archetype}". Known: ${Object.keys(STARTER_CHAINS).join(", ")}`
|
|
2474
|
+
);
|
|
2475
|
+
}
|
|
2476
|
+
let chain;
|
|
2477
|
+
if (primary) {
|
|
2478
|
+
chain = [primary, ...starter.filter((id) => id !== primary)];
|
|
2479
|
+
} else {
|
|
2480
|
+
chain = [...starter];
|
|
2481
|
+
}
|
|
2482
|
+
if (policy?.blockedModels && policy.blockedModels.length > 0) {
|
|
2483
|
+
const blocked = new Set(policy.blockedModels);
|
|
2484
|
+
chain = chain.filter((id) => !blocked.has(id));
|
|
2485
|
+
}
|
|
2486
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2487
|
+
const deduped = [];
|
|
2488
|
+
for (const id of chain) {
|
|
2489
|
+
if (!seen.has(id)) {
|
|
2490
|
+
seen.add(id);
|
|
2491
|
+
deduped.push(id);
|
|
2492
|
+
}
|
|
2493
|
+
}
|
|
2494
|
+
let filtered = deduped;
|
|
2495
|
+
if (reachability) {
|
|
2496
|
+
filtered = deduped.filter((id) => isModelReachable(id, reachability));
|
|
2497
|
+
}
|
|
2498
|
+
return filtered.slice(0, maxDepth);
|
|
2499
|
+
}
|
|
2500
|
+
function getStarterChain(archetype) {
|
|
2501
|
+
const chain = STARTER_CHAINS[archetype];
|
|
2502
|
+
if (!chain) {
|
|
2503
|
+
throw new Error(
|
|
2504
|
+
`getStarterChain: unknown archetype "${archetype}"`
|
|
2505
|
+
);
|
|
2506
|
+
}
|
|
2507
|
+
return [...chain];
|
|
2508
|
+
}
|
|
2509
|
+
function getAllStarterChains() {
|
|
2510
|
+
const out = {};
|
|
2511
|
+
for (const [archetype, chain] of Object.entries(STARTER_CHAINS)) {
|
|
2512
|
+
out[archetype] = [...chain];
|
|
2513
|
+
}
|
|
2514
|
+
return out;
|
|
2515
|
+
}
|
|
2516
|
+
|
|
2517
|
+
// src/index.ts
|
|
2518
|
+
function compile2(ir, opts) {
|
|
2519
|
+
const result = compile(ir, opts);
|
|
2520
|
+
registerCompile(ir.appId, ir.intent.archetype, ir, result);
|
|
2521
|
+
return result;
|
|
2522
|
+
}
|
|
2523
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
2524
|
+
0 && (module.exports = {
|
|
2525
|
+
ALIASES,
|
|
2526
|
+
ALL_ARCHETYPES,
|
|
2527
|
+
CallError,
|
|
2528
|
+
DIALECT_VERSION,
|
|
2529
|
+
INTENT_ARCHETYPES,
|
|
2530
|
+
PROVIDER_ENV_KEYS,
|
|
2531
|
+
allProfiles,
|
|
2532
|
+
bucketContext,
|
|
2533
|
+
bucketHistory,
|
|
2534
|
+
bucketToolCount,
|
|
2535
|
+
buildLLMJudge,
|
|
2536
|
+
call,
|
|
2537
|
+
clearBrain,
|
|
2538
|
+
compile,
|
|
2539
|
+
configureBrain,
|
|
2540
|
+
countTokens,
|
|
2541
|
+
execute,
|
|
2542
|
+
getAllStarterChains,
|
|
2543
|
+
getDefaultFallbackChain,
|
|
2544
|
+
getProfile,
|
|
2545
|
+
getReachabilityDiagnostic,
|
|
2546
|
+
getStarterChain,
|
|
2547
|
+
hashShape,
|
|
2548
|
+
isArchetype,
|
|
2549
|
+
isModelReachable,
|
|
2550
|
+
isProviderReachable,
|
|
2551
|
+
learningKey,
|
|
2552
|
+
profilesByProvider,
|
|
2553
|
+
record,
|
|
2554
|
+
resetTokenizer,
|
|
2555
|
+
resolveProviderKey,
|
|
2556
|
+
runAdvisor,
|
|
2557
|
+
setTokenizer,
|
|
2558
|
+
tryGetProfile
|
|
2559
|
+
});
|