@warmdrift/kgauto-compiler 2.0.0-alpha.3 → 2.0.0-alpha.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -3
- package/dist/chunk-JQGRWJZO.mjs +1216 -0
- package/dist/chunk-NBO4R5PC.mjs +313 -0
- package/dist/chunk-RO22VFIF.mjs +29 -0
- package/dist/chunk-WXCFWUCN.mjs +678 -0
- package/dist/glassbox/index.d.mts +59 -0
- package/dist/glassbox/index.d.ts +59 -0
- package/dist/glassbox/index.js +312 -0
- package/dist/glassbox/index.mjs +12 -0
- package/dist/glassbox-routes/index.d.mts +242 -0
- package/dist/glassbox-routes/index.d.ts +242 -0
- package/dist/glassbox-routes/index.js +2452 -0
- package/dist/glassbox-routes/index.mjs +652 -0
- package/dist/index.d.mts +1179 -11
- package/dist/index.d.ts +1179 -11
- package/dist/index.js +3475 -236
- package/dist/index.mjs +1560 -78
- package/dist/ir-BIAT9gJk.d.ts +1031 -0
- package/dist/ir-De2AQtlr.d.mts +1031 -0
- package/dist/profiles.d.mts +137 -2
- package/dist/profiles.d.ts +137 -2
- package/dist/profiles.js +820 -11
- package/dist/profiles.mjs +5 -1
- package/dist/types-BjrIFPGe.d.mts +131 -0
- package/dist/types-D_JAhCv4.d.ts +131 -0
- package/package.json +12 -2
- package/dist/chunk-MBEI5UOM.mjs +0 -409
- package/dist/profiles-BiyrF36f.d.mts +0 -489
- package/dist/profiles-C5lVqF8_.d.ts +0 -489
|
@@ -0,0 +1,1216 @@
|
|
|
1
|
+
// src/profiles.ts
|
|
2
|
+
var ANTHROPIC_LOWERING_BASE = {
|
|
3
|
+
system: { mode: "inline" },
|
|
4
|
+
cache: {
|
|
5
|
+
strategy: "cache_control",
|
|
6
|
+
minTokens: 1024,
|
|
7
|
+
discount: 0.1,
|
|
8
|
+
ttlSeconds: 300
|
|
9
|
+
},
|
|
10
|
+
tools: { format: "anthropic" }
|
|
11
|
+
};
|
|
12
|
+
var GOOGLE_LOWERING_BASE = {
|
|
13
|
+
system: { mode: "separate", field: "systemInstruction" },
|
|
14
|
+
cache: {
|
|
15
|
+
strategy: "cachedContent",
|
|
16
|
+
minTokens: 4096,
|
|
17
|
+
discount: 0.25,
|
|
18
|
+
ttlSeconds: 3600
|
|
19
|
+
},
|
|
20
|
+
tools: { format: "google" }
|
|
21
|
+
};
|
|
22
|
+
var PROFILES_RAW = [
|
|
23
|
+
// ── Anthropic ──
|
|
24
|
+
{
|
|
25
|
+
id: "claude-opus-4-7",
|
|
26
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
27
|
+
provider: "anthropic",
|
|
28
|
+
status: "current",
|
|
29
|
+
maxContextTokens: 1e6,
|
|
30
|
+
maxOutputTokens: 128e3,
|
|
31
|
+
maxTools: 64,
|
|
32
|
+
parallelToolCalls: true,
|
|
33
|
+
structuredOutput: "grammar",
|
|
34
|
+
systemPromptMode: "inline",
|
|
35
|
+
streaming: true,
|
|
36
|
+
cliffs: [],
|
|
37
|
+
costInputPer1m: 5,
|
|
38
|
+
costOutputPer1m: 25,
|
|
39
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
40
|
+
recovery: [
|
|
41
|
+
{
|
|
42
|
+
signal: "rate_limit",
|
|
43
|
+
action: "escalate",
|
|
44
|
+
reason: "429 from Anthropic \u2014 escalate to fallback chain"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
signal: "model_not_found",
|
|
48
|
+
action: "escalate",
|
|
49
|
+
reason: "Model deprecated/renamed \u2014 escalate (L-061)"
|
|
50
|
+
}
|
|
51
|
+
],
|
|
52
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
|
|
53
|
+
weaknesses: ["cost", "latency"],
|
|
54
|
+
notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
|
|
55
|
+
// Frontier perf. Drops on archetypes where parallel-tool throughput
|
|
56
|
+
// (hunt) or low-budget cost-sensitivity (classify/summarize) matters
|
|
57
|
+
// more than reasoning depth.
|
|
58
|
+
archetypePerf: {
|
|
59
|
+
critique: 10,
|
|
60
|
+
plan: 10,
|
|
61
|
+
generate: 9,
|
|
62
|
+
ask: 9,
|
|
63
|
+
extract: 9,
|
|
64
|
+
transform: 9,
|
|
65
|
+
hunt: 8,
|
|
66
|
+
// strong but Flash dominates parallel tool throughput
|
|
67
|
+
summarize: 8,
|
|
68
|
+
// overkill for tolerant archetype; cost-out of frontier
|
|
69
|
+
classify: 8
|
|
70
|
+
// overkill; brain-validated cheaper models cover this
|
|
71
|
+
}
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
id: "claude-opus-4-6",
|
|
75
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
76
|
+
provider: "anthropic",
|
|
77
|
+
status: "legacy",
|
|
78
|
+
maxContextTokens: 1e6,
|
|
79
|
+
maxOutputTokens: 128e3,
|
|
80
|
+
maxTools: 64,
|
|
81
|
+
parallelToolCalls: true,
|
|
82
|
+
structuredOutput: "grammar",
|
|
83
|
+
systemPromptMode: "inline",
|
|
84
|
+
streaming: true,
|
|
85
|
+
cliffs: [],
|
|
86
|
+
costInputPer1m: 5,
|
|
87
|
+
costOutputPer1m: 25,
|
|
88
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
89
|
+
recovery: [
|
|
90
|
+
{
|
|
91
|
+
signal: "rate_limit",
|
|
92
|
+
action: "escalate",
|
|
93
|
+
reason: "429 from Anthropic \u2014 escalate to fallback chain"
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
signal: "model_not_found",
|
|
97
|
+
action: "escalate",
|
|
98
|
+
reason: "Model deprecated/renamed \u2014 escalate (L-061)"
|
|
99
|
+
}
|
|
100
|
+
],
|
|
101
|
+
strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
|
|
102
|
+
weaknesses: ["cost", "latency"],
|
|
103
|
+
notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
|
|
104
|
+
// One notch below 4.7 across the board — extended-thinking edge does
|
|
105
|
+
// not flip any archetype ranking. Legacy: chains should prefer 4.7.
|
|
106
|
+
archetypePerf: {
|
|
107
|
+
critique: 9,
|
|
108
|
+
plan: 9,
|
|
109
|
+
generate: 9,
|
|
110
|
+
ask: 9,
|
|
111
|
+
extract: 9,
|
|
112
|
+
transform: 9,
|
|
113
|
+
hunt: 7,
|
|
114
|
+
summarize: 8,
|
|
115
|
+
classify: 8
|
|
116
|
+
}
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
id: "claude-sonnet-4-6",
|
|
120
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
121
|
+
provider: "anthropic",
|
|
122
|
+
status: "current",
|
|
123
|
+
maxContextTokens: 1e6,
|
|
124
|
+
maxOutputTokens: 64e3,
|
|
125
|
+
maxTools: 64,
|
|
126
|
+
parallelToolCalls: true,
|
|
127
|
+
structuredOutput: "grammar",
|
|
128
|
+
systemPromptMode: "inline",
|
|
129
|
+
streaming: true,
|
|
130
|
+
cliffs: [],
|
|
131
|
+
costInputPer1m: 3,
|
|
132
|
+
costOutputPer1m: 15,
|
|
133
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
134
|
+
recovery: [
|
|
135
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" },
|
|
136
|
+
{ signal: "model_not_found", action: "escalate", reason: "Deprecated \u2014 escalate (L-061)" }
|
|
137
|
+
],
|
|
138
|
+
strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
|
|
139
|
+
weaknesses: [],
|
|
140
|
+
notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
|
|
141
|
+
// Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
|
|
142
|
+
// in starter chains; tier 1 cross-provider for hunt/summarize/classify.
|
|
143
|
+
archetypePerf: {
|
|
144
|
+
ask: 9,
|
|
145
|
+
generate: 9,
|
|
146
|
+
plan: 9,
|
|
147
|
+
critique: 9,
|
|
148
|
+
extract: 9,
|
|
149
|
+
transform: 9,
|
|
150
|
+
hunt: 7,
|
|
151
|
+
// strong but Flash beats on parallel tool throughput
|
|
152
|
+
summarize: 8,
|
|
153
|
+
// overkill for tolerant archetype
|
|
154
|
+
classify: 8
|
|
155
|
+
// overkill
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
id: "claude-haiku-4-5",
|
|
160
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
161
|
+
provider: "anthropic",
|
|
162
|
+
status: "current",
|
|
163
|
+
maxContextTokens: 2e5,
|
|
164
|
+
maxOutputTokens: 64e3,
|
|
165
|
+
maxTools: 32,
|
|
166
|
+
parallelToolCalls: true,
|
|
167
|
+
structuredOutput: "grammar",
|
|
168
|
+
systemPromptMode: "inline",
|
|
169
|
+
streaming: true,
|
|
170
|
+
cliffs: [
|
|
171
|
+
{
|
|
172
|
+
metric: "tool_count",
|
|
173
|
+
threshold: 16,
|
|
174
|
+
action: "drop_to_top_relevant",
|
|
175
|
+
reason: "Haiku reliability degrades above ~16 tools"
|
|
176
|
+
}
|
|
177
|
+
],
|
|
178
|
+
costInputPer1m: 1,
|
|
179
|
+
costOutputPer1m: 5,
|
|
180
|
+
lowering: ANTHROPIC_LOWERING_BASE,
|
|
181
|
+
recovery: [
|
|
182
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to Sonnet" }
|
|
183
|
+
],
|
|
184
|
+
strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
|
|
185
|
+
weaknesses: ["complex_reasoning", "large_tool_sets"],
|
|
186
|
+
notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
|
|
187
|
+
// Tier 1 cross-provider anchor for short-output chains (classify/
|
|
188
|
+
// summarize/extract/transform). Falls off on plan/critique where
|
|
189
|
+
// reasoning depth matters; competes with Pro on cost+latency.
|
|
190
|
+
archetypePerf: {
|
|
191
|
+
classify: 8,
|
|
192
|
+
summarize: 8,
|
|
193
|
+
ask: 7,
|
|
194
|
+
transform: 7,
|
|
195
|
+
extract: 7,
|
|
196
|
+
hunt: 6,
|
|
197
|
+
// tool reliability drops at 16 — cliff guard fires
|
|
198
|
+
generate: 6,
|
|
199
|
+
plan: 5,
|
|
200
|
+
critique: 4
|
|
201
|
+
// reasoning depth gap vs Sonnet/Opus
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
// ── Google ──
|
|
205
|
+
{
|
|
206
|
+
id: "gemini-2.5-flash",
|
|
207
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
208
|
+
provider: "google",
|
|
209
|
+
status: "current",
|
|
210
|
+
maxContextTokens: 1048576,
|
|
211
|
+
maxOutputTokens: 65535,
|
|
212
|
+
maxTools: 128,
|
|
213
|
+
parallelToolCalls: true,
|
|
214
|
+
structuredOutput: "native",
|
|
215
|
+
systemPromptMode: "separate",
|
|
216
|
+
streaming: true,
|
|
217
|
+
cliffs: [
|
|
218
|
+
{
|
|
219
|
+
metric: "input_tokens",
|
|
220
|
+
threshold: 8e3,
|
|
221
|
+
action: "downgrade_quality_warning",
|
|
222
|
+
reason: "Quality degrades significantly above ~8K context tokens"
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
metric: "tool_count",
|
|
226
|
+
threshold: 20,
|
|
227
|
+
action: "drop_to_top_relevant",
|
|
228
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
metric: "thinking_with_short_output",
|
|
232
|
+
threshold: 1,
|
|
233
|
+
action: "force_thinking_budget_zero",
|
|
234
|
+
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
// s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
|
|
238
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered.
|
|
239
|
+
// v1's disable_thinking_for_short_output already fired and didn't
|
|
240
|
+
// help — disabling thinking is necessary but not sufficient. Tools
|
|
241
|
+
// present + summarize intent confuses Flash into a no-output state
|
|
242
|
+
// (likely tool-decision purgatory). Strip tools entirely for this
|
|
243
|
+
// archetype on this model.
|
|
244
|
+
metric: "tool_count",
|
|
245
|
+
threshold: 1,
|
|
246
|
+
whenIntent: "summarize",
|
|
247
|
+
action: "strip_tools",
|
|
248
|
+
reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
|
|
249
|
+
}
|
|
250
|
+
],
|
|
251
|
+
costInputPer1m: 0.3,
|
|
252
|
+
costOutputPer1m: 2.5,
|
|
253
|
+
lowering: {
|
|
254
|
+
...GOOGLE_LOWERING_BASE,
|
|
255
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
256
|
+
},
|
|
257
|
+
recovery: [
|
|
258
|
+
{
|
|
259
|
+
signal: "empty_response_after_tool",
|
|
260
|
+
action: "retry_with_params",
|
|
261
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
262
|
+
maxRetries: 1,
|
|
263
|
+
reason: "Known: empty after tool result \u2014 retry with thinking off"
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
signal: "empty_response",
|
|
267
|
+
action: "retry_with_params",
|
|
268
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
269
|
+
maxRetries: 1,
|
|
270
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
271
|
+
},
|
|
272
|
+
{
|
|
273
|
+
signal: "malformed_function_call",
|
|
274
|
+
action: "escalate",
|
|
275
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
276
|
+
}
|
|
277
|
+
],
|
|
278
|
+
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
279
|
+
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
280
|
+
notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
|
|
281
|
+
// Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
|
|
282
|
+
// 15-75 calls/step beats Sonnet — L-040), summarize, classify.
|
|
283
|
+
archetypePerf: {
|
|
284
|
+
hunt: 9,
|
|
285
|
+
// L-040: parallel tool throughput 15-75/step
|
|
286
|
+
classify: 7,
|
|
287
|
+
// brain-validated, 218 rows
|
|
288
|
+
summarize: 7,
|
|
289
|
+
// brain-validated; cliff strips tools when present
|
|
290
|
+
transform: 7,
|
|
291
|
+
ask: 7,
|
|
292
|
+
generate: 6,
|
|
293
|
+
plan: 5,
|
|
294
|
+
extract: 6,
|
|
295
|
+
// alpha.8 MAX_TOKENS history on structured output
|
|
296
|
+
critique: 4
|
|
297
|
+
// reasoning shallower than Sonnet/Opus
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
// ── Gemini 2.5 Flash-Lite ──
|
|
302
|
+
// Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
|
|
303
|
+
// it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
|
|
304
|
+
// stable. Positioned BELOW Flash on the cost/perf frontier:
|
|
305
|
+
// input $0.10/M (Flash $0.30/M) — 3× cheaper
|
|
306
|
+
// output $0.40/M (Flash $2.50/M) — 6× cheaper
|
|
307
|
+
// cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
|
|
308
|
+
// Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
|
|
309
|
+
// is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
|
|
310
|
+
// thresholds. The brain will validate/relax these as evidence accumulates
|
|
311
|
+
// per (archetype, model) tuple. Currently ZERO brain rows for this model.
|
|
312
|
+
id: "gemini-2.5-flash-lite",
|
|
313
|
+
verifiedAgainstDocs: "2026-05-13",
|
|
314
|
+
provider: "google",
|
|
315
|
+
status: "current",
|
|
316
|
+
maxContextTokens: 1048576,
|
|
317
|
+
maxOutputTokens: 65536,
|
|
318
|
+
maxTools: 128,
|
|
319
|
+
parallelToolCalls: true,
|
|
320
|
+
structuredOutput: "native",
|
|
321
|
+
systemPromptMode: "separate",
|
|
322
|
+
streaming: true,
|
|
323
|
+
cliffs: [
|
|
324
|
+
{
|
|
325
|
+
metric: "input_tokens",
|
|
326
|
+
threshold: 8e3,
|
|
327
|
+
action: "downgrade_quality_warning",
|
|
328
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
metric: "tool_count",
|
|
332
|
+
threshold: 10,
|
|
333
|
+
action: "drop_to_top_relevant",
|
|
334
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
metric: "thinking_with_short_output",
|
|
338
|
+
threshold: 1,
|
|
339
|
+
action: "force_thinking_budget_zero",
|
|
340
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
344
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
345
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
346
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
347
|
+
metric: "tool_count",
|
|
348
|
+
threshold: 1,
|
|
349
|
+
whenIntent: "summarize",
|
|
350
|
+
action: "strip_tools",
|
|
351
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
352
|
+
}
|
|
353
|
+
],
|
|
354
|
+
costInputPer1m: 0.1,
|
|
355
|
+
costOutputPer1m: 0.4,
|
|
356
|
+
lowering: {
|
|
357
|
+
...GOOGLE_LOWERING_BASE,
|
|
358
|
+
// Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
|
|
359
|
+
// $0.10/M input. Material for repeat-prompt workloads (classify shape).
|
|
360
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
361
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
362
|
+
},
|
|
363
|
+
recovery: [
|
|
364
|
+
{
|
|
365
|
+
signal: "empty_response_after_tool",
|
|
366
|
+
action: "retry_with_params",
|
|
367
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
368
|
+
maxRetries: 1,
|
|
369
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
370
|
+
},
|
|
371
|
+
{
|
|
372
|
+
signal: "empty_response",
|
|
373
|
+
action: "retry_with_params",
|
|
374
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
375
|
+
maxRetries: 1,
|
|
376
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
signal: "malformed_function_call",
|
|
380
|
+
action: "escalate",
|
|
381
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
382
|
+
}
|
|
383
|
+
],
|
|
384
|
+
strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
385
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
386
|
+
notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
|
|
387
|
+
// Tier 3 emergency floor for summarize/classify chains. ZERO brain
|
|
388
|
+
// rows — all values are starter hypotheses anchored to "smaller
|
|
389
|
+
// sibling of Flash, at-or-below Flash perf on every archetype." The
|
|
390
|
+
// first 50 brain rows per archetype will validate or relax these.
|
|
391
|
+
archetypePerf: {
|
|
392
|
+
classify: 6,
|
|
393
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
394
|
+
summarize: 6,
|
|
395
|
+
// starter hypothesis — verify; cliff strips tools
|
|
396
|
+
transform: 6,
|
|
397
|
+
// starter hypothesis — verify
|
|
398
|
+
ask: 5,
|
|
399
|
+
hunt: 5,
|
|
400
|
+
generate: 4,
|
|
401
|
+
extract: 4,
|
|
402
|
+
plan: 3,
|
|
403
|
+
critique: 3
|
|
404
|
+
}
|
|
405
|
+
},
|
|
406
|
+
{
|
|
407
|
+
id: "gemini-2.5-pro",
|
|
408
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
409
|
+
provider: "google",
|
|
410
|
+
status: "current",
|
|
411
|
+
maxContextTokens: 1048576,
|
|
412
|
+
maxOutputTokens: 65535,
|
|
413
|
+
maxTools: 128,
|
|
414
|
+
parallelToolCalls: true,
|
|
415
|
+
structuredOutput: "native",
|
|
416
|
+
systemPromptMode: "separate",
|
|
417
|
+
streaming: true,
|
|
418
|
+
cliffs: [
|
|
419
|
+
{
|
|
420
|
+
metric: "input_tokens",
|
|
421
|
+
threshold: 2e5,
|
|
422
|
+
action: "downgrade_quality_warning",
|
|
423
|
+
reason: "Pricing doubles above 200K: input $1.25\u2192$2.50/M, output $10\u2192$15/M"
|
|
424
|
+
}
|
|
425
|
+
],
|
|
426
|
+
costInputPer1m: 1.25,
|
|
427
|
+
costOutputPer1m: 10,
|
|
428
|
+
lowering: {
|
|
429
|
+
...GOOGLE_LOWERING_BASE,
|
|
430
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
431
|
+
},
|
|
432
|
+
recovery: [
|
|
433
|
+
{
|
|
434
|
+
signal: "malformed_function_call",
|
|
435
|
+
action: "escalate",
|
|
436
|
+
reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
|
|
437
|
+
}
|
|
438
|
+
],
|
|
439
|
+
strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
|
|
440
|
+
weaknesses: ["pricing_above_200k"],
|
|
441
|
+
// Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
|
|
442
|
+
// Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
|
|
443
|
+
archetypePerf: {
|
|
444
|
+
critique: 9,
|
|
445
|
+
plan: 9,
|
|
446
|
+
ask: 8,
|
|
447
|
+
generate: 8,
|
|
448
|
+
extract: 8,
|
|
449
|
+
transform: 8,
|
|
450
|
+
hunt: 8,
|
|
451
|
+
// tier 1 cross-provider for hunt chain
|
|
452
|
+
summarize: 7,
|
|
453
|
+
classify: 7
|
|
454
|
+
}
|
|
455
|
+
},
|
|
456
|
+
{
|
|
457
|
+
id: "gemini-3.1-pro-preview",
|
|
458
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
459
|
+
provider: "google",
|
|
460
|
+
status: "preview",
|
|
461
|
+
maxContextTokens: 1048576,
|
|
462
|
+
maxOutputTokens: 65535,
|
|
463
|
+
maxTools: 128,
|
|
464
|
+
parallelToolCalls: true,
|
|
465
|
+
structuredOutput: "native",
|
|
466
|
+
systemPromptMode: "separate",
|
|
467
|
+
streaming: true,
|
|
468
|
+
cliffs: [
|
|
469
|
+
{
|
|
470
|
+
metric: "input_tokens",
|
|
471
|
+
threshold: 2e5,
|
|
472
|
+
action: "downgrade_quality_warning",
|
|
473
|
+
reason: "Pricing doubles above 200K: input $2\u2192$4/M, output $12\u2192$18/M"
|
|
474
|
+
}
|
|
475
|
+
],
|
|
476
|
+
costInputPer1m: 2,
|
|
477
|
+
costOutputPer1m: 12,
|
|
478
|
+
lowering: {
|
|
479
|
+
...GOOGLE_LOWERING_BASE,
|
|
480
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
481
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
482
|
+
},
|
|
483
|
+
recovery: [
|
|
484
|
+
{
|
|
485
|
+
signal: "malformed_function_call",
|
|
486
|
+
action: "escalate",
|
|
487
|
+
reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
|
|
488
|
+
}
|
|
489
|
+
],
|
|
490
|
+
strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
|
|
491
|
+
weaknesses: ["cost", "preview_status", "pricing_above_200k"],
|
|
492
|
+
notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
|
|
493
|
+
// Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
|
|
494
|
+
// coding / reasoning per Google's release notes. Preview status:
|
|
495
|
+
// chains should stay on 2.5 Pro until GA. Starter hypothesis.
|
|
496
|
+
archetypePerf: {
|
|
497
|
+
critique: 10,
|
|
498
|
+
// Google claims step-change on reasoning
|
|
499
|
+
plan: 10,
|
|
500
|
+
ask: 9,
|
|
501
|
+
generate: 9,
|
|
502
|
+
extract: 9,
|
|
503
|
+
transform: 8,
|
|
504
|
+
hunt: 9,
|
|
505
|
+
// step-change agentic per Google
|
|
506
|
+
summarize: 8,
|
|
507
|
+
classify: 7
|
|
508
|
+
}
|
|
509
|
+
},
|
|
510
|
+
// ── DeepSeek ──
|
|
511
|
+
// 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
|
|
512
|
+
// to `deepseek-v4-flash` non-thinking mode. Old kgauto profile claimed 64k
|
|
513
|
+
// context + $0.27/$1.10 — actual is 1M context + $0.14/$0.28. Now modeled
|
|
514
|
+
// as: V4-Flash + V4-Pro as canonical profiles; deepseek-chat and
|
|
515
|
+
// deepseek-reasoner registered as aliases (see ALIASES below).
|
|
516
|
+
{
|
|
517
|
+
id: "deepseek-v4-flash",
|
|
518
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
519
|
+
provider: "deepseek",
|
|
520
|
+
status: "current",
|
|
521
|
+
maxContextTokens: 1e6,
|
|
522
|
+
maxOutputTokens: 384e3,
|
|
523
|
+
maxTools: 16,
|
|
524
|
+
parallelToolCalls: false,
|
|
525
|
+
structuredOutput: "native",
|
|
526
|
+
systemPromptMode: "inline",
|
|
527
|
+
streaming: true,
|
|
528
|
+
cliffs: [
|
|
529
|
+
{
|
|
530
|
+
metric: "tool_count",
|
|
531
|
+
threshold: 1,
|
|
532
|
+
action: "drop_to_top_relevant",
|
|
533
|
+
reason: "Sequential tool calls only \u2014 L-040"
|
|
534
|
+
}
|
|
535
|
+
],
|
|
536
|
+
costInputPer1m: 0.14,
|
|
537
|
+
costOutputPer1m: 0.28,
|
|
538
|
+
lowering: {
|
|
539
|
+
system: { mode: "inline" },
|
|
540
|
+
cache: { strategy: "unsupported" },
|
|
541
|
+
tools: { format: "deepseek" }
|
|
542
|
+
},
|
|
543
|
+
recovery: [
|
|
544
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
|
|
545
|
+
],
|
|
546
|
+
strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
|
|
547
|
+
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
548
|
+
notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
|
|
549
|
+
// Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
|
|
550
|
+
// classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
|
|
551
|
+
// Falls off on hunt (sequential tools — L-040) and reasoning depth.
|
|
552
|
+
archetypePerf: {
|
|
553
|
+
classify: 7,
|
|
554
|
+
// brain-validated, 169 rows
|
|
555
|
+
summarize: 7,
|
|
556
|
+
// archetype-tolerant, no brain evidence yet
|
|
557
|
+
ask: 6,
|
|
558
|
+
transform: 6,
|
|
559
|
+
generate: 5,
|
|
560
|
+
plan: 5,
|
|
561
|
+
extract: 5,
|
|
562
|
+
critique: 4,
|
|
563
|
+
hunt: 4
|
|
564
|
+
// sequential tool calls only — L-040
|
|
565
|
+
}
|
|
566
|
+
},
|
|
567
|
+
{
|
|
568
|
+
id: "deepseek-v4-pro",
|
|
569
|
+
verifiedAgainstDocs: "2026-05-08",
|
|
570
|
+
provider: "deepseek",
|
|
571
|
+
status: "current",
|
|
572
|
+
maxContextTokens: 1e6,
|
|
573
|
+
maxOutputTokens: 384e3,
|
|
574
|
+
maxTools: 16,
|
|
575
|
+
parallelToolCalls: false,
|
|
576
|
+
structuredOutput: "native",
|
|
577
|
+
systemPromptMode: "inline",
|
|
578
|
+
streaming: true,
|
|
579
|
+
cliffs: [
|
|
580
|
+
{
|
|
581
|
+
metric: "tool_count",
|
|
582
|
+
threshold: 1,
|
|
583
|
+
action: "drop_to_top_relevant",
|
|
584
|
+
reason: "Sequential tool calls only \u2014 L-040"
|
|
585
|
+
}
|
|
586
|
+
],
|
|
587
|
+
// Profile carries REGULAR pricing, not the 75%-off promo (ends 2026-05-31).
|
|
588
|
+
// Under-estimating cost is worse than over-estimating for budget caps.
|
|
589
|
+
costInputPer1m: 1.74,
|
|
590
|
+
costOutputPer1m: 3.48,
|
|
591
|
+
lowering: {
|
|
592
|
+
system: { mode: "inline" },
|
|
593
|
+
cache: { strategy: "unsupported" },
|
|
594
|
+
tools: { format: "deepseek" }
|
|
595
|
+
},
|
|
596
|
+
recovery: [
|
|
597
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
|
|
598
|
+
],
|
|
599
|
+
strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
|
|
600
|
+
weaknesses: ["parallel_tools", "large_tool_sets"],
|
|
601
|
+
notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
|
|
602
|
+
// Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
|
|
603
|
+
// bumped one notch over V4-Flash; same parallel-tool ceiling.
|
|
604
|
+
archetypePerf: {
|
|
605
|
+
plan: 7,
|
|
606
|
+
// §3.3 tier 3 for plan
|
|
607
|
+
critique: 6,
|
|
608
|
+
ask: 7,
|
|
609
|
+
generate: 6,
|
|
610
|
+
classify: 7,
|
|
611
|
+
summarize: 7,
|
|
612
|
+
extract: 6,
|
|
613
|
+
transform: 6,
|
|
614
|
+
hunt: 4
|
|
615
|
+
// sequential tools — same as V4-Flash
|
|
616
|
+
}
|
|
617
|
+
},
|
|
618
|
+
// ── OpenAI ──
|
|
619
|
+
// alpha.16 (2026-05-17): close the half-supported provider gap. env.ts
|
|
620
|
+
// already registered OPENAI_API_KEY + executeOpenAI + normalizeOpenAILike
|
|
621
|
+
// + lowerOpenAI all existed; profile entries were missing, so the
|
|
622
|
+
// alpha.10 auto-filter would mark openai-keyed models reachable but
|
|
623
|
+
// there were no profiles to filter IN. Half-supported is now fully
|
|
624
|
+
// supported. PB request `openai-provider-profiles` (2026-05-16).
|
|
625
|
+
//
|
|
626
|
+
// Profile data verified against developers.openai.com/api/docs/pricing
|
|
627
|
+
// + per-model pages 2026-05-17. L-049/L-081 step-zero: no AI-trained
|
|
628
|
+
// numbers — fetched live from OpenAI's docs. As of 2026-05, OpenAI's
|
|
629
|
+
// current flagship is gpt-5.5 (2025-12 cutoff); gpt-5.4-{base,mini,nano}
|
|
630
|
+
// are the workhorse family. gpt-4.1 + gpt-4o are legacy.
|
|
631
|
+
//
|
|
632
|
+
// Both 5.5 and 5.4 carry a 272K input-token pricing cliff (2x input,
|
|
633
|
+
// 1.5x output beyond that). Modeled as a `downgrade_quality_warning`
|
|
634
|
+
// cliff because it ranks the model down at large-context shapes — the
|
|
635
|
+
// semantics of "this model is now 2x more expensive" map onto the
|
|
636
|
+
// existing penalty mechanism. Cost-watcher will catch high-context
|
|
637
|
+
// spikes empirically; the cliff prevents naive routing into the doubled
|
|
638
|
+
// pricing zone.
|
|
639
|
+
{
|
|
640
|
+
id: "gpt-5.5",
|
|
641
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
642
|
+
provider: "openai",
|
|
643
|
+
status: "current",
|
|
644
|
+
maxContextTokens: 105e4,
|
|
645
|
+
maxOutputTokens: 128e3,
|
|
646
|
+
maxTools: 64,
|
|
647
|
+
parallelToolCalls: true,
|
|
648
|
+
structuredOutput: "native",
|
|
649
|
+
systemPromptMode: "inline",
|
|
650
|
+
streaming: true,
|
|
651
|
+
cliffs: [
|
|
652
|
+
{
|
|
653
|
+
metric: "input_tokens",
|
|
654
|
+
threshold: 272e3,
|
|
655
|
+
action: "downgrade_quality_warning",
|
|
656
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
657
|
+
}
|
|
658
|
+
],
|
|
659
|
+
costInputPer1m: 5,
|
|
660
|
+
costOutputPer1m: 30,
|
|
661
|
+
lowering: {
|
|
662
|
+
system: { mode: "inline" },
|
|
663
|
+
// OpenAI caching is implicit (auto-applied to repeated prefixes
|
|
664
|
+
// ≥1024 tokens for prompt_tokens_details.cached_tokens). No
|
|
665
|
+
// wire-format marker. Discount: 10x for cached input ($0.50/$5.00).
|
|
666
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
667
|
+
tools: { format: "openai" }
|
|
668
|
+
},
|
|
669
|
+
recovery: [
|
|
670
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
671
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
672
|
+
],
|
|
673
|
+
strengths: ["reasoning", "agentic_coding", "long_context", "structured_output", "reliable_tool_use", "reasoning_effort_knob"],
|
|
674
|
+
weaknesses: ["cost", "pricing_cliff_at_272k"],
|
|
675
|
+
notes: "OpenAI frontier (2026-05). 1M context (1.05M total), 128K max output, 2025-12 cutoff. Reasoning effort knob (none/low/medium/high/xhigh). Pricing cliff at 272K input.",
|
|
676
|
+
// Frontier-tier perf hypothesis. Anchored to Opus 4.7 row (similar
|
|
677
|
+
// price/positioning). Brain evidence will refine; no telemetry yet.
|
|
678
|
+
archetypePerf: {
|
|
679
|
+
critique: 9,
|
|
680
|
+
plan: 9,
|
|
681
|
+
generate: 9,
|
|
682
|
+
ask: 9,
|
|
683
|
+
extract: 9,
|
|
684
|
+
transform: 9,
|
|
685
|
+
hunt: 8,
|
|
686
|
+
// parallel tool support good but cliff at 272K hurts deep multi-step
|
|
687
|
+
summarize: 7,
|
|
688
|
+
// overkill for tolerant archetype
|
|
689
|
+
classify: 7
|
|
690
|
+
// overkill; cheaper models cover this
|
|
691
|
+
}
|
|
692
|
+
},
|
|
693
|
+
{
|
|
694
|
+
id: "gpt-5.4",
|
|
695
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
696
|
+
provider: "openai",
|
|
697
|
+
status: "current",
|
|
698
|
+
maxContextTokens: 105e4,
|
|
699
|
+
maxOutputTokens: 128e3,
|
|
700
|
+
maxTools: 64,
|
|
701
|
+
parallelToolCalls: true,
|
|
702
|
+
structuredOutput: "native",
|
|
703
|
+
systemPromptMode: "inline",
|
|
704
|
+
streaming: true,
|
|
705
|
+
cliffs: [
|
|
706
|
+
{
|
|
707
|
+
metric: "input_tokens",
|
|
708
|
+
threshold: 272e3,
|
|
709
|
+
action: "downgrade_quality_warning",
|
|
710
|
+
reason: "OpenAI pricing tier shift: >272K input tokens billed at 2x input + 1.5x output rates"
|
|
711
|
+
}
|
|
712
|
+
],
|
|
713
|
+
costInputPer1m: 2.5,
|
|
714
|
+
costOutputPer1m: 15,
|
|
715
|
+
lowering: {
|
|
716
|
+
system: { mode: "inline" },
|
|
717
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
718
|
+
tools: { format: "openai" }
|
|
719
|
+
},
|
|
720
|
+
recovery: [
|
|
721
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
722
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
723
|
+
],
|
|
724
|
+
strengths: ["reasoning", "long_context", "structured_output", "reliable_tool_use"],
|
|
725
|
+
weaknesses: ["pricing_cliff_at_272k"],
|
|
726
|
+
notes: "OpenAI workhorse (2026-05). 1M context (1.05M total), 128K max output, 2025-08 cutoff. Pricing cliff at 272K input. Pairs cleanly with Sonnet 4.6 on cost ($2.50/$15.00 vs $3.00/$15.00).",
|
|
727
|
+
// Anchored to Sonnet 4.6 row (similar price/positioning). Slight
|
|
728
|
+
// anthropic-side edge on agentic coding per master plan vibe.
|
|
729
|
+
archetypePerf: {
|
|
730
|
+
critique: 8,
|
|
731
|
+
plan: 8,
|
|
732
|
+
generate: 8,
|
|
733
|
+
ask: 8,
|
|
734
|
+
extract: 8,
|
|
735
|
+
transform: 8,
|
|
736
|
+
hunt: 7,
|
|
737
|
+
summarize: 7,
|
|
738
|
+
classify: 7
|
|
739
|
+
}
|
|
740
|
+
},
|
|
741
|
+
{
|
|
742
|
+
id: "gpt-5.4-mini",
|
|
743
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
744
|
+
provider: "openai",
|
|
745
|
+
status: "current",
|
|
746
|
+
maxContextTokens: 4e5,
|
|
747
|
+
maxOutputTokens: 128e3,
|
|
748
|
+
maxTools: 64,
|
|
749
|
+
parallelToolCalls: true,
|
|
750
|
+
structuredOutput: "native",
|
|
751
|
+
systemPromptMode: "inline",
|
|
752
|
+
streaming: true,
|
|
753
|
+
cliffs: [],
|
|
754
|
+
costInputPer1m: 0.75,
|
|
755
|
+
costOutputPer1m: 4.5,
|
|
756
|
+
lowering: {
|
|
757
|
+
system: { mode: "inline" },
|
|
758
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
759
|
+
tools: { format: "openai" }
|
|
760
|
+
},
|
|
761
|
+
recovery: [
|
|
762
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
763
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
764
|
+
],
|
|
765
|
+
strengths: ["cost", "speed", "agentic_coding", "structured_output", "reliable_tool_use"],
|
|
766
|
+
weaknesses: ["reasoning_depth"],
|
|
767
|
+
notes: "OpenAI mini-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. OpenAI describes as 'strongest mini model for coding, computer use, subagents.' Cache discount 10x ($0.075 input).",
|
|
768
|
+
// Mini-tier hypothesis. Anchored to Haiku 4.5 + Flash row pricing.
|
|
769
|
+
// Cost is slightly higher than Haiku ($0.75 vs $0.50 input) but
|
|
770
|
+
// OpenAI claims strong coding/subagent perf.
|
|
771
|
+
archetypePerf: {
|
|
772
|
+
ask: 7,
|
|
773
|
+
generate: 7,
|
|
774
|
+
extract: 7,
|
|
775
|
+
transform: 7,
|
|
776
|
+
classify: 7,
|
|
777
|
+
summarize: 7,
|
|
778
|
+
hunt: 7,
|
|
779
|
+
plan: 6,
|
|
780
|
+
critique: 5
|
|
781
|
+
// reasoning depth gap — frontier models handle this
|
|
782
|
+
}
|
|
783
|
+
},
|
|
784
|
+
{
|
|
785
|
+
id: "gpt-5.4-nano",
|
|
786
|
+
verifiedAgainstDocs: "2026-05-17",
|
|
787
|
+
provider: "openai",
|
|
788
|
+
status: "current",
|
|
789
|
+
maxContextTokens: 4e5,
|
|
790
|
+
maxOutputTokens: 128e3,
|
|
791
|
+
maxTools: 64,
|
|
792
|
+
parallelToolCalls: true,
|
|
793
|
+
structuredOutput: "native",
|
|
794
|
+
systemPromptMode: "inline",
|
|
795
|
+
streaming: true,
|
|
796
|
+
cliffs: [],
|
|
797
|
+
costInputPer1m: 0.2,
|
|
798
|
+
costOutputPer1m: 1.25,
|
|
799
|
+
lowering: {
|
|
800
|
+
system: { mode: "inline" },
|
|
801
|
+
cache: { strategy: "unsupported", minTokens: 1024, discount: 0.1 },
|
|
802
|
+
tools: { format: "openai" }
|
|
803
|
+
},
|
|
804
|
+
recovery: [
|
|
805
|
+
{ signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to fallback chain" },
|
|
806
|
+
{ signal: "model_not_found", action: "escalate", reason: "Model deprecated/renamed \u2014 escalate (L-061)" }
|
|
807
|
+
],
|
|
808
|
+
strengths: ["cost", "speed", "volume", "structured_output"],
|
|
809
|
+
weaknesses: ["reasoning_depth", "no_computer_use"],
|
|
810
|
+
notes: "OpenAI nano-tier (2026-05). 400K context, 128K max output, 2025-08 cutoff. 'Cheapest GPT-5.4-class for simple high-volume tasks.' No fine-tuning, no computer-use tools. Cache discount 10x.",
|
|
811
|
+
// Nano-tier. Anchored to Flash-Lite row ($0.10/$0.40 vs nano's
|
|
812
|
+
// $0.20/$1.25). Slightly more expensive than Flash-Lite but with
|
|
813
|
+
// OpenAI brand reliability. Good fit for classify/summarize floor.
|
|
814
|
+
archetypePerf: {
|
|
815
|
+
classify: 7,
|
|
816
|
+
summarize: 6,
|
|
817
|
+
ask: 6,
|
|
818
|
+
transform: 6,
|
|
819
|
+
extract: 6,
|
|
820
|
+
generate: 5,
|
|
821
|
+
hunt: 5,
|
|
822
|
+
plan: 4,
|
|
823
|
+
critique: 3
|
|
824
|
+
// not for reasoning archetypes
|
|
825
|
+
}
|
|
826
|
+
},
|
|
827
|
+
// ── Auto-onboarded (UNVERIFIED) ──
|
|
828
|
+
// Cloned by scripts/auto-onboard-models.mjs from a same-family template.
|
|
829
|
+
// Each entry's pricing/context/cliffs/lowering reflects the template, NOT
|
|
830
|
+
// provider docs. Verify before promoting status to 'current' (L-049/L-081).
|
|
831
|
+
{
|
|
832
|
+
// s37 (2026-05-21): UNVERIFIED-AUTO-ONBOARD → verified against
|
|
833
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3-flash-preview +
|
|
834
|
+
// ai.google.dev/gemini-api/docs/pricing. L-081 catches:
|
|
835
|
+
// maxOutputTokens 65_535 → 65_536 (off-by-one)
|
|
836
|
+
// costInputPer1m 0.30 → 0.50 (template-cloned from 2.5-flash; actual is 1.67× more expensive)
|
|
837
|
+
// costOutputPer1m 2.50 → 3.00 (template-cloned; actual 1.2× more expensive)
|
|
838
|
+
// cache discount default 0.25 → 0.10 (10× discount, $0.05/$0.50 per docs)
|
|
839
|
+
// Cliffs inherited from 2.5-flash conservatively. The 8K-context-quality
|
|
840
|
+
// cliff was a 2.5-Flash observation — Google positions Gemini 3 as
|
|
841
|
+
// sustained-frontier-on-long-context; brain evidence will validate/relax.
|
|
842
|
+
// Kept as guard for now.
|
|
843
|
+
id: "gemini-3-flash-preview",
|
|
844
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
845
|
+
provider: "google",
|
|
846
|
+
status: "preview",
|
|
847
|
+
maxContextTokens: 1048576,
|
|
848
|
+
maxOutputTokens: 65536,
|
|
849
|
+
maxTools: 128,
|
|
850
|
+
parallelToolCalls: true,
|
|
851
|
+
structuredOutput: "native",
|
|
852
|
+
systemPromptMode: "separate",
|
|
853
|
+
streaming: true,
|
|
854
|
+
cliffs: [
|
|
855
|
+
{
|
|
856
|
+
metric: "input_tokens",
|
|
857
|
+
threshold: 8e3,
|
|
858
|
+
action: "downgrade_quality_warning",
|
|
859
|
+
reason: "Inherited from 2.5-flash guard; brain evidence on Gemini 3 long-context quality will validate/relax"
|
|
860
|
+
},
|
|
861
|
+
{
|
|
862
|
+
metric: "tool_count",
|
|
863
|
+
threshold: 20,
|
|
864
|
+
action: "drop_to_top_relevant",
|
|
865
|
+
reason: "Tool reliability drops above ~20 tools (despite 128 hard limit) \u2014 inherited from Flash family"
|
|
866
|
+
},
|
|
867
|
+
{
|
|
868
|
+
metric: "thinking_with_short_output",
|
|
869
|
+
threshold: 1,
|
|
870
|
+
action: "force_thinking_budget_zero",
|
|
871
|
+
reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
|
|
872
|
+
},
|
|
873
|
+
{
|
|
874
|
+
// Inherited from gemini-2.5-flash s11 trust artifact. Family-likely
|
|
875
|
+
// failure mode for Flash architecture. Keep preemptively until brain
|
|
876
|
+
// evidence on Gemini 3 specifically.
|
|
877
|
+
metric: "tool_count",
|
|
878
|
+
threshold: 1,
|
|
879
|
+
whenIntent: "summarize",
|
|
880
|
+
action: "strip_tools",
|
|
881
|
+
reason: "Inherited from 2.5-flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3-flash-preview specifically."
|
|
882
|
+
}
|
|
883
|
+
],
|
|
884
|
+
costInputPer1m: 0.5,
|
|
885
|
+
costOutputPer1m: 3,
|
|
886
|
+
lowering: {
|
|
887
|
+
...GOOGLE_LOWERING_BASE,
|
|
888
|
+
// 10× cache discount per Google pricing: $0.05/M cached vs $0.50/M input.
|
|
889
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
890
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
891
|
+
},
|
|
892
|
+
recovery: [
|
|
893
|
+
{
|
|
894
|
+
signal: "empty_response_after_tool",
|
|
895
|
+
action: "retry_with_params",
|
|
896
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
897
|
+
maxRetries: 1,
|
|
898
|
+
reason: "Known: empty after tool result \u2014 retry with thinking off"
|
|
899
|
+
},
|
|
900
|
+
{
|
|
901
|
+
signal: "empty_response",
|
|
902
|
+
action: "retry_with_params",
|
|
903
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
904
|
+
maxRetries: 1,
|
|
905
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
906
|
+
},
|
|
907
|
+
{
|
|
908
|
+
signal: "malformed_function_call",
|
|
909
|
+
action: "escalate",
|
|
910
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
911
|
+
}
|
|
912
|
+
],
|
|
913
|
+
strengths: ["speed", "volume", "classification", "1m_context", "cost"],
|
|
914
|
+
weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
|
|
915
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Step-change positioning vs 2.5-flash on agentic loops per Google's release notes (Dec 2025). Pricing 1.67\xD7/1.2\xD7 higher than 2.5-flash; cache discount 10\xD7 (vs 4\xD7 for 2.5). Status=preview until brain evidence accumulates.",
|
|
916
|
+
// Anchored to 2.5-flash archetypePerf as starter, with judgment adjustments
|
|
917
|
+
// for Google's "step-change on agentic" positioning. Brain evidence (zero
|
|
918
|
+
// rows today) will replace these starter values.
|
|
919
|
+
archetypePerf: {
|
|
920
|
+
hunt: 9,
|
|
921
|
+
// Inherits 2.5-flash L-040 parallel-tool tier; Google positions 3 as agentic-loop upgrade
|
|
922
|
+
classify: 7,
|
|
923
|
+
// Inherits 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
924
|
+
summarize: 7,
|
|
925
|
+
// Inherits 2.5-flash; cliff strips tools when present
|
|
926
|
+
transform: 7,
|
|
927
|
+
ask: 8,
|
|
928
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
929
|
+
generate: 7,
|
|
930
|
+
// +1 vs 2.5-flash — agentic coding upgrade per Google
|
|
931
|
+
plan: 6,
|
|
932
|
+
// +1 vs 2.5-flash — complex iterations per positioning
|
|
933
|
+
extract: 6,
|
|
934
|
+
critique: 5
|
|
935
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
936
|
+
}
|
|
937
|
+
},
|
|
938
|
+
{
|
|
939
|
+
// ── Gemini 3.1 Flash-Lite ──
|
|
940
|
+
// Onboarded 2026-05-16 by auto-onboarder; s37 (2026-05-21) verified
|
|
941
|
+
// against ai.google.dev/gemini-api/docs/pricing.
|
|
942
|
+
//
|
|
943
|
+
// L-081 CATCHES (template clone from 2.5-flash-lite was 2.5-3.75× too cheap):
|
|
944
|
+
// costInputPer1m 0.10 → 0.25 (template clone undervalued by 2.5×)
|
|
945
|
+
// costOutputPer1m 0.40 → 1.50 (template clone undervalued by 3.75×)
|
|
946
|
+
//
|
|
947
|
+
// Real 3.1-flash-lite is NOT a cost-equivalent successor to 2.5-flash-lite —
|
|
948
|
+
// it sits between 2.5-flash-lite ($0.10/$0.40) and 2.5-flash ($0.30/$2.50).
|
|
949
|
+
// Cache discount 10× verified ($0.025/M cached vs $0.25/M input).
|
|
950
|
+
//
|
|
951
|
+
// Cliffs are HYPOTHESIZED from 2.5-flash family; brain evidence pending.
|
|
952
|
+
id: "gemini-3.1-flash-lite",
|
|
953
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
954
|
+
provider: "google",
|
|
955
|
+
status: "preview",
|
|
956
|
+
maxContextTokens: 1048576,
|
|
957
|
+
maxOutputTokens: 65536,
|
|
958
|
+
maxTools: 128,
|
|
959
|
+
parallelToolCalls: true,
|
|
960
|
+
structuredOutput: "native",
|
|
961
|
+
systemPromptMode: "separate",
|
|
962
|
+
streaming: true,
|
|
963
|
+
cliffs: [
|
|
964
|
+
{
|
|
965
|
+
metric: "input_tokens",
|
|
966
|
+
threshold: 8e3,
|
|
967
|
+
action: "downgrade_quality_warning",
|
|
968
|
+
reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
|
|
969
|
+
},
|
|
970
|
+
{
|
|
971
|
+
metric: "tool_count",
|
|
972
|
+
threshold: 10,
|
|
973
|
+
action: "drop_to_top_relevant",
|
|
974
|
+
reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
|
|
975
|
+
},
|
|
976
|
+
{
|
|
977
|
+
metric: "thinking_with_short_output",
|
|
978
|
+
threshold: 1,
|
|
979
|
+
action: "force_thinking_budget_zero",
|
|
980
|
+
reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
|
|
981
|
+
},
|
|
982
|
+
{
|
|
983
|
+
// Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
|
|
984
|
+
// trust artifact, kgauto commit 3872832). Flash-Lite shares the
|
|
985
|
+
// same architectural family — almost certainly inherits this cliff.
|
|
986
|
+
// Ship the guard preemptively; brain telemetry confirms or relaxes.
|
|
987
|
+
metric: "tool_count",
|
|
988
|
+
threshold: 1,
|
|
989
|
+
whenIntent: "summarize",
|
|
990
|
+
action: "strip_tools",
|
|
991
|
+
reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
|
|
992
|
+
}
|
|
993
|
+
],
|
|
994
|
+
costInputPer1m: 0.25,
|
|
995
|
+
costOutputPer1m: 1.5,
|
|
996
|
+
lowering: {
|
|
997
|
+
...GOOGLE_LOWERING_BASE,
|
|
998
|
+
// Cache discount 10× (vs Flash 4×) — Google docs s37: $0.025/M cached vs
|
|
999
|
+
// $0.25/M input. Material for repeat-prompt workloads (classify shape).
|
|
1000
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1001
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1002
|
+
},
|
|
1003
|
+
recovery: [
|
|
1004
|
+
{
|
|
1005
|
+
signal: "empty_response_after_tool",
|
|
1006
|
+
action: "retry_with_params",
|
|
1007
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1008
|
+
maxRetries: 1,
|
|
1009
|
+
reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
|
|
1010
|
+
},
|
|
1011
|
+
{
|
|
1012
|
+
signal: "empty_response",
|
|
1013
|
+
action: "retry_with_params",
|
|
1014
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1015
|
+
maxRetries: 1,
|
|
1016
|
+
reason: "Empty response \u2014 try with thinking off."
|
|
1017
|
+
},
|
|
1018
|
+
{
|
|
1019
|
+
signal: "malformed_function_call",
|
|
1020
|
+
action: "escalate",
|
|
1021
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
|
|
1022
|
+
}
|
|
1023
|
+
],
|
|
1024
|
+
strengths: ["low_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
|
|
1025
|
+
weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
|
|
1026
|
+
notes: "Verified s37 (2026-05-21) against Google docs. Sits between 2.5-flash-lite (cheaper) and 2.5-flash (more expensive) on cost frontier; 2.5\xD7 more expensive than initial template-clone. Cliffs hypothesized from Flash family \u2014 brain evidence pending.",
|
|
1027
|
+
// Tier 2-3 floor for summarize/classify chains at the new (verified) price
|
|
1028
|
+
// point. ZERO brain rows — values are starter hypotheses anchored to
|
|
1029
|
+
// "smaller sibling of Flash at higher cost than 2.5-flash-lite." The first
|
|
1030
|
+
// 50 brain rows per archetype will validate or relax these.
|
|
1031
|
+
archetypePerf: {
|
|
1032
|
+
classify: 6,
|
|
1033
|
+
// starter hypothesis — verify (Flash is 7, lite likely ≤)
|
|
1034
|
+
summarize: 6,
|
|
1035
|
+
// starter hypothesis — verify; cliff strips tools
|
|
1036
|
+
transform: 6,
|
|
1037
|
+
// starter hypothesis — verify
|
|
1038
|
+
ask: 5,
|
|
1039
|
+
hunt: 5,
|
|
1040
|
+
generate: 4,
|
|
1041
|
+
extract: 4,
|
|
1042
|
+
plan: 3,
|
|
1043
|
+
critique: 3
|
|
1044
|
+
}
|
|
1045
|
+
},
|
|
1046
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1047
|
+
// Gemini 3.5 Flash — hand-onboarded s37 (2026-05-21)
|
|
1048
|
+
//
|
|
1049
|
+
// Google positioning ("Most intelligent for sustained frontier performance
|
|
1050
|
+
// on agentic and coding tasks" / "particularly effective for rapid agentic
|
|
1051
|
+
// loops involving complex coding cycles and iterations") suggests this is
|
|
1052
|
+
// the Flash-family upgrade specifically aimed at hunt-shape workloads.
|
|
1053
|
+
// Pricing 5× input / 3.6× output vs 2.5-flash — material cost premium.
|
|
1054
|
+
// archetypePerf adjusted +1 vs 2.5-flash on ask/generate/plan/critique
|
|
1055
|
+
// (sustained-frontier positioning); hunt held at 9 inherited from L-040
|
|
1056
|
+
// family parallel-tool tier; brain evidence will validate within 50 rows.
|
|
1057
|
+
//
|
|
1058
|
+
// Cliffs inherited conservatively from 2.5-flash. Google's "sustained
|
|
1059
|
+
// frontier on long-context" positioning suggests the 8K cliff may not
|
|
1060
|
+
// apply to 3.5 — keep as guard until brain evidence shows otherwise.
|
|
1061
|
+
//
|
|
1062
|
+
// Specs verified against:
|
|
1063
|
+
// ai.google.dev/gemini-api/docs/models/gemini-3.5-flash
|
|
1064
|
+
// ai.google.dev/gemini-api/docs/pricing (Standard tier)
|
|
1065
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1066
|
+
{
|
|
1067
|
+
id: "gemini-3.5-flash",
|
|
1068
|
+
verifiedAgainstDocs: "2026-05-21",
|
|
1069
|
+
provider: "google",
|
|
1070
|
+
status: "current",
|
|
1071
|
+
maxContextTokens: 1048576,
|
|
1072
|
+
maxOutputTokens: 65536,
|
|
1073
|
+
maxTools: 128,
|
|
1074
|
+
parallelToolCalls: true,
|
|
1075
|
+
structuredOutput: "native",
|
|
1076
|
+
systemPromptMode: "separate",
|
|
1077
|
+
streaming: true,
|
|
1078
|
+
cliffs: [
|
|
1079
|
+
{
|
|
1080
|
+
metric: "input_tokens",
|
|
1081
|
+
threshold: 8e3,
|
|
1082
|
+
action: "downgrade_quality_warning",
|
|
1083
|
+
reason: "Inherited from 2.5-flash guard; Google positions 3.5 as sustained-frontier-on-long-context but brain evidence pending"
|
|
1084
|
+
},
|
|
1085
|
+
{
|
|
1086
|
+
metric: "tool_count",
|
|
1087
|
+
threshold: 20,
|
|
1088
|
+
action: "drop_to_top_relevant",
|
|
1089
|
+
reason: "Inherited from Flash family: tool reliability drops above ~20 (despite 128 hard limit). Validate per (archetype, model) after n\u226520."
|
|
1090
|
+
},
|
|
1091
|
+
{
|
|
1092
|
+
metric: "thinking_with_short_output",
|
|
1093
|
+
threshold: 1,
|
|
1094
|
+
action: "force_thinking_budget_zero",
|
|
1095
|
+
reason: "Thinking mode supported per Google docs; same drain risk as 2.5-flash \u2014 thinking tokens consume maxOutputTokens"
|
|
1096
|
+
},
|
|
1097
|
+
{
|
|
1098
|
+
// Inherited from 2.5-flash s11 trust artifact (5/5 empty rate on
|
|
1099
|
+
// tt-intelligence/summarize/gemini-2.5-flash with tools offered).
|
|
1100
|
+
// Family-likely failure mode for Flash architecture across versions.
|
|
1101
|
+
// Keep preemptively until brain evidence on 3.5-flash specifically.
|
|
1102
|
+
metric: "tool_count",
|
|
1103
|
+
threshold: 1,
|
|
1104
|
+
whenIntent: "summarize",
|
|
1105
|
+
action: "strip_tools",
|
|
1106
|
+
reason: "Inherited from 2.5-flash s11 cliff (kgauto commit 3872832): summarize+tools \u2192 empty response. Preemptive guard until brain evidence on 3.5-flash specifically."
|
|
1107
|
+
}
|
|
1108
|
+
],
|
|
1109
|
+
costInputPer1m: 1.5,
|
|
1110
|
+
costOutputPer1m: 9,
|
|
1111
|
+
lowering: {
|
|
1112
|
+
...GOOGLE_LOWERING_BASE,
|
|
1113
|
+
// 10× cache discount per Google pricing: $0.15/M cached vs $1.50/M input.
|
|
1114
|
+
cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
|
|
1115
|
+
thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
|
|
1116
|
+
},
|
|
1117
|
+
recovery: [
|
|
1118
|
+
{
|
|
1119
|
+
signal: "empty_response_after_tool",
|
|
1120
|
+
action: "retry_with_params",
|
|
1121
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1122
|
+
maxRetries: 1,
|
|
1123
|
+
reason: "Inherited Flash-family pattern: empty after tool result \u2014 retry with thinking off"
|
|
1124
|
+
},
|
|
1125
|
+
{
|
|
1126
|
+
signal: "empty_response",
|
|
1127
|
+
action: "retry_with_params",
|
|
1128
|
+
retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
|
|
1129
|
+
maxRetries: 1,
|
|
1130
|
+
reason: "Empty response \u2014 try with thinking off"
|
|
1131
|
+
},
|
|
1132
|
+
{
|
|
1133
|
+
signal: "malformed_function_call",
|
|
1134
|
+
action: "escalate",
|
|
1135
|
+
reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
|
|
1136
|
+
}
|
|
1137
|
+
],
|
|
1138
|
+
strengths: ["agentic_loops", "coding", "1m_context", "parallel_tools", "thinking_mode", "sustained_frontier"],
|
|
1139
|
+
weaknesses: ["cost_vs_2_5_flash", "no_brain_evidence_yet"],
|
|
1140
|
+
notes: "Hand-onboarded s37 (2026-05-21) verified against Google docs. Stable status; positioned as Flash-family upgrade for agentic loops and coding. 5\xD7/3.6\xD7 more expensive than 2.5-flash but Google claims step-change on sustained frontier work. archetypePerf adjustments are judgment-grounded starter hypotheses \u2014 brain evidence will validate within ~50 rows per archetype.",
|
|
1141
|
+
// Starter hypothesis: anchored to 2.5-flash archetypePerf with +1
|
|
1142
|
+
// adjustments where Google's positioning explicitly supports
|
|
1143
|
+
// (agentic/coding/sustained). Hunt held at 9 inherited from L-040 family
|
|
1144
|
+
// parallel-tool tier. Brain evidence will replace.
|
|
1145
|
+
archetypePerf: {
|
|
1146
|
+
hunt: 9,
|
|
1147
|
+
// Inherited from 2.5-flash L-040 parallel-tool tier; Google positions 3.5 as agentic-loop champion
|
|
1148
|
+
classify: 7,
|
|
1149
|
+
// Inherited from 2.5-flash brain-validated tier (218 rows on 2.5)
|
|
1150
|
+
summarize: 7,
|
|
1151
|
+
// Inherited from 2.5-flash; cliff strips tools when present
|
|
1152
|
+
transform: 7,
|
|
1153
|
+
ask: 8,
|
|
1154
|
+
// +1 vs 2.5-flash — sustained-frontier positioning
|
|
1155
|
+
generate: 8,
|
|
1156
|
+
// +1 vs 2.5-flash (6→8) — Google: "complex coding cycles and iterations"
|
|
1157
|
+
plan: 7,
|
|
1158
|
+
// +1 vs 2.5-flash (5→7) — "complex iterations" positioning
|
|
1159
|
+
extract: 7,
|
|
1160
|
+
// +1 vs 2.5-flash — sustained-frontier on structured tasks
|
|
1161
|
+
critique: 5
|
|
1162
|
+
// +1 vs 2.5-flash — but still below Sonnet/Opus reasoning floor
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
];
|
|
1166
|
+
var ALIASES = {
|
|
1167
|
+
// DeepSeek's own model routing — both names served by V4-Flash.
|
|
1168
|
+
"deepseek-chat": "deepseek-v4-flash",
|
|
1169
|
+
"deepseek-reasoner": "deepseek-v4-flash",
|
|
1170
|
+
// Legacy kgauto typo — actual API alias is dash-form (alpha.1 had dot).
|
|
1171
|
+
"claude-haiku-4.5": "claude-haiku-4-5"
|
|
1172
|
+
};
|
|
1173
|
+
var brainHook = {};
|
|
1174
|
+
function _setProfileBrainHook(hook) {
|
|
1175
|
+
brainHook = hook;
|
|
1176
|
+
}
|
|
1177
|
+
function canonicalId(id) {
|
|
1178
|
+
return brainHook.resolveAlias?.(id) ?? ALIASES[id] ?? id;
|
|
1179
|
+
}
|
|
1180
|
+
var PROFILE_INDEX = new Map(
|
|
1181
|
+
PROFILES_RAW.map((p) => [p.id, p])
|
|
1182
|
+
);
|
|
1183
|
+
function getProfile(id) {
|
|
1184
|
+
const canonical = canonicalId(id);
|
|
1185
|
+
const fromBrain = brainHook.getProfile?.(canonical);
|
|
1186
|
+
if (fromBrain) return fromBrain;
|
|
1187
|
+
const p = PROFILE_INDEX.get(canonical);
|
|
1188
|
+
if (!p) {
|
|
1189
|
+
const known = [...PROFILE_INDEX.keys(), ...Object.keys(ALIASES)].join(", ");
|
|
1190
|
+
throw new Error(`Unknown model id: "${id}". Known: ${known}`);
|
|
1191
|
+
}
|
|
1192
|
+
return p;
|
|
1193
|
+
}
|
|
1194
|
+
function tryGetProfile(id) {
|
|
1195
|
+
const canonical = canonicalId(id);
|
|
1196
|
+
return brainHook.getProfile?.(canonical) ?? PROFILE_INDEX.get(canonical);
|
|
1197
|
+
}
|
|
1198
|
+
function allProfiles() {
|
|
1199
|
+
return PROFILES_RAW;
|
|
1200
|
+
}
|
|
1201
|
+
function allProfilesRaw() {
|
|
1202
|
+
return PROFILES_RAW;
|
|
1203
|
+
}
|
|
1204
|
+
function profilesByProvider(provider) {
|
|
1205
|
+
return PROFILES_RAW.filter((p) => p.provider === provider);
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
export {
|
|
1209
|
+
ALIASES,
|
|
1210
|
+
_setProfileBrainHook,
|
|
1211
|
+
getProfile,
|
|
1212
|
+
tryGetProfile,
|
|
1213
|
+
allProfiles,
|
|
1214
|
+
allProfilesRaw,
|
|
1215
|
+
profilesByProvider
|
|
1216
|
+
};
|