@warmdrift/kgauto-compiler 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-DO6R9moS.mjs';
2
+ import './dialect.mjs';
@@ -0,0 +1,2 @@
1
+ export { g as ALIASES, h as CacheStrategy, k as CliffRule, L as LoweringSpec, M as ModelProfile, q as RecoveryRule, S as StructuredOutputCapability, r as SystemPromptMode, t as allProfiles, u as getProfile, v as profilesByProvider, w as tryGetProfile } from './profiles-Bgri1pe7.js';
2
+ import './dialect.js';
@@ -0,0 +1,685 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/profiles.ts
21
+ var profiles_exports = {};
22
+ __export(profiles_exports, {
23
+ ALIASES: () => ALIASES,
24
+ allProfiles: () => allProfiles,
25
+ getProfile: () => getProfile,
26
+ profilesByProvider: () => profilesByProvider,
27
+ tryGetProfile: () => tryGetProfile
28
+ });
29
+ module.exports = __toCommonJS(profiles_exports);
30
+ var ANTHROPIC_LOWERING_BASE = {
31
+ system: { mode: "inline" },
32
+ cache: {
33
+ strategy: "cache_control",
34
+ minTokens: 1024,
35
+ discount: 0.1,
36
+ ttlSeconds: 300
37
+ },
38
+ tools: { format: "anthropic" }
39
+ };
40
+ var GOOGLE_LOWERING_BASE = {
41
+ system: { mode: "separate", field: "systemInstruction" },
42
+ cache: {
43
+ strategy: "cachedContent",
44
+ minTokens: 4096,
45
+ discount: 0.25,
46
+ ttlSeconds: 3600
47
+ },
48
+ tools: { format: "google" }
49
+ };
50
+ var PROFILES_RAW = [
51
+ // ── Anthropic ──
52
+ {
53
+ id: "claude-opus-4-7",
54
+ verifiedAgainstDocs: "2026-05-08",
55
+ provider: "anthropic",
56
+ status: "current",
57
+ maxContextTokens: 1e6,
58
+ maxOutputTokens: 128e3,
59
+ maxTools: 64,
60
+ parallelToolCalls: true,
61
+ structuredOutput: "grammar",
62
+ systemPromptMode: "inline",
63
+ streaming: true,
64
+ cliffs: [],
65
+ costInputPer1m: 5,
66
+ costOutputPer1m: 25,
67
+ lowering: ANTHROPIC_LOWERING_BASE,
68
+ recovery: [
69
+ {
70
+ signal: "rate_limit",
71
+ action: "escalate",
72
+ reason: "429 from Anthropic \u2014 escalate to fallback chain"
73
+ },
74
+ {
75
+ signal: "model_not_found",
76
+ action: "escalate",
77
+ reason: "Model deprecated/renamed \u2014 escalate (L-061)"
78
+ }
79
+ ],
80
+ strengths: ["reasoning", "agentic_coding", "long_context", "reliable_tool_use", "structured_output"],
81
+ weaknesses: ["cost", "latency"],
82
+ notes: "Frontier (2026-05). Step-change improvement over 4.6 in agentic coding. Adaptive thinking only \u2014 no extended-thinking toggle. 1M context, 128k max output.",
83
+ // Frontier perf. Drops on archetypes where parallel-tool throughput
84
+ // (hunt) or low-budget cost-sensitivity (classify/summarize) matters
85
+ // more than reasoning depth.
86
+ archetypePerf: {
87
+ critique: 10,
88
+ plan: 10,
89
+ generate: 9,
90
+ ask: 9,
91
+ extract: 9,
92
+ transform: 9,
93
+ hunt: 8,
94
+ // strong but Flash dominates parallel tool throughput
95
+ summarize: 8,
96
+ // overkill for tolerant archetype; cost-out of frontier
97
+ classify: 8
98
+ // overkill; brain-validated cheaper models cover this
99
+ }
100
+ },
101
+ {
102
+ id: "claude-opus-4-6",
103
+ verifiedAgainstDocs: "2026-05-08",
104
+ provider: "anthropic",
105
+ status: "legacy",
106
+ maxContextTokens: 1e6,
107
+ maxOutputTokens: 128e3,
108
+ maxTools: 64,
109
+ parallelToolCalls: true,
110
+ structuredOutput: "grammar",
111
+ systemPromptMode: "inline",
112
+ streaming: true,
113
+ cliffs: [],
114
+ costInputPer1m: 5,
115
+ costOutputPer1m: 25,
116
+ lowering: ANTHROPIC_LOWERING_BASE,
117
+ recovery: [
118
+ {
119
+ signal: "rate_limit",
120
+ action: "escalate",
121
+ reason: "429 from Anthropic \u2014 escalate to fallback chain"
122
+ },
123
+ {
124
+ signal: "model_not_found",
125
+ action: "escalate",
126
+ reason: "Model deprecated/renamed \u2014 escalate (L-061)"
127
+ }
128
+ ],
129
+ strengths: ["reasoning", "long_context", "reliable_tool_use", "structured_output", "extended_thinking"],
130
+ weaknesses: ["cost", "latency"],
131
+ notes: "Predecessor to 4.7. Still current in Anthropic legacy table. Same pricing as 4.7 \u2014 choose 4.7 unless you need extended-thinking budget control (4.7 is adaptive-only).",
132
+ // One notch below 4.7 across the board — extended-thinking edge does
133
+ // not flip any archetype ranking. Legacy: chains should prefer 4.7.
134
+ archetypePerf: {
135
+ critique: 9,
136
+ plan: 9,
137
+ generate: 9,
138
+ ask: 9,
139
+ extract: 9,
140
+ transform: 9,
141
+ hunt: 7,
142
+ summarize: 8,
143
+ classify: 8
144
+ }
145
+ },
146
+ {
147
+ id: "claude-sonnet-4-6",
148
+ verifiedAgainstDocs: "2026-05-08",
149
+ provider: "anthropic",
150
+ status: "current",
151
+ maxContextTokens: 1e6,
152
+ maxOutputTokens: 64e3,
153
+ maxTools: 64,
154
+ parallelToolCalls: true,
155
+ structuredOutput: "grammar",
156
+ systemPromptMode: "inline",
157
+ streaming: true,
158
+ cliffs: [],
159
+ costInputPer1m: 3,
160
+ costOutputPer1m: 15,
161
+ lowering: ANTHROPIC_LOWERING_BASE,
162
+ recovery: [
163
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" },
164
+ { signal: "model_not_found", action: "escalate", reason: "Deprecated \u2014 escalate (L-061)" }
165
+ ],
166
+ strengths: ["quality", "tool_use", "long_context", "cache_friendly", "extended_thinking"],
167
+ weaknesses: [],
168
+ notes: "Workhorse. Best price/quality for most multi-turn agentic work. 1M context, 64k max output.",
169
+ // Master plan §6.2 anchor. Tier 0 for plan/generate/ask/extract/transform
170
+ // in starter chains; tier 1 cross-provider for hunt/summarize/classify.
171
+ archetypePerf: {
172
+ ask: 9,
173
+ generate: 9,
174
+ plan: 9,
175
+ critique: 9,
176
+ extract: 9,
177
+ transform: 9,
178
+ hunt: 7,
179
+ // strong but Flash beats on parallel tool throughput
180
+ summarize: 8,
181
+ // overkill for tolerant archetype
182
+ classify: 8
183
+ // overkill
184
+ }
185
+ },
186
+ {
187
+ id: "claude-haiku-4-5",
188
+ verifiedAgainstDocs: "2026-05-08",
189
+ provider: "anthropic",
190
+ status: "current",
191
+ maxContextTokens: 2e5,
192
+ maxOutputTokens: 64e3,
193
+ maxTools: 32,
194
+ parallelToolCalls: true,
195
+ structuredOutput: "grammar",
196
+ systemPromptMode: "inline",
197
+ streaming: true,
198
+ cliffs: [
199
+ {
200
+ metric: "tool_count",
201
+ threshold: 16,
202
+ action: "drop_to_top_relevant",
203
+ reason: "Haiku reliability degrades above ~16 tools"
204
+ }
205
+ ],
206
+ costInputPer1m: 1,
207
+ costOutputPer1m: 5,
208
+ lowering: ANTHROPIC_LOWERING_BASE,
209
+ recovery: [
210
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate to Sonnet" }
211
+ ],
212
+ strengths: ["speed", "cost", "classification", "cache_friendly", "extended_thinking"],
213
+ weaknesses: ["complex_reasoning", "large_tool_sets"],
214
+ notes: "Cheapest Anthropic. Great for classify, summarize, ask shapes. 200k context, 64k max output. API alias `claude-haiku-4-5` resolves to dated snapshot `claude-haiku-4-5-20251001`.",
215
+ // Tier 1 cross-provider anchor for short-output chains (classify/
216
+ // summarize/extract/transform). Falls off on plan/critique where
217
+ // reasoning depth matters; competes with Pro on cost+latency.
218
+ archetypePerf: {
219
+ classify: 8,
220
+ summarize: 8,
221
+ ask: 7,
222
+ transform: 7,
223
+ extract: 7,
224
+ hunt: 6,
225
+ // tool reliability drops at 16 — cliff guard fires
226
+ generate: 6,
227
+ plan: 5,
228
+ critique: 4
229
+ // reasoning depth gap vs Sonnet/Opus
230
+ }
231
+ },
232
+ // ── Google ──
233
+ {
234
+ id: "gemini-2.5-flash",
235
+ verifiedAgainstDocs: "2026-05-08",
236
+ provider: "google",
237
+ status: "current",
238
+ maxContextTokens: 1048576,
239
+ maxOutputTokens: 65535,
240
+ maxTools: 128,
241
+ parallelToolCalls: true,
242
+ structuredOutput: "native",
243
+ systemPromptMode: "separate",
244
+ streaming: true,
245
+ cliffs: [
246
+ {
247
+ metric: "input_tokens",
248
+ threshold: 8e3,
249
+ action: "downgrade_quality_warning",
250
+ reason: "Quality degrades significantly above ~8K context tokens"
251
+ },
252
+ {
253
+ metric: "tool_count",
254
+ threshold: 20,
255
+ action: "drop_to_top_relevant",
256
+ reason: "Tool reliability drops above ~20 tools (despite 128 hard limit)"
257
+ },
258
+ {
259
+ metric: "thinking_with_short_output",
260
+ threshold: 1,
261
+ action: "force_thinking_budget_zero",
262
+ reason: "Thinking tokens consume maxOutputTokens \u2014 empty response if drained"
263
+ },
264
+ {
265
+ // s11 trust artifact (2026-05-02): brain showed 5/5 empty rate on
266
+ // tt-intelligence/summarize/gemini-2.5-flash with tools offered.
267
+ // v1's disable_thinking_for_short_output already fired and didn't
268
+ // help — disabling thinking is necessary but not sufficient. Tools
269
+ // present + summarize intent confuses Flash into a no-output state
270
+ // (likely tool-decision purgatory). Strip tools entirely for this
271
+ // archetype on this model.
272
+ metric: "tool_count",
273
+ threshold: 1,
274
+ whenIntent: "summarize",
275
+ action: "strip_tools",
276
+ reason: "Gemini Flash returns empty when summarize intent has tools offered (5/5 empty rate observed in v1 prod 2026-04-19, replayed into v2 brain 2026-04-29)"
277
+ }
278
+ ],
279
+ costInputPer1m: 0.3,
280
+ costOutputPer1m: 2.5,
281
+ lowering: {
282
+ ...GOOGLE_LOWERING_BASE,
283
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
284
+ },
285
+ recovery: [
286
+ {
287
+ signal: "empty_response_after_tool",
288
+ action: "retry_with_params",
289
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
290
+ maxRetries: 1,
291
+ reason: "Known: empty after tool result \u2014 retry with thinking off"
292
+ },
293
+ {
294
+ signal: "empty_response",
295
+ action: "retry_with_params",
296
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
297
+ maxRetries: 1,
298
+ reason: "Empty response \u2014 try with thinking off"
299
+ },
300
+ {
301
+ signal: "malformed_function_call",
302
+ action: "escalate",
303
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target"
304
+ }
305
+ ],
306
+ strengths: ["speed", "volume", "classification", "1m_context", "cost"],
307
+ weaknesses: ["complex_schemas", "large_tool_sets", "high_context_quality"],
308
+ notes: "Fast and cheap with 1M context. Quality cliffs at 8K context and 20 tools \u2014 guard with cliffs.",
309
+ // Master plan §6.2 anchor. Tier 0 for hunt (parallel tool throughput
310
+ // 15-75 calls/step beats Sonnet — L-040), summarize, classify.
311
+ archetypePerf: {
312
+ hunt: 9,
313
+ // L-040: parallel tool throughput 15-75/step
314
+ classify: 7,
315
+ // brain-validated, 218 rows
316
+ summarize: 7,
317
+ // brain-validated; cliff strips tools when present
318
+ transform: 7,
319
+ ask: 7,
320
+ generate: 6,
321
+ plan: 5,
322
+ extract: 6,
323
+ // alpha.8 MAX_TOKENS history on structured output
324
+ critique: 4
325
+ // reasoning shallower than Sonnet/Opus
326
+ }
327
+ },
328
+ {
329
+ // ── Gemini 2.5 Flash-Lite ──
330
+ // Onboarded 2026-05-13 (s22) after the model-release watcher surfaced
331
+ // it as a UNREGISTERED + NEW candidate. Released by Google July 2025,
332
+ // stable. Positioned BELOW Flash on the cost/perf frontier:
333
+ // input $0.10/M (Flash $0.30/M) — 3× cheaper
334
+ // output $0.40/M (Flash $2.50/M) — 6× cheaper
335
+ // cache $0.01/M — 1/10 of input (vs Flash 0.25 discount)
336
+ // Cliffs are HYPOTHESIZED from Flash's known failure modes — Flash-Lite
337
+ // is a smaller sibling, so we inherit Flash's cliffs at equal-or-tighter
338
+ // thresholds. The brain will validate/relax these as evidence accumulates
339
+ // per (archetype, model) tuple. Currently ZERO brain rows for this model.
340
+ id: "gemini-2.5-flash-lite",
341
+ verifiedAgainstDocs: "2026-05-13",
342
+ provider: "google",
343
+ status: "current",
344
+ maxContextTokens: 1048576,
345
+ maxOutputTokens: 65536,
346
+ maxTools: 128,
347
+ parallelToolCalls: true,
348
+ structuredOutput: "native",
349
+ systemPromptMode: "separate",
350
+ streaming: true,
351
+ cliffs: [
352
+ {
353
+ metric: "input_tokens",
354
+ threshold: 8e3,
355
+ action: "downgrade_quality_warning",
356
+ reason: "Inherited from Flash: quality degrades above ~8K. Smaller model \u2014 likely degrades faster. Re-tune from brain after n\u226520."
357
+ },
358
+ {
359
+ metric: "tool_count",
360
+ threshold: 10,
361
+ action: "drop_to_top_relevant",
362
+ reason: "Conservative: Flash drops at 20, Flash-Lite is smaller \u2014 assume tighter ceiling until brain proves otherwise."
363
+ },
364
+ {
365
+ metric: "thinking_with_short_output",
366
+ threshold: 1,
367
+ action: "force_thinking_budget_zero",
368
+ reason: "Thinking enabled per Google API (thinking: true). Same drain risk as Flash \u2014 thinking tokens consume maxOutputTokens."
369
+ },
370
+ {
371
+ // Strong prior: Flash hit 5/5 empty rate on summarize+tools (s11
372
+ // trust artifact, kgauto commit 3872832). Flash-Lite shares the
373
+ // same architectural family — almost certainly inherits this cliff.
374
+ // Ship the guard preemptively; brain telemetry confirms or relaxes.
375
+ metric: "tool_count",
376
+ threshold: 1,
377
+ whenIntent: "summarize",
378
+ action: "strip_tools",
379
+ reason: "Inherited from Flash s11 cliff: summarize+tools \u2192 empty response. Preemptive guard until brain evidence on Flash-Lite specifically."
380
+ }
381
+ ],
382
+ costInputPer1m: 0.1,
383
+ costOutputPer1m: 0.4,
384
+ lowering: {
385
+ ...GOOGLE_LOWERING_BASE,
386
+ // Cache discount 10× (vs Flash 4×) — Google's spec is $0.01/M cache vs
387
+ // $0.10/M input. Material for repeat-prompt workloads (classify shape).
388
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
389
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
390
+ },
391
+ recovery: [
392
+ {
393
+ signal: "empty_response_after_tool",
394
+ action: "retry_with_params",
395
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
396
+ maxRetries: 1,
397
+ reason: "Known on Flash family: empty after tool result \u2014 retry with thinking off."
398
+ },
399
+ {
400
+ signal: "empty_response",
401
+ action: "retry_with_params",
402
+ retryParams: { "generationConfig.thinkingConfig.thinkingBudget": 0 },
403
+ maxRetries: 1,
404
+ reason: "Empty response \u2014 try with thinking off."
405
+ },
406
+ {
407
+ signal: "malformed_function_call",
408
+ action: "escalate",
409
+ reason: "MALFORMED_FUNCTION_CALL maps to stop \u2014 escalate to next target."
410
+ }
411
+ ],
412
+ strengths: ["lowest_cost", "speed", "volume", "classification", "summarize", "1m_context", "cache_friendly"],
413
+ weaknesses: ["complex_reasoning", "large_tool_sets", "complex_schemas", "structured_output_unproven", "long_context_quality"],
414
+ notes: "Bottom-frontier anchor on cost: $0.10/$0.40 per 1M tokens, 1M context, 65K max output. Released July 2025 (stable). Positioned for classify / summarize / transform archetypes where quality bar is forgiving. Cliffs inherited from Flash at equal-or-tighter thresholds \u2014 re-tune per (archetype) once brain has n\u226520 rows. Alpha.8 contract layer handles MAX_TOKENS-on-structured-output via fallback chain, so structuredOutput=native is safe to declare even though Flash had alpha.8 history. Cache discount in spec: $0.01/M = 1/10 of input (richer than Flash 25%) \u2014 meaningful for repeat-prompt workloads.",
415
+ // Tier 3 emergency floor for summarize/classify chains. ZERO brain
416
+ // rows — all values are starter hypotheses anchored to "smaller
417
+ // sibling of Flash, at-or-below Flash perf on every archetype." The
418
+ // first 50 brain rows per archetype will validate or relax these.
419
+ archetypePerf: {
420
+ classify: 6,
421
+ // starter hypothesis — verify (Flash is 7, lite likely ≤)
422
+ summarize: 6,
423
+ // starter hypothesis — verify; cliff strips tools
424
+ transform: 6,
425
+ // starter hypothesis — verify
426
+ ask: 5,
427
+ hunt: 5,
428
+ generate: 4,
429
+ extract: 4,
430
+ plan: 3,
431
+ critique: 3
432
+ }
433
+ },
434
+ {
435
+ id: "gemini-2.5-pro",
436
+ verifiedAgainstDocs: "2026-05-08",
437
+ provider: "google",
438
+ status: "current",
439
+ maxContextTokens: 1048576,
440
+ maxOutputTokens: 65535,
441
+ maxTools: 128,
442
+ parallelToolCalls: true,
443
+ structuredOutput: "native",
444
+ systemPromptMode: "separate",
445
+ streaming: true,
446
+ cliffs: [
447
+ {
448
+ metric: "input_tokens",
449
+ threshold: 2e5,
450
+ action: "downgrade_quality_warning",
451
+ reason: "Pricing doubles above 200K: input $1.25\u2192$2.50/M, output $10\u2192$15/M"
452
+ }
453
+ ],
454
+ costInputPer1m: 1.25,
455
+ costOutputPer1m: 10,
456
+ lowering: {
457
+ ...GOOGLE_LOWERING_BASE,
458
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
459
+ },
460
+ recovery: [
461
+ {
462
+ signal: "malformed_function_call",
463
+ action: "escalate",
464
+ reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
465
+ }
466
+ ],
467
+ strengths: ["reasoning", "1m_context", "structured_output", "tool_use"],
468
+ weaknesses: ["pricing_above_200k"],
469
+ // Master plan §3.3 anchor: tier-2 cross-provider in almost every chain.
470
+ // Sits on the frontier at perf-9 — close to Sonnet but cheaper input.
471
+ archetypePerf: {
472
+ critique: 9,
473
+ plan: 9,
474
+ ask: 8,
475
+ generate: 8,
476
+ extract: 8,
477
+ transform: 8,
478
+ hunt: 8,
479
+ // tier 1 cross-provider for hunt chain
480
+ summarize: 7,
481
+ classify: 7
482
+ }
483
+ },
484
+ {
485
+ id: "gemini-3.1-pro-preview",
486
+ verifiedAgainstDocs: "2026-05-08",
487
+ provider: "google",
488
+ status: "preview",
489
+ maxContextTokens: 1048576,
490
+ maxOutputTokens: 65535,
491
+ maxTools: 128,
492
+ parallelToolCalls: true,
493
+ structuredOutput: "native",
494
+ systemPromptMode: "separate",
495
+ streaming: true,
496
+ cliffs: [
497
+ {
498
+ metric: "input_tokens",
499
+ threshold: 2e5,
500
+ action: "downgrade_quality_warning",
501
+ reason: "Pricing doubles above 200K: input $2\u2192$4/M, output $12\u2192$18/M"
502
+ }
503
+ ],
504
+ costInputPer1m: 2,
505
+ costOutputPer1m: 12,
506
+ lowering: {
507
+ ...GOOGLE_LOWERING_BASE,
508
+ cache: { ...GOOGLE_LOWERING_BASE.cache, discount: 0.1 },
509
+ thinking: { field: "generationConfig.thinkingConfig.thinkingBudget", default: "auto" }
510
+ },
511
+ recovery: [
512
+ {
513
+ signal: "malformed_function_call",
514
+ action: "escalate",
515
+ reason: "MALFORMED_FUNCTION_CALL \u2014 escalate"
516
+ }
517
+ ],
518
+ strengths: ["reasoning", "1m_context", "agentic_coding", "structured_output", "tool_use"],
519
+ weaknesses: ["cost", "preview_status", "pricing_above_200k"],
520
+ notes: "Frontier Gemini (preview, 2026-Q2). Step-change agentic coding per Google. Cache discount 10\xD7 (vs 4\xD7 for 2.5 Pro). Use status=preview to flag rollback path until GA.",
521
+ // Frontier-Gemini preview — bumped one notch over 2.5 Pro on agentic
522
+ // coding / reasoning per Google's release notes. Preview status:
523
+ // chains should stay on 2.5 Pro until GA. Starter hypothesis.
524
+ archetypePerf: {
525
+ critique: 10,
526
+ // Google claims step-change on reasoning
527
+ plan: 10,
528
+ ask: 9,
529
+ generate: 9,
530
+ extract: 9,
531
+ transform: 8,
532
+ hunt: 9,
533
+ // step-change agentic per Google
534
+ summarize: 8,
535
+ classify: 7
536
+ }
537
+ },
538
+ // ── DeepSeek ──
539
+ // 2026-05-08 audit (L-073): DeepSeek's `deepseek-chat` was silently aliased
540
+ // to `deepseek-v4-flash` non-thinking mode. Old kgauto profile claimed 64k
541
+ // context + $0.27/$1.10 — actual is 1M context + $0.14/$0.28. Now modeled
542
+ // as: V4-Flash + V4-Pro as canonical profiles; deepseek-chat and
543
+ // deepseek-reasoner registered as aliases (see ALIASES below).
544
+ {
545
+ id: "deepseek-v4-flash",
546
+ verifiedAgainstDocs: "2026-05-08",
547
+ provider: "deepseek",
548
+ status: "current",
549
+ maxContextTokens: 1e6,
550
+ maxOutputTokens: 384e3,
551
+ maxTools: 16,
552
+ parallelToolCalls: false,
553
+ structuredOutput: "native",
554
+ systemPromptMode: "inline",
555
+ streaming: true,
556
+ cliffs: [
557
+ {
558
+ metric: "tool_count",
559
+ threshold: 1,
560
+ action: "drop_to_top_relevant",
561
+ reason: "Sequential tool calls only \u2014 L-040"
562
+ }
563
+ ],
564
+ costInputPer1m: 0.14,
565
+ costOutputPer1m: 0.28,
566
+ lowering: {
567
+ system: { mode: "inline" },
568
+ cache: { strategy: "unsupported" },
569
+ tools: { format: "deepseek" }
570
+ },
571
+ recovery: [
572
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
573
+ ],
574
+ strengths: ["cost", "1m_context", "json_output", "code", "reasoning"],
575
+ weaknesses: ["parallel_tools", "large_tool_sets"],
576
+ notes: "Cheap workhorse. 1M context, 384k max output. Cache-hit input $0.0028/M (1/50\xD7 of miss). Aliased as `deepseek-chat` (non-thinking) and `deepseek-reasoner` (thinking) \u2014 see ALIASES.",
577
+ // Master plan §6.2 anchor. Brain-validated tier 1 cross-provider for
578
+ // classify (169 rows, 0% empty). Tier 0 for summarize-with-no-tools.
579
+ // Falls off on hunt (sequential tools — L-040) and reasoning depth.
580
+ archetypePerf: {
581
+ classify: 7,
582
+ // brain-validated, 169 rows
583
+ summarize: 7,
584
+ // archetype-tolerant, no brain evidence yet
585
+ ask: 6,
586
+ transform: 6,
587
+ generate: 5,
588
+ plan: 5,
589
+ extract: 5,
590
+ critique: 4,
591
+ hunt: 4
592
+ // sequential tool calls only — L-040
593
+ }
594
+ },
595
+ {
596
+ id: "deepseek-v4-pro",
597
+ verifiedAgainstDocs: "2026-05-08",
598
+ provider: "deepseek",
599
+ status: "current",
600
+ maxContextTokens: 1e6,
601
+ maxOutputTokens: 384e3,
602
+ maxTools: 16,
603
+ parallelToolCalls: false,
604
+ structuredOutput: "native",
605
+ systemPromptMode: "inline",
606
+ streaming: true,
607
+ cliffs: [
608
+ {
609
+ metric: "tool_count",
610
+ threshold: 1,
611
+ action: "drop_to_top_relevant",
612
+ reason: "Sequential tool calls only \u2014 L-040"
613
+ }
614
+ ],
615
+ // Profile carries REGULAR pricing, not the 75%-off promo (ends 2026-05-31).
616
+ // Under-estimating cost is worse than over-estimating for budget caps.
617
+ costInputPer1m: 1.74,
618
+ costOutputPer1m: 3.48,
619
+ lowering: {
620
+ system: { mode: "inline" },
621
+ cache: { strategy: "unsupported" },
622
+ tools: { format: "deepseek" }
623
+ },
624
+ recovery: [
625
+ { signal: "rate_limit", action: "escalate", reason: "429 \u2014 escalate" }
626
+ ],
627
+ strengths: ["quality", "reasoning", "1m_context", "json_output", "code", "extended_thinking"],
628
+ weaknesses: ["parallel_tools", "large_tool_sets"],
629
+ notes: "Pro tier. 1M context, 384k max output. Regular pricing $1.74/$3.48; 75% promo through 2026-05-31 ($0.435/$0.87). Default mode = thinking.",
630
+ // Master plan §3.3: tier 3 cross-provider for plan chain. Reasoning
631
+ // bumped one notch over V4-Flash; same parallel-tool ceiling.
632
+ archetypePerf: {
633
+ plan: 7,
634
+ // §3.3 tier 3 for plan
635
+ critique: 6,
636
+ ask: 7,
637
+ generate: 6,
638
+ classify: 7,
639
+ summarize: 7,
640
+ extract: 6,
641
+ transform: 6,
642
+ hunt: 4
643
+ // sequential tools — same as V4-Flash
644
+ }
645
+ }
646
+ ];
647
+ var ALIASES = {
648
+ // DeepSeek's own model routing — both names served by V4-Flash.
649
+ "deepseek-chat": "deepseek-v4-flash",
650
+ "deepseek-reasoner": "deepseek-v4-flash",
651
+ // Legacy kgauto typo — actual API alias is dash-form (alpha.1 had dot).
652
+ "claude-haiku-4.5": "claude-haiku-4-5"
653
+ };
654
+ function canonicalId(id) {
655
+ return ALIASES[id] ?? id;
656
+ }
657
+ var PROFILE_INDEX = new Map(
658
+ PROFILES_RAW.map((p) => [p.id, p])
659
+ );
660
+ function getProfile(id) {
661
+ const canonical = canonicalId(id);
662
+ const p = PROFILE_INDEX.get(canonical);
663
+ if (!p) {
664
+ const known = [...PROFILE_INDEX.keys(), ...Object.keys(ALIASES)].join(", ");
665
+ throw new Error(`Unknown model id: "${id}". Known: ${known}`);
666
+ }
667
+ return p;
668
+ }
669
+ function tryGetProfile(id) {
670
+ return PROFILE_INDEX.get(canonicalId(id));
671
+ }
672
+ function allProfiles() {
673
+ return PROFILES_RAW;
674
+ }
675
+ function profilesByProvider(provider) {
676
+ return PROFILES_RAW.filter((p) => p.provider === provider);
677
+ }
678
+ // Annotate the CommonJS export names for ESM import in node:
679
+ 0 && (module.exports = {
680
+ ALIASES,
681
+ allProfiles,
682
+ getProfile,
683
+ profilesByProvider,
684
+ tryGetProfile
685
+ });