@warmdrift/kgauto 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +229 -0
- package/dist/index.d.ts +229 -0
- package/dist/index.js +1131 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +1090 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +41 -0
- package/profiles.json +672 -0
package/profiles.json
ADDED
|
@@ -0,0 +1,672 @@
|
|
|
1
|
+
{
|
|
2
|
+
"claude-opus-4-6": {
|
|
3
|
+
"provider": "anthropic",
|
|
4
|
+
"status": "current",
|
|
5
|
+
"max_tools": 20,
|
|
6
|
+
"max_context_tokens": 1000000,
|
|
7
|
+
"max_output_tokens": 128000,
|
|
8
|
+
"parallel_tool_calls": true,
|
|
9
|
+
"output_modes": [
|
|
10
|
+
"generateText",
|
|
11
|
+
"generateObject"
|
|
12
|
+
],
|
|
13
|
+
"prompt_rules": [],
|
|
14
|
+
"known_failures": [
|
|
15
|
+
"refusal returns stop_reason 'refusal' with schema-violating output",
|
|
16
|
+
"rate limits pooled across all Opus 4.x versions"
|
|
17
|
+
],
|
|
18
|
+
"strengths": [
|
|
19
|
+
"complex_reasoning",
|
|
20
|
+
"judgment",
|
|
21
|
+
"nuance",
|
|
22
|
+
"reliability",
|
|
23
|
+
"1m_context",
|
|
24
|
+
"adaptive_thinking"
|
|
25
|
+
],
|
|
26
|
+
"weaknesses": [
|
|
27
|
+
"cost",
|
|
28
|
+
"latency"
|
|
29
|
+
],
|
|
30
|
+
"cost_input_per_1m": 5.0,
|
|
31
|
+
"cost_output_per_1m": 25.0,
|
|
32
|
+
"step_limit_default": 10,
|
|
33
|
+
"notes": "Current frontier. 1M context. Adaptive thinking. Fast mode available at 6x rates. Auditor model for KG."
|
|
34
|
+
},
|
|
35
|
+
"claude-sonnet-4-6": {
|
|
36
|
+
"provider": "anthropic",
|
|
37
|
+
"status": "current",
|
|
38
|
+
"max_tools": 20,
|
|
39
|
+
"max_context_tokens": 1000000,
|
|
40
|
+
"max_output_tokens": 64000,
|
|
41
|
+
"parallel_tool_calls": true,
|
|
42
|
+
"output_modes": [
|
|
43
|
+
"generateText",
|
|
44
|
+
"generateObject"
|
|
45
|
+
],
|
|
46
|
+
"prompt_rules": [],
|
|
47
|
+
"known_failures": [
|
|
48
|
+
"refusal returns stop_reason 'refusal' with schema-violating output",
|
|
49
|
+
"rate limits pooled across all Sonnet 4.x versions",
|
|
50
|
+
"pricing jumps to $6/$22.50 per 1M tokens above 200k context"
|
|
51
|
+
],
|
|
52
|
+
"strengths": [
|
|
53
|
+
"structured_output",
|
|
54
|
+
"tool_use",
|
|
55
|
+
"instruction_following",
|
|
56
|
+
"reliability",
|
|
57
|
+
"1m_context",
|
|
58
|
+
"adaptive_thinking"
|
|
59
|
+
],
|
|
60
|
+
"weaknesses": [
|
|
61
|
+
"cost_for_simple_tasks"
|
|
62
|
+
],
|
|
63
|
+
"cost_input_per_1m": 3.0,
|
|
64
|
+
"cost_output_per_1m": 15.0,
|
|
65
|
+
"step_limit_default": 10,
|
|
66
|
+
"notes": "Current recommended workhorse. 1M context. generateObject works with complex schemas. Adaptive thinking."
|
|
67
|
+
},
|
|
68
|
+
"claude-haiku-4.5": {
|
|
69
|
+
"provider": "anthropic",
|
|
70
|
+
"status": "current",
|
|
71
|
+
"max_tools": 20,
|
|
72
|
+
"max_context_tokens": 200000,
|
|
73
|
+
"max_output_tokens": 64000,
|
|
74
|
+
"parallel_tool_calls": true,
|
|
75
|
+
"output_modes": [
|
|
76
|
+
"generateText",
|
|
77
|
+
"generateObject"
|
|
78
|
+
],
|
|
79
|
+
"prompt_rules": [],
|
|
80
|
+
"known_failures": [
|
|
81
|
+
"refusal returns stop_reason 'refusal' with schema-violating output",
|
|
82
|
+
"no adaptive thinking \u2014 only extended thinking"
|
|
83
|
+
],
|
|
84
|
+
"strengths": [
|
|
85
|
+
"speed",
|
|
86
|
+
"classification",
|
|
87
|
+
"simple_routing",
|
|
88
|
+
"cost",
|
|
89
|
+
"structured_output"
|
|
90
|
+
],
|
|
91
|
+
"weaknesses": [
|
|
92
|
+
"complex_reasoning",
|
|
93
|
+
"nuance",
|
|
94
|
+
"200k_context_limit"
|
|
95
|
+
],
|
|
96
|
+
"cost_input_per_1m": 1.0,
|
|
97
|
+
"cost_output_per_1m": 5.0,
|
|
98
|
+
"step_limit_default": 6,
|
|
99
|
+
"notes": "Fast and cheap. Good for classification, routing, simple tasks. 200k context (not 1M)."
|
|
100
|
+
},
|
|
101
|
+
"claude-sonnet-4": {
|
|
102
|
+
"provider": "anthropic",
|
|
103
|
+
"status": "legacy",
|
|
104
|
+
"max_tools": 20,
|
|
105
|
+
"max_context_tokens": 200000,
|
|
106
|
+
"max_output_tokens": 64000,
|
|
107
|
+
"parallel_tool_calls": true,
|
|
108
|
+
"output_modes": [
|
|
109
|
+
"generateText",
|
|
110
|
+
"generateObject"
|
|
111
|
+
],
|
|
112
|
+
"prompt_rules": [],
|
|
113
|
+
"known_failures": [
|
|
114
|
+
"1M context beta retiring April 30 2026 \u2014 requests over 200k will error",
|
|
115
|
+
"refusal returns stop_reason 'refusal' with schema-violating output"
|
|
116
|
+
],
|
|
117
|
+
"strengths": [
|
|
118
|
+
"structured_output",
|
|
119
|
+
"tool_use",
|
|
120
|
+
"instruction_following",
|
|
121
|
+
"reliability"
|
|
122
|
+
],
|
|
123
|
+
"weaknesses": [
|
|
124
|
+
"legacy",
|
|
125
|
+
"cost_for_simple_tasks"
|
|
126
|
+
],
|
|
127
|
+
"cost_input_per_1m": 3.0,
|
|
128
|
+
"cost_output_per_1m": 15.0,
|
|
129
|
+
"step_limit_default": 10,
|
|
130
|
+
"notes": "Legacy \u2014 use claude-sonnet-4-6 instead. 1M context beta expiring."
|
|
131
|
+
},
|
|
132
|
+
"claude-opus-4": {
|
|
133
|
+
"provider": "anthropic",
|
|
134
|
+
"status": "legacy",
|
|
135
|
+
"max_tools": 20,
|
|
136
|
+
"max_context_tokens": 200000,
|
|
137
|
+
"max_output_tokens": 32000,
|
|
138
|
+
"parallel_tool_calls": true,
|
|
139
|
+
"output_modes": [
|
|
140
|
+
"generateText",
|
|
141
|
+
"generateObject"
|
|
142
|
+
],
|
|
143
|
+
"prompt_rules": [],
|
|
144
|
+
"known_failures": [
|
|
145
|
+
"refusal returns stop_reason 'refusal' with schema-violating output"
|
|
146
|
+
],
|
|
147
|
+
"strengths": [
|
|
148
|
+
"complex_reasoning",
|
|
149
|
+
"judgment"
|
|
150
|
+
],
|
|
151
|
+
"weaknesses": [
|
|
152
|
+
"legacy",
|
|
153
|
+
"expensive_vs_4.6",
|
|
154
|
+
"lower_output_limit"
|
|
155
|
+
],
|
|
156
|
+
"cost_input_per_1m": 15.0,
|
|
157
|
+
"cost_output_per_1m": 75.0,
|
|
158
|
+
"step_limit_default": 10,
|
|
159
|
+
"notes": "Legacy \u2014 use claude-opus-4-6 instead. 3x more expensive for same tier."
|
|
160
|
+
},
|
|
161
|
+
"gpt-4.1": {
|
|
162
|
+
"provider": "openai",
|
|
163
|
+
"status": "current",
|
|
164
|
+
"max_tools": 128,
|
|
165
|
+
"max_context_tokens": 1047576,
|
|
166
|
+
"max_output_tokens": 32768,
|
|
167
|
+
"parallel_tool_calls": true,
|
|
168
|
+
"output_modes": [
|
|
169
|
+
"generateText",
|
|
170
|
+
"generateObject"
|
|
171
|
+
],
|
|
172
|
+
"prompt_rules": [],
|
|
173
|
+
"known_failures": [
|
|
174
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
175
|
+
"first request with new JSON schema has preprocessing delay up to 60s",
|
|
176
|
+
"structured output truncates silently at max_tokens \u2014 unparseable JSON",
|
|
177
|
+
"safety refusal returns 'refusal' field instead of schema-conforming object",
|
|
178
|
+
"model alias points to latest snapshot \u2014 behavior can change without warning"
|
|
179
|
+
],
|
|
180
|
+
"strengths": [
|
|
181
|
+
"all_rounder",
|
|
182
|
+
"structured_output",
|
|
183
|
+
"tool_use",
|
|
184
|
+
"reliability",
|
|
185
|
+
"1m_context",
|
|
186
|
+
"coding"
|
|
187
|
+
],
|
|
188
|
+
"weaknesses": [],
|
|
189
|
+
"cost_input_per_1m": 2.0,
|
|
190
|
+
"cost_output_per_1m": 8.0,
|
|
191
|
+
"step_limit_default": 10,
|
|
192
|
+
"notes": "OpenAI stable flagship. 1M context. Strong coding benchmarks. Natural shadow-test candidate for complex tasks vs Opus."
|
|
193
|
+
},
|
|
194
|
+
"gpt-4.1-mini": {
|
|
195
|
+
"provider": "openai",
|
|
196
|
+
"status": "current",
|
|
197
|
+
"max_tools": 128,
|
|
198
|
+
"max_context_tokens": 1047576,
|
|
199
|
+
"max_output_tokens": 32768,
|
|
200
|
+
"parallel_tool_calls": true,
|
|
201
|
+
"output_modes": [
|
|
202
|
+
"generateText",
|
|
203
|
+
"generateObject"
|
|
204
|
+
],
|
|
205
|
+
"prompt_rules": [],
|
|
206
|
+
"known_failures": [
|
|
207
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
208
|
+
"structured output truncates silently at max_tokens",
|
|
209
|
+
"safety refusal returns 'refusal' field instead of schema-conforming object"
|
|
210
|
+
],
|
|
211
|
+
"strengths": [
|
|
212
|
+
"speed",
|
|
213
|
+
"cost",
|
|
214
|
+
"structured_output",
|
|
215
|
+
"1m_context"
|
|
216
|
+
],
|
|
217
|
+
"weaknesses": [
|
|
218
|
+
"complex_reasoning"
|
|
219
|
+
],
|
|
220
|
+
"cost_input_per_1m": 0.4,
|
|
221
|
+
"cost_output_per_1m": 1.6,
|
|
222
|
+
"step_limit_default": 8,
|
|
223
|
+
"notes": "OpenAI mid-tier. 1M context. Good for standard tasks."
|
|
224
|
+
},
|
|
225
|
+
"gpt-4.1-nano": {
|
|
226
|
+
"provider": "openai",
|
|
227
|
+
"status": "current",
|
|
228
|
+
"max_tools": 128,
|
|
229
|
+
"max_context_tokens": 1047576,
|
|
230
|
+
"max_output_tokens": 32768,
|
|
231
|
+
"parallel_tool_calls": true,
|
|
232
|
+
"output_modes": [
|
|
233
|
+
"generateText",
|
|
234
|
+
"generateObject"
|
|
235
|
+
],
|
|
236
|
+
"prompt_rules": [],
|
|
237
|
+
"known_failures": [
|
|
238
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
239
|
+
"instruction drift in long conversations"
|
|
240
|
+
],
|
|
241
|
+
"strengths": [
|
|
242
|
+
"speed",
|
|
243
|
+
"cost",
|
|
244
|
+
"1m_context"
|
|
245
|
+
],
|
|
246
|
+
"weaknesses": [
|
|
247
|
+
"complex_reasoning",
|
|
248
|
+
"nuance"
|
|
249
|
+
],
|
|
250
|
+
"cost_input_per_1m": 0.1,
|
|
251
|
+
"cost_output_per_1m": 0.4,
|
|
252
|
+
"step_limit_default": 6,
|
|
253
|
+
"notes": "OpenAI cheapest. 1M context. Simple tasks only."
|
|
254
|
+
},
|
|
255
|
+
"gpt-4o": {
|
|
256
|
+
"provider": "openai",
|
|
257
|
+
"status": "legacy",
|
|
258
|
+
"max_tools": 128,
|
|
259
|
+
"max_context_tokens": 128000,
|
|
260
|
+
"max_output_tokens": 16384,
|
|
261
|
+
"parallel_tool_calls": true,
|
|
262
|
+
"output_modes": [
|
|
263
|
+
"generateText",
|
|
264
|
+
"generateObject"
|
|
265
|
+
],
|
|
266
|
+
"prompt_rules": [],
|
|
267
|
+
"known_failures": [
|
|
268
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
269
|
+
"structured output truncates silently at max_tokens",
|
|
270
|
+
"safety refusal returns 'refusal' field instead of schema-conforming object",
|
|
271
|
+
"hallucination rate higher than expected at temperature=0"
|
|
272
|
+
],
|
|
273
|
+
"strengths": [
|
|
274
|
+
"all_rounder",
|
|
275
|
+
"structured_output",
|
|
276
|
+
"tool_use",
|
|
277
|
+
"reliability"
|
|
278
|
+
],
|
|
279
|
+
"weaknesses": [
|
|
280
|
+
"legacy",
|
|
281
|
+
"128k_context_limit"
|
|
282
|
+
],
|
|
283
|
+
"cost_input_per_1m": 2.5,
|
|
284
|
+
"cost_output_per_1m": 10.0,
|
|
285
|
+
"step_limit_default": 10,
|
|
286
|
+
"notes": "Legacy \u2014 use gpt-4.1 instead. Better quality, cheaper, 1M context."
|
|
287
|
+
},
|
|
288
|
+
"gpt-4o-mini": {
|
|
289
|
+
"provider": "openai",
|
|
290
|
+
"status": "legacy",
|
|
291
|
+
"max_tools": 128,
|
|
292
|
+
"max_context_tokens": 128000,
|
|
293
|
+
"max_output_tokens": 16384,
|
|
294
|
+
"parallel_tool_calls": true,
|
|
295
|
+
"output_modes": [
|
|
296
|
+
"generateText",
|
|
297
|
+
"generateObject"
|
|
298
|
+
],
|
|
299
|
+
"prompt_rules": [],
|
|
300
|
+
"known_failures": [
|
|
301
|
+
"structured output incompatible with parallel_tool_calls \u2014 must set parallel_tool_calls=false",
|
|
302
|
+
"instruction drift in long conversations \u2014 worse than gpt-4o"
|
|
303
|
+
],
|
|
304
|
+
"strengths": [
|
|
305
|
+
"speed",
|
|
306
|
+
"cost",
|
|
307
|
+
"simple_tasks"
|
|
308
|
+
],
|
|
309
|
+
"weaknesses": [
|
|
310
|
+
"legacy",
|
|
311
|
+
"complex_reasoning",
|
|
312
|
+
"128k_context_limit"
|
|
313
|
+
],
|
|
314
|
+
"cost_input_per_1m": 0.15,
|
|
315
|
+
"cost_output_per_1m": 0.6,
|
|
316
|
+
"step_limit_default": 6,
|
|
317
|
+
"notes": "Legacy \u2014 use gpt-4.1-mini or gpt-4.1-nano instead."
|
|
318
|
+
},
|
|
319
|
+
"o3": {
|
|
320
|
+
"provider": "openai",
|
|
321
|
+
"status": "current",
|
|
322
|
+
"max_tools": 128,
|
|
323
|
+
"max_context_tokens": 200000,
|
|
324
|
+
"max_output_tokens": 100000,
|
|
325
|
+
"parallel_tool_calls": false,
|
|
326
|
+
"output_modes": [
|
|
327
|
+
"generateText",
|
|
328
|
+
"generateObject"
|
|
329
|
+
],
|
|
330
|
+
"prompt_rules": [
|
|
331
|
+
"no_system_with_developer"
|
|
332
|
+
],
|
|
333
|
+
"known_failures": [
|
|
334
|
+
"reasoning tokens unpredictable and unbounded \u2014 single call cost variance 10-50x",
|
|
335
|
+
"parallel_tool_calls=true errors or silently ignored \u2014 always set false",
|
|
336
|
+
"reasoning summaries unreliable \u2014 omitted >90% of cases",
|
|
337
|
+
"abandons hard tasks mid-work \u2014 non-deterministic behavioral pattern",
|
|
338
|
+
"reasoning tokens discarded between turns \u2014 cannot reference prior reasoning chain",
|
|
339
|
+
"hallucinates tool invocations for tools not in schema"
|
|
340
|
+
],
|
|
341
|
+
"strengths": [
|
|
342
|
+
"reasoning",
|
|
343
|
+
"complex_analysis",
|
|
344
|
+
"code",
|
|
345
|
+
"math"
|
|
346
|
+
],
|
|
347
|
+
"weaknesses": [
|
|
348
|
+
"cost_variance",
|
|
349
|
+
"sequential_tools",
|
|
350
|
+
"unpredictable_latency"
|
|
351
|
+
],
|
|
352
|
+
"cost_input_per_1m": 2.0,
|
|
353
|
+
"cost_output_per_1m": 8.0,
|
|
354
|
+
"step_limit_default": 8,
|
|
355
|
+
"notes": "Frontier reasoning. Cost is unpredictable due to reasoning tokens. Budget with max_completion_tokens. reasoning_effort param controls cost/quality tradeoff."
|
|
356
|
+
},
|
|
357
|
+
"gemini-2.5-flash": {
|
|
358
|
+
"provider": "google",
|
|
359
|
+
"status": "current",
|
|
360
|
+
"max_tools": 128,
|
|
361
|
+
"max_context_tokens": 1048576,
|
|
362
|
+
"max_output_tokens": 65535,
|
|
363
|
+
"parallel_tool_calls": true,
|
|
364
|
+
"output_modes": [
|
|
365
|
+
"generateText",
|
|
366
|
+
"generateObject"
|
|
367
|
+
],
|
|
368
|
+
"prompt_rules": [
|
|
369
|
+
"ban_cot_phrases",
|
|
370
|
+
"hard_word_limit",
|
|
371
|
+
"explicit_format",
|
|
372
|
+
"disable_thinking_for_short_output"
|
|
373
|
+
],
|
|
374
|
+
"known_failures": [
|
|
375
|
+
"thinking tokens consume maxOutputTokens \u2014 empty response if budget exhausted by reasoning",
|
|
376
|
+
"MALFORMED_FUNCTION_CALL maps to 'stop' in LiteLLM \u2014 silent failure in agentic loops",
|
|
377
|
+
"empty response after tool call result submission \u2014 reproducible for specific inputs",
|
|
378
|
+
"parallel tool call parsing broken in streaming \u2014 SDKs only parse first functionCall in parts array",
|
|
379
|
+
"deeply nested or large schemas rejected at API level",
|
|
380
|
+
"unsupported JSON Schema properties silently ignored \u2014 partial silent failure",
|
|
381
|
+
"quality degrades significantly with large documents or high-context prompts",
|
|
382
|
+
"10-20 tools recommended despite 128 hard limit \u2014 reliability drops above 20"
|
|
383
|
+
],
|
|
384
|
+
"strengths": [
|
|
385
|
+
"speed",
|
|
386
|
+
"volume",
|
|
387
|
+
"classification",
|
|
388
|
+
"parallel_tool_calls",
|
|
389
|
+
"1m_context",
|
|
390
|
+
"cost"
|
|
391
|
+
],
|
|
392
|
+
"weaknesses": [
|
|
393
|
+
"complex_schemas",
|
|
394
|
+
"large_tool_sets_unreliable",
|
|
395
|
+
"thinking_token_drain",
|
|
396
|
+
"streaming_tool_parsing"
|
|
397
|
+
],
|
|
398
|
+
"cost_input_per_1m": 0.3,
|
|
399
|
+
"cost_output_per_1m": 2.5,
|
|
400
|
+
"step_limit_default": 6,
|
|
401
|
+
"notes": "Fast and cheap with 1M context. Thinking ON by default \u2014 set thinkingBudget=0 for short outputs or max_tokens>=1024. Tool reliability drops above 10-20 tools despite 128 limit. The primary silent failure model in the pool."
|
|
402
|
+
},
|
|
403
|
+
"gemini-2.5-pro": {
|
|
404
|
+
"provider": "google",
|
|
405
|
+
"status": "current",
|
|
406
|
+
"max_tools": 128,
|
|
407
|
+
"max_context_tokens": 1048576,
|
|
408
|
+
"max_output_tokens": 65535,
|
|
409
|
+
"parallel_tool_calls": true,
|
|
410
|
+
"output_modes": [
|
|
411
|
+
"generateText",
|
|
412
|
+
"generateObject"
|
|
413
|
+
],
|
|
414
|
+
"prompt_rules": [
|
|
415
|
+
"disable_thinking_for_short_output"
|
|
416
|
+
],
|
|
417
|
+
"known_failures": [
|
|
418
|
+
"thinking tokens consume maxOutputTokens",
|
|
419
|
+
"MALFORMED_FUNCTION_CALL maps to 'stop' in LiteLLM",
|
|
420
|
+
"pricing doubles above 200k context tokens"
|
|
421
|
+
],
|
|
422
|
+
"strengths": [
|
|
423
|
+
"reasoning",
|
|
424
|
+
"1m_context",
|
|
425
|
+
"structured_output",
|
|
426
|
+
"tool_use"
|
|
427
|
+
],
|
|
428
|
+
"weaknesses": [
|
|
429
|
+
"pricing_above_200k"
|
|
430
|
+
],
|
|
431
|
+
"cost_input_per_1m": 1.25,
|
|
432
|
+
"cost_output_per_1m": 10.0,
|
|
433
|
+
"step_limit_default": 10,
|
|
434
|
+
"notes": "Google stable frontier. 1M context. Pricing doubles above 200k tokens \u2014 budget carefully for long-context use."
|
|
435
|
+
},
|
|
436
|
+
"deepseek-v4": {
|
|
437
|
+
"provider": "deepseek",
|
|
438
|
+
"status": "current",
|
|
439
|
+
"max_tools": 128,
|
|
440
|
+
"max_context_tokens": 1000000,
|
|
441
|
+
"max_output_tokens": 64000,
|
|
442
|
+
"parallel_tool_calls": false,
|
|
443
|
+
"output_modes": [
|
|
444
|
+
"generateText",
|
|
445
|
+
"generateObject"
|
|
446
|
+
],
|
|
447
|
+
"prompt_rules": [
|
|
448
|
+
"explicit_format"
|
|
449
|
+
],
|
|
450
|
+
"known_failures": [
|
|
451
|
+
"uptime is primary production risk \u2014 documented multi-day outages",
|
|
452
|
+
"503 server overloaded without backpressure \u2014 no clean 429s, just silent slow responses",
|
|
453
|
+
"tool call JSON occasionally malformed \u2014 parse errors must be handled explicitly"
|
|
454
|
+
],
|
|
455
|
+
"strengths": [
|
|
456
|
+
"cost",
|
|
457
|
+
"reasoning",
|
|
458
|
+
"1m_context",
|
|
459
|
+
"coding"
|
|
460
|
+
],
|
|
461
|
+
"weaknesses": [
|
|
462
|
+
"uptime",
|
|
463
|
+
"sequential_tool_calls",
|
|
464
|
+
"no_backpressure"
|
|
465
|
+
],
|
|
466
|
+
"cost_input_per_1m": 0.3,
|
|
467
|
+
"cost_output_per_1m": 0.5,
|
|
468
|
+
"step_limit_default": 8,
|
|
469
|
+
"notes": "Extraordinary value \u2014 frontier quality at 10-50x cheaper than Western providers. 1M context. Uptime is the risk."
|
|
470
|
+
},
|
|
471
|
+
"deepseek-chat": {
|
|
472
|
+
"provider": "deepseek",
|
|
473
|
+
"status": "legacy",
|
|
474
|
+
"max_tools": 128,
|
|
475
|
+
"max_context_tokens": 128000,
|
|
476
|
+
"max_output_tokens": 8000,
|
|
477
|
+
"parallel_tool_calls": false,
|
|
478
|
+
"output_modes": [
|
|
479
|
+
"generateText",
|
|
480
|
+
"generateObject"
|
|
481
|
+
],
|
|
482
|
+
"prompt_rules": [
|
|
483
|
+
"explicit_format"
|
|
484
|
+
],
|
|
485
|
+
"known_failures": [
|
|
486
|
+
"uptime is primary production risk \u2014 documented multi-day outages",
|
|
487
|
+
"503 server overloaded errors \u2014 streaming stalls mid-response without error",
|
|
488
|
+
"tool call JSON occasionally malformed",
|
|
489
|
+
"7-8 sequential tool calls \u2014 5x latency vs parallel models for same task"
|
|
490
|
+
],
|
|
491
|
+
"strengths": [
|
|
492
|
+
"cost",
|
|
493
|
+
"general_reasoning"
|
|
494
|
+
],
|
|
495
|
+
"weaknesses": [
|
|
496
|
+
"sequential_tool_calls",
|
|
497
|
+
"latency_with_tools",
|
|
498
|
+
"uptime",
|
|
499
|
+
"legacy"
|
|
500
|
+
],
|
|
501
|
+
"cost_input_per_1m": 0.28,
|
|
502
|
+
"cost_output_per_1m": 0.42,
|
|
503
|
+
"step_limit_default": 6,
|
|
504
|
+
"notes": "Legacy V3.2. Use deepseek-v4 instead. 10x cheaper cache hits ($0.028/1M). Sequential tool calls only."
|
|
505
|
+
},
|
|
506
|
+
"deepseek-reasoner": {
|
|
507
|
+
"provider": "deepseek",
|
|
508
|
+
"status": "legacy",
|
|
509
|
+
"max_tools": 128,
|
|
510
|
+
"max_context_tokens": 128000,
|
|
511
|
+
"max_output_tokens": 64000,
|
|
512
|
+
"parallel_tool_calls": false,
|
|
513
|
+
"output_modes": [
|
|
514
|
+
"generateText",
|
|
515
|
+
"generateObject"
|
|
516
|
+
],
|
|
517
|
+
"prompt_rules": [
|
|
518
|
+
"explicit_format"
|
|
519
|
+
],
|
|
520
|
+
"known_failures": [
|
|
521
|
+
"uptime is primary production risk",
|
|
522
|
+
"tool calling added in V3.2 \u2014 older integrations may not support it",
|
|
523
|
+
"the V3.2-Speciale variant does NOT support tool calling \u2014 do not confuse"
|
|
524
|
+
],
|
|
525
|
+
"strengths": [
|
|
526
|
+
"reasoning",
|
|
527
|
+
"cost_effective_reasoning"
|
|
528
|
+
],
|
|
529
|
+
"weaknesses": [
|
|
530
|
+
"sequential_tool_calls",
|
|
531
|
+
"latency",
|
|
532
|
+
"uptime",
|
|
533
|
+
"legacy"
|
|
534
|
+
],
|
|
535
|
+
"cost_input_per_1m": 0.28,
|
|
536
|
+
"cost_output_per_1m": 0.42,
|
|
537
|
+
"step_limit_default": 8,
|
|
538
|
+
"notes": "Legacy V3.2 reasoning mode. Same pricing as deepseek-chat in V3.2. Use deepseek-v4 instead."
|
|
539
|
+
},
|
|
540
|
+
"mistral-small": {
|
|
541
|
+
"provider": "mistral",
|
|
542
|
+
"status": "current",
|
|
543
|
+
"max_tools": 128,
|
|
544
|
+
"max_context_tokens": 256000,
|
|
545
|
+
"max_output_tokens": 16000,
|
|
546
|
+
"parallel_tool_calls": true,
|
|
547
|
+
"output_modes": [
|
|
548
|
+
"generateText",
|
|
549
|
+
"generateObject"
|
|
550
|
+
],
|
|
551
|
+
"prompt_rules": [
|
|
552
|
+
"reinforce_json_in_prompt"
|
|
553
|
+
],
|
|
554
|
+
"known_failures": [
|
|
555
|
+
"json_object mode only 64% reliable on complex schemas \u2014 must use json_schema strict mode",
|
|
556
|
+
"must explicitly instruct JSON output in prompt even when using JSON mode",
|
|
557
|
+
"max output governed by context remainder \u2014 250k input leaves only 6k for output"
|
|
558
|
+
],
|
|
559
|
+
"strengths": [
|
|
560
|
+
"speed",
|
|
561
|
+
"cost",
|
|
562
|
+
"european_hosting",
|
|
563
|
+
"256k_context",
|
|
564
|
+
"structured_output_strict"
|
|
565
|
+
],
|
|
566
|
+
"weaknesses": [
|
|
567
|
+
"complex_reasoning",
|
|
568
|
+
"json_object_unreliable"
|
|
569
|
+
],
|
|
570
|
+
"cost_input_per_1m": 0.2,
|
|
571
|
+
"cost_output_per_1m": 0.6,
|
|
572
|
+
"step_limit_default": 6,
|
|
573
|
+
"notes": "Mistral Small 4 (2603). 256k context. European hosting. Always use json_schema strict mode, never json_object."
|
|
574
|
+
},
|
|
575
|
+
"mistral-large": {
|
|
576
|
+
"provider": "mistral",
|
|
577
|
+
"status": "current",
|
|
578
|
+
"max_tools": 128,
|
|
579
|
+
"max_context_tokens": 256000,
|
|
580
|
+
"max_output_tokens": 16000,
|
|
581
|
+
"parallel_tool_calls": true,
|
|
582
|
+
"output_modes": [
|
|
583
|
+
"generateText",
|
|
584
|
+
"generateObject"
|
|
585
|
+
],
|
|
586
|
+
"prompt_rules": [
|
|
587
|
+
"reinforce_json_in_prompt"
|
|
588
|
+
],
|
|
589
|
+
"known_failures": [
|
|
590
|
+
"json_object mode only 64% reliable \u2014 must use json_schema strict mode",
|
|
591
|
+
"must explicitly instruct JSON output in prompt even when using JSON mode"
|
|
592
|
+
],
|
|
593
|
+
"strengths": [
|
|
594
|
+
"reasoning",
|
|
595
|
+
"european_hosting",
|
|
596
|
+
"structured_output",
|
|
597
|
+
"256k_context"
|
|
598
|
+
],
|
|
599
|
+
"weaknesses": [
|
|
600
|
+
"cost_vs_gpt41"
|
|
601
|
+
],
|
|
602
|
+
"cost_input_per_1m": 2.0,
|
|
603
|
+
"cost_output_per_1m": 6.0,
|
|
604
|
+
"step_limit_default": 10,
|
|
605
|
+
"notes": "Mistral Large 3 (2512). 256k context. European hosting. Strong reasoning."
|
|
606
|
+
},
|
|
607
|
+
"grok-3": {
|
|
608
|
+
"provider": "xai",
|
|
609
|
+
"status": "current",
|
|
610
|
+
"max_tools": 128,
|
|
611
|
+
"max_context_tokens": 131072,
|
|
612
|
+
"max_output_tokens": 16000,
|
|
613
|
+
"parallel_tool_calls": true,
|
|
614
|
+
"output_modes": [
|
|
615
|
+
"generateText",
|
|
616
|
+
"generateObject"
|
|
617
|
+
],
|
|
618
|
+
"prompt_rules": [],
|
|
619
|
+
"known_failures": [
|
|
620
|
+
"function call tags leak into thinking blocks \u2014 breaks tool-call parsers",
|
|
621
|
+
"enters repetitive tool-call generation loops \u2014 add hard ceiling on iterations",
|
|
622
|
+
"reasoning_effort param NOT supported \u2014 only on grok-3-mini",
|
|
623
|
+
"100k TPM cap per customer \u2014 lower than comparable models"
|
|
624
|
+
],
|
|
625
|
+
"strengths": [
|
|
626
|
+
"reasoning",
|
|
627
|
+
"speed"
|
|
628
|
+
],
|
|
629
|
+
"weaknesses": [
|
|
630
|
+
"tool_call_tag_leaks",
|
|
631
|
+
"tool_loop_risk",
|
|
632
|
+
"tpm_cap"
|
|
633
|
+
],
|
|
634
|
+
"cost_input_per_1m": 3.0,
|
|
635
|
+
"cost_output_per_1m": 15.0,
|
|
636
|
+
"step_limit_default": 10,
|
|
637
|
+
"notes": "Strong reasoning. Young API with thinner operational track record. Watch for tool-call parsing issues."
|
|
638
|
+
},
|
|
639
|
+
"grok-3-mini": {
|
|
640
|
+
"provider": "xai",
|
|
641
|
+
"status": "current",
|
|
642
|
+
"max_tools": 128,
|
|
643
|
+
"max_context_tokens": 131072,
|
|
644
|
+
"max_output_tokens": 16000,
|
|
645
|
+
"parallel_tool_calls": true,
|
|
646
|
+
"output_modes": [
|
|
647
|
+
"generateText",
|
|
648
|
+
"generateObject"
|
|
649
|
+
],
|
|
650
|
+
"prompt_rules": [],
|
|
651
|
+
"known_failures": [
|
|
652
|
+
"function call tags leak into thinking blocks",
|
|
653
|
+
"enters repetitive tool-call generation loops",
|
|
654
|
+
"100k TPM cap per customer"
|
|
655
|
+
],
|
|
656
|
+
"strengths": [
|
|
657
|
+
"speed",
|
|
658
|
+
"cost",
|
|
659
|
+
"medium_reasoning",
|
|
660
|
+
"reasoning_effort_control"
|
|
661
|
+
],
|
|
662
|
+
"weaknesses": [
|
|
663
|
+
"tool_call_tag_leaks",
|
|
664
|
+
"tool_loop_risk",
|
|
665
|
+
"tpm_cap"
|
|
666
|
+
],
|
|
667
|
+
"cost_input_per_1m": 0.3,
|
|
668
|
+
"cost_output_per_1m": 0.5,
|
|
669
|
+
"step_limit_default": 8,
|
|
670
|
+
"notes": "Fast, cheap. reasoning_effort param (low/high) controls reasoning token budget. Full CoT exposed in every response."
|
|
671
|
+
}
|
|
672
|
+
}
|