@sudobility/shapeshyft_types 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -4,9 +4,17 @@
4
4
  * TypeScript types for ShapeShyft API - LLM structured output platform
5
5
  */
6
6
  Object.defineProperty(exports, "__esModule", { value: true });
7
- exports.COST_PER_MILLION_TOKENS = exports.PROVIDER_ALLOWS_CUSTOM_MODEL = exports.DEFAULT_PROVIDER_MODEL = exports.PROVIDER_MODELS = exports.LLM_PROVIDERS = void 0;
7
+ exports.COST_PER_MILLION_TOKENS = exports.MODEL_CAPABILITIES = exports.PROVIDER_ALLOWS_CUSTOM_MODEL = exports.DEFAULT_PROVIDER_MODEL = exports.PROVIDER_MODELS = exports.LLM_PROVIDERS = void 0;
8
+ exports.getModelCapabilities = getModelCapabilities;
9
+ exports.hasInputCapability = hasInputCapability;
10
+ exports.hasOutputCapability = hasOutputCapability;
11
+ exports.getMediaFormats = getMediaFormats;
12
+ exports.supportsMediaFormat = supportsMediaFormat;
13
+ exports.getDefaultMediaFormat = getDefaultMediaFormat;
14
+ exports.supportsMediaUrl = supportsMediaUrl;
8
15
  exports.getModelPricing = getModelPricing;
9
16
  exports.estimateCost = estimateCost;
17
+ exports.estimateMultimodalCost = estimateMultimodalCost;
10
18
  exports.formatCost = formatCost;
11
19
  exports.formatCostPerMillion = formatCostPerMillion;
12
20
  exports.successResponse = successResponse;
@@ -24,30 +32,75 @@ exports.LLM_PROVIDERS = [
24
32
  'perplexity',
25
33
  'llm_server',
26
34
  ];
27
- /** Models available for each provider */
35
+ /** Models available for each provider (January 2026) */
28
36
  exports.PROVIDER_MODELS = {
29
- openai: ['gpt-4o', 'gpt-4o-mini', 'gpt-4-turbo', 'gpt-3.5-turbo', 'o1', 'o1-mini', 'o1-pro'],
30
- anthropic: ['claude-sonnet-4-20250514', 'claude-opus-4-20250514', 'claude-3-5-haiku-20241022'],
31
- gemini: ['gemini-2.0-flash', 'gemini-2.0-flash-lite', 'gemini-1.5-pro', 'gemini-1.5-flash'],
32
- mistral: ['mistral-large-latest', 'mistral-medium-latest', 'mistral-small-latest', 'codestral-latest', 'mistral-nemo'],
33
- cohere: ['command-r-plus', 'command-r', 'command', 'command-light'],
34
- groq: ['llama-3.3-70b-versatile', 'llama-3.1-8b-instant', 'mixtral-8x7b-32768', 'gemma2-9b-it'],
35
- xai: ['grok-2', 'grok-2-mini'],
36
- deepseek: ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner'],
37
- perplexity: ['llama-3.1-sonar-small-128k-online', 'llama-3.1-sonar-large-128k-online', 'llama-3.1-sonar-huge-128k-online'],
38
- llm_server: ['qwen/qwen3-30b-a3b-2507', 'meta-llama-3.1-8b-instruct', 'qwen-32b-everything', 'openai/gpt-oss-20b'],
37
+ openai: [
38
+ 'gpt-4.1', 'gpt-4.1-mini', 'gpt-4.1-nano',
39
+ 'gpt-4o', 'gpt-4o-mini',
40
+ 'o3', 'o3-pro', 'o4-mini',
41
+ 'gpt-4-turbo', 'o1',
42
+ ],
43
+ anthropic: [
44
+ 'claude-opus-4-5-20251124', 'claude-sonnet-4-5-20251124',
45
+ 'claude-opus-4-1-20250805',
46
+ 'claude-sonnet-4-20250514', 'claude-opus-4-20250514',
47
+ 'claude-3-5-haiku-20241022',
48
+ ],
49
+ gemini: [
50
+ 'gemini-3-pro-preview', 'gemini-3-flash-preview', 'gemini-3-pro-image-preview',
51
+ 'gemini-2.5-pro', 'gemini-2.5-flash', 'gemini-2.5-flash-lite', 'gemini-2.5-flash-image', 'gemini-2.5-flash-native-audio-preview',
52
+ 'gemini-2.0-flash', 'gemini-2.0-flash-lite',
53
+ ],
54
+ mistral: [
55
+ 'mistral-large-2512', 'mistral-large-latest',
56
+ 'mistral-medium-3.1', 'mistral-medium-latest',
57
+ 'mistral-small-3.2', 'mistral-small-latest',
58
+ 'ministral-3b-2512', 'ministral-8b-2512', 'ministral-14b-2512',
59
+ 'codestral-2501', 'codestral-latest',
60
+ 'pixtral-large-2411', 'pixtral-large-latest',
61
+ 'voxtral-small', 'voxtral-mini',
62
+ 'mistral-ocr-2512',
63
+ ],
64
+ cohere: [
65
+ 'command-a-03-2025', 'command-a-reasoning', 'command-a-vision',
66
+ 'command-r-plus-08-2024', 'command-r-08-2024',
67
+ 'command-r-plus', 'command-r',
68
+ ],
69
+ groq: [
70
+ 'llama-3.3-70b-versatile', 'llama-3.1-8b-instant',
71
+ 'openai/gpt-oss-120b', 'openai/gpt-oss-20b',
72
+ 'groq/compound', 'groq/compound-mini',
73
+ 'meta-llama/llama-guard-4-12b',
74
+ 'whisper-large-v3', 'whisper-large-v3-turbo',
75
+ ],
76
+ xai: [
77
+ 'grok-4', 'grok-4.1-fast',
78
+ 'grok-3', 'grok-3-mini', 'grok-3-vision',
79
+ 'grok-2', 'grok-2-vision',
80
+ ],
81
+ deepseek: ['deepseek-chat', 'deepseek-reasoner'],
82
+ perplexity: [
83
+ 'sonar', 'sonar-pro',
84
+ 'sonar-reasoning', 'sonar-reasoning-pro',
85
+ 'sonar-deep-research',
86
+ ],
87
+ llm_server: [
88
+ 'qwen/qwen3-30b-a3b-2507', 'meta-llama-3.1-8b-instruct', 'qwen-32b-everything', 'openai/gpt-oss-20b',
89
+ 'llava-v1.6-mistral-7b', 'llava-v1.6-vicuna-13b', 'qwen2-vl-7b-instruct', 'phi-3-vision-128k-instruct',
90
+ 'minicpm-v-2_6', 'moondream2', 'internvl2-8b', 'llama-3.2-11b-vision', 'pixtral-12b',
91
+ ],
39
92
  };
40
- /** Default model for each provider */
93
+ /** Default model for each provider (January 2026) */
41
94
  exports.DEFAULT_PROVIDER_MODEL = {
42
- openai: 'gpt-4o-mini',
43
- anthropic: 'claude-sonnet-4-20250514',
44
- gemini: 'gemini-2.0-flash',
95
+ openai: 'gpt-4.1-mini',
96
+ anthropic: 'claude-sonnet-4-5-20251124',
97
+ gemini: 'gemini-2.5-flash',
45
98
  mistral: 'mistral-small-latest',
46
- cohere: 'command-r',
99
+ cohere: 'command-r-08-2024',
47
100
  groq: 'llama-3.3-70b-versatile',
48
- xai: 'grok-2-mini',
101
+ xai: 'grok-3-mini',
49
102
  deepseek: 'deepseek-chat',
50
- perplexity: 'llama-3.1-sonar-small-128k-online',
103
+ perplexity: 'sonar',
51
104
  llm_server: 'meta-llama-3.1-8b-instruct',
52
105
  };
53
106
  /** Providers that allow custom model input (user can type any model name) */
@@ -63,84 +116,679 @@ exports.PROVIDER_ALLOWS_CUSTOM_MODEL = {
63
116
  perplexity: false,
64
117
  llm_server: true, // Users can type any model name for custom LLM servers
65
118
  };
119
+ /**
120
+ * Model capabilities map (January 2026).
121
+ * Maps model names to their multimodal capabilities.
122
+ * Models not in this map have undefined capabilities.
123
+ */
124
+ exports.MODEL_CAPABILITIES = {
125
+ // ===========================================================================
126
+ // OpenAI (January 2026)
127
+ // OpenAI supports URL and base64 for images
128
+ // ===========================================================================
129
+ // GPT-4.1 family
130
+ 'gpt-4.1': {
131
+ visionInput: true, audioInput: false, videoInput: false,
132
+ imageOutput: false, audioOutput: false, videoOutput: false,
133
+ mediaFormats: { imageFormats: ['url', 'base64'] },
134
+ },
135
+ 'gpt-4.1-mini': {
136
+ visionInput: true, audioInput: false, videoInput: false,
137
+ imageOutput: false, audioOutput: false, videoOutput: false,
138
+ mediaFormats: { imageFormats: ['url', 'base64'] },
139
+ },
140
+ 'gpt-4.1-nano': {
141
+ visionInput: false, audioInput: false, videoInput: false,
142
+ imageOutput: false, audioOutput: false, videoOutput: false,
143
+ },
144
+ // GPT-4o (omni - multimodal)
145
+ 'gpt-4o': {
146
+ visionInput: true, audioInput: true, videoInput: false,
147
+ imageOutput: false, audioOutput: true, videoOutput: false,
148
+ mediaFormats: { imageFormats: ['url', 'base64'], audioFormats: ['base64', 'file'] },
149
+ },
150
+ 'gpt-4o-mini': {
151
+ visionInput: true, audioInput: true, videoInput: false,
152
+ imageOutput: false, audioOutput: true, videoOutput: false,
153
+ mediaFormats: { imageFormats: ['url', 'base64'], audioFormats: ['base64', 'file'] },
154
+ },
155
+ // Reasoning models (o-series)
156
+ 'o3': {
157
+ visionInput: true, audioInput: false, videoInput: false,
158
+ imageOutput: false, audioOutput: false, videoOutput: false,
159
+ mediaFormats: { imageFormats: ['url', 'base64'] },
160
+ },
161
+ 'o3-pro': {
162
+ visionInput: true, audioInput: false, videoInput: false,
163
+ imageOutput: false, audioOutput: false, videoOutput: false,
164
+ mediaFormats: { imageFormats: ['url', 'base64'] },
165
+ },
166
+ 'o4-mini': {
167
+ visionInput: true, audioInput: false, videoInput: false,
168
+ imageOutput: false, audioOutput: false, videoOutput: false,
169
+ mediaFormats: { imageFormats: ['url', 'base64'] },
170
+ },
171
+ // Legacy
172
+ 'gpt-4-turbo': {
173
+ visionInput: true, audioInput: false, videoInput: false,
174
+ imageOutput: false, audioOutput: false, videoOutput: false,
175
+ mediaFormats: { imageFormats: ['url', 'base64'] },
176
+ },
177
+ 'o1': {
178
+ visionInput: true, audioInput: false, videoInput: false,
179
+ imageOutput: false, audioOutput: false, videoOutput: false,
180
+ mediaFormats: { imageFormats: ['url', 'base64'] },
181
+ },
182
+ // ===========================================================================
183
+ // Anthropic (January 2026)
184
+ // Anthropic supports URL and base64 for images
185
+ // ===========================================================================
186
+ // Claude 4.5
187
+ 'claude-opus-4-5-20251124': {
188
+ visionInput: true, audioInput: false, videoInput: false,
189
+ imageOutput: false, audioOutput: false, videoOutput: false,
190
+ mediaFormats: { imageFormats: ['url', 'base64'] },
191
+ },
192
+ 'claude-sonnet-4-5-20251124': {
193
+ visionInput: true, audioInput: false, videoInput: false,
194
+ imageOutput: false, audioOutput: false, videoOutput: false,
195
+ mediaFormats: { imageFormats: ['url', 'base64'] },
196
+ },
197
+ // Claude 4.1
198
+ 'claude-opus-4-1-20250805': {
199
+ visionInput: true, audioInput: false, videoInput: false,
200
+ imageOutput: false, audioOutput: false, videoOutput: false,
201
+ mediaFormats: { imageFormats: ['url', 'base64'] },
202
+ },
203
+ // Claude 4
204
+ 'claude-sonnet-4-20250514': {
205
+ visionInput: true, audioInput: false, videoInput: false,
206
+ imageOutput: false, audioOutput: false, videoOutput: false,
207
+ mediaFormats: { imageFormats: ['url', 'base64'] },
208
+ },
209
+ 'claude-opus-4-20250514': {
210
+ visionInput: true, audioInput: false, videoInput: false,
211
+ imageOutput: false, audioOutput: false, videoOutput: false,
212
+ mediaFormats: { imageFormats: ['url', 'base64'] },
213
+ },
214
+ // Claude 3.5
215
+ 'claude-3-5-haiku-20241022': {
216
+ visionInput: true, audioInput: false, videoInput: false,
217
+ imageOutput: false, audioOutput: false, videoOutput: false,
218
+ mediaFormats: { imageFormats: ['url', 'base64'] },
219
+ },
220
+ // ===========================================================================
221
+ // Google Gemini (January 2026)
222
+ // Gemini supports URL, base64, and GCS URIs for all media types
223
+ // ===========================================================================
224
+ // Gemini 3 (latest)
225
+ 'gemini-3-pro-preview': {
226
+ visionInput: true, audioInput: true, videoInput: true,
227
+ imageOutput: false, audioOutput: false, videoOutput: false,
228
+ mediaFormats: {
229
+ imageFormats: ['url', 'base64', 'gcs'],
230
+ audioFormats: ['url', 'base64', 'gcs'],
231
+ videoFormats: ['url', 'gcs'],
232
+ },
233
+ },
234
+ 'gemini-3-flash-preview': {
235
+ visionInput: true, audioInput: true, videoInput: true,
236
+ imageOutput: false, audioOutput: false, videoOutput: false,
237
+ mediaFormats: {
238
+ imageFormats: ['url', 'base64', 'gcs'],
239
+ audioFormats: ['url', 'base64', 'gcs'],
240
+ videoFormats: ['url', 'gcs'],
241
+ },
242
+ },
243
+ 'gemini-3-pro-image-preview': {
244
+ visionInput: true, audioInput: false, videoInput: false,
245
+ imageOutput: true, audioOutput: false, videoOutput: false,
246
+ mediaFormats: { imageFormats: ['url', 'base64', 'gcs'] },
247
+ },
248
+ // Gemini 2.5
249
+ 'gemini-2.5-pro': {
250
+ visionInput: true, audioInput: true, videoInput: true,
251
+ imageOutput: false, audioOutput: false, videoOutput: false,
252
+ mediaFormats: {
253
+ imageFormats: ['url', 'base64', 'gcs'],
254
+ audioFormats: ['url', 'base64', 'gcs'],
255
+ videoFormats: ['url', 'gcs'],
256
+ },
257
+ },
258
+ 'gemini-2.5-flash': {
259
+ visionInput: true, audioInput: true, videoInput: true,
260
+ imageOutput: false, audioOutput: false, videoOutput: false,
261
+ mediaFormats: {
262
+ imageFormats: ['url', 'base64', 'gcs'],
263
+ audioFormats: ['url', 'base64', 'gcs'],
264
+ videoFormats: ['url', 'gcs'],
265
+ },
266
+ },
267
+ 'gemini-2.5-flash-lite': {
268
+ visionInput: true, audioInput: true, videoInput: true,
269
+ imageOutput: false, audioOutput: false, videoOutput: false,
270
+ mediaFormats: {
271
+ imageFormats: ['url', 'base64', 'gcs'],
272
+ audioFormats: ['url', 'base64', 'gcs'],
273
+ videoFormats: ['url', 'gcs'],
274
+ },
275
+ },
276
+ 'gemini-2.5-flash-image': {
277
+ visionInput: true, audioInput: false, videoInput: false,
278
+ imageOutput: true, audioOutput: false, videoOutput: false,
279
+ mediaFormats: { imageFormats: ['url', 'base64', 'gcs'] },
280
+ },
281
+ 'gemini-2.5-flash-native-audio-preview': {
282
+ visionInput: true, audioInput: true, videoInput: true,
283
+ imageOutput: false, audioOutput: true, videoOutput: false,
284
+ mediaFormats: {
285
+ imageFormats: ['url', 'base64', 'gcs'],
286
+ audioFormats: ['url', 'base64', 'gcs'],
287
+ videoFormats: ['url', 'gcs'],
288
+ },
289
+ },
290
+ // Gemini 2.0
291
+ 'gemini-2.0-flash': {
292
+ visionInput: true, audioInput: true, videoInput: true,
293
+ imageOutput: true, audioOutput: true, videoOutput: false,
294
+ mediaFormats: {
295
+ imageFormats: ['url', 'base64', 'gcs'],
296
+ audioFormats: ['url', 'base64', 'gcs'],
297
+ videoFormats: ['url', 'gcs'],
298
+ },
299
+ },
300
+ 'gemini-2.0-flash-lite': {
301
+ visionInput: true, audioInput: true, videoInput: true,
302
+ imageOutput: false, audioOutput: false, videoOutput: false,
303
+ mediaFormats: {
304
+ imageFormats: ['url', 'base64', 'gcs'],
305
+ audioFormats: ['url', 'base64', 'gcs'],
306
+ videoFormats: ['url', 'gcs'],
307
+ },
308
+ },
309
+ // ===========================================================================
310
+ // Mistral AI (January 2026)
311
+ // Mistral supports URL and base64 for images
312
+ // ===========================================================================
313
+ // Large models (Mistral Large 3 has vision)
314
+ 'mistral-large-2512': {
315
+ visionInput: true, audioInput: false, videoInput: false,
316
+ imageOutput: false, audioOutput: false, videoOutput: false,
317
+ mediaFormats: { imageFormats: ['url', 'base64'] },
318
+ },
319
+ 'mistral-large-latest': {
320
+ visionInput: true, audioInput: false, videoInput: false,
321
+ imageOutput: false, audioOutput: false, videoOutput: false,
322
+ mediaFormats: { imageFormats: ['url', 'base64'] },
323
+ },
324
+ // Medium models (vision capable)
325
+ 'mistral-medium-3.1': {
326
+ visionInput: true, audioInput: false, videoInput: false,
327
+ imageOutput: false, audioOutput: false, videoOutput: false,
328
+ mediaFormats: { imageFormats: ['url', 'base64'] },
329
+ },
330
+ 'mistral-medium-latest': {
331
+ visionInput: true, audioInput: false, videoInput: false,
332
+ imageOutput: false, audioOutput: false, videoOutput: false,
333
+ mediaFormats: { imageFormats: ['url', 'base64'] },
334
+ },
335
+ // Small models (vision capable)
336
+ 'mistral-small-3.2': {
337
+ visionInput: true, audioInput: false, videoInput: false,
338
+ imageOutput: false, audioOutput: false, videoOutput: false,
339
+ mediaFormats: { imageFormats: ['url', 'base64'] },
340
+ },
341
+ 'mistral-small-latest': {
342
+ visionInput: false, audioInput: false, videoInput: false,
343
+ imageOutput: false, audioOutput: false, videoOutput: false,
344
+ },
345
+ // Ministral (vision capable)
346
+ 'ministral-3b-2512': {
347
+ visionInput: true, audioInput: false, videoInput: false,
348
+ imageOutput: false, audioOutput: false, videoOutput: false,
349
+ mediaFormats: { imageFormats: ['url', 'base64'] },
350
+ },
351
+ 'ministral-8b-2512': {
352
+ visionInput: true, audioInput: false, videoInput: false,
353
+ imageOutput: false, audioOutput: false, videoOutput: false,
354
+ mediaFormats: { imageFormats: ['url', 'base64'] },
355
+ },
356
+ 'ministral-14b-2512': {
357
+ visionInput: true, audioInput: false, videoInput: false,
358
+ imageOutput: false, audioOutput: false, videoOutput: false,
359
+ mediaFormats: { imageFormats: ['url', 'base64'] },
360
+ },
361
+ // Code models
362
+ 'codestral-2501': {
363
+ visionInput: false, audioInput: false, videoInput: false,
364
+ imageOutput: false, audioOutput: false, videoOutput: false,
365
+ },
366
+ 'codestral-latest': {
367
+ visionInput: false, audioInput: false, videoInput: false,
368
+ imageOutput: false, audioOutput: false, videoOutput: false,
369
+ },
370
+ // Vision models (Pixtral)
371
+ 'pixtral-large-2411': {
372
+ visionInput: true, audioInput: false, videoInput: false,
373
+ imageOutput: false, audioOutput: false, videoOutput: false,
374
+ mediaFormats: { imageFormats: ['url', 'base64'] },
375
+ },
376
+ 'pixtral-large-latest': {
377
+ visionInput: true, audioInput: false, videoInput: false,
378
+ imageOutput: false, audioOutput: false, videoOutput: false,
379
+ mediaFormats: { imageFormats: ['url', 'base64'] },
380
+ },
381
+ // Audio models (Voxtral)
382
+ 'voxtral-small': {
383
+ visionInput: false, audioInput: true, videoInput: false,
384
+ imageOutput: false, audioOutput: false, videoOutput: false,
385
+ mediaFormats: { audioFormats: ['base64', 'file'] },
386
+ },
387
+ 'voxtral-mini': {
388
+ visionInput: false, audioInput: true, videoInput: false,
389
+ imageOutput: false, audioOutput: false, videoOutput: false,
390
+ mediaFormats: { audioFormats: ['base64', 'file'] },
391
+ },
392
+ // Document AI
393
+ 'mistral-ocr-2512': {
394
+ visionInput: true, audioInput: false, videoInput: false,
395
+ imageOutput: false, audioOutput: false, videoOutput: false,
396
+ mediaFormats: { imageFormats: ['url', 'base64'] },
397
+ },
398
+ // ===========================================================================
399
+ // Cohere (January 2026)
400
+ // ===========================================================================
401
+ // Command A family
402
+ 'command-a-03-2025': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
403
+ 'command-a-reasoning': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
404
+ 'command-a-vision': {
405
+ visionInput: true, audioInput: false, videoInput: false,
406
+ imageOutput: false, audioOutput: false, videoOutput: false,
407
+ mediaFormats: { imageFormats: ['base64'] },
408
+ },
409
+ // Command R family
410
+ 'command-r-plus-08-2024': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
411
+ 'command-r-08-2024': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
412
+ 'command-r-plus': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
413
+ 'command-r': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
414
+ // ===========================================================================
415
+ // Groq (January 2026)
416
+ // ===========================================================================
417
+ 'llama-3.3-70b-versatile': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
418
+ 'llama-3.1-8b-instant': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
419
+ 'openai/gpt-oss-120b': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
420
+ 'openai/gpt-oss-20b': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
421
+ 'groq/compound': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
422
+ 'groq/compound-mini': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
423
+ 'meta-llama/llama-guard-4-12b': {
424
+ visionInput: true, audioInput: false, videoInput: false,
425
+ imageOutput: false, audioOutput: false, videoOutput: false,
426
+ mediaFormats: { imageFormats: ['base64'] },
427
+ },
428
+ 'whisper-large-v3': {
429
+ visionInput: false, audioInput: true, videoInput: false,
430
+ imageOutput: false, audioOutput: false, videoOutput: false,
431
+ mediaFormats: { audioFormats: ['file'] },
432
+ },
433
+ 'whisper-large-v3-turbo': {
434
+ visionInput: false, audioInput: true, videoInput: false,
435
+ imageOutput: false, audioOutput: false, videoOutput: false,
436
+ mediaFormats: { audioFormats: ['file'] },
437
+ },
438
+ // ===========================================================================
439
+ // xAI Grok (January 2026)
440
+ // ===========================================================================
441
+ 'grok-4': {
442
+ visionInput: true, audioInput: false, videoInput: false,
443
+ imageOutput: false, audioOutput: false, videoOutput: false,
444
+ mediaFormats: { imageFormats: ['url', 'base64'] },
445
+ },
446
+ 'grok-4.1-fast': {
447
+ visionInput: true, audioInput: false, videoInput: false,
448
+ imageOutput: false, audioOutput: false, videoOutput: false,
449
+ mediaFormats: { imageFormats: ['url', 'base64'] },
450
+ },
451
+ 'grok-3': {
452
+ visionInput: true, audioInput: false, videoInput: false,
453
+ imageOutput: false, audioOutput: false, videoOutput: false,
454
+ mediaFormats: { imageFormats: ['url', 'base64'] },
455
+ },
456
+ 'grok-3-mini': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
457
+ 'grok-3-vision': {
458
+ visionInput: true, audioInput: false, videoInput: false,
459
+ imageOutput: false, audioOutput: false, videoOutput: false,
460
+ mediaFormats: { imageFormats: ['url', 'base64'] },
461
+ },
462
+ 'grok-2': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
463
+ 'grok-2-vision': {
464
+ visionInput: true, audioInput: false, videoInput: false,
465
+ imageOutput: false, audioOutput: false, videoOutput: false,
466
+ mediaFormats: { imageFormats: ['url', 'base64'] },
467
+ },
468
+ // ===========================================================================
469
+ // DeepSeek (January 2026 - V3.2)
470
+ // ===========================================================================
471
+ 'deepseek-chat': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
472
+ 'deepseek-reasoner': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
473
+ // ===========================================================================
474
+ // Perplexity (January 2026 - Sonar family)
475
+ // ===========================================================================
476
+ 'sonar': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
477
+ 'sonar-pro': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
478
+ 'sonar-reasoning': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
479
+ 'sonar-reasoning-pro': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
480
+ 'sonar-deep-research': { visionInput: false, audioInput: false, videoInput: false, imageOutput: false, audioOutput: false, videoOutput: false },
481
+ // ===========================================================================
482
+ // LM Studio / Local Models (common vision models)
483
+ // Local models typically only support base64 as they don't have internet access
484
+ // ===========================================================================
485
+ 'llava-v1.6-mistral-7b': {
486
+ visionInput: true, audioInput: false, videoInput: false,
487
+ imageOutput: false, audioOutput: false, videoOutput: false,
488
+ mediaFormats: { imageFormats: ['base64'] },
489
+ },
490
+ 'llava-v1.6-vicuna-13b': {
491
+ visionInput: true, audioInput: false, videoInput: false,
492
+ imageOutput: false, audioOutput: false, videoOutput: false,
493
+ mediaFormats: { imageFormats: ['base64'] },
494
+ },
495
+ 'qwen2-vl-7b-instruct': {
496
+ visionInput: true, audioInput: false, videoInput: false,
497
+ imageOutput: false, audioOutput: false, videoOutput: false,
498
+ mediaFormats: { imageFormats: ['base64'] },
499
+ },
500
+ 'phi-3-vision-128k-instruct': {
501
+ visionInput: true, audioInput: false, videoInput: false,
502
+ imageOutput: false, audioOutput: false, videoOutput: false,
503
+ mediaFormats: { imageFormats: ['base64'] },
504
+ },
505
+ 'minicpm-v-2_6': {
506
+ visionInput: true, audioInput: false, videoInput: false,
507
+ imageOutput: false, audioOutput: false, videoOutput: false,
508
+ mediaFormats: { imageFormats: ['base64'] },
509
+ },
510
+ 'moondream2': {
511
+ visionInput: true, audioInput: false, videoInput: false,
512
+ imageOutput: false, audioOutput: false, videoOutput: false,
513
+ mediaFormats: { imageFormats: ['base64'] },
514
+ },
515
+ 'internvl2-8b': {
516
+ visionInput: true, audioInput: false, videoInput: false,
517
+ imageOutput: false, audioOutput: false, videoOutput: false,
518
+ mediaFormats: { imageFormats: ['base64'] },
519
+ },
520
+ 'llama-3.2-11b-vision': {
521
+ visionInput: true, audioInput: false, videoInput: false,
522
+ imageOutput: false, audioOutput: false, videoOutput: false,
523
+ mediaFormats: { imageFormats: ['base64'] },
524
+ },
525
+ 'pixtral-12b': {
526
+ visionInput: true, audioInput: false, videoInput: false,
527
+ imageOutput: false, audioOutput: false, videoOutput: false,
528
+ mediaFormats: { imageFormats: ['base64'] },
529
+ },
530
+ };
531
+ /**
532
+ * Get capabilities for a model.
533
+ * Returns undefined for each capability if model is not in the map (unknown).
534
+ */
535
+ function getModelCapabilities(model) {
536
+ return exports.MODEL_CAPABILITIES[model] ?? {};
537
+ }
538
+ /**
539
+ * Check if a model has a specific input capability.
540
+ * Returns undefined if unknown, true/false if known.
541
+ */
542
+ function hasInputCapability(model, capability) {
543
+ const caps = exports.MODEL_CAPABILITIES[model];
544
+ if (!caps)
545
+ return undefined;
546
+ switch (capability) {
547
+ case 'vision': return caps.visionInput;
548
+ case 'audio': return caps.audioInput;
549
+ case 'video': return caps.videoInput;
550
+ }
551
+ }
552
+ /**
553
+ * Check if a model has a specific output capability.
554
+ * Returns undefined if unknown, true/false if known.
555
+ */
556
+ function hasOutputCapability(model, capability) {
557
+ const caps = exports.MODEL_CAPABILITIES[model];
558
+ if (!caps)
559
+ return undefined;
560
+ switch (capability) {
561
+ case 'image': return caps.imageOutput;
562
+ case 'audio': return caps.audioOutput;
563
+ case 'video': return caps.videoOutput;
564
+ }
565
+ }
566
+ /**
567
+ * Get supported media input formats for a specific media type.
568
+ * Returns the list of supported formats, or undefined if model is unknown.
569
+ * Returns empty array if model is known but doesn't support that media type.
570
+ */
571
+ function getMediaFormats(model, mediaType) {
572
+ const caps = exports.MODEL_CAPABILITIES[model];
573
+ if (!caps)
574
+ return undefined;
575
+ if (!caps.mediaFormats)
576
+ return [];
577
+ switch (mediaType) {
578
+ case 'image': return caps.mediaFormats.imageFormats ?? [];
579
+ case 'audio': return caps.mediaFormats.audioFormats ?? [];
580
+ case 'video': return caps.mediaFormats.videoFormats ?? [];
581
+ }
582
+ }
583
+ /**
584
+ * Check if a model supports a specific media input format.
585
+ * Returns undefined if model is unknown, true/false if known.
586
+ */
587
+ function supportsMediaFormat(model, mediaType, format) {
588
+ const formats = getMediaFormats(model, mediaType);
589
+ if (formats === undefined)
590
+ return undefined;
591
+ return formats.includes(format);
592
+ }
593
+ /**
594
+ * Get the preferred/default media input format for a model.
595
+ * Returns 'url' if supported, otherwise the first supported format.
596
+ * Returns undefined if model is unknown or doesn't support the media type.
597
+ */
598
+ function getDefaultMediaFormat(model, mediaType) {
599
+ const formats = getMediaFormats(model, mediaType);
600
+ if (!formats || formats.length === 0)
601
+ return undefined;
602
+ // Prefer URL if available (more efficient for providers)
603
+ if (formats.includes('url'))
604
+ return 'url';
605
+ // Otherwise return first available format
606
+ return formats[0];
607
+ }
608
+ /**
609
+ * Check if a model supports URL-based media input for a specific media type.
610
+ * This is a convenience function since URL support affects UI behavior.
611
+ */
612
+ function supportsMediaUrl(model, mediaType) {
613
+ return supportsMediaFormat(model, mediaType, 'url');
614
+ }
66
615
  /**
67
616
  * Cost estimation per 1M tokens (in cents)
68
- * Prices sourced from official provider pricing pages as of Jan 2025
617
+ * Prices sourced from official provider pricing pages as of January 2026
69
618
  */
70
619
  exports.COST_PER_MILLION_TOKENS = {
71
620
  // ==========================================================================
72
- // OpenAI (https://openai.com/pricing)
621
+ // OpenAI (https://openai.com/pricing) - January 2026
622
+ // Images: counted as tokens (~765 tokens per 512x512 image)
623
+ // Audio: Realtime API pricing
73
624
  // ==========================================================================
74
- 'gpt-4o': { input: 250, output: 1000 },
75
- 'gpt-4o-mini': { input: 15, output: 60 },
625
+ // GPT-4.1 family (latest non-reasoning)
626
+ 'gpt-4.1': { input: 200, output: 800 },
627
+ 'gpt-4.1-mini': { input: 40, output: 160 },
628
+ 'gpt-4.1-nano': { input: 10, output: 40 },
629
+ // GPT-4o (omni - multimodal)
630
+ 'gpt-4o': {
631
+ input: 250, output: 1000,
632
+ audioInput: 600, // ~$6.00 per minute (Realtime API)
633
+ audioOutput: 1200, // ~$12.00 per minute (Realtime API)
634
+ },
635
+ 'gpt-4o-mini': {
636
+ input: 15, output: 60,
637
+ audioInput: 60, // ~$0.60 per minute (Realtime API)
638
+ audioOutput: 120, // ~$1.20 per minute (Realtime API)
639
+ },
640
+ // Reasoning models (o-series)
641
+ 'o3': { input: 1000, output: 4000 },
642
+ 'o3-pro': { input: 15000, output: 60000 },
643
+ 'o4-mini': { input: 150, output: 600 },
644
+ // Legacy
76
645
  'gpt-4-turbo': { input: 1000, output: 3000 },
77
- 'gpt-3.5-turbo': { input: 50, output: 150 },
78
646
  'o1': { input: 1500, output: 6000 },
79
- 'o1-mini': { input: 300, output: 1200 },
80
- 'o1-pro': { input: 15000, output: 60000 },
81
647
  // ==========================================================================
82
- // Anthropic (https://anthropic.com/pricing)
648
+ // Anthropic (https://anthropic.com/pricing) - January 2026
83
649
  // ==========================================================================
650
+ // Claude 4.5
651
+ 'claude-opus-4-5-20251124': { input: 2000, output: 10000 },
652
+ 'claude-sonnet-4-5-20251124': { input: 400, output: 2000 },
653
+ // Claude 4.1
654
+ 'claude-opus-4-1-20250805': { input: 1800, output: 9000 },
655
+ // Claude 4
84
656
  'claude-sonnet-4-20250514': { input: 300, output: 1500 },
85
657
  'claude-opus-4-20250514': { input: 1500, output: 7500 },
658
+ // Claude 3.5
86
659
  'claude-3-5-haiku-20241022': { input: 80, output: 400 },
87
- // Legacy model names (for backwards compatibility)
88
- 'claude-3-5-sonnet-20241022': { input: 300, output: 1500 },
89
- 'claude-3-opus-20240229': { input: 1500, output: 7500 },
90
- 'claude-3-haiku-20240307': { input: 25, output: 125 },
91
660
  // ==========================================================================
92
- // Google Gemini (https://ai.google.dev/pricing)
661
+ // Google Gemini (https://ai.google.dev/pricing) - January 2026
662
+ // Images: ~258 tokens/image, Audio: ~32 tokens/sec, Video: ~263 tokens/sec
93
663
  // ==========================================================================
94
- 'gemini-2.0-flash': { input: 10, output: 40 },
664
+ // Gemini 3 (latest)
665
+ 'gemini-3-pro-preview': { input: 175, output: 700 },
666
+ 'gemini-3-flash-preview': { input: 15, output: 60 },
667
+ 'gemini-3-pro-image-preview': {
668
+ input: 175, output: 700,
669
+ imageOutput: 8, // ~$0.08 per generated image
670
+ },
671
+ // Gemini 2.5
672
+ 'gemini-2.5-pro': { input: 125, output: 500 },
673
+ 'gemini-2.5-flash': { input: 7.5, output: 30 },
674
+ 'gemini-2.5-flash-lite': { input: 3.75, output: 15 },
675
+ 'gemini-2.5-flash-image': {
676
+ input: 10, output: 40,
677
+ imageOutput: 4, // ~$0.04 per generated image
678
+ },
679
+ 'gemini-2.5-flash-native-audio-preview': {
680
+ input: 10, output: 40,
681
+ audioInput: 10, // ~$0.10 per minute audio input
682
+ audioOutput: 60, // ~$0.60 per minute audio output
683
+ },
684
+ // Gemini 2.0
685
+ 'gemini-2.0-flash': {
686
+ input: 10, output: 40,
687
+ imageOutput: 4, // ~$0.04 per generated image
688
+ audioOutput: 60, // ~$0.60 per minute of audio output
689
+ },
95
690
  'gemini-2.0-flash-lite': { input: 5, output: 20 },
96
- 'gemini-1.5-pro': { input: 125, output: 500 },
97
- 'gemini-1.5-flash': { input: 7.5, output: 30 },
98
691
  // ==========================================================================
99
- // Mistral AI (https://mistral.ai/technology/#pricing)
692
+ // Mistral AI (https://mistral.ai/technology/#pricing) - January 2026
100
693
  // ==========================================================================
694
+ // Large models (Mistral Large 3)
695
+ 'mistral-large-2512': { input: 200, output: 600 },
101
696
  'mistral-large-latest': { input: 200, output: 600 },
102
- 'mistral-medium-latest': { input: 270, output: 810 },
697
+ // Medium models
698
+ 'mistral-medium-3.1': { input: 100, output: 300 },
699
+ 'mistral-medium-latest': { input: 100, output: 300 },
700
+ // Small models
701
+ 'mistral-small-3.2': { input: 20, output: 60 },
103
702
  'mistral-small-latest': { input: 10, output: 30 },
703
+ // Ministral family (small, efficient)
704
+ 'ministral-3b-2512': { input: 4, output: 12 },
705
+ 'ministral-8b-2512': { input: 10, output: 30 },
706
+ 'ministral-14b-2512': { input: 15, output: 45 },
707
+ // Code models
708
+ 'codestral-2501': { input: 30, output: 90 },
104
709
  'codestral-latest': { input: 30, output: 90 },
105
- 'mistral-nemo': { input: 15, output: 15 },
710
+ // Vision models (Pixtral)
711
+ 'pixtral-large-2411': { input: 200, output: 600 },
712
+ 'pixtral-large-latest': { input: 200, output: 600 },
713
+ // Audio models (Voxtral)
714
+ 'voxtral-small': { input: 20, output: 60, audioInput: 15 },
715
+ 'voxtral-mini': { input: 10, output: 30, audioInput: 8 },
716
+ // Document AI
717
+ 'mistral-ocr-2512': { input: 15, output: 45 },
106
718
  // ==========================================================================
107
- // Cohere (https://cohere.com/pricing)
719
+ // Cohere (https://cohere.com/pricing) - January 2026
108
720
  // ==========================================================================
721
+ // Command A family (latest)
722
+ 'command-a-03-2025': { input: 250, output: 1000 },
723
+ 'command-a-reasoning': { input: 400, output: 1600 },
724
+ 'command-a-vision': { input: 300, output: 1200 },
725
+ // Command R family
726
+ 'command-r-plus-08-2024': { input: 250, output: 1000 },
727
+ 'command-r-08-2024': { input: 15, output: 60 },
109
728
  'command-r-plus': { input: 250, output: 1000 },
110
729
  'command-r': { input: 15, output: 60 },
111
- 'command': { input: 100, output: 200 },
112
- 'command-light': { input: 30, output: 60 },
113
730
  // ==========================================================================
114
- // Groq (https://groq.com/pricing) - Fast inference, competitive pricing
731
+ // Groq (https://groq.com/pricing) - Fast inference, January 2026
115
732
  // ==========================================================================
733
+ // Llama models
116
734
  'llama-3.3-70b-versatile': { input: 59, output: 79 },
117
735
  'llama-3.1-8b-instant': { input: 5, output: 8 },
118
- 'mixtral-8x7b-32768': { input: 24, output: 24 },
119
- 'gemma2-9b-it': { input: 20, output: 20 },
736
+ // GPT OSS models
737
+ 'openai/gpt-oss-120b': { input: 150, output: 200 },
738
+ 'openai/gpt-oss-20b': { input: 30, output: 40 },
739
+ // Compound (agentic)
740
+ 'groq/compound': { input: 100, output: 150 },
741
+ 'groq/compound-mini': { input: 30, output: 50 },
742
+ // Safety
743
+ 'meta-llama/llama-guard-4-12b': { input: 20, output: 20 },
744
+ // Audio (Whisper - speech-to-text, per minute)
745
+ 'whisper-large-v3': { input: 11, output: 0, audioInput: 11 },
746
+ 'whisper-large-v3-turbo': { input: 4, output: 0, audioInput: 4 },
120
747
  // ==========================================================================
121
- // xAI Grok (https://x.ai/api)
748
+ // xAI Grok (https://x.ai/api) - January 2026
122
749
  // ==========================================================================
750
+ // Grok 4 (latest)
751
+ 'grok-4': { input: 500, output: 2000 },
752
+ 'grok-4.1-fast': { input: 100, output: 400 },
753
+ // Grok 3
754
+ 'grok-3': { input: 300, output: 1200 },
755
+ 'grok-3-mini': { input: 30, output: 120 },
756
+ 'grok-3-vision': { input: 350, output: 1400 },
757
+ // Grok 2 (legacy)
123
758
  'grok-2': { input: 200, output: 1000 },
124
- 'grok-2-mini': { input: 20, output: 100 },
759
+ 'grok-2-vision': { input: 200, output: 1000 },
125
760
  // ==========================================================================
126
- // DeepSeek (https://platform.deepseek.com/api-docs/pricing)
761
+ // DeepSeek (https://platform.deepseek.com/api-docs/pricing) - January 2026
762
+ // V3.2 pricing (very competitive)
127
763
  // ==========================================================================
128
764
  'deepseek-chat': { input: 14, output: 28 },
129
- 'deepseek-coder': { input: 14, output: 28 },
130
765
  'deepseek-reasoner': { input: 55, output: 219 },
131
766
  // ==========================================================================
132
- // Perplexity (https://docs.perplexity.ai/guides/pricing)
767
+ // Perplexity (https://docs.perplexity.ai/guides/pricing) - January 2026
768
+ // Sonar family (includes search costs)
133
769
  // ==========================================================================
134
- 'llama-3.1-sonar-small-128k-online': { input: 20, output: 20 },
135
- 'llama-3.1-sonar-large-128k-online': { input: 100, output: 100 },
136
- 'llama-3.1-sonar-huge-128k-online': { input: 500, output: 500 },
770
+ 'sonar': { input: 100, output: 100 },
771
+ 'sonar-pro': { input: 300, output: 300 },
772
+ 'sonar-reasoning': { input: 500, output: 500 },
773
+ 'sonar-reasoning-pro': { input: 800, output: 800 },
774
+ 'sonar-deep-research': { input: 1200, output: 1200 },
137
775
  // ==========================================================================
138
- // LLM Server (custom) - Default/estimated pricing
776
+ // LLM Server (custom) - Default/estimated pricing for local models
139
777
  // ==========================================================================
778
+ // Text models
140
779
  'qwen/qwen3-30b-a3b-2507': { input: 50, output: 100 },
141
780
  'meta-llama-3.1-8b-instruct': { input: 20, output: 40 },
142
781
  'qwen-32b-everything': { input: 50, output: 100 },
143
- 'openai/gpt-oss-20b': { input: 30, output: 60 },
782
+ // Vision models (LM Studio / local) - estimated based on model size
783
+ 'llava-v1.6-mistral-7b': { input: 25, output: 50 },
784
+ 'llava-v1.6-vicuna-13b': { input: 40, output: 80 },
785
+ 'qwen2-vl-7b-instruct': { input: 25, output: 50 },
786
+ 'phi-3-vision-128k-instruct': { input: 20, output: 40 },
787
+ 'minicpm-v-2_6': { input: 15, output: 30 },
788
+ 'moondream2': { input: 10, output: 20 },
789
+ 'internvl2-8b': { input: 25, output: 50 },
790
+ 'llama-3.2-11b-vision': { input: 30, output: 60 },
791
+ 'pixtral-12b': { input: 35, output: 70 },
144
792
  // ==========================================================================
145
793
  // Default for unknown models
146
794
  // ==========================================================================
@@ -156,7 +804,7 @@ function getModelPricing(model) {
156
804
  return exports.COST_PER_MILLION_TOKENS[model] ?? DEFAULT_MODEL_PRICING;
157
805
  }
158
806
  /**
159
- * Estimate cost in cents for token usage
807
+ * Estimate cost in cents for token usage (text only, for backwards compatibility)
160
808
  */
161
809
  function estimateCost(model, inputTokens, outputTokens) {
162
810
  const costs = getModelPricing(model);
@@ -164,6 +812,42 @@ function estimateCost(model, inputTokens, outputTokens) {
164
812
  const outputCost = (outputTokens / 1000000) * costs.output;
165
813
  return Math.round((inputCost + outputCost) * 100) / 100; // Round to 2 decimal places
166
814
  }
815
+ /**
816
+ * Estimate cost in cents for multimodal usage
817
+ */
818
+ function estimateMultimodalCost(model, usage) {
819
+ const pricing = getModelPricing(model);
820
+ let totalCost = 0;
821
+ // Text token costs
822
+ if (usage.inputTokens) {
823
+ totalCost += (usage.inputTokens / 1000000) * pricing.input;
824
+ }
825
+ if (usage.outputTokens) {
826
+ totalCost += (usage.outputTokens / 1000000) * pricing.output;
827
+ }
828
+ // Image costs
829
+ if (usage.imagesInput && pricing.imageInput) {
830
+ totalCost += usage.imagesInput * pricing.imageInput;
831
+ }
832
+ if (usage.imagesOutput && pricing.imageOutput) {
833
+ totalCost += usage.imagesOutput * pricing.imageOutput;
834
+ }
835
+ // Audio costs
836
+ if (usage.audioInputMinutes && pricing.audioInput) {
837
+ totalCost += usage.audioInputMinutes * pricing.audioInput;
838
+ }
839
+ if (usage.audioOutputMinutes && pricing.audioOutput) {
840
+ totalCost += usage.audioOutputMinutes * pricing.audioOutput;
841
+ }
842
+ // Video costs
843
+ if (usage.videoInputMinutes && pricing.videoInput) {
844
+ totalCost += usage.videoInputMinutes * pricing.videoInput;
845
+ }
846
+ if (usage.videoOutputMinutes && pricing.videoOutput) {
847
+ totalCost += usage.videoOutputMinutes * pricing.videoOutput;
848
+ }
849
+ return Math.round(totalCost * 100) / 100; // Round to 2 decimal places
850
+ }
167
851
  /**
168
852
  * Format cost in cents to a readable string (e.g., "$0.0015" or "$1.50")
169
853
  */