@roo-code/types 1.109.0 → 1.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import { z as z16 } from "zod";
3
3
 
4
4
  // src/events.ts
5
- import { z as z3 } from "zod";
5
+ import { z as z4 } from "zod";
6
6
 
7
7
  // src/message.ts
8
8
  import { z } from "zod";
@@ -141,16 +141,97 @@ var queuedMessageSchema = z.object({
141
141
  images: z.array(z.string()).optional()
142
142
  });
143
143
 
144
- // src/tool.ts
144
+ // src/model.ts
145
145
  import { z as z2 } from "zod";
146
+ var reasoningEfforts = ["low", "medium", "high"];
147
+ var reasoningEffortsSchema = z2.enum(reasoningEfforts);
148
+ var reasoningEffortWithMinimalSchema = z2.union([reasoningEffortsSchema, z2.literal("minimal")]);
149
+ var reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"];
150
+ var reasoningEffortExtendedSchema = z2.enum(reasoningEffortsExtended);
151
+ var reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"];
152
+ var reasoningEffortSettingSchema = z2.enum(reasoningEffortSettingValues);
153
+ var verbosityLevels = ["low", "medium", "high"];
154
+ var verbosityLevelsSchema = z2.enum(verbosityLevels);
155
+ var serviceTiers = ["default", "flex", "priority"];
156
+ var serviceTierSchema = z2.enum(serviceTiers);
157
+ var modelParameters = ["max_tokens", "temperature", "reasoning", "include_reasoning"];
158
+ var modelParametersSchema = z2.enum(modelParameters);
159
+ var isModelParameter = (value) => modelParameters.includes(value);
160
+ var modelInfoSchema = z2.object({
161
+ maxTokens: z2.number().nullish(),
162
+ maxThinkingTokens: z2.number().nullish(),
163
+ contextWindow: z2.number(),
164
+ supportsImages: z2.boolean().optional(),
165
+ supportsPromptCache: z2.boolean(),
166
+ // Optional default prompt cache retention policy for providers that support it.
167
+ // When set to "24h", extended prompt caching will be requested; when omitted
168
+ // or set to "in_memory", the default in‑memory cache is used.
169
+ promptCacheRetention: z2.enum(["in_memory", "24h"]).optional(),
170
+ // Capability flag to indicate whether the model supports an output verbosity parameter
171
+ supportsVerbosity: z2.boolean().optional(),
172
+ supportsReasoningBudget: z2.boolean().optional(),
173
+ // Capability flag to indicate whether the model supports simple on/off binary reasoning
174
+ supportsReasoningBinary: z2.boolean().optional(),
175
+ // Capability flag to indicate whether the model supports temperature parameter
176
+ supportsTemperature: z2.boolean().optional(),
177
+ defaultTemperature: z2.number().optional(),
178
+ requiredReasoningBudget: z2.boolean().optional(),
179
+ supportsReasoningEffort: z2.union([z2.boolean(), z2.array(z2.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]).optional(),
180
+ requiredReasoningEffort: z2.boolean().optional(),
181
+ preserveReasoning: z2.boolean().optional(),
182
+ supportedParameters: z2.array(modelParametersSchema).optional(),
183
+ inputPrice: z2.number().optional(),
184
+ outputPrice: z2.number().optional(),
185
+ cacheWritesPrice: z2.number().optional(),
186
+ cacheReadsPrice: z2.number().optional(),
187
+ description: z2.string().optional(),
188
+ // Default effort value for models that support reasoning effort
189
+ reasoningEffort: reasoningEffortExtendedSchema.optional(),
190
+ minTokensPerCachePoint: z2.number().optional(),
191
+ maxCachePoints: z2.number().optional(),
192
+ cachableFields: z2.array(z2.string()).optional(),
193
+ // Flag to indicate if the model is deprecated and should not be used
194
+ deprecated: z2.boolean().optional(),
195
+ // Flag to indicate if the model should hide vendor/company identity in responses
196
+ isStealthModel: z2.boolean().optional(),
197
+ // Flag to indicate if the model is free (no cost)
198
+ isFree: z2.boolean().optional(),
199
+ // Exclude specific native tools from being available (only applies to native protocol)
200
+ // These tools will be removed from the set of tools available to the model
201
+ excludedTools: z2.array(z2.string()).optional(),
202
+ // Include specific native tools (only applies to native protocol)
203
+ // These tools will be added if they belong to an allowed group in the current mode
204
+ // Cannot force-add tools from groups the mode doesn't allow
205
+ includedTools: z2.array(z2.string()).optional(),
206
+ /**
207
+ * Service tiers with pricing information.
208
+ * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
209
+ * The top-level input/output/cache* fields represent the default/standard tier.
210
+ */
211
+ tiers: z2.array(
212
+ z2.object({
213
+ name: serviceTierSchema.optional(),
214
+ // Service tier name (flex, priority, etc.)
215
+ contextWindow: z2.number(),
216
+ inputPrice: z2.number().optional(),
217
+ outputPrice: z2.number().optional(),
218
+ cacheWritesPrice: z2.number().optional(),
219
+ cacheReadsPrice: z2.number().optional()
220
+ })
221
+ ).optional()
222
+ });
223
+
224
+ // src/tool.ts
225
+ import { z as z3 } from "zod";
146
226
  var toolGroups = ["read", "edit", "browser", "command", "mcp", "modes"];
147
- var toolGroupsSchema = z2.enum(toolGroups);
227
+ var toolGroupsSchema = z3.enum(toolGroups);
148
228
  var toolNames = [
149
229
  "execute_command",
150
230
  "read_file",
151
231
  "read_command_output",
152
232
  "write_to_file",
153
233
  "apply_diff",
234
+ "edit",
154
235
  "search_and_replace",
155
236
  "search_replace",
156
237
  "edit_file",
@@ -171,12 +252,12 @@ var toolNames = [
171
252
  "generate_image",
172
253
  "custom_tool"
173
254
  ];
174
- var toolNamesSchema = z2.enum(toolNames);
175
- var toolUsageSchema = z2.record(
255
+ var toolNamesSchema = z3.enum(toolNames);
256
+ var toolUsageSchema = z3.record(
176
257
  toolNamesSchema,
177
- z2.object({
178
- attempts: z2.number(),
179
- failures: z2.number()
258
+ z3.object({
259
+ attempts: z3.number(),
260
+ failures: z3.number()
180
261
  })
181
262
  );
182
263
 
@@ -207,198 +288,230 @@ var RooCodeEventName = /* @__PURE__ */ ((RooCodeEventName2) => {
207
288
  RooCodeEventName2["TaskToolFailed"] = "taskToolFailed";
208
289
  RooCodeEventName2["ModeChanged"] = "modeChanged";
209
290
  RooCodeEventName2["ProviderProfileChanged"] = "providerProfileChanged";
291
+ RooCodeEventName2["CommandsResponse"] = "commandsResponse";
292
+ RooCodeEventName2["ModesResponse"] = "modesResponse";
293
+ RooCodeEventName2["ModelsResponse"] = "modelsResponse";
210
294
  RooCodeEventName2["EvalPass"] = "evalPass";
211
295
  RooCodeEventName2["EvalFail"] = "evalFail";
212
296
  return RooCodeEventName2;
213
297
  })(RooCodeEventName || {});
214
- var rooCodeEventsSchema = z3.object({
215
- ["taskCreated" /* TaskCreated */]: z3.tuple([z3.string()]),
216
- ["taskStarted" /* TaskStarted */]: z3.tuple([z3.string()]),
217
- ["taskCompleted" /* TaskCompleted */]: z3.tuple([
218
- z3.string(),
298
+ var rooCodeEventsSchema = z4.object({
299
+ ["taskCreated" /* TaskCreated */]: z4.tuple([z4.string()]),
300
+ ["taskStarted" /* TaskStarted */]: z4.tuple([z4.string()]),
301
+ ["taskCompleted" /* TaskCompleted */]: z4.tuple([
302
+ z4.string(),
219
303
  tokenUsageSchema,
220
304
  toolUsageSchema,
221
- z3.object({
222
- isSubtask: z3.boolean()
305
+ z4.object({
306
+ isSubtask: z4.boolean()
223
307
  })
224
308
  ]),
225
- ["taskAborted" /* TaskAborted */]: z3.tuple([z3.string()]),
226
- ["taskFocused" /* TaskFocused */]: z3.tuple([z3.string()]),
227
- ["taskUnfocused" /* TaskUnfocused */]: z3.tuple([z3.string()]),
228
- ["taskActive" /* TaskActive */]: z3.tuple([z3.string()]),
229
- ["taskInteractive" /* TaskInteractive */]: z3.tuple([z3.string()]),
230
- ["taskResumable" /* TaskResumable */]: z3.tuple([z3.string()]),
231
- ["taskIdle" /* TaskIdle */]: z3.tuple([z3.string()]),
232
- ["taskPaused" /* TaskPaused */]: z3.tuple([z3.string()]),
233
- ["taskUnpaused" /* TaskUnpaused */]: z3.tuple([z3.string()]),
234
- ["taskSpawned" /* TaskSpawned */]: z3.tuple([z3.string(), z3.string()]),
235
- ["taskDelegated" /* TaskDelegated */]: z3.tuple([
236
- z3.string(),
309
+ ["taskAborted" /* TaskAborted */]: z4.tuple([z4.string()]),
310
+ ["taskFocused" /* TaskFocused */]: z4.tuple([z4.string()]),
311
+ ["taskUnfocused" /* TaskUnfocused */]: z4.tuple([z4.string()]),
312
+ ["taskActive" /* TaskActive */]: z4.tuple([z4.string()]),
313
+ ["taskInteractive" /* TaskInteractive */]: z4.tuple([z4.string()]),
314
+ ["taskResumable" /* TaskResumable */]: z4.tuple([z4.string()]),
315
+ ["taskIdle" /* TaskIdle */]: z4.tuple([z4.string()]),
316
+ ["taskPaused" /* TaskPaused */]: z4.tuple([z4.string()]),
317
+ ["taskUnpaused" /* TaskUnpaused */]: z4.tuple([z4.string()]),
318
+ ["taskSpawned" /* TaskSpawned */]: z4.tuple([z4.string(), z4.string()]),
319
+ ["taskDelegated" /* TaskDelegated */]: z4.tuple([
320
+ z4.string(),
237
321
  // parentTaskId
238
- z3.string()
322
+ z4.string()
239
323
  // childTaskId
240
324
  ]),
241
- ["taskDelegationCompleted" /* TaskDelegationCompleted */]: z3.tuple([
242
- z3.string(),
325
+ ["taskDelegationCompleted" /* TaskDelegationCompleted */]: z4.tuple([
326
+ z4.string(),
243
327
  // parentTaskId
244
- z3.string(),
328
+ z4.string(),
245
329
  // childTaskId
246
- z3.string()
330
+ z4.string()
247
331
  // completionResultSummary
248
332
  ]),
249
- ["taskDelegationResumed" /* TaskDelegationResumed */]: z3.tuple([
250
- z3.string(),
333
+ ["taskDelegationResumed" /* TaskDelegationResumed */]: z4.tuple([
334
+ z4.string(),
251
335
  // parentTaskId
252
- z3.string()
336
+ z4.string()
253
337
  // childTaskId
254
338
  ]),
255
- ["message" /* Message */]: z3.tuple([
256
- z3.object({
257
- taskId: z3.string(),
258
- action: z3.union([z3.literal("created"), z3.literal("updated")]),
339
+ ["message" /* Message */]: z4.tuple([
340
+ z4.object({
341
+ taskId: z4.string(),
342
+ action: z4.union([z4.literal("created"), z4.literal("updated")]),
259
343
  message: clineMessageSchema
260
344
  })
261
345
  ]),
262
- ["taskModeSwitched" /* TaskModeSwitched */]: z3.tuple([z3.string(), z3.string()]),
263
- ["taskAskResponded" /* TaskAskResponded */]: z3.tuple([z3.string()]),
264
- ["taskUserMessage" /* TaskUserMessage */]: z3.tuple([z3.string()]),
265
- ["queuedMessagesUpdated" /* QueuedMessagesUpdated */]: z3.tuple([z3.string(), z3.array(queuedMessageSchema)]),
266
- ["taskToolFailed" /* TaskToolFailed */]: z3.tuple([z3.string(), toolNamesSchema, z3.string()]),
267
- ["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */]: z3.tuple([z3.string(), tokenUsageSchema, toolUsageSchema]),
268
- ["modeChanged" /* ModeChanged */]: z3.tuple([z3.string()]),
269
- ["providerProfileChanged" /* ProviderProfileChanged */]: z3.tuple([z3.object({ name: z3.string(), provider: z3.string() })])
346
+ ["taskModeSwitched" /* TaskModeSwitched */]: z4.tuple([z4.string(), z4.string()]),
347
+ ["taskAskResponded" /* TaskAskResponded */]: z4.tuple([z4.string()]),
348
+ ["taskUserMessage" /* TaskUserMessage */]: z4.tuple([z4.string()]),
349
+ ["queuedMessagesUpdated" /* QueuedMessagesUpdated */]: z4.tuple([z4.string(), z4.array(queuedMessageSchema)]),
350
+ ["taskToolFailed" /* TaskToolFailed */]: z4.tuple([z4.string(), toolNamesSchema, z4.string()]),
351
+ ["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */]: z4.tuple([z4.string(), tokenUsageSchema, toolUsageSchema]),
352
+ ["modeChanged" /* ModeChanged */]: z4.tuple([z4.string()]),
353
+ ["providerProfileChanged" /* ProviderProfileChanged */]: z4.tuple([z4.object({ name: z4.string(), provider: z4.string() })]),
354
+ ["commandsResponse" /* CommandsResponse */]: z4.tuple([
355
+ z4.array(
356
+ z4.object({
357
+ name: z4.string(),
358
+ source: z4.enum(["global", "project", "built-in"]),
359
+ filePath: z4.string().optional(),
360
+ description: z4.string().optional(),
361
+ argumentHint: z4.string().optional()
362
+ })
363
+ )
364
+ ]),
365
+ ["modesResponse" /* ModesResponse */]: z4.tuple([z4.array(z4.object({ slug: z4.string(), name: z4.string() }))]),
366
+ ["modelsResponse" /* ModelsResponse */]: z4.tuple([z4.record(z4.string(), modelInfoSchema)])
270
367
  });
271
- var taskEventSchema = z3.discriminatedUnion("eventName", [
368
+ var taskEventSchema = z4.discriminatedUnion("eventName", [
272
369
  // Task Provider Lifecycle
273
- z3.object({
274
- eventName: z3.literal("taskCreated" /* TaskCreated */),
370
+ z4.object({
371
+ eventName: z4.literal("taskCreated" /* TaskCreated */),
275
372
  payload: rooCodeEventsSchema.shape["taskCreated" /* TaskCreated */],
276
- taskId: z3.number().optional()
373
+ taskId: z4.number().optional()
277
374
  }),
278
375
  // Task Lifecycle
279
- z3.object({
280
- eventName: z3.literal("taskStarted" /* TaskStarted */),
376
+ z4.object({
377
+ eventName: z4.literal("taskStarted" /* TaskStarted */),
281
378
  payload: rooCodeEventsSchema.shape["taskStarted" /* TaskStarted */],
282
- taskId: z3.number().optional()
379
+ taskId: z4.number().optional()
283
380
  }),
284
- z3.object({
285
- eventName: z3.literal("taskCompleted" /* TaskCompleted */),
381
+ z4.object({
382
+ eventName: z4.literal("taskCompleted" /* TaskCompleted */),
286
383
  payload: rooCodeEventsSchema.shape["taskCompleted" /* TaskCompleted */],
287
- taskId: z3.number().optional()
384
+ taskId: z4.number().optional()
288
385
  }),
289
- z3.object({
290
- eventName: z3.literal("taskAborted" /* TaskAborted */),
386
+ z4.object({
387
+ eventName: z4.literal("taskAborted" /* TaskAborted */),
291
388
  payload: rooCodeEventsSchema.shape["taskAborted" /* TaskAborted */],
292
- taskId: z3.number().optional()
389
+ taskId: z4.number().optional()
293
390
  }),
294
- z3.object({
295
- eventName: z3.literal("taskFocused" /* TaskFocused */),
391
+ z4.object({
392
+ eventName: z4.literal("taskFocused" /* TaskFocused */),
296
393
  payload: rooCodeEventsSchema.shape["taskFocused" /* TaskFocused */],
297
- taskId: z3.number().optional()
394
+ taskId: z4.number().optional()
298
395
  }),
299
- z3.object({
300
- eventName: z3.literal("taskUnfocused" /* TaskUnfocused */),
396
+ z4.object({
397
+ eventName: z4.literal("taskUnfocused" /* TaskUnfocused */),
301
398
  payload: rooCodeEventsSchema.shape["taskUnfocused" /* TaskUnfocused */],
302
- taskId: z3.number().optional()
399
+ taskId: z4.number().optional()
303
400
  }),
304
- z3.object({
305
- eventName: z3.literal("taskActive" /* TaskActive */),
401
+ z4.object({
402
+ eventName: z4.literal("taskActive" /* TaskActive */),
306
403
  payload: rooCodeEventsSchema.shape["taskActive" /* TaskActive */],
307
- taskId: z3.number().optional()
404
+ taskId: z4.number().optional()
308
405
  }),
309
- z3.object({
310
- eventName: z3.literal("taskInteractive" /* TaskInteractive */),
406
+ z4.object({
407
+ eventName: z4.literal("taskInteractive" /* TaskInteractive */),
311
408
  payload: rooCodeEventsSchema.shape["taskInteractive" /* TaskInteractive */],
312
- taskId: z3.number().optional()
409
+ taskId: z4.number().optional()
313
410
  }),
314
- z3.object({
315
- eventName: z3.literal("taskResumable" /* TaskResumable */),
411
+ z4.object({
412
+ eventName: z4.literal("taskResumable" /* TaskResumable */),
316
413
  payload: rooCodeEventsSchema.shape["taskResumable" /* TaskResumable */],
317
- taskId: z3.number().optional()
414
+ taskId: z4.number().optional()
318
415
  }),
319
- z3.object({
320
- eventName: z3.literal("taskIdle" /* TaskIdle */),
416
+ z4.object({
417
+ eventName: z4.literal("taskIdle" /* TaskIdle */),
321
418
  payload: rooCodeEventsSchema.shape["taskIdle" /* TaskIdle */],
322
- taskId: z3.number().optional()
419
+ taskId: z4.number().optional()
323
420
  }),
324
421
  // Subtask Lifecycle
325
- z3.object({
326
- eventName: z3.literal("taskPaused" /* TaskPaused */),
422
+ z4.object({
423
+ eventName: z4.literal("taskPaused" /* TaskPaused */),
327
424
  payload: rooCodeEventsSchema.shape["taskPaused" /* TaskPaused */],
328
- taskId: z3.number().optional()
425
+ taskId: z4.number().optional()
329
426
  }),
330
- z3.object({
331
- eventName: z3.literal("taskUnpaused" /* TaskUnpaused */),
427
+ z4.object({
428
+ eventName: z4.literal("taskUnpaused" /* TaskUnpaused */),
332
429
  payload: rooCodeEventsSchema.shape["taskUnpaused" /* TaskUnpaused */],
333
- taskId: z3.number().optional()
430
+ taskId: z4.number().optional()
334
431
  }),
335
- z3.object({
336
- eventName: z3.literal("taskSpawned" /* TaskSpawned */),
432
+ z4.object({
433
+ eventName: z4.literal("taskSpawned" /* TaskSpawned */),
337
434
  payload: rooCodeEventsSchema.shape["taskSpawned" /* TaskSpawned */],
338
- taskId: z3.number().optional()
435
+ taskId: z4.number().optional()
339
436
  }),
340
- z3.object({
341
- eventName: z3.literal("taskDelegated" /* TaskDelegated */),
437
+ z4.object({
438
+ eventName: z4.literal("taskDelegated" /* TaskDelegated */),
342
439
  payload: rooCodeEventsSchema.shape["taskDelegated" /* TaskDelegated */],
343
- taskId: z3.number().optional()
440
+ taskId: z4.number().optional()
344
441
  }),
345
- z3.object({
346
- eventName: z3.literal("taskDelegationCompleted" /* TaskDelegationCompleted */),
442
+ z4.object({
443
+ eventName: z4.literal("taskDelegationCompleted" /* TaskDelegationCompleted */),
347
444
  payload: rooCodeEventsSchema.shape["taskDelegationCompleted" /* TaskDelegationCompleted */],
348
- taskId: z3.number().optional()
445
+ taskId: z4.number().optional()
349
446
  }),
350
- z3.object({
351
- eventName: z3.literal("taskDelegationResumed" /* TaskDelegationResumed */),
447
+ z4.object({
448
+ eventName: z4.literal("taskDelegationResumed" /* TaskDelegationResumed */),
352
449
  payload: rooCodeEventsSchema.shape["taskDelegationResumed" /* TaskDelegationResumed */],
353
- taskId: z3.number().optional()
450
+ taskId: z4.number().optional()
354
451
  }),
355
452
  // Task Execution
356
- z3.object({
357
- eventName: z3.literal("message" /* Message */),
453
+ z4.object({
454
+ eventName: z4.literal("message" /* Message */),
358
455
  payload: rooCodeEventsSchema.shape["message" /* Message */],
359
- taskId: z3.number().optional()
456
+ taskId: z4.number().optional()
360
457
  }),
361
- z3.object({
362
- eventName: z3.literal("taskModeSwitched" /* TaskModeSwitched */),
458
+ z4.object({
459
+ eventName: z4.literal("taskModeSwitched" /* TaskModeSwitched */),
363
460
  payload: rooCodeEventsSchema.shape["taskModeSwitched" /* TaskModeSwitched */],
364
- taskId: z3.number().optional()
461
+ taskId: z4.number().optional()
365
462
  }),
366
- z3.object({
367
- eventName: z3.literal("taskAskResponded" /* TaskAskResponded */),
463
+ z4.object({
464
+ eventName: z4.literal("taskAskResponded" /* TaskAskResponded */),
368
465
  payload: rooCodeEventsSchema.shape["taskAskResponded" /* TaskAskResponded */],
369
- taskId: z3.number().optional()
466
+ taskId: z4.number().optional()
370
467
  }),
371
- z3.object({
372
- eventName: z3.literal("queuedMessagesUpdated" /* QueuedMessagesUpdated */),
468
+ z4.object({
469
+ eventName: z4.literal("queuedMessagesUpdated" /* QueuedMessagesUpdated */),
373
470
  payload: rooCodeEventsSchema.shape["queuedMessagesUpdated" /* QueuedMessagesUpdated */],
374
- taskId: z3.number().optional()
471
+ taskId: z4.number().optional()
375
472
  }),
376
473
  // Task Analytics
377
- z3.object({
378
- eventName: z3.literal("taskToolFailed" /* TaskToolFailed */),
474
+ z4.object({
475
+ eventName: z4.literal("taskToolFailed" /* TaskToolFailed */),
379
476
  payload: rooCodeEventsSchema.shape["taskToolFailed" /* TaskToolFailed */],
380
- taskId: z3.number().optional()
477
+ taskId: z4.number().optional()
381
478
  }),
382
- z3.object({
383
- eventName: z3.literal("taskTokenUsageUpdated" /* TaskTokenUsageUpdated */),
479
+ z4.object({
480
+ eventName: z4.literal("taskTokenUsageUpdated" /* TaskTokenUsageUpdated */),
384
481
  payload: rooCodeEventsSchema.shape["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */],
385
- taskId: z3.number().optional()
482
+ taskId: z4.number().optional()
483
+ }),
484
+ // Query Responses
485
+ z4.object({
486
+ eventName: z4.literal("commandsResponse" /* CommandsResponse */),
487
+ payload: rooCodeEventsSchema.shape["commandsResponse" /* CommandsResponse */],
488
+ taskId: z4.number().optional()
489
+ }),
490
+ z4.object({
491
+ eventName: z4.literal("modesResponse" /* ModesResponse */),
492
+ payload: rooCodeEventsSchema.shape["modesResponse" /* ModesResponse */],
493
+ taskId: z4.number().optional()
494
+ }),
495
+ z4.object({
496
+ eventName: z4.literal("modelsResponse" /* ModelsResponse */),
497
+ payload: rooCodeEventsSchema.shape["modelsResponse" /* ModelsResponse */],
498
+ taskId: z4.number().optional()
386
499
  }),
387
500
  // Evals
388
- z3.object({
389
- eventName: z3.literal("evalPass" /* EvalPass */),
390
- payload: z3.undefined(),
391
- taskId: z3.number()
501
+ z4.object({
502
+ eventName: z4.literal("evalPass" /* EvalPass */),
503
+ payload: z4.undefined(),
504
+ taskId: z4.number()
392
505
  }),
393
- z3.object({
394
- eventName: z3.literal("evalFail" /* EvalFail */),
395
- payload: z3.undefined(),
396
- taskId: z3.number()
506
+ z4.object({
507
+ eventName: z4.literal("evalFail" /* EvalFail */),
508
+ payload: z4.undefined(),
509
+ taskId: z4.number()
397
510
  })
398
511
  ]);
399
512
 
400
513
  // src/task.ts
401
- import { z as z4 } from "zod";
514
+ import { z as z5 } from "zod";
402
515
  var TaskStatus = /* @__PURE__ */ ((TaskStatus2) => {
403
516
  TaskStatus2["Running"] = "running";
404
517
  TaskStatus2["Interactive"] = "interactive";
@@ -407,9 +520,9 @@ var TaskStatus = /* @__PURE__ */ ((TaskStatus2) => {
407
520
  TaskStatus2["None"] = "none";
408
521
  return TaskStatus2;
409
522
  })(TaskStatus || {});
410
- var taskMetadataSchema = z4.object({
411
- task: z4.string().optional(),
412
- images: z4.array(z4.string()).optional()
523
+ var taskMetadataSchema = z5.object({
524
+ task: z5.string().optional(),
525
+ images: z5.array(z5.string()).optional()
413
526
  });
414
527
 
415
528
  // src/global-settings.ts
@@ -418,86 +531,6 @@ import { z as z14 } from "zod";
418
531
  // src/provider-settings.ts
419
532
  import { z as z8 } from "zod";
420
533
 
421
- // src/model.ts
422
- import { z as z5 } from "zod";
423
- var reasoningEfforts = ["low", "medium", "high"];
424
- var reasoningEffortsSchema = z5.enum(reasoningEfforts);
425
- var reasoningEffortWithMinimalSchema = z5.union([reasoningEffortsSchema, z5.literal("minimal")]);
426
- var reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"];
427
- var reasoningEffortExtendedSchema = z5.enum(reasoningEffortsExtended);
428
- var reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"];
429
- var reasoningEffortSettingSchema = z5.enum(reasoningEffortSettingValues);
430
- var verbosityLevels = ["low", "medium", "high"];
431
- var verbosityLevelsSchema = z5.enum(verbosityLevels);
432
- var serviceTiers = ["default", "flex", "priority"];
433
- var serviceTierSchema = z5.enum(serviceTiers);
434
- var modelParameters = ["max_tokens", "temperature", "reasoning", "include_reasoning"];
435
- var modelParametersSchema = z5.enum(modelParameters);
436
- var isModelParameter = (value) => modelParameters.includes(value);
437
- var modelInfoSchema = z5.object({
438
- maxTokens: z5.number().nullish(),
439
- maxThinkingTokens: z5.number().nullish(),
440
- contextWindow: z5.number(),
441
- supportsImages: z5.boolean().optional(),
442
- supportsPromptCache: z5.boolean(),
443
- // Optional default prompt cache retention policy for providers that support it.
444
- // When set to "24h", extended prompt caching will be requested; when omitted
445
- // or set to "in_memory", the default in‑memory cache is used.
446
- promptCacheRetention: z5.enum(["in_memory", "24h"]).optional(),
447
- // Capability flag to indicate whether the model supports an output verbosity parameter
448
- supportsVerbosity: z5.boolean().optional(),
449
- supportsReasoningBudget: z5.boolean().optional(),
450
- // Capability flag to indicate whether the model supports simple on/off binary reasoning
451
- supportsReasoningBinary: z5.boolean().optional(),
452
- // Capability flag to indicate whether the model supports temperature parameter
453
- supportsTemperature: z5.boolean().optional(),
454
- defaultTemperature: z5.number().optional(),
455
- requiredReasoningBudget: z5.boolean().optional(),
456
- supportsReasoningEffort: z5.union([z5.boolean(), z5.array(z5.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]).optional(),
457
- requiredReasoningEffort: z5.boolean().optional(),
458
- preserveReasoning: z5.boolean().optional(),
459
- supportedParameters: z5.array(modelParametersSchema).optional(),
460
- inputPrice: z5.number().optional(),
461
- outputPrice: z5.number().optional(),
462
- cacheWritesPrice: z5.number().optional(),
463
- cacheReadsPrice: z5.number().optional(),
464
- description: z5.string().optional(),
465
- // Default effort value for models that support reasoning effort
466
- reasoningEffort: reasoningEffortExtendedSchema.optional(),
467
- minTokensPerCachePoint: z5.number().optional(),
468
- maxCachePoints: z5.number().optional(),
469
- cachableFields: z5.array(z5.string()).optional(),
470
- // Flag to indicate if the model is deprecated and should not be used
471
- deprecated: z5.boolean().optional(),
472
- // Flag to indicate if the model should hide vendor/company identity in responses
473
- isStealthModel: z5.boolean().optional(),
474
- // Flag to indicate if the model is free (no cost)
475
- isFree: z5.boolean().optional(),
476
- // Exclude specific native tools from being available (only applies to native protocol)
477
- // These tools will be removed from the set of tools available to the model
478
- excludedTools: z5.array(z5.string()).optional(),
479
- // Include specific native tools (only applies to native protocol)
480
- // These tools will be added if they belong to an allowed group in the current mode
481
- // Cannot force-add tools from groups the mode doesn't allow
482
- includedTools: z5.array(z5.string()).optional(),
483
- /**
484
- * Service tiers with pricing information.
485
- * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
486
- * The top-level input/output/cache* fields represent the default/standard tier.
487
- */
488
- tiers: z5.array(
489
- z5.object({
490
- name: serviceTierSchema.optional(),
491
- // Service tier name (flex, priority, etc.)
492
- contextWindow: z5.number(),
493
- inputPrice: z5.number().optional(),
494
- outputPrice: z5.number().optional(),
495
- cacheWritesPrice: z5.number().optional(),
496
- cacheReadsPrice: z5.number().optional()
497
- })
498
- ).optional()
499
- });
500
-
501
534
  // src/codebase-index.ts
502
535
  import { z as z6 } from "zod";
503
536
  var CODEBASE_INDEX_DEFAULTS = {
@@ -797,1090 +830,1051 @@ var anthropicModels = {
797
830
  };
798
831
  var ANTHROPIC_DEFAULT_MAX_TOKENS = 8192;
799
832
 
800
- // src/providers/baseten.ts
801
- var basetenModels = {
802
- "moonshotai/Kimi-K2-Thinking": {
803
- maxTokens: 16384,
804
- contextWindow: 262e3,
805
- supportsImages: false,
806
- supportsPromptCache: false,
807
- inputPrice: 0.6,
808
- outputPrice: 2.5,
809
- cacheWritesPrice: 0,
810
- cacheReadsPrice: 0,
811
- description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2"
812
- },
813
- "zai-org/GLM-4.6": {
814
- maxTokens: 16384,
833
+ // src/providers/azure.ts
834
+ var azureModels = {
835
+ "codex-mini": {
836
+ maxTokens: 1e5,
815
837
  contextWindow: 2e5,
816
838
  supportsImages: false,
817
- supportsPromptCache: false,
818
- inputPrice: 0.6,
819
- outputPrice: 2.2,
820
- cacheWritesPrice: 0,
821
- cacheReadsPrice: 0,
822
- description: "Frontier open model with advanced agentic, reasoning and coding capabilities"
839
+ supportsPromptCache: true,
840
+ inputPrice: 1.5,
841
+ outputPrice: 6,
842
+ cacheReadsPrice: 0.375,
843
+ supportsTemperature: false,
844
+ description: "Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks"
823
845
  },
824
- "deepseek-ai/DeepSeek-R1": {
825
- maxTokens: 16384,
826
- contextWindow: 163840,
846
+ "gpt-4": {
847
+ maxTokens: 8192,
848
+ contextWindow: 8192,
827
849
  supportsImages: false,
828
850
  supportsPromptCache: false,
829
- inputPrice: 2.55,
830
- outputPrice: 5.95,
831
- cacheWritesPrice: 0,
832
- cacheReadsPrice: 0,
833
- description: "DeepSeek's first-generation reasoning model"
851
+ inputPrice: 60,
852
+ outputPrice: 120,
853
+ supportsTemperature: true,
854
+ description: "GPT-4"
834
855
  },
835
- "deepseek-ai/DeepSeek-R1-0528": {
836
- maxTokens: 16384,
837
- contextWindow: 163840,
856
+ "gpt-4-32k": {
857
+ maxTokens: 32768,
858
+ contextWindow: 32768,
838
859
  supportsImages: false,
839
860
  supportsPromptCache: false,
840
- inputPrice: 2.55,
841
- outputPrice: 5.95,
842
- cacheWritesPrice: 0,
843
- cacheReadsPrice: 0,
844
- description: "The latest revision of DeepSeek's first-generation reasoning model"
861
+ inputPrice: 60,
862
+ outputPrice: 120,
863
+ supportsTemperature: true,
864
+ description: "GPT-4 32K"
845
865
  },
846
- "deepseek-ai/DeepSeek-V3-0324": {
847
- maxTokens: 16384,
848
- contextWindow: 163840,
849
- supportsImages: false,
866
+ "gpt-4-turbo": {
867
+ maxTokens: 4096,
868
+ contextWindow: 128e3,
869
+ supportsImages: true,
850
870
  supportsPromptCache: false,
851
- inputPrice: 0.77,
852
- outputPrice: 0.77,
853
- cacheWritesPrice: 0,
854
- cacheReadsPrice: 0,
855
- description: "Fast general-purpose LLM with enhanced reasoning capabilities"
871
+ inputPrice: 10,
872
+ outputPrice: 30,
873
+ supportsTemperature: true,
874
+ description: "GPT-4 Turbo"
856
875
  },
857
- "deepseek-ai/DeepSeek-V3.1": {
858
- maxTokens: 16384,
859
- contextWindow: 163840,
860
- supportsImages: false,
876
+ "gpt-4-turbo-vision": {
877
+ maxTokens: 4096,
878
+ contextWindow: 128e3,
879
+ supportsImages: true,
861
880
  supportsPromptCache: false,
862
- inputPrice: 0.5,
863
- outputPrice: 1.5,
864
- cacheWritesPrice: 0,
865
- cacheReadsPrice: 0,
866
- description: "Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling"
881
+ inputPrice: 10,
882
+ outputPrice: 30,
883
+ supportsTemperature: true,
884
+ description: "GPT-4 Turbo Vision"
867
885
  },
868
- "deepseek-ai/DeepSeek-V3.2": {
869
- maxTokens: 16384,
870
- contextWindow: 163840,
871
- supportsImages: false,
872
- supportsPromptCache: false,
873
- inputPrice: 0.3,
874
- outputPrice: 0.45,
875
- cacheWritesPrice: 0,
876
- cacheReadsPrice: 0,
877
- description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance"
886
+ "gpt-4.1": {
887
+ maxTokens: 32768,
888
+ contextWindow: 1047576,
889
+ supportsImages: true,
890
+ supportsPromptCache: true,
891
+ inputPrice: 2,
892
+ outputPrice: 8,
893
+ cacheReadsPrice: 0.5,
894
+ supportsTemperature: true,
895
+ description: "GPT-4.1"
878
896
  },
879
- "openai/gpt-oss-120b": {
880
- maxTokens: 16384,
881
- contextWindow: 128072,
882
- supportsImages: false,
883
- supportsPromptCache: false,
897
+ "gpt-4.1-mini": {
898
+ maxTokens: 32768,
899
+ contextWindow: 1047576,
900
+ supportsImages: true,
901
+ supportsPromptCache: true,
902
+ inputPrice: 0.4,
903
+ outputPrice: 1.6,
904
+ cacheReadsPrice: 0.1,
905
+ supportsTemperature: true,
906
+ description: "GPT-4.1 mini"
907
+ },
908
+ "gpt-4.1-nano": {
909
+ maxTokens: 32768,
910
+ contextWindow: 1047576,
911
+ supportsImages: true,
912
+ supportsPromptCache: true,
884
913
  inputPrice: 0.1,
885
- outputPrice: 0.5,
886
- cacheWritesPrice: 0,
887
- cacheReadsPrice: 0,
888
- description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities"
914
+ outputPrice: 0.4,
915
+ cacheReadsPrice: 0.03,
916
+ supportsTemperature: true,
917
+ description: "GPT-4.1 nano"
889
918
  },
890
- "Qwen/Qwen3-235B-A22B-Instruct-2507": {
919
+ "gpt-4o": {
891
920
  maxTokens: 16384,
892
- contextWindow: 262144,
893
- supportsImages: false,
894
- supportsPromptCache: false,
895
- inputPrice: 0.22,
896
- outputPrice: 0.8,
897
- cacheWritesPrice: 0,
898
- cacheReadsPrice: 0,
899
- description: "Mixture-of-experts LLM with math and reasoning capabilities"
921
+ contextWindow: 128e3,
922
+ supportsImages: true,
923
+ supportsPromptCache: true,
924
+ inputPrice: 2.5,
925
+ outputPrice: 10,
926
+ cacheReadsPrice: 1.25,
927
+ supportsTemperature: true,
928
+ description: "GPT-4o"
900
929
  },
901
- "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
930
+ "gpt-4o-mini": {
902
931
  maxTokens: 16384,
903
- contextWindow: 262144,
904
- supportsImages: false,
905
- supportsPromptCache: false,
906
- inputPrice: 0.38,
907
- outputPrice: 1.53,
908
- cacheWritesPrice: 0,
909
- cacheReadsPrice: 0,
910
- description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities"
911
- },
912
- "moonshotai/Kimi-K2-Instruct-0905": {
913
- maxTokens: 16384,
914
- contextWindow: 262e3,
915
- supportsImages: false,
916
- supportsPromptCache: false,
917
- inputPrice: 0.6,
918
- outputPrice: 2.5,
919
- cacheWritesPrice: 0,
920
- cacheReadsPrice: 0,
921
- description: "State of the art language model for agentic and coding tasks. September Update."
922
- }
923
- };
924
- var basetenDefaultModelId = "zai-org/GLM-4.6";
925
-
926
- // src/providers/bedrock.ts
927
- var bedrockDefaultModelId = "anthropic.claude-sonnet-4-5-20250929-v1:0";
928
- var bedrockDefaultPromptRouterModelId = "anthropic.claude-3-sonnet-20240229-v1:0";
929
- var bedrockModels = {
930
- "anthropic.claude-sonnet-4-5-20250929-v1:0": {
931
- maxTokens: 8192,
932
- contextWindow: 2e5,
932
+ contextWindow: 128e3,
933
933
  supportsImages: true,
934
934
  supportsPromptCache: true,
935
- supportsReasoningBudget: true,
936
- inputPrice: 3,
937
- outputPrice: 15,
938
- cacheWritesPrice: 3.75,
939
- cacheReadsPrice: 0.3,
940
- minTokensPerCachePoint: 1024,
941
- maxCachePoints: 4,
942
- cachableFields: ["system", "messages", "tools"]
935
+ inputPrice: 0.15,
936
+ outputPrice: 0.6,
937
+ cacheReadsPrice: 0.08,
938
+ supportsTemperature: true,
939
+ description: "GPT-4o mini"
943
940
  },
944
- "amazon.nova-pro-v1:0": {
945
- maxTokens: 5e3,
946
- contextWindow: 3e5,
941
+ "gpt-5": {
942
+ maxTokens: 128e3,
943
+ contextWindow: 272e3,
944
+ includedTools: ["apply_patch"],
945
+ excludedTools: ["apply_diff", "write_to_file"],
947
946
  supportsImages: true,
948
947
  supportsPromptCache: true,
949
- inputPrice: 0.8,
950
- outputPrice: 3.2,
951
- cacheWritesPrice: 0.8,
952
- // per million tokens
953
- cacheReadsPrice: 0.2,
954
- // per million tokens
955
- minTokensPerCachePoint: 1,
956
- maxCachePoints: 1,
957
- cachableFields: ["system"]
958
- },
959
- "amazon.nova-pro-latency-optimized-v1:0": {
960
- maxTokens: 5e3,
961
- contextWindow: 3e5,
962
- supportsImages: true,
963
- supportsPromptCache: false,
964
- inputPrice: 1,
965
- outputPrice: 4,
966
- cacheWritesPrice: 1,
967
- // per million tokens
968
- cacheReadsPrice: 0.25,
969
- // per million tokens
970
- description: "Amazon Nova Pro with latency optimized inference"
948
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
949
+ reasoningEffort: "medium",
950
+ inputPrice: 1.25,
951
+ outputPrice: 10,
952
+ cacheReadsPrice: 0.13,
953
+ supportsVerbosity: true,
954
+ supportsTemperature: false,
955
+ description: "GPT-5: The best model for coding and agentic tasks across domains"
971
956
  },
972
- "amazon.nova-lite-v1:0": {
973
- maxTokens: 5e3,
974
- contextWindow: 3e5,
957
+ "gpt-5-codex": {
958
+ maxTokens: 128e3,
959
+ contextWindow: 4e5,
960
+ includedTools: ["apply_patch"],
961
+ excludedTools: ["apply_diff", "write_to_file"],
975
962
  supportsImages: true,
976
963
  supportsPromptCache: true,
977
- inputPrice: 0.06,
978
- outputPrice: 0.24,
979
- cacheWritesPrice: 0.06,
980
- // per million tokens
981
- cacheReadsPrice: 0.015,
982
- // per million tokens
983
- minTokensPerCachePoint: 1,
984
- maxCachePoints: 1,
985
- cachableFields: ["system"]
964
+ supportsReasoningEffort: ["low", "medium", "high"],
965
+ reasoningEffort: "medium",
966
+ inputPrice: 1.25,
967
+ outputPrice: 10,
968
+ cacheReadsPrice: 0.13,
969
+ supportsTemperature: false,
970
+ description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex"
986
971
  },
987
- "amazon.nova-2-lite-v1:0": {
988
- maxTokens: 65535,
989
- contextWindow: 1e6,
972
+ "gpt-5-mini": {
973
+ maxTokens: 128e3,
974
+ contextWindow: 272e3,
975
+ includedTools: ["apply_patch"],
976
+ excludedTools: ["apply_diff", "write_to_file"],
990
977
  supportsImages: true,
991
978
  supportsPromptCache: true,
992
- inputPrice: 0.33,
993
- outputPrice: 2.75,
994
- cacheWritesPrice: 0,
995
- cacheReadsPrice: 0.0825,
996
- // 75% less than input price
997
- minTokensPerCachePoint: 1,
998
- maxCachePoints: 1,
999
- cachableFields: ["system"],
1000
- description: "Amazon Nova 2 Lite - Comparable to Claude Haiku 4.5"
1001
- },
1002
- "amazon.nova-micro-v1:0": {
1003
- maxTokens: 5e3,
1004
- contextWindow: 128e3,
1005
- supportsImages: false,
1006
- supportsPromptCache: true,
1007
- inputPrice: 0.035,
1008
- outputPrice: 0.14,
1009
- cacheWritesPrice: 0.035,
1010
- // per million tokens
1011
- cacheReadsPrice: 875e-5,
1012
- // per million tokens
1013
- minTokensPerCachePoint: 1,
1014
- maxCachePoints: 1,
1015
- cachableFields: ["system"]
979
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
980
+ reasoningEffort: "medium",
981
+ inputPrice: 0.25,
982
+ outputPrice: 2,
983
+ cacheReadsPrice: 0.03,
984
+ supportsVerbosity: true,
985
+ supportsTemperature: false,
986
+ description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks"
1016
987
  },
1017
- "anthropic.claude-sonnet-4-20250514-v1:0": {
1018
- maxTokens: 8192,
1019
- contextWindow: 2e5,
988
+ "gpt-5-nano": {
989
+ maxTokens: 128e3,
990
+ contextWindow: 272e3,
991
+ includedTools: ["apply_patch"],
992
+ excludedTools: ["apply_diff", "write_to_file"],
1020
993
  supportsImages: true,
1021
994
  supportsPromptCache: true,
1022
- supportsReasoningBudget: true,
1023
- inputPrice: 3,
1024
- outputPrice: 15,
1025
- cacheWritesPrice: 3.75,
1026
- cacheReadsPrice: 0.3,
1027
- minTokensPerCachePoint: 1024,
1028
- maxCachePoints: 4,
1029
- cachableFields: ["system", "messages", "tools"]
995
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
996
+ reasoningEffort: "medium",
997
+ inputPrice: 0.05,
998
+ outputPrice: 0.4,
999
+ cacheReadsPrice: 0.01,
1000
+ supportsVerbosity: true,
1001
+ supportsTemperature: false,
1002
+ description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5"
1030
1003
  },
1031
- "anthropic.claude-opus-4-1-20250805-v1:0": {
1032
- maxTokens: 8192,
1033
- contextWindow: 2e5,
1004
+ "gpt-5-pro": {
1005
+ maxTokens: 272e3,
1006
+ contextWindow: 4e5,
1007
+ includedTools: ["apply_patch"],
1008
+ excludedTools: ["apply_diff", "write_to_file"],
1034
1009
  supportsImages: true,
1035
- supportsPromptCache: true,
1036
- supportsReasoningBudget: true,
1010
+ supportsPromptCache: false,
1011
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1012
+ reasoningEffort: "medium",
1037
1013
  inputPrice: 15,
1038
- outputPrice: 75,
1039
- cacheWritesPrice: 18.75,
1040
- cacheReadsPrice: 1.5,
1041
- minTokensPerCachePoint: 1024,
1042
- maxCachePoints: 4,
1043
- cachableFields: ["system", "messages", "tools"]
1014
+ outputPrice: 120,
1015
+ supportsVerbosity: true,
1016
+ supportsTemperature: false,
1017
+ description: "GPT-5 Pro"
1044
1018
  },
1045
- "anthropic.claude-opus-4-6-v1": {
1046
- maxTokens: 8192,
1047
- contextWindow: 2e5,
1048
- // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
1019
+ "gpt-5.1": {
1020
+ maxTokens: 128e3,
1021
+ contextWindow: 272e3,
1022
+ includedTools: ["apply_patch"],
1023
+ excludedTools: ["apply_diff", "write_to_file"],
1049
1024
  supportsImages: true,
1050
1025
  supportsPromptCache: true,
1051
- supportsReasoningBudget: true,
1052
- inputPrice: 5,
1053
- // $5 per million input tokens (≤200K context)
1054
- outputPrice: 25,
1055
- // $25 per million output tokens (≤200K context)
1056
- cacheWritesPrice: 6.25,
1057
- // $6.25 per million tokens
1058
- cacheReadsPrice: 0.5,
1059
- // $0.50 per million tokens
1060
- minTokensPerCachePoint: 1024,
1061
- maxCachePoints: 4,
1062
- cachableFields: ["system", "messages", "tools"],
1063
- // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
1064
- tiers: [
1065
- {
1066
- contextWindow: 1e6,
1067
- // 1M tokens with beta flag
1068
- inputPrice: 10,
1069
- // $10 per million input tokens (>200K context)
1070
- outputPrice: 37.5,
1071
- // $37.50 per million output tokens (>200K context)
1072
- cacheWritesPrice: 12.5,
1073
- // $12.50 per million tokens (>200K context)
1074
- cacheReadsPrice: 1
1075
- // $1.00 per million tokens (>200K context)
1076
- }
1077
- ]
1026
+ promptCacheRetention: "24h",
1027
+ supportsReasoningEffort: ["none", "low", "medium", "high"],
1028
+ reasoningEffort: "medium",
1029
+ inputPrice: 1.25,
1030
+ outputPrice: 10,
1031
+ cacheReadsPrice: 0.125,
1032
+ supportsVerbosity: true,
1033
+ supportsTemperature: false,
1034
+ description: "GPT-5.1: The best model for coding and agentic tasks across domains"
1078
1035
  },
1079
- "anthropic.claude-opus-4-5-20251101-v1:0": {
1080
- maxTokens: 8192,
1081
- contextWindow: 2e5,
1036
+ "gpt-5.1-chat": {
1037
+ maxTokens: 16384,
1038
+ contextWindow: 128e3,
1039
+ includedTools: ["apply_patch"],
1040
+ excludedTools: ["apply_diff", "write_to_file"],
1082
1041
  supportsImages: true,
1083
1042
  supportsPromptCache: true,
1084
- supportsReasoningBudget: true,
1085
- inputPrice: 5,
1086
- outputPrice: 25,
1087
- cacheWritesPrice: 6.25,
1088
- cacheReadsPrice: 0.5,
1089
- minTokensPerCachePoint: 1024,
1090
- maxCachePoints: 4,
1091
- cachableFields: ["system", "messages", "tools"]
1043
+ promptCacheRetention: "24h",
1044
+ inputPrice: 1.25,
1045
+ outputPrice: 10,
1046
+ cacheReadsPrice: 0.125,
1047
+ supportsTemperature: false,
1048
+ description: "GPT-5.1 Chat: Optimized for conversational AI and chat use cases"
1092
1049
  },
1093
- "anthropic.claude-opus-4-20250514-v1:0": {
1094
- maxTokens: 8192,
1095
- contextWindow: 2e5,
1050
+ "gpt-5.1-codex": {
1051
+ maxTokens: 128e3,
1052
+ contextWindow: 4e5,
1053
+ includedTools: ["apply_patch"],
1054
+ excludedTools: ["apply_diff", "write_to_file"],
1096
1055
  supportsImages: true,
1097
1056
  supportsPromptCache: true,
1098
- supportsReasoningBudget: true,
1099
- inputPrice: 15,
1100
- outputPrice: 75,
1101
- cacheWritesPrice: 18.75,
1102
- cacheReadsPrice: 1.5,
1103
- minTokensPerCachePoint: 1024,
1104
- maxCachePoints: 4,
1105
- cachableFields: ["system", "messages", "tools"]
1106
- },
1107
- "anthropic.claude-3-7-sonnet-20250219-v1:0": {
1108
- maxTokens: 8192,
1109
- contextWindow: 2e5,
1057
+ promptCacheRetention: "24h",
1058
+ supportsReasoningEffort: ["low", "medium", "high"],
1059
+ reasoningEffort: "medium",
1060
+ inputPrice: 1.25,
1061
+ outputPrice: 10,
1062
+ cacheReadsPrice: 0.125,
1063
+ supportsTemperature: false,
1064
+ description: "GPT-5.1 Codex: A version of GPT-5.1 optimized for agentic coding in Codex"
1065
+ },
1066
+ "gpt-5.1-codex-max": {
1067
+ maxTokens: 128e3,
1068
+ contextWindow: 4e5,
1069
+ includedTools: ["apply_patch"],
1070
+ excludedTools: ["apply_diff", "write_to_file"],
1110
1071
  supportsImages: true,
1111
1072
  supportsPromptCache: true,
1112
- supportsReasoningBudget: true,
1113
- inputPrice: 3,
1114
- outputPrice: 15,
1115
- cacheWritesPrice: 3.75,
1116
- cacheReadsPrice: 0.3,
1117
- minTokensPerCachePoint: 1024,
1118
- maxCachePoints: 4,
1119
- cachableFields: ["system", "messages", "tools"]
1073
+ promptCacheRetention: "24h",
1074
+ supportsReasoningEffort: ["low", "medium", "high", "xhigh"],
1075
+ reasoningEffort: "medium",
1076
+ inputPrice: 1.25,
1077
+ outputPrice: 10,
1078
+ cacheReadsPrice: 0.125,
1079
+ supportsTemperature: false,
1080
+ description: "GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks"
1120
1081
  },
1121
- "anthropic.claude-3-5-sonnet-20241022-v2:0": {
1122
- maxTokens: 8192,
1123
- contextWindow: 2e5,
1082
+ "gpt-5.1-codex-mini": {
1083
+ maxTokens: 128e3,
1084
+ contextWindow: 4e5,
1085
+ includedTools: ["apply_patch"],
1086
+ excludedTools: ["apply_diff", "write_to_file"],
1124
1087
  supportsImages: true,
1125
1088
  supportsPromptCache: true,
1126
- inputPrice: 3,
1127
- outputPrice: 15,
1128
- cacheWritesPrice: 3.75,
1129
- cacheReadsPrice: 0.3,
1130
- minTokensPerCachePoint: 1024,
1131
- maxCachePoints: 4,
1132
- cachableFields: ["system", "messages", "tools"]
1089
+ promptCacheRetention: "24h",
1090
+ supportsReasoningEffort: ["low", "medium", "high"],
1091
+ reasoningEffort: "medium",
1092
+ inputPrice: 0.25,
1093
+ outputPrice: 2,
1094
+ cacheReadsPrice: 0.025,
1095
+ supportsTemperature: false,
1096
+ description: "GPT-5.1 Codex mini: A version of GPT-5.1 optimized for agentic coding in Codex"
1133
1097
  },
1134
- "anthropic.claude-3-5-haiku-20241022-v1:0": {
1135
- maxTokens: 8192,
1136
- contextWindow: 2e5,
1137
- supportsImages: false,
1098
+ "gpt-5.2": {
1099
+ maxTokens: 128e3,
1100
+ contextWindow: 4e5,
1101
+ includedTools: ["apply_patch"],
1102
+ excludedTools: ["apply_diff", "write_to_file"],
1103
+ supportsImages: true,
1138
1104
  supportsPromptCache: true,
1139
- inputPrice: 0.8,
1140
- outputPrice: 4,
1141
- cacheWritesPrice: 1,
1142
- cacheReadsPrice: 0.08,
1143
- minTokensPerCachePoint: 2048,
1144
- maxCachePoints: 4,
1145
- cachableFields: ["system", "messages", "tools"]
1105
+ promptCacheRetention: "24h",
1106
+ supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"],
1107
+ reasoningEffort: "medium",
1108
+ inputPrice: 1.75,
1109
+ outputPrice: 14,
1110
+ cacheReadsPrice: 0.125,
1111
+ supportsVerbosity: true,
1112
+ supportsTemperature: false,
1113
+ description: "GPT-5.2: Our flagship model for coding and agentic tasks across industries"
1146
1114
  },
1147
- "anthropic.claude-haiku-4-5-20251001-v1:0": {
1148
- maxTokens: 8192,
1149
- contextWindow: 2e5,
1115
+ "gpt-5.2-chat": {
1116
+ maxTokens: 16384,
1117
+ contextWindow: 128e3,
1118
+ includedTools: ["apply_patch"],
1119
+ excludedTools: ["apply_diff", "write_to_file"],
1150
1120
  supportsImages: true,
1151
1121
  supportsPromptCache: true,
1152
- supportsReasoningBudget: true,
1153
- inputPrice: 1,
1154
- outputPrice: 5,
1155
- cacheWritesPrice: 1.25,
1156
- // 5m cache writes
1157
- cacheReadsPrice: 0.1,
1158
- // cache hits / refreshes
1159
- minTokensPerCachePoint: 2048,
1160
- maxCachePoints: 4,
1161
- cachableFields: ["system", "messages", "tools"]
1122
+ inputPrice: 1.75,
1123
+ outputPrice: 14,
1124
+ cacheReadsPrice: 0.175,
1125
+ supportsTemperature: false,
1126
+ description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases"
1162
1127
  },
1163
- "anthropic.claude-3-5-sonnet-20240620-v1:0": {
1164
- maxTokens: 8192,
1165
- contextWindow: 2e5,
1128
+ "gpt-5.2-codex": {
1129
+ maxTokens: 128e3,
1130
+ contextWindow: 4e5,
1131
+ includedTools: ["apply_patch"],
1132
+ excludedTools: ["apply_diff", "write_to_file"],
1166
1133
  supportsImages: true,
1167
- supportsPromptCache: false,
1168
- inputPrice: 3,
1169
- outputPrice: 15
1134
+ supportsPromptCache: true,
1135
+ promptCacheRetention: "24h",
1136
+ supportsReasoningEffort: ["low", "medium", "high", "xhigh"],
1137
+ reasoningEffort: "medium",
1138
+ inputPrice: 1.75,
1139
+ outputPrice: 14,
1140
+ cacheReadsPrice: 0.175,
1141
+ supportsTemperature: false,
1142
+ description: "GPT-5.2 Codex: Our most intelligent coding model optimized for long-horizon, agentic coding tasks"
1170
1143
  },
1171
- "anthropic.claude-3-opus-20240229-v1:0": {
1172
- maxTokens: 4096,
1144
+ o1: {
1145
+ maxTokens: 1e5,
1173
1146
  contextWindow: 2e5,
1174
1147
  supportsImages: true,
1175
- supportsPromptCache: false,
1148
+ supportsPromptCache: true,
1176
1149
  inputPrice: 15,
1177
- outputPrice: 75
1150
+ outputPrice: 60,
1151
+ cacheReadsPrice: 7.5,
1152
+ supportsTemperature: false,
1153
+ description: "o1"
1178
1154
  },
1179
- "anthropic.claude-3-sonnet-20240229-v1:0": {
1180
- maxTokens: 4096,
1155
+ "o1-mini": {
1156
+ maxTokens: 65536,
1157
+ contextWindow: 128e3,
1158
+ supportsImages: true,
1159
+ supportsPromptCache: true,
1160
+ inputPrice: 1.1,
1161
+ outputPrice: 4.4,
1162
+ cacheReadsPrice: 0.55,
1163
+ supportsTemperature: false,
1164
+ description: "o1-mini"
1165
+ },
1166
+ "o1-preview": {
1167
+ maxTokens: 32768,
1168
+ contextWindow: 128e3,
1169
+ supportsImages: true,
1170
+ supportsPromptCache: true,
1171
+ inputPrice: 16.5,
1172
+ outputPrice: 66,
1173
+ cacheReadsPrice: 8.25,
1174
+ supportsTemperature: false,
1175
+ description: "o1-preview"
1176
+ },
1177
+ o3: {
1178
+ maxTokens: 1e5,
1181
1179
  contextWindow: 2e5,
1182
1180
  supportsImages: true,
1183
- supportsPromptCache: false,
1184
- inputPrice: 3,
1185
- outputPrice: 15
1181
+ supportsPromptCache: true,
1182
+ supportsReasoningEffort: ["low", "medium", "high"],
1183
+ reasoningEffort: "medium",
1184
+ inputPrice: 2,
1185
+ outputPrice: 8,
1186
+ cacheReadsPrice: 0.5,
1187
+ supportsTemperature: false,
1188
+ description: "o3"
1186
1189
  },
1187
- "anthropic.claude-3-haiku-20240307-v1:0": {
1188
- maxTokens: 4096,
1190
+ "o3-mini": {
1191
+ maxTokens: 1e5,
1192
+ contextWindow: 2e5,
1193
+ supportsImages: false,
1194
+ supportsPromptCache: true,
1195
+ supportsReasoningEffort: ["low", "medium", "high"],
1196
+ reasoningEffort: "medium",
1197
+ inputPrice: 1.1,
1198
+ outputPrice: 4.4,
1199
+ cacheReadsPrice: 0.55,
1200
+ supportsTemperature: false,
1201
+ description: "o3-mini"
1202
+ },
1203
+ "o4-mini": {
1204
+ maxTokens: 1e5,
1189
1205
  contextWindow: 2e5,
1190
1206
  supportsImages: true,
1207
+ supportsPromptCache: true,
1208
+ supportsReasoningEffort: ["low", "medium", "high"],
1209
+ reasoningEffort: "medium",
1210
+ inputPrice: 1.1,
1211
+ outputPrice: 4.4,
1212
+ cacheReadsPrice: 0.28,
1213
+ supportsTemperature: false,
1214
+ description: "o4-mini"
1215
+ }
1216
+ };
1217
+ var azureDefaultModelId = "gpt-4o";
1218
+ var azureDefaultModelInfo = azureModels[azureDefaultModelId];
1219
+
1220
+ // src/providers/baseten.ts
1221
+ var basetenModels = {
1222
+ "moonshotai/Kimi-K2-Thinking": {
1223
+ maxTokens: 16384,
1224
+ contextWindow: 262e3,
1225
+ supportsImages: false,
1191
1226
  supportsPromptCache: false,
1192
- inputPrice: 0.25,
1193
- outputPrice: 1.25
1227
+ inputPrice: 0.6,
1228
+ outputPrice: 2.5,
1229
+ cacheWritesPrice: 0,
1230
+ cacheReadsPrice: 0,
1231
+ description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2"
1194
1232
  },
1195
- "deepseek.r1-v1:0": {
1196
- maxTokens: 32768,
1197
- contextWindow: 128e3,
1233
+ "zai-org/GLM-4.6": {
1234
+ maxTokens: 16384,
1235
+ contextWindow: 2e5,
1198
1236
  supportsImages: false,
1199
1237
  supportsPromptCache: false,
1200
- inputPrice: 1.35,
1201
- outputPrice: 5.4
1238
+ inputPrice: 0.6,
1239
+ outputPrice: 2.2,
1240
+ cacheWritesPrice: 0,
1241
+ cacheReadsPrice: 0,
1242
+ description: "Frontier open model with advanced agentic, reasoning and coding capabilities"
1202
1243
  },
1203
- "openai.gpt-oss-20b-1:0": {
1204
- maxTokens: 8192,
1205
- contextWindow: 128e3,
1244
+ "deepseek-ai/DeepSeek-R1": {
1245
+ maxTokens: 16384,
1246
+ contextWindow: 163840,
1206
1247
  supportsImages: false,
1207
1248
  supportsPromptCache: false,
1208
- inputPrice: 0.5,
1209
- outputPrice: 1.5,
1210
- description: "GPT-OSS 20B - Optimized for low latency and local/specialized use cases"
1249
+ inputPrice: 2.55,
1250
+ outputPrice: 5.95,
1251
+ cacheWritesPrice: 0,
1252
+ cacheReadsPrice: 0,
1253
+ description: "DeepSeek's first-generation reasoning model"
1211
1254
  },
1212
- "openai.gpt-oss-120b-1:0": {
1213
- maxTokens: 8192,
1214
- contextWindow: 128e3,
1255
+ "deepseek-ai/DeepSeek-R1-0528": {
1256
+ maxTokens: 16384,
1257
+ contextWindow: 163840,
1215
1258
  supportsImages: false,
1216
1259
  supportsPromptCache: false,
1217
- inputPrice: 2,
1218
- outputPrice: 6,
1219
- description: "GPT-OSS 120B - Production-ready, general-purpose, high-reasoning model"
1260
+ inputPrice: 2.55,
1261
+ outputPrice: 5.95,
1262
+ cacheWritesPrice: 0,
1263
+ cacheReadsPrice: 0,
1264
+ description: "The latest revision of DeepSeek's first-generation reasoning model"
1220
1265
  },
1221
- "meta.llama3-3-70b-instruct-v1:0": {
1222
- maxTokens: 8192,
1223
- contextWindow: 128e3,
1266
+ "deepseek-ai/DeepSeek-V3-0324": {
1267
+ maxTokens: 16384,
1268
+ contextWindow: 163840,
1224
1269
  supportsImages: false,
1225
1270
  supportsPromptCache: false,
1226
- inputPrice: 0.72,
1227
- outputPrice: 0.72,
1228
- description: "Llama 3.3 Instruct (70B)"
1229
- },
1230
- "meta.llama3-2-90b-instruct-v1:0": {
1231
- maxTokens: 8192,
1232
- contextWindow: 128e3,
1233
- supportsImages: true,
1234
- supportsPromptCache: false,
1235
- inputPrice: 0.72,
1236
- outputPrice: 0.72,
1237
- description: "Llama 3.2 Instruct (90B)"
1271
+ inputPrice: 0.77,
1272
+ outputPrice: 0.77,
1273
+ cacheWritesPrice: 0,
1274
+ cacheReadsPrice: 0,
1275
+ description: "Fast general-purpose LLM with enhanced reasoning capabilities"
1238
1276
  },
1239
- "meta.llama3-2-11b-instruct-v1:0": {
1240
- maxTokens: 8192,
1241
- contextWindow: 128e3,
1242
- supportsImages: true,
1277
+ "deepseek-ai/DeepSeek-V3.1": {
1278
+ maxTokens: 16384,
1279
+ contextWindow: 163840,
1280
+ supportsImages: false,
1243
1281
  supportsPromptCache: false,
1244
- inputPrice: 0.16,
1245
- outputPrice: 0.16,
1246
- description: "Llama 3.2 Instruct (11B)"
1282
+ inputPrice: 0.5,
1283
+ outputPrice: 1.5,
1284
+ cacheWritesPrice: 0,
1285
+ cacheReadsPrice: 0,
1286
+ description: "Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling"
1247
1287
  },
1248
- "meta.llama3-2-3b-instruct-v1:0": {
1249
- maxTokens: 8192,
1250
- contextWindow: 128e3,
1288
+ "deepseek-ai/DeepSeek-V3.2": {
1289
+ maxTokens: 16384,
1290
+ contextWindow: 163840,
1251
1291
  supportsImages: false,
1252
1292
  supportsPromptCache: false,
1253
- inputPrice: 0.15,
1254
- outputPrice: 0.15,
1255
- description: "Llama 3.2 Instruct (3B)"
1293
+ inputPrice: 0.3,
1294
+ outputPrice: 0.45,
1295
+ cacheWritesPrice: 0,
1296
+ cacheReadsPrice: 0,
1297
+ description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance"
1256
1298
  },
1257
- "meta.llama3-2-1b-instruct-v1:0": {
1258
- maxTokens: 8192,
1259
- contextWindow: 128e3,
1299
+ "openai/gpt-oss-120b": {
1300
+ maxTokens: 16384,
1301
+ contextWindow: 128072,
1260
1302
  supportsImages: false,
1261
1303
  supportsPromptCache: false,
1262
1304
  inputPrice: 0.1,
1263
- outputPrice: 0.1,
1264
- description: "Llama 3.2 Instruct (1B)"
1305
+ outputPrice: 0.5,
1306
+ cacheWritesPrice: 0,
1307
+ cacheReadsPrice: 0,
1308
+ description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities"
1265
1309
  },
1266
- "meta.llama3-1-405b-instruct-v1:0": {
1267
- maxTokens: 8192,
1268
- contextWindow: 128e3,
1310
+ "Qwen/Qwen3-235B-A22B-Instruct-2507": {
1311
+ maxTokens: 16384,
1312
+ contextWindow: 262144,
1269
1313
  supportsImages: false,
1270
1314
  supportsPromptCache: false,
1271
- inputPrice: 2.4,
1272
- outputPrice: 2.4,
1273
- description: "Llama 3.1 Instruct (405B)"
1315
+ inputPrice: 0.22,
1316
+ outputPrice: 0.8,
1317
+ cacheWritesPrice: 0,
1318
+ cacheReadsPrice: 0,
1319
+ description: "Mixture-of-experts LLM with math and reasoning capabilities"
1274
1320
  },
1275
- "meta.llama3-1-70b-instruct-v1:0": {
1276
- maxTokens: 8192,
1277
- contextWindow: 128e3,
1321
+ "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
1322
+ maxTokens: 16384,
1323
+ contextWindow: 262144,
1278
1324
  supportsImages: false,
1279
1325
  supportsPromptCache: false,
1280
- inputPrice: 0.72,
1281
- outputPrice: 0.72,
1282
- description: "Llama 3.1 Instruct (70B)"
1326
+ inputPrice: 0.38,
1327
+ outputPrice: 1.53,
1328
+ cacheWritesPrice: 0,
1329
+ cacheReadsPrice: 0,
1330
+ description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities"
1283
1331
  },
1284
- "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
1285
- maxTokens: 8192,
1286
- contextWindow: 128e3,
1332
+ "moonshotai/Kimi-K2-Instruct-0905": {
1333
+ maxTokens: 16384,
1334
+ contextWindow: 262e3,
1287
1335
  supportsImages: false,
1288
1336
  supportsPromptCache: false,
1289
- inputPrice: 0.9,
1290
- outputPrice: 0.9,
1291
- description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)"
1292
- },
1293
- "meta.llama3-1-8b-instruct-v1:0": {
1337
+ inputPrice: 0.6,
1338
+ outputPrice: 2.5,
1339
+ cacheWritesPrice: 0,
1340
+ cacheReadsPrice: 0,
1341
+ description: "State of the art language model for agentic and coding tasks. September Update."
1342
+ }
1343
+ };
1344
+ var basetenDefaultModelId = "zai-org/GLM-4.6";
1345
+
1346
+ // src/providers/bedrock.ts
1347
+ var bedrockDefaultModelId = "anthropic.claude-sonnet-4-5-20250929-v1:0";
1348
+ var bedrockDefaultPromptRouterModelId = "anthropic.claude-3-sonnet-20240229-v1:0";
1349
+ var bedrockModels = {
1350
+ "anthropic.claude-sonnet-4-5-20250929-v1:0": {
1294
1351
  maxTokens: 8192,
1295
- contextWindow: 8e3,
1296
- supportsImages: false,
1297
- supportsPromptCache: false,
1298
- inputPrice: 0.22,
1299
- outputPrice: 0.22,
1300
- description: "Llama 3.1 Instruct (8B)"
1301
- },
1302
- "meta.llama3-70b-instruct-v1:0": {
1303
- maxTokens: 2048,
1304
- contextWindow: 8e3,
1305
- supportsImages: false,
1306
- supportsPromptCache: false,
1307
- inputPrice: 2.65,
1308
- outputPrice: 3.5
1352
+ contextWindow: 2e5,
1353
+ supportsImages: true,
1354
+ supportsPromptCache: true,
1355
+ supportsReasoningBudget: true,
1356
+ inputPrice: 3,
1357
+ outputPrice: 15,
1358
+ cacheWritesPrice: 3.75,
1359
+ cacheReadsPrice: 0.3,
1360
+ minTokensPerCachePoint: 1024,
1361
+ maxCachePoints: 4,
1362
+ cachableFields: ["system", "messages", "tools"]
1309
1363
  },
1310
- "meta.llama3-8b-instruct-v1:0": {
1311
- maxTokens: 2048,
1312
- contextWindow: 4e3,
1313
- supportsImages: false,
1314
- supportsPromptCache: false,
1315
- inputPrice: 0.3,
1316
- outputPrice: 0.6
1364
+ "amazon.nova-pro-v1:0": {
1365
+ maxTokens: 5e3,
1366
+ contextWindow: 3e5,
1367
+ supportsImages: true,
1368
+ supportsPromptCache: true,
1369
+ inputPrice: 0.8,
1370
+ outputPrice: 3.2,
1371
+ cacheWritesPrice: 0.8,
1372
+ // per million tokens
1373
+ cacheReadsPrice: 0.2,
1374
+ // per million tokens
1375
+ minTokensPerCachePoint: 1,
1376
+ maxCachePoints: 1,
1377
+ cachableFields: ["system"]
1317
1378
  },
1318
- "amazon.titan-text-lite-v1:0": {
1319
- maxTokens: 4096,
1320
- contextWindow: 8e3,
1321
- supportsImages: false,
1379
+ "amazon.nova-pro-latency-optimized-v1:0": {
1380
+ maxTokens: 5e3,
1381
+ contextWindow: 3e5,
1382
+ supportsImages: true,
1322
1383
  supportsPromptCache: false,
1323
- inputPrice: 0.15,
1324
- outputPrice: 0.2,
1325
- description: "Amazon Titan Text Lite"
1384
+ inputPrice: 1,
1385
+ outputPrice: 4,
1386
+ cacheWritesPrice: 1,
1387
+ // per million tokens
1388
+ cacheReadsPrice: 0.25,
1389
+ // per million tokens
1390
+ description: "Amazon Nova Pro with latency optimized inference"
1326
1391
  },
1327
- "amazon.titan-text-express-v1:0": {
1328
- maxTokens: 4096,
1329
- contextWindow: 8e3,
1330
- supportsImages: false,
1331
- supportsPromptCache: false,
1332
- inputPrice: 0.2,
1333
- outputPrice: 0.6,
1334
- description: "Amazon Titan Text Express"
1392
+ "amazon.nova-lite-v1:0": {
1393
+ maxTokens: 5e3,
1394
+ contextWindow: 3e5,
1395
+ supportsImages: true,
1396
+ supportsPromptCache: true,
1397
+ inputPrice: 0.06,
1398
+ outputPrice: 0.24,
1399
+ cacheWritesPrice: 0.06,
1400
+ // per million tokens
1401
+ cacheReadsPrice: 0.015,
1402
+ // per million tokens
1403
+ minTokensPerCachePoint: 1,
1404
+ maxCachePoints: 1,
1405
+ cachableFields: ["system"]
1335
1406
  },
1336
- "moonshot.kimi-k2-thinking": {
1337
- maxTokens: 32e3,
1338
- contextWindow: 262144,
1339
- supportsImages: false,
1340
- supportsPromptCache: false,
1341
- preserveReasoning: true,
1342
- inputPrice: 0.6,
1343
- outputPrice: 2.5,
1344
- description: "Kimi K2 Thinking (1T parameter MoE model with 32B active parameters)"
1407
+ "amazon.nova-2-lite-v1:0": {
1408
+ maxTokens: 65535,
1409
+ contextWindow: 1e6,
1410
+ supportsImages: true,
1411
+ supportsPromptCache: true,
1412
+ inputPrice: 0.33,
1413
+ outputPrice: 2.75,
1414
+ cacheWritesPrice: 0,
1415
+ cacheReadsPrice: 0.0825,
1416
+ // 75% less than input price
1417
+ minTokensPerCachePoint: 1,
1418
+ maxCachePoints: 1,
1419
+ cachableFields: ["system"],
1420
+ description: "Amazon Nova 2 Lite - Comparable to Claude Haiku 4.5"
1345
1421
  },
1346
- "minimax.minimax-m2": {
1347
- maxTokens: 16384,
1348
- contextWindow: 196608,
1422
+ "amazon.nova-micro-v1:0": {
1423
+ maxTokens: 5e3,
1424
+ contextWindow: 128e3,
1349
1425
  supportsImages: false,
1350
- supportsPromptCache: false,
1351
- preserveReasoning: true,
1352
- inputPrice: 0.3,
1353
- outputPrice: 1.2,
1354
- description: "MiniMax M2 (230B parameter MoE model with 10B active parameters)"
1426
+ supportsPromptCache: true,
1427
+ inputPrice: 0.035,
1428
+ outputPrice: 0.14,
1429
+ cacheWritesPrice: 0.035,
1430
+ // per million tokens
1431
+ cacheReadsPrice: 875e-5,
1432
+ // per million tokens
1433
+ minTokensPerCachePoint: 1,
1434
+ maxCachePoints: 1,
1435
+ cachableFields: ["system"]
1355
1436
  },
1356
- "qwen.qwen3-next-80b-a3b": {
1437
+ "anthropic.claude-sonnet-4-20250514-v1:0": {
1357
1438
  maxTokens: 8192,
1358
- contextWindow: 262144,
1359
- supportsImages: false,
1360
- supportsPromptCache: false,
1361
- inputPrice: 0.15,
1362
- outputPrice: 1.2,
1363
- description: "Qwen3 Next 80B (MoE model with 3B active parameters)"
1439
+ contextWindow: 2e5,
1440
+ supportsImages: true,
1441
+ supportsPromptCache: true,
1442
+ supportsReasoningBudget: true,
1443
+ inputPrice: 3,
1444
+ outputPrice: 15,
1445
+ cacheWritesPrice: 3.75,
1446
+ cacheReadsPrice: 0.3,
1447
+ minTokensPerCachePoint: 1024,
1448
+ maxCachePoints: 4,
1449
+ cachableFields: ["system", "messages", "tools"]
1364
1450
  },
1365
- "qwen.qwen3-coder-480b-a35b-v1:0": {
1451
+ "anthropic.claude-opus-4-1-20250805-v1:0": {
1366
1452
  maxTokens: 8192,
1367
- contextWindow: 262144,
1368
- supportsImages: false,
1369
- supportsPromptCache: false,
1370
- inputPrice: 0.45,
1371
- outputPrice: 1.8,
1372
- description: "Qwen3 Coder 480B (MoE model with 35B active parameters)"
1373
- }
1374
- };
1375
- var BEDROCK_DEFAULT_TEMPERATURE = 0.3;
1376
- var BEDROCK_MAX_TOKENS = 4096;
1377
- var BEDROCK_DEFAULT_CONTEXT = 128e3;
1378
- var AWS_INFERENCE_PROFILE_MAPPING = [
1379
- // Australia regions (Sydney and Melbourne) → au. inference profile (most specific - 14 chars)
1380
- ["ap-southeast-2", "au."],
1381
- ["ap-southeast-4", "au."],
1382
- // Japan regions (Tokyo and Osaka) → jp. inference profile (13 chars)
1383
- ["ap-northeast-", "jp."],
1384
- // US Government Cloud → ug. inference profile (7 chars)
1385
- ["us-gov-", "ug."],
1386
- // Americas regions → us. inference profile (3 chars)
1387
- ["us-", "us."],
1388
- // Europe regions → eu. inference profile (3 chars)
1389
- ["eu-", "eu."],
1390
- // Asia Pacific regions → apac. inference profile (3 chars)
1391
- ["ap-", "apac."],
1392
- // Canada regions → ca. inference profile (3 chars)
1393
- ["ca-", "ca."],
1394
- // South America regions → sa. inference profile (3 chars)
1395
- ["sa-", "sa."]
1396
- ];
1397
- var BEDROCK_REGIONS = [
1398
- { value: "us-east-1", label: "us-east-1" },
1399
- { value: "us-east-2", label: "us-east-2" },
1400
- { value: "us-west-1", label: "us-west-1" },
1401
- { value: "us-west-2", label: "us-west-2" },
1402
- { value: "ap-northeast-1", label: "ap-northeast-1" },
1403
- { value: "ap-northeast-2", label: "ap-northeast-2" },
1404
- { value: "ap-northeast-3", label: "ap-northeast-3" },
1405
- { value: "ap-south-1", label: "ap-south-1" },
1406
- { value: "ap-south-2", label: "ap-south-2" },
1407
- { value: "ap-southeast-1", label: "ap-southeast-1" },
1408
- { value: "ap-southeast-2", label: "ap-southeast-2" },
1409
- { value: "ap-east-1", label: "ap-east-1" },
1410
- { value: "eu-central-1", label: "eu-central-1" },
1411
- { value: "eu-central-2", label: "eu-central-2" },
1412
- { value: "eu-west-1", label: "eu-west-1" },
1413
- { value: "eu-west-2", label: "eu-west-2" },
1414
- { value: "eu-west-3", label: "eu-west-3" },
1415
- { value: "eu-north-1", label: "eu-north-1" },
1416
- { value: "eu-south-1", label: "eu-south-1" },
1417
- { value: "eu-south-2", label: "eu-south-2" },
1418
- { value: "ca-central-1", label: "ca-central-1" },
1419
- { value: "sa-east-1", label: "sa-east-1" },
1420
- { value: "us-gov-east-1", label: "us-gov-east-1" },
1421
- { value: "us-gov-west-1", label: "us-gov-west-1" }
1422
- ].sort((a, b) => a.value.localeCompare(b.value));
1423
- var BEDROCK_1M_CONTEXT_MODEL_IDS = [
1424
- "anthropic.claude-sonnet-4-20250514-v1:0",
1425
- "anthropic.claude-sonnet-4-5-20250929-v1:0",
1426
- "anthropic.claude-opus-4-6-v1"
1427
- ];
1428
- var BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
1429
- "anthropic.claude-sonnet-4-20250514-v1:0",
1430
- "anthropic.claude-sonnet-4-5-20250929-v1:0",
1431
- "anthropic.claude-haiku-4-5-20251001-v1:0",
1432
- "anthropic.claude-opus-4-5-20251101-v1:0",
1433
- "anthropic.claude-opus-4-6-v1"
1434
- ];
1435
- var BEDROCK_SERVICE_TIER_MODEL_IDS = [
1436
- // Amazon Nova models
1437
- "amazon.nova-lite-v1:0",
1438
- "amazon.nova-2-lite-v1:0",
1439
- "amazon.nova-pro-v1:0",
1440
- "amazon.nova-pro-latency-optimized-v1:0",
1441
- // DeepSeek models
1442
- "deepseek.r1-v1:0",
1443
- // Qwen models
1444
- "qwen.qwen3-next-80b-a3b",
1445
- "qwen.qwen3-coder-480b-a35b-v1:0",
1446
- // OpenAI GPT-OSS models
1447
- "openai.gpt-oss-20b-1:0",
1448
- "openai.gpt-oss-120b-1:0"
1449
- ];
1450
- var BEDROCK_SERVICE_TIER_PRICING = {
1451
- STANDARD: 1,
1452
- // Base price
1453
- FLEX: 0.5,
1454
- // 50% discount from standard
1455
- PRIORITY: 1.75
1456
- // 75% premium over standard
1457
- };
1458
-
1459
- // src/providers/cerebras.ts
1460
- var cerebrasDefaultModelId = "gpt-oss-120b";
1461
- var cerebrasModels = {
1462
- "zai-glm-4.7": {
1463
- maxTokens: 16384,
1464
- // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
1465
- contextWindow: 131072,
1466
- supportsImages: false,
1453
+ contextWindow: 2e5,
1454
+ supportsImages: true,
1467
1455
  supportsPromptCache: true,
1468
- supportsTemperature: true,
1469
- defaultTemperature: 1,
1470
- inputPrice: 0,
1471
- outputPrice: 0,
1472
- description: "Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks."
1473
- },
1474
- "qwen-3-235b-a22b-instruct-2507": {
1475
- maxTokens: 16384,
1476
- // Conservative default to avoid premature rate limiting
1477
- contextWindow: 64e3,
1478
- supportsImages: false,
1479
- supportsPromptCache: false,
1480
- inputPrice: 0,
1481
- outputPrice: 0,
1482
- description: "Intelligent model with ~1400 tokens/s"
1483
- },
1484
- "llama-3.3-70b": {
1485
- maxTokens: 16384,
1486
- // Conservative default to avoid premature rate limiting
1487
- contextWindow: 64e3,
1488
- supportsImages: false,
1489
- supportsPromptCache: false,
1490
- inputPrice: 0,
1491
- outputPrice: 0,
1492
- description: "Powerful model with ~2600 tokens/s"
1493
- },
1494
- "qwen-3-32b": {
1495
- maxTokens: 16384,
1496
- // Conservative default to avoid premature rate limiting
1497
- contextWindow: 64e3,
1498
- supportsImages: false,
1499
- supportsPromptCache: false,
1500
- inputPrice: 0,
1501
- outputPrice: 0,
1502
- description: "SOTA coding performance with ~2500 tokens/s"
1503
- },
1504
- "gpt-oss-120b": {
1505
- maxTokens: 16384,
1506
- // Conservative default to avoid premature rate limiting
1507
- contextWindow: 64e3,
1508
- supportsImages: false,
1509
- supportsPromptCache: false,
1510
- inputPrice: 0,
1511
- outputPrice: 0,
1512
- description: "OpenAI GPT OSS model with ~2800 tokens/s\n\n\u2022 64K context window\n\u2022 Excels at efficient reasoning across science, math, and coding"
1513
- }
1514
- };
1515
-
1516
- // src/providers/chutes.ts
1517
- var chutesDefaultModelId = "deepseek-ai/DeepSeek-R1-0528";
1518
- var chutesModels = {
1519
- "deepseek-ai/DeepSeek-R1-0528": {
1520
- maxTokens: 32768,
1521
- contextWindow: 163840,
1522
- supportsImages: false,
1523
- supportsPromptCache: false,
1524
- inputPrice: 0,
1525
- outputPrice: 0,
1526
- description: "DeepSeek R1 0528 model."
1527
- },
1528
- "deepseek-ai/DeepSeek-R1": {
1529
- maxTokens: 32768,
1530
- contextWindow: 163840,
1531
- supportsImages: false,
1532
- supportsPromptCache: false,
1533
- inputPrice: 0,
1534
- outputPrice: 0,
1535
- description: "DeepSeek R1 model."
1536
- },
1537
- "deepseek-ai/DeepSeek-V3": {
1538
- maxTokens: 32768,
1539
- contextWindow: 163840,
1540
- supportsImages: false,
1541
- supportsPromptCache: false,
1542
- inputPrice: 0,
1543
- outputPrice: 0,
1544
- description: "DeepSeek V3 model."
1545
- },
1546
- "deepseek-ai/DeepSeek-V3.1": {
1547
- maxTokens: 32768,
1548
- contextWindow: 163840,
1549
- supportsImages: false,
1550
- supportsPromptCache: false,
1551
- inputPrice: 0,
1552
- outputPrice: 0,
1553
- description: "DeepSeek V3.1 model."
1554
- },
1555
- "deepseek-ai/DeepSeek-V3.1-Terminus": {
1556
- maxTokens: 163840,
1557
- contextWindow: 163840,
1558
- supportsImages: false,
1559
- supportsPromptCache: false,
1560
- inputPrice: 0.23,
1561
- outputPrice: 0.9,
1562
- description: "DeepSeek\u2011V3.1\u2011Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix\u2011ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance."
1563
- },
1564
- "deepseek-ai/DeepSeek-V3.1-turbo": {
1565
- maxTokens: 32768,
1566
- contextWindow: 163840,
1567
- supportsImages: false,
1568
- supportsPromptCache: false,
1569
- inputPrice: 1,
1570
- outputPrice: 3,
1571
- description: "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2\xD7 quota per request and not intended for bulk workloads."
1456
+ supportsReasoningBudget: true,
1457
+ inputPrice: 15,
1458
+ outputPrice: 75,
1459
+ cacheWritesPrice: 18.75,
1460
+ cacheReadsPrice: 1.5,
1461
+ minTokensPerCachePoint: 1024,
1462
+ maxCachePoints: 4,
1463
+ cachableFields: ["system", "messages", "tools"]
1572
1464
  },
1573
- "deepseek-ai/DeepSeek-V3.2-Exp": {
1574
- maxTokens: 163840,
1575
- contextWindow: 163840,
1576
- supportsImages: false,
1577
- supportsPromptCache: false,
1578
- inputPrice: 0.25,
1579
- outputPrice: 0.35,
1580
- description: "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long\u2011context training and inference efficiency while maintaining performance comparable to V3.1\u2011Terminus."
1465
+ "anthropic.claude-opus-4-6-v1": {
1466
+ maxTokens: 8192,
1467
+ contextWindow: 2e5,
1468
+ // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
1469
+ supportsImages: true,
1470
+ supportsPromptCache: true,
1471
+ supportsReasoningBudget: true,
1472
+ inputPrice: 5,
1473
+ // $5 per million input tokens (≤200K context)
1474
+ outputPrice: 25,
1475
+ // $25 per million output tokens (≤200K context)
1476
+ cacheWritesPrice: 6.25,
1477
+ // $6.25 per million tokens
1478
+ cacheReadsPrice: 0.5,
1479
+ // $0.50 per million tokens
1480
+ minTokensPerCachePoint: 1024,
1481
+ maxCachePoints: 4,
1482
+ cachableFields: ["system", "messages", "tools"],
1483
+ // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
1484
+ tiers: [
1485
+ {
1486
+ contextWindow: 1e6,
1487
+ // 1M tokens with beta flag
1488
+ inputPrice: 10,
1489
+ // $10 per million input tokens (>200K context)
1490
+ outputPrice: 37.5,
1491
+ // $37.50 per million output tokens (>200K context)
1492
+ cacheWritesPrice: 12.5,
1493
+ // $12.50 per million tokens (>200K context)
1494
+ cacheReadsPrice: 1
1495
+ // $1.00 per million tokens (>200K context)
1496
+ }
1497
+ ]
1581
1498
  },
1582
- "unsloth/Llama-3.3-70B-Instruct": {
1583
- maxTokens: 32768,
1584
- // From Groq
1585
- contextWindow: 131072,
1586
- // From Groq
1587
- supportsImages: false,
1588
- supportsPromptCache: false,
1589
- inputPrice: 0,
1590
- outputPrice: 0,
1591
- description: "Unsloth Llama 3.3 70B Instruct model."
1499
+ "anthropic.claude-opus-4-5-20251101-v1:0": {
1500
+ maxTokens: 8192,
1501
+ contextWindow: 2e5,
1502
+ supportsImages: true,
1503
+ supportsPromptCache: true,
1504
+ supportsReasoningBudget: true,
1505
+ inputPrice: 5,
1506
+ outputPrice: 25,
1507
+ cacheWritesPrice: 6.25,
1508
+ cacheReadsPrice: 0.5,
1509
+ minTokensPerCachePoint: 1024,
1510
+ maxCachePoints: 4,
1511
+ cachableFields: ["system", "messages", "tools"]
1592
1512
  },
1593
- "chutesai/Llama-4-Scout-17B-16E-Instruct": {
1594
- maxTokens: 32768,
1595
- contextWindow: 512e3,
1596
- supportsImages: false,
1597
- supportsPromptCache: false,
1598
- inputPrice: 0,
1599
- outputPrice: 0,
1600
- description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context."
1513
+ "anthropic.claude-opus-4-20250514-v1:0": {
1514
+ maxTokens: 8192,
1515
+ contextWindow: 2e5,
1516
+ supportsImages: true,
1517
+ supportsPromptCache: true,
1518
+ supportsReasoningBudget: true,
1519
+ inputPrice: 15,
1520
+ outputPrice: 75,
1521
+ cacheWritesPrice: 18.75,
1522
+ cacheReadsPrice: 1.5,
1523
+ minTokensPerCachePoint: 1024,
1524
+ maxCachePoints: 4,
1525
+ cachableFields: ["system", "messages", "tools"]
1601
1526
  },
1602
- "unsloth/Mistral-Nemo-Instruct-2407": {
1603
- maxTokens: 32768,
1604
- contextWindow: 128e3,
1605
- supportsImages: false,
1606
- supportsPromptCache: false,
1607
- inputPrice: 0,
1608
- outputPrice: 0,
1609
- description: "Unsloth Mistral Nemo Instruct model."
1527
+ "anthropic.claude-3-7-sonnet-20250219-v1:0": {
1528
+ maxTokens: 8192,
1529
+ contextWindow: 2e5,
1530
+ supportsImages: true,
1531
+ supportsPromptCache: true,
1532
+ supportsReasoningBudget: true,
1533
+ inputPrice: 3,
1534
+ outputPrice: 15,
1535
+ cacheWritesPrice: 3.75,
1536
+ cacheReadsPrice: 0.3,
1537
+ minTokensPerCachePoint: 1024,
1538
+ maxCachePoints: 4,
1539
+ cachableFields: ["system", "messages", "tools"]
1610
1540
  },
1611
- "unsloth/gemma-3-12b-it": {
1612
- maxTokens: 32768,
1613
- contextWindow: 131072,
1614
- supportsImages: false,
1615
- supportsPromptCache: false,
1616
- inputPrice: 0,
1617
- outputPrice: 0,
1618
- description: "Unsloth Gemma 3 12B IT model."
1541
+ "anthropic.claude-3-5-sonnet-20241022-v2:0": {
1542
+ maxTokens: 8192,
1543
+ contextWindow: 2e5,
1544
+ supportsImages: true,
1545
+ supportsPromptCache: true,
1546
+ inputPrice: 3,
1547
+ outputPrice: 15,
1548
+ cacheWritesPrice: 3.75,
1549
+ cacheReadsPrice: 0.3,
1550
+ minTokensPerCachePoint: 1024,
1551
+ maxCachePoints: 4,
1552
+ cachableFields: ["system", "messages", "tools"]
1619
1553
  },
1620
- "NousResearch/DeepHermes-3-Llama-3-8B-Preview": {
1621
- maxTokens: 32768,
1622
- contextWindow: 131072,
1554
+ "anthropic.claude-3-5-haiku-20241022-v1:0": {
1555
+ maxTokens: 8192,
1556
+ contextWindow: 2e5,
1623
1557
  supportsImages: false,
1624
- supportsPromptCache: false,
1625
- inputPrice: 0,
1626
- outputPrice: 0,
1627
- description: "Nous DeepHermes 3 Llama 3 8B Preview model."
1558
+ supportsPromptCache: true,
1559
+ inputPrice: 0.8,
1560
+ outputPrice: 4,
1561
+ cacheWritesPrice: 1,
1562
+ cacheReadsPrice: 0.08,
1563
+ minTokensPerCachePoint: 2048,
1564
+ maxCachePoints: 4,
1565
+ cachableFields: ["system", "messages", "tools"]
1628
1566
  },
1629
- "unsloth/gemma-3-4b-it": {
1630
- maxTokens: 32768,
1631
- contextWindow: 131072,
1632
- supportsImages: false,
1633
- supportsPromptCache: false,
1634
- inputPrice: 0,
1635
- outputPrice: 0,
1636
- description: "Unsloth Gemma 3 4B IT model."
1567
+ "anthropic.claude-haiku-4-5-20251001-v1:0": {
1568
+ maxTokens: 8192,
1569
+ contextWindow: 2e5,
1570
+ supportsImages: true,
1571
+ supportsPromptCache: true,
1572
+ supportsReasoningBudget: true,
1573
+ inputPrice: 1,
1574
+ outputPrice: 5,
1575
+ cacheWritesPrice: 1.25,
1576
+ // 5m cache writes
1577
+ cacheReadsPrice: 0.1,
1578
+ // cache hits / refreshes
1579
+ minTokensPerCachePoint: 2048,
1580
+ maxCachePoints: 4,
1581
+ cachableFields: ["system", "messages", "tools"]
1637
1582
  },
1638
- "nvidia/Llama-3_3-Nemotron-Super-49B-v1": {
1639
- maxTokens: 32768,
1640
- contextWindow: 131072,
1641
- supportsImages: false,
1583
+ "anthropic.claude-3-5-sonnet-20240620-v1:0": {
1584
+ maxTokens: 8192,
1585
+ contextWindow: 2e5,
1586
+ supportsImages: true,
1642
1587
  supportsPromptCache: false,
1643
- inputPrice: 0,
1644
- outputPrice: 0,
1645
- description: "Nvidia Llama 3.3 Nemotron Super 49B model."
1588
+ inputPrice: 3,
1589
+ outputPrice: 15
1646
1590
  },
1647
- "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": {
1648
- maxTokens: 32768,
1649
- contextWindow: 131072,
1650
- supportsImages: false,
1591
+ "anthropic.claude-3-opus-20240229-v1:0": {
1592
+ maxTokens: 4096,
1593
+ contextWindow: 2e5,
1594
+ supportsImages: true,
1651
1595
  supportsPromptCache: false,
1652
- inputPrice: 0,
1653
- outputPrice: 0,
1654
- description: "Nvidia Llama 3.1 Nemotron Ultra 253B model."
1596
+ inputPrice: 15,
1597
+ outputPrice: 75
1655
1598
  },
1656
- "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": {
1657
- maxTokens: 32768,
1658
- contextWindow: 256e3,
1659
- supportsImages: false,
1599
+ "anthropic.claude-3-sonnet-20240229-v1:0": {
1600
+ maxTokens: 4096,
1601
+ contextWindow: 2e5,
1602
+ supportsImages: true,
1660
1603
  supportsPromptCache: false,
1661
- inputPrice: 0,
1662
- outputPrice: 0,
1663
- description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model."
1604
+ inputPrice: 3,
1605
+ outputPrice: 15
1664
1606
  },
1665
- "deepseek-ai/DeepSeek-V3-Base": {
1666
- maxTokens: 32768,
1667
- contextWindow: 163840,
1668
- supportsImages: false,
1607
+ "anthropic.claude-3-haiku-20240307-v1:0": {
1608
+ maxTokens: 4096,
1609
+ contextWindow: 2e5,
1610
+ supportsImages: true,
1669
1611
  supportsPromptCache: false,
1670
- inputPrice: 0,
1671
- outputPrice: 0,
1672
- description: "DeepSeek V3 Base model."
1612
+ inputPrice: 0.25,
1613
+ outputPrice: 1.25
1673
1614
  },
1674
- "deepseek-ai/DeepSeek-R1-Zero": {
1615
+ "deepseek.r1-v1:0": {
1675
1616
  maxTokens: 32768,
1676
- contextWindow: 163840,
1617
+ contextWindow: 128e3,
1677
1618
  supportsImages: false,
1678
1619
  supportsPromptCache: false,
1679
- inputPrice: 0,
1680
- outputPrice: 0,
1681
- description: "DeepSeek R1 Zero model."
1620
+ inputPrice: 1.35,
1621
+ outputPrice: 5.4
1682
1622
  },
1683
- "deepseek-ai/DeepSeek-V3-0324": {
1684
- maxTokens: 32768,
1685
- contextWindow: 163840,
1623
+ "openai.gpt-oss-20b-1:0": {
1624
+ maxTokens: 8192,
1625
+ contextWindow: 128e3,
1686
1626
  supportsImages: false,
1687
1627
  supportsPromptCache: false,
1688
- inputPrice: 0,
1689
- outputPrice: 0,
1690
- description: "DeepSeek V3 (0324) model."
1628
+ inputPrice: 0.5,
1629
+ outputPrice: 1.5,
1630
+ description: "GPT-OSS 20B - Optimized for low latency and local/specialized use cases"
1691
1631
  },
1692
- "Qwen/Qwen3-235B-A22B-Instruct-2507": {
1693
- maxTokens: 32768,
1694
- contextWindow: 262144,
1632
+ "openai.gpt-oss-120b-1:0": {
1633
+ maxTokens: 8192,
1634
+ contextWindow: 128e3,
1695
1635
  supportsImages: false,
1696
1636
  supportsPromptCache: false,
1697
- inputPrice: 0,
1698
- outputPrice: 0,
1699
- description: "Qwen3 235B A22B Instruct 2507 model with 262K context window."
1637
+ inputPrice: 2,
1638
+ outputPrice: 6,
1639
+ description: "GPT-OSS 120B - Production-ready, general-purpose, high-reasoning model"
1700
1640
  },
1701
- "Qwen/Qwen3-235B-A22B": {
1702
- maxTokens: 32768,
1703
- contextWindow: 40960,
1641
+ "meta.llama3-3-70b-instruct-v1:0": {
1642
+ maxTokens: 8192,
1643
+ contextWindow: 128e3,
1704
1644
  supportsImages: false,
1705
1645
  supportsPromptCache: false,
1706
- inputPrice: 0,
1707
- outputPrice: 0,
1708
- description: "Qwen3 235B A22B model."
1646
+ inputPrice: 0.72,
1647
+ outputPrice: 0.72,
1648
+ description: "Llama 3.3 Instruct (70B)"
1709
1649
  },
1710
- "Qwen/Qwen3-32B": {
1711
- maxTokens: 32768,
1712
- contextWindow: 40960,
1713
- supportsImages: false,
1650
+ "meta.llama3-2-90b-instruct-v1:0": {
1651
+ maxTokens: 8192,
1652
+ contextWindow: 128e3,
1653
+ supportsImages: true,
1714
1654
  supportsPromptCache: false,
1715
- inputPrice: 0,
1716
- outputPrice: 0,
1717
- description: "Qwen3 32B model."
1655
+ inputPrice: 0.72,
1656
+ outputPrice: 0.72,
1657
+ description: "Llama 3.2 Instruct (90B)"
1718
1658
  },
1719
- "Qwen/Qwen3-30B-A3B": {
1720
- maxTokens: 32768,
1721
- contextWindow: 40960,
1722
- supportsImages: false,
1659
+ "meta.llama3-2-11b-instruct-v1:0": {
1660
+ maxTokens: 8192,
1661
+ contextWindow: 128e3,
1662
+ supportsImages: true,
1723
1663
  supportsPromptCache: false,
1724
- inputPrice: 0,
1725
- outputPrice: 0,
1726
- description: "Qwen3 30B A3B model."
1664
+ inputPrice: 0.16,
1665
+ outputPrice: 0.16,
1666
+ description: "Llama 3.2 Instruct (11B)"
1727
1667
  },
1728
- "Qwen/Qwen3-14B": {
1729
- maxTokens: 32768,
1730
- contextWindow: 40960,
1668
+ "meta.llama3-2-3b-instruct-v1:0": {
1669
+ maxTokens: 8192,
1670
+ contextWindow: 128e3,
1731
1671
  supportsImages: false,
1732
1672
  supportsPromptCache: false,
1733
- inputPrice: 0,
1734
- outputPrice: 0,
1735
- description: "Qwen3 14B model."
1673
+ inputPrice: 0.15,
1674
+ outputPrice: 0.15,
1675
+ description: "Llama 3.2 Instruct (3B)"
1736
1676
  },
1737
- "Qwen/Qwen3-8B": {
1738
- maxTokens: 32768,
1739
- contextWindow: 40960,
1677
+ "meta.llama3-2-1b-instruct-v1:0": {
1678
+ maxTokens: 8192,
1679
+ contextWindow: 128e3,
1740
1680
  supportsImages: false,
1741
1681
  supportsPromptCache: false,
1742
- inputPrice: 0,
1743
- outputPrice: 0,
1744
- description: "Qwen3 8B model."
1682
+ inputPrice: 0.1,
1683
+ outputPrice: 0.1,
1684
+ description: "Llama 3.2 Instruct (1B)"
1745
1685
  },
1746
- "microsoft/MAI-DS-R1-FP8": {
1747
- maxTokens: 32768,
1748
- contextWindow: 163840,
1686
+ "meta.llama3-1-405b-instruct-v1:0": {
1687
+ maxTokens: 8192,
1688
+ contextWindow: 128e3,
1749
1689
  supportsImages: false,
1750
1690
  supportsPromptCache: false,
1751
- inputPrice: 0,
1752
- outputPrice: 0,
1753
- description: "Microsoft MAI-DS-R1 FP8 model."
1691
+ inputPrice: 2.4,
1692
+ outputPrice: 2.4,
1693
+ description: "Llama 3.1 Instruct (405B)"
1754
1694
  },
1755
- "tngtech/DeepSeek-R1T-Chimera": {
1756
- maxTokens: 32768,
1757
- contextWindow: 163840,
1695
+ "meta.llama3-1-70b-instruct-v1:0": {
1696
+ maxTokens: 8192,
1697
+ contextWindow: 128e3,
1758
1698
  supportsImages: false,
1759
1699
  supportsPromptCache: false,
1760
- inputPrice: 0,
1761
- outputPrice: 0,
1762
- description: "TNGTech DeepSeek R1T Chimera model."
1700
+ inputPrice: 0.72,
1701
+ outputPrice: 0.72,
1702
+ description: "Llama 3.1 Instruct (70B)"
1763
1703
  },
1764
- "zai-org/GLM-4.5-Air": {
1765
- maxTokens: 32768,
1766
- contextWindow: 151329,
1704
+ "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
1705
+ maxTokens: 8192,
1706
+ contextWindow: 128e3,
1767
1707
  supportsImages: false,
1768
1708
  supportsPromptCache: false,
1769
- inputPrice: 0,
1770
- outputPrice: 0,
1771
- description: "GLM-4.5-Air model with 151,329 token context window and 106B total parameters with 12B activated."
1709
+ inputPrice: 0.9,
1710
+ outputPrice: 0.9,
1711
+ description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)"
1772
1712
  },
1773
- "zai-org/GLM-4.5-FP8": {
1774
- maxTokens: 32768,
1775
- contextWindow: 131072,
1713
+ "meta.llama3-1-8b-instruct-v1:0": {
1714
+ maxTokens: 8192,
1715
+ contextWindow: 8e3,
1776
1716
  supportsImages: false,
1777
1717
  supportsPromptCache: false,
1778
- inputPrice: 0,
1779
- outputPrice: 0,
1780
- description: "GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture."
1718
+ inputPrice: 0.22,
1719
+ outputPrice: 0.22,
1720
+ description: "Llama 3.1 Instruct (8B)"
1781
1721
  },
1782
- "zai-org/GLM-4.5-turbo": {
1783
- maxTokens: 32768,
1784
- contextWindow: 131072,
1722
+ "meta.llama3-70b-instruct-v1:0": {
1723
+ maxTokens: 2048,
1724
+ contextWindow: 8e3,
1785
1725
  supportsImages: false,
1786
1726
  supportsPromptCache: false,
1787
- inputPrice: 1,
1788
- outputPrice: 3,
1789
- description: "GLM-4.5-turbo model with 128K token context window, optimized for fast inference."
1727
+ inputPrice: 2.65,
1728
+ outputPrice: 3.5
1790
1729
  },
1791
- "zai-org/GLM-4.6-FP8": {
1792
- maxTokens: 32768,
1793
- contextWindow: 202752,
1730
+ "meta.llama3-8b-instruct-v1:0": {
1731
+ maxTokens: 2048,
1732
+ contextWindow: 4e3,
1794
1733
  supportsImages: false,
1795
1734
  supportsPromptCache: false,
1796
- inputPrice: 0,
1797
- outputPrice: 0,
1798
- description: "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios."
1735
+ inputPrice: 0.3,
1736
+ outputPrice: 0.6
1799
1737
  },
1800
- "zai-org/GLM-4.6-turbo": {
1801
- maxTokens: 202752,
1802
- // From Chutes /v1/models: max_output_length
1803
- contextWindow: 202752,
1738
+ "amazon.titan-text-lite-v1:0": {
1739
+ maxTokens: 4096,
1740
+ contextWindow: 8e3,
1804
1741
  supportsImages: false,
1805
1742
  supportsPromptCache: false,
1806
- inputPrice: 1.15,
1807
- outputPrice: 3.25,
1808
- description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference."
1743
+ inputPrice: 0.15,
1744
+ outputPrice: 0.2,
1745
+ description: "Amazon Titan Text Lite"
1809
1746
  },
1810
- "meituan-longcat/LongCat-Flash-Thinking-FP8": {
1811
- maxTokens: 32768,
1812
- contextWindow: 128e3,
1747
+ "amazon.titan-text-express-v1:0": {
1748
+ maxTokens: 4096,
1749
+ contextWindow: 8e3,
1813
1750
  supportsImages: false,
1814
1751
  supportsPromptCache: false,
1815
- inputPrice: 0,
1816
- outputPrice: 0,
1817
- description: "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks."
1752
+ inputPrice: 0.2,
1753
+ outputPrice: 0.6,
1754
+ description: "Amazon Titan Text Express"
1818
1755
  },
1819
- "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
1820
- maxTokens: 32768,
1756
+ "moonshot.kimi-k2-thinking": {
1757
+ maxTokens: 32e3,
1821
1758
  contextWindow: 262144,
1822
1759
  supportsImages: false,
1823
1760
  supportsPromptCache: false,
1824
- inputPrice: 0,
1825
- outputPrice: 0,
1826
- description: "Qwen3 Coder 480B A35B Instruct FP8 model, optimized for coding tasks."
1761
+ preserveReasoning: true,
1762
+ inputPrice: 0.6,
1763
+ outputPrice: 2.5,
1764
+ description: "Kimi K2 Thinking (1T parameter MoE model with 32B active parameters)"
1827
1765
  },
1828
- "moonshotai/Kimi-K2-Instruct-75k": {
1829
- maxTokens: 32768,
1830
- contextWindow: 75e3,
1766
+ "minimax.minimax-m2": {
1767
+ maxTokens: 16384,
1768
+ contextWindow: 196608,
1831
1769
  supportsImages: false,
1832
1770
  supportsPromptCache: false,
1833
- inputPrice: 0.1481,
1834
- outputPrice: 0.5926,
1835
- description: "Moonshot AI Kimi K2 Instruct model with 75k context window."
1771
+ preserveReasoning: true,
1772
+ inputPrice: 0.3,
1773
+ outputPrice: 1.2,
1774
+ description: "MiniMax M2 (230B parameter MoE model with 10B active parameters)"
1836
1775
  },
1837
- "moonshotai/Kimi-K2-Instruct-0905": {
1838
- maxTokens: 32768,
1776
+ "qwen.qwen3-next-80b-a3b": {
1777
+ maxTokens: 8192,
1839
1778
  contextWindow: 262144,
1840
1779
  supportsImages: false,
1841
1780
  supportsPromptCache: false,
1842
- inputPrice: 0.1999,
1843
- outputPrice: 0.8001,
1844
- description: "Moonshot AI Kimi K2 Instruct 0905 model with 256k context window."
1781
+ inputPrice: 0.15,
1782
+ outputPrice: 1.2,
1783
+ description: "Qwen3 Next 80B (MoE model with 3B active parameters)"
1845
1784
  },
1846
- "Qwen/Qwen3-235B-A22B-Thinking-2507": {
1847
- maxTokens: 32768,
1785
+ "qwen.qwen3-coder-480b-a35b-v1:0": {
1786
+ maxTokens: 8192,
1848
1787
  contextWindow: 262144,
1849
1788
  supportsImages: false,
1850
1789
  supportsPromptCache: false,
1851
- inputPrice: 0.077968332,
1852
- outputPrice: 0.31202496,
1853
- description: "Qwen3 235B A22B Thinking 2507 model with 262K context window."
1854
- },
1855
- "Qwen/Qwen3-Next-80B-A3B-Instruct": {
1856
- maxTokens: 32768,
1857
- contextWindow: 131072,
1858
- supportsImages: false,
1859
- supportsPromptCache: false,
1860
- inputPrice: 0,
1861
- outputPrice: 0,
1862
- description: "Fast, stable instruction-tuned model optimized for complex tasks, RAG, and tool use without thinking traces."
1863
- },
1864
- "Qwen/Qwen3-Next-80B-A3B-Thinking": {
1865
- maxTokens: 32768,
1866
- contextWindow: 131072,
1867
- supportsImages: false,
1868
- supportsPromptCache: false,
1869
- inputPrice: 0,
1870
- outputPrice: 0,
1871
- description: "Reasoning-first model with structured thinking traces for multi-step problems, math proofs, and code synthesis."
1872
- },
1873
- "Qwen/Qwen3-VL-235B-A22B-Thinking": {
1874
- maxTokens: 262144,
1875
- contextWindow: 262144,
1876
- supportsImages: true,
1877
- supportsPromptCache: false,
1878
- inputPrice: 0.16,
1879
- outputPrice: 0.65,
1880
- description: "Qwen3\u2011VL\u2011235B\u2011A22B\u2011Thinking is an open\u2011weight MoE vision\u2011language model (235B total, ~22B activated) optimized for deliberate multi\u2011step reasoning with strong text\u2011image\u2011video understanding and long\u2011context capabilities."
1790
+ inputPrice: 0.45,
1791
+ outputPrice: 1.8,
1792
+ description: "Qwen3 Coder 480B (MoE model with 35B active parameters)"
1881
1793
  }
1882
1794
  };
1883
- var chutesDefaultModelInfo = chutesModels[chutesDefaultModelId];
1795
+ var BEDROCK_DEFAULT_TEMPERATURE = 0.3;
1796
+ var BEDROCK_MAX_TOKENS = 4096;
1797
+ var BEDROCK_DEFAULT_CONTEXT = 128e3;
1798
+ var AWS_INFERENCE_PROFILE_MAPPING = [
1799
+ // Australia regions (Sydney and Melbourne) → au. inference profile (most specific - 14 chars)
1800
+ ["ap-southeast-2", "au."],
1801
+ ["ap-southeast-4", "au."],
1802
+ // Japan regions (Tokyo and Osaka) → jp. inference profile (13 chars)
1803
+ ["ap-northeast-", "jp."],
1804
+ // US Government Cloud → ug. inference profile (7 chars)
1805
+ ["us-gov-", "ug."],
1806
+ // Americas regions → us. inference profile (3 chars)
1807
+ ["us-", "us."],
1808
+ // Europe regions → eu. inference profile (3 chars)
1809
+ ["eu-", "eu."],
1810
+ // Asia Pacific regions → apac. inference profile (3 chars)
1811
+ ["ap-", "apac."],
1812
+ // Canada regions → ca. inference profile (3 chars)
1813
+ ["ca-", "ca."],
1814
+ // South America regions → sa. inference profile (3 chars)
1815
+ ["sa-", "sa."]
1816
+ ];
1817
+ var BEDROCK_REGIONS = [
1818
+ { value: "us-east-1", label: "us-east-1" },
1819
+ { value: "us-east-2", label: "us-east-2" },
1820
+ { value: "us-west-1", label: "us-west-1" },
1821
+ { value: "us-west-2", label: "us-west-2" },
1822
+ { value: "ap-northeast-1", label: "ap-northeast-1" },
1823
+ { value: "ap-northeast-2", label: "ap-northeast-2" },
1824
+ { value: "ap-northeast-3", label: "ap-northeast-3" },
1825
+ { value: "ap-south-1", label: "ap-south-1" },
1826
+ { value: "ap-south-2", label: "ap-south-2" },
1827
+ { value: "ap-southeast-1", label: "ap-southeast-1" },
1828
+ { value: "ap-southeast-2", label: "ap-southeast-2" },
1829
+ { value: "ap-east-1", label: "ap-east-1" },
1830
+ { value: "eu-central-1", label: "eu-central-1" },
1831
+ { value: "eu-central-2", label: "eu-central-2" },
1832
+ { value: "eu-west-1", label: "eu-west-1" },
1833
+ { value: "eu-west-2", label: "eu-west-2" },
1834
+ { value: "eu-west-3", label: "eu-west-3" },
1835
+ { value: "eu-north-1", label: "eu-north-1" },
1836
+ { value: "eu-south-1", label: "eu-south-1" },
1837
+ { value: "eu-south-2", label: "eu-south-2" },
1838
+ { value: "ca-central-1", label: "ca-central-1" },
1839
+ { value: "sa-east-1", label: "sa-east-1" },
1840
+ { value: "us-gov-east-1", label: "us-gov-east-1" },
1841
+ { value: "us-gov-west-1", label: "us-gov-west-1" }
1842
+ ].sort((a, b) => a.value.localeCompare(b.value));
1843
+ var BEDROCK_1M_CONTEXT_MODEL_IDS = [
1844
+ "anthropic.claude-sonnet-4-20250514-v1:0",
1845
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
1846
+ "anthropic.claude-opus-4-6-v1"
1847
+ ];
1848
+ var BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
1849
+ "anthropic.claude-sonnet-4-20250514-v1:0",
1850
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
1851
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
1852
+ "anthropic.claude-opus-4-5-20251101-v1:0",
1853
+ "anthropic.claude-opus-4-6-v1"
1854
+ ];
1855
+ var BEDROCK_SERVICE_TIER_MODEL_IDS = [
1856
+ // Amazon Nova models
1857
+ "amazon.nova-lite-v1:0",
1858
+ "amazon.nova-2-lite-v1:0",
1859
+ "amazon.nova-pro-v1:0",
1860
+ "amazon.nova-pro-latency-optimized-v1:0",
1861
+ // DeepSeek models
1862
+ "deepseek.r1-v1:0",
1863
+ // Qwen models
1864
+ "qwen.qwen3-next-80b-a3b",
1865
+ "qwen.qwen3-coder-480b-a35b-v1:0",
1866
+ // OpenAI GPT-OSS models
1867
+ "openai.gpt-oss-20b-1:0",
1868
+ "openai.gpt-oss-120b-1:0"
1869
+ ];
1870
+ var BEDROCK_SERVICE_TIER_PRICING = {
1871
+ STANDARD: 1,
1872
+ // Base price
1873
+ FLEX: 0.5,
1874
+ // 50% discount from standard
1875
+ PRIORITY: 1.75
1876
+ // 75% premium over standard
1877
+ };
1884
1878
 
1885
1879
  // src/providers/deepseek.ts
1886
1880
  var deepSeekDefaultModelId = "deepseek-chat";
@@ -1921,109 +1915,6 @@ var deepSeekModels = {
1921
1915
  };
1922
1916
  var DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3;
1923
1917
 
1924
- // src/providers/doubao.ts
1925
- var doubaoDefaultModelId = "doubao-seed-1-6-250615";
1926
- var doubaoModels = {
1927
- "doubao-seed-1-6-250615": {
1928
- maxTokens: 32768,
1929
- contextWindow: 128e3,
1930
- supportsImages: true,
1931
- supportsPromptCache: true,
1932
- inputPrice: 1e-4,
1933
- // $0.0001 per million tokens (cache miss)
1934
- outputPrice: 4e-4,
1935
- // $0.0004 per million tokens
1936
- cacheWritesPrice: 1e-4,
1937
- // $0.0001 per million tokens (cache miss)
1938
- cacheReadsPrice: 2e-5,
1939
- // $0.00002 per million tokens (cache hit)
1940
- description: `Doubao Seed 1.6 is a powerful model designed for high-performance tasks with extensive context handling.`
1941
- },
1942
- "doubao-seed-1-6-thinking-250715": {
1943
- maxTokens: 32768,
1944
- contextWindow: 128e3,
1945
- supportsImages: true,
1946
- supportsPromptCache: true,
1947
- inputPrice: 2e-4,
1948
- // $0.0002 per million tokens
1949
- outputPrice: 8e-4,
1950
- // $0.0008 per million tokens
1951
- cacheWritesPrice: 2e-4,
1952
- // $0.0002 per million
1953
- cacheReadsPrice: 4e-5,
1954
- // $0.00004 per million tokens (cache hit)
1955
- description: `Doubao Seed 1.6 Thinking is optimized for reasoning tasks, providing enhanced performance in complex problem-solving scenarios.`
1956
- },
1957
- "doubao-seed-1-6-flash-250715": {
1958
- maxTokens: 32768,
1959
- contextWindow: 128e3,
1960
- supportsImages: true,
1961
- supportsPromptCache: true,
1962
- inputPrice: 15e-5,
1963
- // $0.00015 per million tokens
1964
- outputPrice: 6e-4,
1965
- // $0.0006 per million tokens
1966
- cacheWritesPrice: 15e-5,
1967
- // $0.00015 per million
1968
- cacheReadsPrice: 3e-5,
1969
- // $0.00003 per million tokens (cache hit)
1970
- description: `Doubao Seed 1.6 Flash is tailored for speed and efficiency, making it ideal for applications requiring rapid responses.`
1971
- }
1972
- };
1973
- var doubaoDefaultModelInfo = doubaoModels[doubaoDefaultModelId];
1974
- var DOUBAO_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3";
1975
- var DOUBAO_API_CHAT_PATH = "/chat/completions";
1976
-
1977
- // src/providers/featherless.ts
1978
- var featherlessModels = {
1979
- "deepseek-ai/DeepSeek-V3-0324": {
1980
- maxTokens: 4096,
1981
- contextWindow: 32678,
1982
- supportsImages: false,
1983
- supportsPromptCache: false,
1984
- inputPrice: 0,
1985
- outputPrice: 0,
1986
- description: "DeepSeek V3 0324 model."
1987
- },
1988
- "deepseek-ai/DeepSeek-R1-0528": {
1989
- maxTokens: 4096,
1990
- contextWindow: 32678,
1991
- supportsImages: false,
1992
- supportsPromptCache: false,
1993
- inputPrice: 0,
1994
- outputPrice: 0,
1995
- description: "DeepSeek R1 0528 model."
1996
- },
1997
- "moonshotai/Kimi-K2-Instruct": {
1998
- maxTokens: 4096,
1999
- contextWindow: 32678,
2000
- supportsImages: false,
2001
- supportsPromptCache: false,
2002
- inputPrice: 0,
2003
- outputPrice: 0,
2004
- description: "Kimi K2 Instruct model."
2005
- },
2006
- "openai/gpt-oss-120b": {
2007
- maxTokens: 4096,
2008
- contextWindow: 32678,
2009
- supportsImages: false,
2010
- supportsPromptCache: false,
2011
- inputPrice: 0,
2012
- outputPrice: 0,
2013
- description: "GPT-OSS 120B model."
2014
- },
2015
- "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
2016
- maxTokens: 4096,
2017
- contextWindow: 32678,
2018
- supportsImages: false,
2019
- supportsPromptCache: false,
2020
- inputPrice: 0,
2021
- outputPrice: 0,
2022
- description: "Qwen3 Coder 480B A35B Instruct model."
2023
- }
2024
- };
2025
- var featherlessDefaultModelId = "moonshotai/Kimi-K2-Instruct";
2026
-
2027
1918
  // src/providers/fireworks.ts
2028
1919
  var fireworksDefaultModelId = "accounts/fireworks/models/kimi-k2-instruct-0905";
2029
1920
  var fireworksModels = {
@@ -2439,121 +2330,6 @@ var geminiModels = {
2439
2330
  }
2440
2331
  };
2441
2332
 
2442
- // src/providers/groq.ts
2443
- var groqDefaultModelId = "moonshotai/kimi-k2-instruct-0905";
2444
- var groqModels = {
2445
- // Models based on API response: https://api.groq.com/openai/v1/models
2446
- "llama-3.1-8b-instant": {
2447
- maxTokens: 8192,
2448
- contextWindow: 131072,
2449
- supportsImages: false,
2450
- supportsPromptCache: false,
2451
- inputPrice: 0.05,
2452
- outputPrice: 0.08,
2453
- description: "Meta Llama 3.1 8B Instant model, 128K context."
2454
- },
2455
- "llama-3.3-70b-versatile": {
2456
- maxTokens: 8192,
2457
- contextWindow: 131072,
2458
- supportsImages: false,
2459
- supportsPromptCache: false,
2460
- inputPrice: 0.59,
2461
- outputPrice: 0.79,
2462
- description: "Meta Llama 3.3 70B Versatile model, 128K context."
2463
- },
2464
- "meta-llama/llama-4-scout-17b-16e-instruct": {
2465
- maxTokens: 8192,
2466
- contextWindow: 131072,
2467
- supportsImages: false,
2468
- supportsPromptCache: false,
2469
- inputPrice: 0.11,
2470
- outputPrice: 0.34,
2471
- description: "Meta Llama 4 Scout 17B Instruct model, 128K context."
2472
- },
2473
- "qwen/qwen3-32b": {
2474
- maxTokens: 8192,
2475
- contextWindow: 131072,
2476
- supportsImages: false,
2477
- supportsPromptCache: false,
2478
- inputPrice: 0.29,
2479
- outputPrice: 0.59,
2480
- description: "Alibaba Qwen 3 32B model, 128K context."
2481
- },
2482
- "moonshotai/kimi-k2-instruct-0905": {
2483
- maxTokens: 16384,
2484
- contextWindow: 262144,
2485
- supportsImages: false,
2486
- supportsPromptCache: true,
2487
- inputPrice: 0.6,
2488
- outputPrice: 2.5,
2489
- cacheReadsPrice: 0.15,
2490
- description: "Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support."
2491
- },
2492
- "openai/gpt-oss-120b": {
2493
- maxTokens: 32766,
2494
- contextWindow: 131072,
2495
- supportsImages: false,
2496
- supportsPromptCache: false,
2497
- inputPrice: 0.15,
2498
- outputPrice: 0.75,
2499
- description: "GPT-OSS 120B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 128 experts."
2500
- },
2501
- "openai/gpt-oss-20b": {
2502
- maxTokens: 32768,
2503
- contextWindow: 131072,
2504
- supportsImages: false,
2505
- supportsPromptCache: false,
2506
- inputPrice: 0.1,
2507
- outputPrice: 0.5,
2508
- description: "GPT-OSS 20B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 32 experts."
2509
- }
2510
- };
2511
-
2512
- // src/providers/huggingface.ts
2513
- var HUGGINGFACE_DEFAULT_MAX_TOKENS = 2048;
2514
- var HUGGINGFACE_MAX_TOKENS_FALLBACK = 8192;
2515
- var HUGGINGFACE_DEFAULT_CONTEXT_WINDOW = 128e3;
2516
- var HUGGINGFACE_SLIDER_STEP = 256;
2517
- var HUGGINGFACE_SLIDER_MIN = 1;
2518
- var HUGGINGFACE_TEMPERATURE_MAX_VALUE = 2;
2519
- var HUGGINGFACE_API_URL = "https://router.huggingface.co/v1/models?collection=roocode";
2520
- var HUGGINGFACE_CACHE_DURATION = 1e3 * 60 * 60;
2521
-
2522
- // src/providers/io-intelligence.ts
2523
- var ioIntelligenceDefaultModelId = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8";
2524
- var ioIntelligenceDefaultBaseUrl = "https://api.intelligence.io.solutions/api/v1";
2525
- var IO_INTELLIGENCE_CACHE_DURATION = 1e3 * 60 * 60;
2526
- var ioIntelligenceModels = {
2527
- "deepseek-ai/DeepSeek-R1-0528": {
2528
- maxTokens: 8192,
2529
- contextWindow: 128e3,
2530
- supportsImages: false,
2531
- supportsPromptCache: false,
2532
- description: "DeepSeek R1 reasoning model"
2533
- },
2534
- "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
2535
- maxTokens: 8192,
2536
- contextWindow: 43e4,
2537
- supportsImages: true,
2538
- supportsPromptCache: false,
2539
- description: "Llama 4 Maverick 17B model"
2540
- },
2541
- "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": {
2542
- maxTokens: 8192,
2543
- contextWindow: 106e3,
2544
- supportsImages: false,
2545
- supportsPromptCache: false,
2546
- description: "Qwen3 Coder 480B specialized for coding"
2547
- },
2548
- "openai/gpt-oss-120b": {
2549
- maxTokens: 8192,
2550
- contextWindow: 131072,
2551
- supportsImages: false,
2552
- supportsPromptCache: false,
2553
- description: "OpenAI GPT-OSS 120B model"
2554
- }
2555
- };
2556
-
2557
2333
  // src/providers/lite-llm.ts
2558
2334
  var litellmDefaultModelId = "claude-3-7-sonnet-20250219";
2559
2335
  var litellmDefaultModelInfo = {
@@ -3255,7 +3031,7 @@ var openAiModelInfoSaneDefaults = {
3255
3031
  inputPrice: 0,
3256
3032
  outputPrice: 0
3257
3033
  };
3258
- var azureOpenAiDefaultApiVersion = "2024-08-01-preview";
3034
+ var azureOpenAiDefaultApiVersion = "2025-04-01-preview";
3259
3035
  var OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0;
3260
3036
  var OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions";
3261
3037
 
@@ -3645,19 +3421,6 @@ var sambaNovaModels = {
3645
3421
  }
3646
3422
  };
3647
3423
 
3648
- // src/providers/unbound.ts
3649
- var unboundDefaultModelId = "anthropic/claude-sonnet-4-5";
3650
- var unboundDefaultModelInfo = {
3651
- maxTokens: 8192,
3652
- contextWindow: 2e5,
3653
- supportsImages: true,
3654
- supportsPromptCache: true,
3655
- inputPrice: 3,
3656
- outputPrice: 15,
3657
- cacheWritesPrice: 3.75,
3658
- cacheReadsPrice: 0.3
3659
- };
3660
-
3661
3424
  // src/providers/vertex.ts
3662
3425
  var vertexDefaultModelId = "claude-sonnet-4-5@20250929";
3663
3426
  var vertexModels = {
@@ -4924,18 +4687,6 @@ var zaiApiLineConfigs = {
4924
4687
  }
4925
4688
  };
4926
4689
 
4927
- // src/providers/deepinfra.ts
4928
- var deepInfraDefaultModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
4929
- var deepInfraDefaultModelInfo = {
4930
- maxTokens: 16384,
4931
- contextWindow: 262144,
4932
- supportsImages: false,
4933
- supportsPromptCache: false,
4934
- inputPrice: 0.3,
4935
- outputPrice: 1.2,
4936
- description: "Qwen 3 Coder 480B A35B Instruct Turbo model, 256K context."
4937
- };
4938
-
4939
4690
  // src/providers/minimax.ts
4940
4691
  var minimaxDefaultModelId = "MiniMax-M2";
4941
4692
  var minimaxModels = {
@@ -4993,18 +4744,10 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
4993
4744
  return openRouterDefaultModelId;
4994
4745
  case "requesty":
4995
4746
  return requestyDefaultModelId;
4996
- case "unbound":
4997
- return unboundDefaultModelId;
4998
4747
  case "litellm":
4999
4748
  return litellmDefaultModelId;
5000
4749
  case "xai":
5001
4750
  return xaiDefaultModelId;
5002
- case "groq":
5003
- return groqDefaultModelId;
5004
- case "huggingface":
5005
- return "meta-llama/Llama-3.3-70B-Instruct";
5006
- case "chutes":
5007
- return chutesDefaultModelId;
5008
4751
  case "baseten":
5009
4752
  return basetenDefaultModelId;
5010
4753
  case "bedrock":
@@ -5015,8 +4758,6 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5015
4758
  return geminiDefaultModelId;
5016
4759
  case "deepseek":
5017
4760
  return deepSeekDefaultModelId;
5018
- case "doubao":
5019
- return doubaoDefaultModelId;
5020
4761
  case "moonshot":
5021
4762
  return moonshotDefaultModelId;
5022
4763
  case "minimax":
@@ -5039,26 +4780,20 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5039
4780
  case "lmstudio":
5040
4781
  return "";
5041
4782
  // LMStudio uses dynamic model selection
5042
- case "deepinfra":
5043
- return deepInfraDefaultModelId;
5044
4783
  case "vscode-lm":
5045
4784
  return vscodeLlmDefaultModelId;
5046
- case "cerebras":
5047
- return cerebrasDefaultModelId;
5048
4785
  case "sambanova":
5049
4786
  return sambaNovaDefaultModelId;
5050
4787
  case "fireworks":
5051
4788
  return fireworksDefaultModelId;
5052
- case "featherless":
5053
- return featherlessDefaultModelId;
5054
- case "io-intelligence":
5055
- return ioIntelligenceDefaultModelId;
5056
4789
  case "roo":
5057
4790
  return rooDefaultModelId;
5058
4791
  case "qwen-code":
5059
4792
  return qwenCodeDefaultModelId;
5060
4793
  case "vercel-ai-gateway":
5061
4794
  return vercelAiGatewayDefaultModelId;
4795
+ case "azure":
4796
+ return azureDefaultModelId;
5062
4797
  case "anthropic":
5063
4798
  case "gemini-cli":
5064
4799
  case "fake-ai":
@@ -5069,18 +4804,7 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5069
4804
 
5070
4805
  // src/provider-settings.ts
5071
4806
  var DEFAULT_CONSECUTIVE_MISTAKE_LIMIT = 3;
5072
- var dynamicProviders = [
5073
- "openrouter",
5074
- "vercel-ai-gateway",
5075
- "huggingface",
5076
- "litellm",
5077
- "deepinfra",
5078
- "io-intelligence",
5079
- "requesty",
5080
- "unbound",
5081
- "roo",
5082
- "chutes"
5083
- ];
4807
+ var dynamicProviders = ["openrouter", "vercel-ai-gateway", "litellm", "requesty", "roo"];
5084
4808
  var isDynamicProvider = (key) => dynamicProviders.includes(key);
5085
4809
  var localProviders = ["ollama", "lmstudio"];
5086
4810
  var isLocalProvider = (key) => localProviders.includes(key);
@@ -5097,16 +4821,13 @@ var providerNames = [
5097
4821
  ...customProviders,
5098
4822
  ...fauxProviders,
5099
4823
  "anthropic",
4824
+ "azure",
5100
4825
  "bedrock",
5101
4826
  "baseten",
5102
- "cerebras",
5103
- "doubao",
5104
4827
  "deepseek",
5105
- "featherless",
5106
4828
  "fireworks",
5107
4829
  "gemini",
5108
4830
  "gemini-cli",
5109
- "groq",
5110
4831
  "mistral",
5111
4832
  "moonshot",
5112
4833
  "minimax",
@@ -5121,10 +4842,24 @@ var providerNames = [
5121
4842
  ];
5122
4843
  var providerNamesSchema = z8.enum(providerNames);
5123
4844
  var isProviderName = (key) => typeof key === "string" && providerNames.includes(key);
4845
+ var retiredProviderNames = [
4846
+ "cerebras",
4847
+ "chutes",
4848
+ "deepinfra",
4849
+ "doubao",
4850
+ "featherless",
4851
+ "groq",
4852
+ "huggingface",
4853
+ "io-intelligence",
4854
+ "unbound"
4855
+ ];
4856
+ var retiredProviderNamesSchema = z8.enum(retiredProviderNames);
4857
+ var isRetiredProvider = (value) => retiredProviderNames.includes(value);
4858
+ var providerNamesWithRetiredSchema = z8.union([providerNamesSchema, retiredProviderNamesSchema]);
5124
4859
  var providerSettingsEntrySchema = z8.object({
5125
4860
  id: z8.string(),
5126
4861
  name: z8.string(),
5127
- apiProvider: providerNamesSchema.optional(),
4862
+ apiProvider: providerNamesWithRetiredSchema.optional(),
5128
4863
  modelId: z8.string().optional()
5129
4864
  });
5130
4865
  var baseProviderSettingsSchema = z8.object({
@@ -5184,8 +4919,6 @@ var vertexSchema = apiModelIdProviderModelSchema.extend({
5184
4919
  vertexJsonCredentials: z8.string().optional(),
5185
4920
  vertexProjectId: z8.string().optional(),
5186
4921
  vertexRegion: z8.string().optional(),
5187
- enableUrlContext: z8.boolean().optional(),
5188
- enableGrounding: z8.boolean().optional(),
5189
4922
  vertex1MContext: z8.boolean().optional()
5190
4923
  // Enable 'context-1m-2025-08-07' beta for 1M context window.
5191
4924
  });
@@ -5224,9 +4957,7 @@ var lmStudioSchema = baseProviderSettingsSchema.extend({
5224
4957
  });
5225
4958
  var geminiSchema = apiModelIdProviderModelSchema.extend({
5226
4959
  geminiApiKey: z8.string().optional(),
5227
- googleGeminiBaseUrl: z8.string().optional(),
5228
- enableUrlContext: z8.boolean().optional(),
5229
- enableGrounding: z8.boolean().optional()
4960
+ googleGeminiBaseUrl: z8.string().optional()
5230
4961
  });
5231
4962
  var geminiCliSchema = apiModelIdProviderModelSchema.extend({
5232
4963
  geminiCliOAuthPath: z8.string().optional(),
@@ -5250,15 +4981,6 @@ var deepSeekSchema = apiModelIdProviderModelSchema.extend({
5250
4981
  deepSeekBaseUrl: z8.string().optional(),
5251
4982
  deepSeekApiKey: z8.string().optional()
5252
4983
  });
5253
- var deepInfraSchema = apiModelIdProviderModelSchema.extend({
5254
- deepInfraBaseUrl: z8.string().optional(),
5255
- deepInfraApiKey: z8.string().optional(),
5256
- deepInfraModelId: z8.string().optional()
5257
- });
5258
- var doubaoSchema = apiModelIdProviderModelSchema.extend({
5259
- doubaoBaseUrl: z8.string().optional(),
5260
- doubaoApiKey: z8.string().optional()
5261
- });
5262
4984
  var moonshotSchema = apiModelIdProviderModelSchema.extend({
5263
4985
  moonshotBaseUrl: z8.union([z8.literal("https://api.moonshot.ai/v1"), z8.literal("https://api.moonshot.cn/v1")]).optional(),
5264
4986
  moonshotApiKey: z8.string().optional()
@@ -5267,10 +4989,6 @@ var minimaxSchema = apiModelIdProviderModelSchema.extend({
5267
4989
  minimaxBaseUrl: z8.union([z8.literal("https://api.minimax.io/v1"), z8.literal("https://api.minimaxi.com/v1")]).optional(),
5268
4990
  minimaxApiKey: z8.string().optional()
5269
4991
  });
5270
- var unboundSchema = baseProviderSettingsSchema.extend({
5271
- unboundApiKey: z8.string().optional(),
5272
- unboundModelId: z8.string().optional()
5273
- });
5274
4992
  var requestySchema = baseProviderSettingsSchema.extend({
5275
4993
  requestyBaseUrl: z8.string().optional(),
5276
4994
  requestyApiKey: z8.string().optional(),
@@ -5282,26 +5000,12 @@ var fakeAiSchema = baseProviderSettingsSchema.extend({
5282
5000
  var xaiSchema = apiModelIdProviderModelSchema.extend({
5283
5001
  xaiApiKey: z8.string().optional()
5284
5002
  });
5285
- var groqSchema = apiModelIdProviderModelSchema.extend({
5286
- groqApiKey: z8.string().optional()
5287
- });
5288
- var huggingFaceSchema = baseProviderSettingsSchema.extend({
5289
- huggingFaceApiKey: z8.string().optional(),
5290
- huggingFaceModelId: z8.string().optional(),
5291
- huggingFaceInferenceProvider: z8.string().optional()
5292
- });
5293
- var chutesSchema = apiModelIdProviderModelSchema.extend({
5294
- chutesApiKey: z8.string().optional()
5295
- });
5296
5003
  var litellmSchema = baseProviderSettingsSchema.extend({
5297
5004
  litellmBaseUrl: z8.string().optional(),
5298
5005
  litellmApiKey: z8.string().optional(),
5299
5006
  litellmModelId: z8.string().optional(),
5300
5007
  litellmUsePromptCache: z8.boolean().optional()
5301
5008
  });
5302
- var cerebrasSchema = apiModelIdProviderModelSchema.extend({
5303
- cerebrasApiKey: z8.string().optional()
5304
- });
5305
5009
  var sambaNovaSchema = apiModelIdProviderModelSchema.extend({
5306
5010
  sambaNovaApiKey: z8.string().optional()
5307
5011
  });
@@ -5313,13 +5017,6 @@ var zaiSchema = apiModelIdProviderModelSchema.extend({
5313
5017
  var fireworksSchema = apiModelIdProviderModelSchema.extend({
5314
5018
  fireworksApiKey: z8.string().optional()
5315
5019
  });
5316
- var featherlessSchema = apiModelIdProviderModelSchema.extend({
5317
- featherlessApiKey: z8.string().optional()
5318
- });
5319
- var ioIntelligenceSchema = apiModelIdProviderModelSchema.extend({
5320
- ioIntelligenceModelId: z8.string().optional(),
5321
- ioIntelligenceApiKey: z8.string().optional()
5322
- });
5323
5020
  var qwenCodeSchema = apiModelIdProviderModelSchema.extend({
5324
5021
  qwenCodeOauthPath: z8.string().optional()
5325
5022
  });
@@ -5334,11 +5031,18 @@ var vercelAiGatewaySchema = baseProviderSettingsSchema.extend({
5334
5031
  var basetenSchema = apiModelIdProviderModelSchema.extend({
5335
5032
  basetenApiKey: z8.string().optional()
5336
5033
  });
5034
+ var azureSchema = apiModelIdProviderModelSchema.extend({
5035
+ azureApiKey: z8.string().optional(),
5036
+ azureResourceName: z8.string().optional(),
5037
+ azureDeploymentName: z8.string().optional(),
5038
+ azureApiVersion: z8.string().optional()
5039
+ });
5337
5040
  var defaultSchema = z8.object({
5338
5041
  apiProvider: z8.undefined()
5339
5042
  });
5340
5043
  var providerSettingsSchemaDiscriminated = z8.discriminatedUnion("apiProvider", [
5341
5044
  anthropicSchema.merge(z8.object({ apiProvider: z8.literal("anthropic") })),
5045
+ azureSchema.merge(z8.object({ apiProvider: z8.literal("azure") })),
5342
5046
  openRouterSchema.merge(z8.object({ apiProvider: z8.literal("openrouter") })),
5343
5047
  bedrockSchema.merge(z8.object({ apiProvider: z8.literal("bedrock") })),
5344
5048
  vertexSchema.merge(z8.object({ apiProvider: z8.literal("vertex") })),
@@ -5352,33 +5056,25 @@ var providerSettingsSchemaDiscriminated = z8.discriminatedUnion("apiProvider", [
5352
5056
  openAiNativeSchema.merge(z8.object({ apiProvider: z8.literal("openai-native") })),
5353
5057
  mistralSchema.merge(z8.object({ apiProvider: z8.literal("mistral") })),
5354
5058
  deepSeekSchema.merge(z8.object({ apiProvider: z8.literal("deepseek") })),
5355
- deepInfraSchema.merge(z8.object({ apiProvider: z8.literal("deepinfra") })),
5356
- doubaoSchema.merge(z8.object({ apiProvider: z8.literal("doubao") })),
5357
5059
  moonshotSchema.merge(z8.object({ apiProvider: z8.literal("moonshot") })),
5358
5060
  minimaxSchema.merge(z8.object({ apiProvider: z8.literal("minimax") })),
5359
- unboundSchema.merge(z8.object({ apiProvider: z8.literal("unbound") })),
5360
5061
  requestySchema.merge(z8.object({ apiProvider: z8.literal("requesty") })),
5361
5062
  fakeAiSchema.merge(z8.object({ apiProvider: z8.literal("fake-ai") })),
5362
5063
  xaiSchema.merge(z8.object({ apiProvider: z8.literal("xai") })),
5363
- groqSchema.merge(z8.object({ apiProvider: z8.literal("groq") })),
5364
5064
  basetenSchema.merge(z8.object({ apiProvider: z8.literal("baseten") })),
5365
- huggingFaceSchema.merge(z8.object({ apiProvider: z8.literal("huggingface") })),
5366
- chutesSchema.merge(z8.object({ apiProvider: z8.literal("chutes") })),
5367
5065
  litellmSchema.merge(z8.object({ apiProvider: z8.literal("litellm") })),
5368
- cerebrasSchema.merge(z8.object({ apiProvider: z8.literal("cerebras") })),
5369
5066
  sambaNovaSchema.merge(z8.object({ apiProvider: z8.literal("sambanova") })),
5370
5067
  zaiSchema.merge(z8.object({ apiProvider: z8.literal("zai") })),
5371
5068
  fireworksSchema.merge(z8.object({ apiProvider: z8.literal("fireworks") })),
5372
- featherlessSchema.merge(z8.object({ apiProvider: z8.literal("featherless") })),
5373
- ioIntelligenceSchema.merge(z8.object({ apiProvider: z8.literal("io-intelligence") })),
5374
5069
  qwenCodeSchema.merge(z8.object({ apiProvider: z8.literal("qwen-code") })),
5375
5070
  rooSchema.merge(z8.object({ apiProvider: z8.literal("roo") })),
5376
5071
  vercelAiGatewaySchema.merge(z8.object({ apiProvider: z8.literal("vercel-ai-gateway") })),
5377
5072
  defaultSchema
5378
5073
  ]);
5379
5074
  var providerSettingsSchema = z8.object({
5380
- apiProvider: providerNamesSchema.optional(),
5075
+ apiProvider: providerNamesWithRetiredSchema.optional(),
5381
5076
  ...anthropicSchema.shape,
5077
+ ...azureSchema.shape,
5382
5078
  ...openRouterSchema.shape,
5383
5079
  ...bedrockSchema.shape,
5384
5080
  ...vertexSchema.shape,
@@ -5392,25 +5088,16 @@ var providerSettingsSchema = z8.object({
5392
5088
  ...openAiNativeSchema.shape,
5393
5089
  ...mistralSchema.shape,
5394
5090
  ...deepSeekSchema.shape,
5395
- ...deepInfraSchema.shape,
5396
- ...doubaoSchema.shape,
5397
5091
  ...moonshotSchema.shape,
5398
5092
  ...minimaxSchema.shape,
5399
- ...unboundSchema.shape,
5400
5093
  ...requestySchema.shape,
5401
5094
  ...fakeAiSchema.shape,
5402
5095
  ...xaiSchema.shape,
5403
- ...groqSchema.shape,
5404
5096
  ...basetenSchema.shape,
5405
- ...huggingFaceSchema.shape,
5406
- ...chutesSchema.shape,
5407
5097
  ...litellmSchema.shape,
5408
- ...cerebrasSchema.shape,
5409
5098
  ...sambaNovaSchema.shape,
5410
5099
  ...zaiSchema.shape,
5411
5100
  ...fireworksSchema.shape,
5412
- ...featherlessSchema.shape,
5413
- ...ioIntelligenceSchema.shape,
5414
5101
  ...qwenCodeSchema.shape,
5415
5102
  ...rooSchema.shape,
5416
5103
  ...vercelAiGatewaySchema.shape,
@@ -5428,13 +5115,9 @@ var modelIdKeys = [
5428
5115
  "ollamaModelId",
5429
5116
  "lmStudioModelId",
5430
5117
  "lmStudioDraftModelId",
5431
- "unboundModelId",
5432
5118
  "requestyModelId",
5433
5119
  "litellmModelId",
5434
- "huggingFaceModelId",
5435
- "ioIntelligenceModelId",
5436
- "vercelAiGatewayModelId",
5437
- "deepInfraModelId"
5120
+ "vercelAiGatewayModelId"
5438
5121
  ];
5439
5122
  var getModelId = (settings) => {
5440
5123
  const modelIdKey = modelIdKeys.find((key) => settings[key]);
@@ -5443,6 +5126,7 @@ var getModelId = (settings) => {
5443
5126
  var isTypicalProvider = (key) => isProviderName(key) && !isInternalProvider(key) && !isCustomProvider(key) && !isFauxProvider(key);
5444
5127
  var modelIdKeysByProvider = {
5445
5128
  anthropic: "apiModelId",
5129
+ azure: "apiModelId",
5446
5130
  openrouter: "openRouterModelId",
5447
5131
  bedrock: "apiModelId",
5448
5132
  vertex: "apiModelId",
@@ -5456,23 +5140,14 @@ var modelIdKeysByProvider = {
5456
5140
  moonshot: "apiModelId",
5457
5141
  minimax: "apiModelId",
5458
5142
  deepseek: "apiModelId",
5459
- deepinfra: "deepInfraModelId",
5460
- doubao: "apiModelId",
5461
5143
  "qwen-code": "apiModelId",
5462
- unbound: "unboundModelId",
5463
5144
  requesty: "requestyModelId",
5464
5145
  xai: "apiModelId",
5465
- groq: "apiModelId",
5466
5146
  baseten: "apiModelId",
5467
- chutes: "apiModelId",
5468
5147
  litellm: "litellmModelId",
5469
- huggingface: "huggingFaceModelId",
5470
- cerebras: "apiModelId",
5471
5148
  sambanova: "apiModelId",
5472
5149
  zai: "apiModelId",
5473
5150
  fireworks: "apiModelId",
5474
- featherless: "apiModelId",
5475
- "io-intelligence": "ioIntelligenceModelId",
5476
5151
  roo: "apiModelId",
5477
5152
  "vercel-ai-gateway": "vercelAiGatewayModelId"
5478
5153
  };
@@ -5495,27 +5170,22 @@ var MODELS_BY_PROVIDER = {
5495
5170
  label: "Anthropic",
5496
5171
  models: Object.keys(anthropicModels)
5497
5172
  },
5173
+ azure: {
5174
+ id: "azure",
5175
+ label: "Azure AI Foundry",
5176
+ // Azure uses deployment names configured by the user (not a fixed upstream model ID list)
5177
+ models: []
5178
+ },
5498
5179
  bedrock: {
5499
5180
  id: "bedrock",
5500
5181
  label: "Amazon Bedrock",
5501
5182
  models: Object.keys(bedrockModels)
5502
5183
  },
5503
- cerebras: {
5504
- id: "cerebras",
5505
- label: "Cerebras",
5506
- models: Object.keys(cerebrasModels)
5507
- },
5508
5184
  deepseek: {
5509
5185
  id: "deepseek",
5510
5186
  label: "DeepSeek",
5511
5187
  models: Object.keys(deepSeekModels)
5512
5188
  },
5513
- doubao: { id: "doubao", label: "Doubao", models: Object.keys(doubaoModels) },
5514
- featherless: {
5515
- id: "featherless",
5516
- label: "Featherless",
5517
- models: Object.keys(featherlessModels)
5518
- },
5519
5189
  fireworks: {
5520
5190
  id: "fireworks",
5521
5191
  label: "Fireworks",
@@ -5526,12 +5196,6 @@ var MODELS_BY_PROVIDER = {
5526
5196
  label: "Google Gemini",
5527
5197
  models: Object.keys(geminiModels)
5528
5198
  },
5529
- groq: { id: "groq", label: "Groq", models: Object.keys(groqModels) },
5530
- "io-intelligence": {
5531
- id: "io-intelligence",
5532
- label: "IO Intelligence",
5533
- models: Object.keys(ioIntelligenceModels)
5534
- },
5535
5199
  mistral: {
5536
5200
  id: "mistral",
5537
5201
  label: "Mistral",
@@ -5578,14 +5242,10 @@ var MODELS_BY_PROVIDER = {
5578
5242
  zai: { id: "zai", label: "Z.ai", models: Object.keys(internationalZAiModels) },
5579
5243
  baseten: { id: "baseten", label: "Baseten", models: Object.keys(basetenModels) },
5580
5244
  // Dynamic providers; models pulled from remote APIs.
5581
- huggingface: { id: "huggingface", label: "Hugging Face", models: [] },
5582
5245
  litellm: { id: "litellm", label: "LiteLLM", models: [] },
5583
5246
  openrouter: { id: "openrouter", label: "OpenRouter", models: [] },
5584
5247
  requesty: { id: "requesty", label: "Requesty", models: [] },
5585
- unbound: { id: "unbound", label: "Unbound", models: [] },
5586
- deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
5587
5248
  "vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] },
5588
- chutes: { id: "chutes", label: "Chutes AI", models: [] },
5589
5249
  // Local providers; models discovered from localhost endpoints.
5590
5250
  lmstudio: { id: "lmstudio", label: "LM Studio", models: [] },
5591
5251
  ollama: { id: "ollama", label: "Ollama", models: [] }
@@ -6228,7 +5888,12 @@ var globalSettingsSchema = z14.object({
6228
5888
  * Whether to show the worktree selector in the home screen.
6229
5889
  * @default true
6230
5890
  */
6231
- showWorktreesInHomeScreen: z14.boolean().optional()
5891
+ showWorktreesInHomeScreen: z14.boolean().optional(),
5892
+ /**
5893
+ * List of native tool names to globally disable.
5894
+ * Tools in this list will be excluded from prompt generation and rejected at execution time.
5895
+ */
5896
+ disabledTools: z14.array(toolNamesSchema).optional()
6232
5897
  });
6233
5898
  var GLOBAL_SETTINGS_KEYS = globalSettingsSchema.keyof().options;
6234
5899
  var rooCodeSettingsSchema = providerSettingsSchema.merge(globalSettingsSchema);
@@ -6243,19 +5908,13 @@ var SECRET_STATE_KEYS = [
6243
5908
  "ollamaApiKey",
6244
5909
  "geminiApiKey",
6245
5910
  "openAiNativeApiKey",
6246
- "cerebrasApiKey",
6247
5911
  "deepSeekApiKey",
6248
- "doubaoApiKey",
6249
5912
  "moonshotApiKey",
6250
5913
  "mistralApiKey",
6251
5914
  "minimaxApiKey",
6252
- "unboundApiKey",
6253
5915
  "requestyApiKey",
6254
5916
  "xaiApiKey",
6255
- "groqApiKey",
6256
- "chutesApiKey",
6257
5917
  "litellmApiKey",
6258
- "deepInfraApiKey",
6259
5918
  "codeIndexOpenAiKey",
6260
5919
  "codeIndexQdrantApiKey",
6261
5920
  "codebaseIndexOpenAiCompatibleApiKey",
@@ -6263,14 +5922,12 @@ var SECRET_STATE_KEYS = [
6263
5922
  "codebaseIndexMistralApiKey",
6264
5923
  "codebaseIndexVercelAiGatewayApiKey",
6265
5924
  "codebaseIndexOpenRouterApiKey",
6266
- "huggingFaceApiKey",
6267
5925
  "sambaNovaApiKey",
6268
5926
  "zaiApiKey",
6269
5927
  "fireworksApiKey",
6270
- "featherlessApiKey",
6271
- "ioIntelligenceApiKey",
6272
5928
  "vercelAiGatewayApiKey",
6273
- "basetenApiKey"
5929
+ "basetenApiKey",
5930
+ "azureApiKey"
6274
5931
  ];
6275
5932
  var GLOBAL_SECRET_KEYS = [
6276
5933
  "openRouterImageApiKey"
@@ -6406,7 +6063,8 @@ var organizationDefaultSettingsSchema = globalSettingsSchema.pick({
6406
6063
  terminalCommandDelay: true,
6407
6064
  terminalShellIntegrationDisabled: true,
6408
6065
  terminalShellIntegrationTimeout: true,
6409
- terminalZshClearEolMark: true
6066
+ terminalZshClearEolMark: true,
6067
+ disabledTools: true
6410
6068
  }).merge(
6411
6069
  z16.object({
6412
6070
  maxOpenTabsContext: z16.number().int().nonnegative().optional(),
@@ -6873,6 +6531,9 @@ var TaskCommandName = /* @__PURE__ */ ((TaskCommandName2) => {
6873
6531
  TaskCommandName2["CloseTask"] = "CloseTask";
6874
6532
  TaskCommandName2["ResumeTask"] = "ResumeTask";
6875
6533
  TaskCommandName2["SendMessage"] = "SendMessage";
6534
+ TaskCommandName2["GetCommands"] = "GetCommands";
6535
+ TaskCommandName2["GetModes"] = "GetModes";
6536
+ TaskCommandName2["GetModels"] = "GetModels";
6876
6537
  return TaskCommandName2;
6877
6538
  })(TaskCommandName || {});
6878
6539
  var taskCommandSchema = z19.discriminatedUnion("commandName", [
@@ -6901,6 +6562,15 @@ var taskCommandSchema = z19.discriminatedUnion("commandName", [
6901
6562
  text: z19.string().optional(),
6902
6563
  images: z19.array(z19.string()).optional()
6903
6564
  })
6565
+ }),
6566
+ z19.object({
6567
+ commandName: z19.literal("GetCommands" /* GetCommands */)
6568
+ }),
6569
+ z19.object({
6570
+ commandName: z19.literal("GetModes" /* GetModes */)
6571
+ }),
6572
+ z19.object({
6573
+ commandName: z19.literal("GetModels" /* GetModels */)
6904
6574
  })
6905
6575
  ]);
6906
6576
  var ipcMessageSchema = z19.discriminatedUnion("type", [
@@ -7087,8 +6757,6 @@ export {
7087
6757
  DEFAULT_MODES,
7088
6758
  DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE,
7089
6759
  DEFAULT_WRITE_DELAY_MS,
7090
- DOUBAO_API_BASE_URL,
7091
- DOUBAO_API_CHAT_PATH,
7092
6760
  EVALS_SETTINGS,
7093
6761
  EVALS_TIMEOUT,
7094
6762
  EXPECTED_API_ERROR_CODES,
@@ -7099,18 +6767,9 @@ export {
7099
6767
  GLOBAL_SETTINGS_KEYS,
7100
6768
  GLOBAL_STATE_KEYS,
7101
6769
  HEARTBEAT_INTERVAL_MS,
7102
- HUGGINGFACE_API_URL,
7103
- HUGGINGFACE_CACHE_DURATION,
7104
- HUGGINGFACE_DEFAULT_CONTEXT_WINDOW,
7105
- HUGGINGFACE_DEFAULT_MAX_TOKENS,
7106
- HUGGINGFACE_MAX_TOKENS_FALLBACK,
7107
- HUGGINGFACE_SLIDER_MIN,
7108
- HUGGINGFACE_SLIDER_STEP,
7109
- HUGGINGFACE_TEMPERATURE_MAX_VALUE,
7110
6770
  IMAGE_GENERATION_MODELS,
7111
6771
  IMAGE_GENERATION_MODEL_IDS,
7112
6772
  INSTANCE_TTL_SECONDS,
7113
- IO_INTELLIGENCE_CACHE_DURATION,
7114
6773
  IpcMessageType,
7115
6774
  IpcOrigin,
7116
6775
  LMSTUDIO_DEFAULT_TEMPERATURE,
@@ -7158,6 +6817,9 @@ export {
7158
6817
  anthropicDefaultModelId,
7159
6818
  anthropicModels,
7160
6819
  appPropertiesSchema,
6820
+ azureDefaultModelId,
6821
+ azureDefaultModelInfo,
6822
+ azureModels,
7161
6823
  azureOpenAiDefaultApiVersion,
7162
6824
  basetenDefaultModelId,
7163
6825
  basetenModels,
@@ -7165,13 +6827,8 @@ export {
7165
6827
  bedrockDefaultPromptRouterModelId,
7166
6828
  bedrockModels,
7167
6829
  browserActions,
7168
- cerebrasDefaultModelId,
7169
- cerebrasModels,
7170
6830
  checkoutDiffPayloadSchema,
7171
6831
  checkoutRestorePayloadSchema,
7172
- chutesDefaultModelId,
7173
- chutesDefaultModelInfo,
7174
- chutesModels,
7175
6832
  clineAskSchema,
7176
6833
  clineAsks,
7177
6834
  clineMessageSchema,
@@ -7191,15 +6848,10 @@ export {
7191
6848
  customModesSettingsSchema,
7192
6849
  customProviders,
7193
6850
  customSupportPromptsSchema,
7194
- deepInfraDefaultModelId,
7195
- deepInfraDefaultModelInfo,
7196
6851
  deepSeekDefaultModelId,
7197
6852
  deepSeekModels,
7198
6853
  defineCustomTool,
7199
6854
  discriminatedProviderSettingsWithIdSchema,
7200
- doubaoDefaultModelId,
7201
- doubaoDefaultModelInfo,
7202
- doubaoModels,
7203
6855
  dynamicAppPropertiesSchema,
7204
6856
  dynamicProviders,
7205
6857
  experimentIds,
@@ -7212,8 +6864,6 @@ export {
7212
6864
  extractConsecutiveMistakeErrorProperties,
7213
6865
  extractMessageFromJsonPayload,
7214
6866
  fauxProviders,
7215
- featherlessDefaultModelId,
7216
- featherlessModels,
7217
6867
  fireworksDefaultModelId,
7218
6868
  fireworksModels,
7219
6869
  followUpDataSchema,
@@ -7227,8 +6877,6 @@ export {
7227
6877
  getProviderDefaultModelId,
7228
6878
  gitPropertiesSchema,
7229
6879
  globalSettingsSchema,
7230
- groqDefaultModelId,
7231
- groqModels,
7232
6880
  groupEntrySchema,
7233
6881
  groupOptionsSchema,
7234
6882
  historyItemSchema,
@@ -7239,9 +6887,6 @@ export {
7239
6887
  internalProviders,
7240
6888
  internationalZAiDefaultModelId,
7241
6889
  internationalZAiModels,
7242
- ioIntelligenceDefaultBaseUrl,
7243
- ioIntelligenceDefaultModelId,
7244
- ioIntelligenceModels,
7245
6890
  ipcMessageSchema,
7246
6891
  isApiProviderError,
7247
6892
  isConsecutiveMistakeError,
@@ -7260,6 +6905,7 @@ export {
7260
6905
  isNonBlockingAsk,
7261
6906
  isProviderName,
7262
6907
  isResumableAsk,
6908
+ isRetiredProvider,
7263
6909
  isSecretStateKey,
7264
6910
  isTypicalProvider,
7265
6911
  lMStudioDefaultModelId,
@@ -7310,6 +6956,7 @@ export {
7310
6956
  promptComponentSchema,
7311
6957
  providerNames,
7312
6958
  providerNamesSchema,
6959
+ providerNamesWithRetiredSchema,
7313
6960
  providerSettingsEntrySchema,
7314
6961
  providerSettingsSchema,
7315
6962
  providerSettingsSchemaDiscriminated,
@@ -7327,6 +6974,8 @@ export {
7327
6974
  requestyDefaultModelId,
7328
6975
  requestyDefaultModelInfo,
7329
6976
  resumableAsks,
6977
+ retiredProviderNames,
6978
+ retiredProviderNamesSchema,
7330
6979
  rooCodeEventsSchema,
7331
6980
  rooCodeSettingsSchema,
7332
6981
  rooCodeTelemetryEventSchema,
@@ -7359,8 +7008,6 @@ export {
7359
7008
  toolNamesSchema,
7360
7009
  toolProgressStatusSchema,
7361
7010
  toolUsageSchema,
7362
- unboundDefaultModelId,
7363
- unboundDefaultModelInfo,
7364
7011
  usageStatsSchema,
7365
7012
  userFeaturesSchema,
7366
7013
  userSettingsConfigSchema,