@roo-code/types 1.109.0 → 1.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -44,8 +44,6 @@ __export(index_exports, {
44
44
  DEFAULT_MODES: () => DEFAULT_MODES,
45
45
  DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE: () => DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE,
46
46
  DEFAULT_WRITE_DELAY_MS: () => DEFAULT_WRITE_DELAY_MS,
47
- DOUBAO_API_BASE_URL: () => DOUBAO_API_BASE_URL,
48
- DOUBAO_API_CHAT_PATH: () => DOUBAO_API_CHAT_PATH,
49
47
  EVALS_SETTINGS: () => EVALS_SETTINGS,
50
48
  EVALS_TIMEOUT: () => EVALS_TIMEOUT,
51
49
  EXPECTED_API_ERROR_CODES: () => EXPECTED_API_ERROR_CODES,
@@ -56,18 +54,9 @@ __export(index_exports, {
56
54
  GLOBAL_SETTINGS_KEYS: () => GLOBAL_SETTINGS_KEYS,
57
55
  GLOBAL_STATE_KEYS: () => GLOBAL_STATE_KEYS,
58
56
  HEARTBEAT_INTERVAL_MS: () => HEARTBEAT_INTERVAL_MS,
59
- HUGGINGFACE_API_URL: () => HUGGINGFACE_API_URL,
60
- HUGGINGFACE_CACHE_DURATION: () => HUGGINGFACE_CACHE_DURATION,
61
- HUGGINGFACE_DEFAULT_CONTEXT_WINDOW: () => HUGGINGFACE_DEFAULT_CONTEXT_WINDOW,
62
- HUGGINGFACE_DEFAULT_MAX_TOKENS: () => HUGGINGFACE_DEFAULT_MAX_TOKENS,
63
- HUGGINGFACE_MAX_TOKENS_FALLBACK: () => HUGGINGFACE_MAX_TOKENS_FALLBACK,
64
- HUGGINGFACE_SLIDER_MIN: () => HUGGINGFACE_SLIDER_MIN,
65
- HUGGINGFACE_SLIDER_STEP: () => HUGGINGFACE_SLIDER_STEP,
66
- HUGGINGFACE_TEMPERATURE_MAX_VALUE: () => HUGGINGFACE_TEMPERATURE_MAX_VALUE,
67
57
  IMAGE_GENERATION_MODELS: () => IMAGE_GENERATION_MODELS,
68
58
  IMAGE_GENERATION_MODEL_IDS: () => IMAGE_GENERATION_MODEL_IDS,
69
59
  INSTANCE_TTL_SECONDS: () => INSTANCE_TTL_SECONDS,
70
- IO_INTELLIGENCE_CACHE_DURATION: () => IO_INTELLIGENCE_CACHE_DURATION,
71
60
  IpcMessageType: () => IpcMessageType,
72
61
  IpcOrigin: () => IpcOrigin,
73
62
  LMSTUDIO_DEFAULT_TEMPERATURE: () => LMSTUDIO_DEFAULT_TEMPERATURE,
@@ -115,6 +104,9 @@ __export(index_exports, {
115
104
  anthropicDefaultModelId: () => anthropicDefaultModelId,
116
105
  anthropicModels: () => anthropicModels,
117
106
  appPropertiesSchema: () => appPropertiesSchema,
107
+ azureDefaultModelId: () => azureDefaultModelId,
108
+ azureDefaultModelInfo: () => azureDefaultModelInfo,
109
+ azureModels: () => azureModels,
118
110
  azureOpenAiDefaultApiVersion: () => azureOpenAiDefaultApiVersion,
119
111
  basetenDefaultModelId: () => basetenDefaultModelId,
120
112
  basetenModels: () => basetenModels,
@@ -122,13 +114,8 @@ __export(index_exports, {
122
114
  bedrockDefaultPromptRouterModelId: () => bedrockDefaultPromptRouterModelId,
123
115
  bedrockModels: () => bedrockModels,
124
116
  browserActions: () => browserActions,
125
- cerebrasDefaultModelId: () => cerebrasDefaultModelId,
126
- cerebrasModels: () => cerebrasModels,
127
117
  checkoutDiffPayloadSchema: () => checkoutDiffPayloadSchema,
128
118
  checkoutRestorePayloadSchema: () => checkoutRestorePayloadSchema,
129
- chutesDefaultModelId: () => chutesDefaultModelId,
130
- chutesDefaultModelInfo: () => chutesDefaultModelInfo,
131
- chutesModels: () => chutesModels,
132
119
  clineAskSchema: () => clineAskSchema,
133
120
  clineAsks: () => clineAsks,
134
121
  clineMessageSchema: () => clineMessageSchema,
@@ -148,15 +135,10 @@ __export(index_exports, {
148
135
  customModesSettingsSchema: () => customModesSettingsSchema,
149
136
  customProviders: () => customProviders,
150
137
  customSupportPromptsSchema: () => customSupportPromptsSchema,
151
- deepInfraDefaultModelId: () => deepInfraDefaultModelId,
152
- deepInfraDefaultModelInfo: () => deepInfraDefaultModelInfo,
153
138
  deepSeekDefaultModelId: () => deepSeekDefaultModelId,
154
139
  deepSeekModels: () => deepSeekModels,
155
140
  defineCustomTool: () => defineCustomTool,
156
141
  discriminatedProviderSettingsWithIdSchema: () => discriminatedProviderSettingsWithIdSchema,
157
- doubaoDefaultModelId: () => doubaoDefaultModelId,
158
- doubaoDefaultModelInfo: () => doubaoDefaultModelInfo,
159
- doubaoModels: () => doubaoModels,
160
142
  dynamicAppPropertiesSchema: () => dynamicAppPropertiesSchema,
161
143
  dynamicProviders: () => dynamicProviders,
162
144
  experimentIds: () => experimentIds,
@@ -169,8 +151,6 @@ __export(index_exports, {
169
151
  extractConsecutiveMistakeErrorProperties: () => extractConsecutiveMistakeErrorProperties,
170
152
  extractMessageFromJsonPayload: () => extractMessageFromJsonPayload,
171
153
  fauxProviders: () => fauxProviders,
172
- featherlessDefaultModelId: () => featherlessDefaultModelId,
173
- featherlessModels: () => featherlessModels,
174
154
  fireworksDefaultModelId: () => fireworksDefaultModelId,
175
155
  fireworksModels: () => fireworksModels,
176
156
  followUpDataSchema: () => followUpDataSchema,
@@ -184,8 +164,6 @@ __export(index_exports, {
184
164
  getProviderDefaultModelId: () => getProviderDefaultModelId,
185
165
  gitPropertiesSchema: () => gitPropertiesSchema,
186
166
  globalSettingsSchema: () => globalSettingsSchema,
187
- groqDefaultModelId: () => groqDefaultModelId,
188
- groqModels: () => groqModels,
189
167
  groupEntrySchema: () => groupEntrySchema,
190
168
  groupOptionsSchema: () => groupOptionsSchema,
191
169
  historyItemSchema: () => historyItemSchema,
@@ -196,9 +174,6 @@ __export(index_exports, {
196
174
  internalProviders: () => internalProviders,
197
175
  internationalZAiDefaultModelId: () => internationalZAiDefaultModelId,
198
176
  internationalZAiModels: () => internationalZAiModels,
199
- ioIntelligenceDefaultBaseUrl: () => ioIntelligenceDefaultBaseUrl,
200
- ioIntelligenceDefaultModelId: () => ioIntelligenceDefaultModelId,
201
- ioIntelligenceModels: () => ioIntelligenceModels,
202
177
  ipcMessageSchema: () => ipcMessageSchema,
203
178
  isApiProviderError: () => isApiProviderError,
204
179
  isConsecutiveMistakeError: () => isConsecutiveMistakeError,
@@ -217,6 +192,7 @@ __export(index_exports, {
217
192
  isNonBlockingAsk: () => isNonBlockingAsk,
218
193
  isProviderName: () => isProviderName,
219
194
  isResumableAsk: () => isResumableAsk,
195
+ isRetiredProvider: () => isRetiredProvider,
220
196
  isSecretStateKey: () => isSecretStateKey,
221
197
  isTypicalProvider: () => isTypicalProvider,
222
198
  lMStudioDefaultModelId: () => lMStudioDefaultModelId,
@@ -267,6 +243,7 @@ __export(index_exports, {
267
243
  promptComponentSchema: () => promptComponentSchema,
268
244
  providerNames: () => providerNames,
269
245
  providerNamesSchema: () => providerNamesSchema,
246
+ providerNamesWithRetiredSchema: () => providerNamesWithRetiredSchema,
270
247
  providerSettingsEntrySchema: () => providerSettingsEntrySchema,
271
248
  providerSettingsSchema: () => providerSettingsSchema,
272
249
  providerSettingsSchemaDiscriminated: () => providerSettingsSchemaDiscriminated,
@@ -284,6 +261,8 @@ __export(index_exports, {
284
261
  requestyDefaultModelId: () => requestyDefaultModelId,
285
262
  requestyDefaultModelInfo: () => requestyDefaultModelInfo,
286
263
  resumableAsks: () => resumableAsks,
264
+ retiredProviderNames: () => retiredProviderNames,
265
+ retiredProviderNamesSchema: () => retiredProviderNamesSchema,
287
266
  rooCodeEventsSchema: () => rooCodeEventsSchema,
288
267
  rooCodeSettingsSchema: () => rooCodeSettingsSchema,
289
268
  rooCodeTelemetryEventSchema: () => rooCodeTelemetryEventSchema,
@@ -316,8 +295,6 @@ __export(index_exports, {
316
295
  toolNamesSchema: () => toolNamesSchema,
317
296
  toolProgressStatusSchema: () => toolProgressStatusSchema,
318
297
  toolUsageSchema: () => toolUsageSchema,
319
- unboundDefaultModelId: () => unboundDefaultModelId,
320
- unboundDefaultModelInfo: () => unboundDefaultModelInfo,
321
298
  usageStatsSchema: () => usageStatsSchema,
322
299
  userFeaturesSchema: () => userFeaturesSchema,
323
300
  userSettingsConfigSchema: () => userSettingsConfigSchema,
@@ -342,7 +319,7 @@ module.exports = __toCommonJS(index_exports);
342
319
  var import_zod16 = require("zod");
343
320
 
344
321
  // src/events.ts
345
- var import_zod3 = require("zod");
322
+ var import_zod4 = require("zod");
346
323
 
347
324
  // src/message.ts
348
325
  var import_zod = require("zod");
@@ -481,16 +458,97 @@ var queuedMessageSchema = import_zod.z.object({
481
458
  images: import_zod.z.array(import_zod.z.string()).optional()
482
459
  });
483
460
 
484
- // src/tool.ts
461
+ // src/model.ts
485
462
  var import_zod2 = require("zod");
463
+ var reasoningEfforts = ["low", "medium", "high"];
464
+ var reasoningEffortsSchema = import_zod2.z.enum(reasoningEfforts);
465
+ var reasoningEffortWithMinimalSchema = import_zod2.z.union([reasoningEffortsSchema, import_zod2.z.literal("minimal")]);
466
+ var reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"];
467
+ var reasoningEffortExtendedSchema = import_zod2.z.enum(reasoningEffortsExtended);
468
+ var reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"];
469
+ var reasoningEffortSettingSchema = import_zod2.z.enum(reasoningEffortSettingValues);
470
+ var verbosityLevels = ["low", "medium", "high"];
471
+ var verbosityLevelsSchema = import_zod2.z.enum(verbosityLevels);
472
+ var serviceTiers = ["default", "flex", "priority"];
473
+ var serviceTierSchema = import_zod2.z.enum(serviceTiers);
474
+ var modelParameters = ["max_tokens", "temperature", "reasoning", "include_reasoning"];
475
+ var modelParametersSchema = import_zod2.z.enum(modelParameters);
476
+ var isModelParameter = (value) => modelParameters.includes(value);
477
+ var modelInfoSchema = import_zod2.z.object({
478
+ maxTokens: import_zod2.z.number().nullish(),
479
+ maxThinkingTokens: import_zod2.z.number().nullish(),
480
+ contextWindow: import_zod2.z.number(),
481
+ supportsImages: import_zod2.z.boolean().optional(),
482
+ supportsPromptCache: import_zod2.z.boolean(),
483
+ // Optional default prompt cache retention policy for providers that support it.
484
+ // When set to "24h", extended prompt caching will be requested; when omitted
485
+ // or set to "in_memory", the default in‑memory cache is used.
486
+ promptCacheRetention: import_zod2.z.enum(["in_memory", "24h"]).optional(),
487
+ // Capability flag to indicate whether the model supports an output verbosity parameter
488
+ supportsVerbosity: import_zod2.z.boolean().optional(),
489
+ supportsReasoningBudget: import_zod2.z.boolean().optional(),
490
+ // Capability flag to indicate whether the model supports simple on/off binary reasoning
491
+ supportsReasoningBinary: import_zod2.z.boolean().optional(),
492
+ // Capability flag to indicate whether the model supports temperature parameter
493
+ supportsTemperature: import_zod2.z.boolean().optional(),
494
+ defaultTemperature: import_zod2.z.number().optional(),
495
+ requiredReasoningBudget: import_zod2.z.boolean().optional(),
496
+ supportsReasoningEffort: import_zod2.z.union([import_zod2.z.boolean(), import_zod2.z.array(import_zod2.z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]).optional(),
497
+ requiredReasoningEffort: import_zod2.z.boolean().optional(),
498
+ preserveReasoning: import_zod2.z.boolean().optional(),
499
+ supportedParameters: import_zod2.z.array(modelParametersSchema).optional(),
500
+ inputPrice: import_zod2.z.number().optional(),
501
+ outputPrice: import_zod2.z.number().optional(),
502
+ cacheWritesPrice: import_zod2.z.number().optional(),
503
+ cacheReadsPrice: import_zod2.z.number().optional(),
504
+ description: import_zod2.z.string().optional(),
505
+ // Default effort value for models that support reasoning effort
506
+ reasoningEffort: reasoningEffortExtendedSchema.optional(),
507
+ minTokensPerCachePoint: import_zod2.z.number().optional(),
508
+ maxCachePoints: import_zod2.z.number().optional(),
509
+ cachableFields: import_zod2.z.array(import_zod2.z.string()).optional(),
510
+ // Flag to indicate if the model is deprecated and should not be used
511
+ deprecated: import_zod2.z.boolean().optional(),
512
+ // Flag to indicate if the model should hide vendor/company identity in responses
513
+ isStealthModel: import_zod2.z.boolean().optional(),
514
+ // Flag to indicate if the model is free (no cost)
515
+ isFree: import_zod2.z.boolean().optional(),
516
+ // Exclude specific native tools from being available (only applies to native protocol)
517
+ // These tools will be removed from the set of tools available to the model
518
+ excludedTools: import_zod2.z.array(import_zod2.z.string()).optional(),
519
+ // Include specific native tools (only applies to native protocol)
520
+ // These tools will be added if they belong to an allowed group in the current mode
521
+ // Cannot force-add tools from groups the mode doesn't allow
522
+ includedTools: import_zod2.z.array(import_zod2.z.string()).optional(),
523
+ /**
524
+ * Service tiers with pricing information.
525
+ * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
526
+ * The top-level input/output/cache* fields represent the default/standard tier.
527
+ */
528
+ tiers: import_zod2.z.array(
529
+ import_zod2.z.object({
530
+ name: serviceTierSchema.optional(),
531
+ // Service tier name (flex, priority, etc.)
532
+ contextWindow: import_zod2.z.number(),
533
+ inputPrice: import_zod2.z.number().optional(),
534
+ outputPrice: import_zod2.z.number().optional(),
535
+ cacheWritesPrice: import_zod2.z.number().optional(),
536
+ cacheReadsPrice: import_zod2.z.number().optional()
537
+ })
538
+ ).optional()
539
+ });
540
+
541
+ // src/tool.ts
542
+ var import_zod3 = require("zod");
486
543
  var toolGroups = ["read", "edit", "browser", "command", "mcp", "modes"];
487
- var toolGroupsSchema = import_zod2.z.enum(toolGroups);
544
+ var toolGroupsSchema = import_zod3.z.enum(toolGroups);
488
545
  var toolNames = [
489
546
  "execute_command",
490
547
  "read_file",
491
548
  "read_command_output",
492
549
  "write_to_file",
493
550
  "apply_diff",
551
+ "edit",
494
552
  "search_and_replace",
495
553
  "search_replace",
496
554
  "edit_file",
@@ -511,12 +569,12 @@ var toolNames = [
511
569
  "generate_image",
512
570
  "custom_tool"
513
571
  ];
514
- var toolNamesSchema = import_zod2.z.enum(toolNames);
515
- var toolUsageSchema = import_zod2.z.record(
572
+ var toolNamesSchema = import_zod3.z.enum(toolNames);
573
+ var toolUsageSchema = import_zod3.z.record(
516
574
  toolNamesSchema,
517
- import_zod2.z.object({
518
- attempts: import_zod2.z.number(),
519
- failures: import_zod2.z.number()
575
+ import_zod3.z.object({
576
+ attempts: import_zod3.z.number(),
577
+ failures: import_zod3.z.number()
520
578
  })
521
579
  );
522
580
 
@@ -547,198 +605,230 @@ var RooCodeEventName = /* @__PURE__ */ ((RooCodeEventName2) => {
547
605
  RooCodeEventName2["TaskToolFailed"] = "taskToolFailed";
548
606
  RooCodeEventName2["ModeChanged"] = "modeChanged";
549
607
  RooCodeEventName2["ProviderProfileChanged"] = "providerProfileChanged";
608
+ RooCodeEventName2["CommandsResponse"] = "commandsResponse";
609
+ RooCodeEventName2["ModesResponse"] = "modesResponse";
610
+ RooCodeEventName2["ModelsResponse"] = "modelsResponse";
550
611
  RooCodeEventName2["EvalPass"] = "evalPass";
551
612
  RooCodeEventName2["EvalFail"] = "evalFail";
552
613
  return RooCodeEventName2;
553
614
  })(RooCodeEventName || {});
554
- var rooCodeEventsSchema = import_zod3.z.object({
555
- ["taskCreated" /* TaskCreated */]: import_zod3.z.tuple([import_zod3.z.string()]),
556
- ["taskStarted" /* TaskStarted */]: import_zod3.z.tuple([import_zod3.z.string()]),
557
- ["taskCompleted" /* TaskCompleted */]: import_zod3.z.tuple([
558
- import_zod3.z.string(),
615
+ var rooCodeEventsSchema = import_zod4.z.object({
616
+ ["taskCreated" /* TaskCreated */]: import_zod4.z.tuple([import_zod4.z.string()]),
617
+ ["taskStarted" /* TaskStarted */]: import_zod4.z.tuple([import_zod4.z.string()]),
618
+ ["taskCompleted" /* TaskCompleted */]: import_zod4.z.tuple([
619
+ import_zod4.z.string(),
559
620
  tokenUsageSchema,
560
621
  toolUsageSchema,
561
- import_zod3.z.object({
562
- isSubtask: import_zod3.z.boolean()
622
+ import_zod4.z.object({
623
+ isSubtask: import_zod4.z.boolean()
563
624
  })
564
625
  ]),
565
- ["taskAborted" /* TaskAborted */]: import_zod3.z.tuple([import_zod3.z.string()]),
566
- ["taskFocused" /* TaskFocused */]: import_zod3.z.tuple([import_zod3.z.string()]),
567
- ["taskUnfocused" /* TaskUnfocused */]: import_zod3.z.tuple([import_zod3.z.string()]),
568
- ["taskActive" /* TaskActive */]: import_zod3.z.tuple([import_zod3.z.string()]),
569
- ["taskInteractive" /* TaskInteractive */]: import_zod3.z.tuple([import_zod3.z.string()]),
570
- ["taskResumable" /* TaskResumable */]: import_zod3.z.tuple([import_zod3.z.string()]),
571
- ["taskIdle" /* TaskIdle */]: import_zod3.z.tuple([import_zod3.z.string()]),
572
- ["taskPaused" /* TaskPaused */]: import_zod3.z.tuple([import_zod3.z.string()]),
573
- ["taskUnpaused" /* TaskUnpaused */]: import_zod3.z.tuple([import_zod3.z.string()]),
574
- ["taskSpawned" /* TaskSpawned */]: import_zod3.z.tuple([import_zod3.z.string(), import_zod3.z.string()]),
575
- ["taskDelegated" /* TaskDelegated */]: import_zod3.z.tuple([
576
- import_zod3.z.string(),
626
+ ["taskAborted" /* TaskAborted */]: import_zod4.z.tuple([import_zod4.z.string()]),
627
+ ["taskFocused" /* TaskFocused */]: import_zod4.z.tuple([import_zod4.z.string()]),
628
+ ["taskUnfocused" /* TaskUnfocused */]: import_zod4.z.tuple([import_zod4.z.string()]),
629
+ ["taskActive" /* TaskActive */]: import_zod4.z.tuple([import_zod4.z.string()]),
630
+ ["taskInteractive" /* TaskInteractive */]: import_zod4.z.tuple([import_zod4.z.string()]),
631
+ ["taskResumable" /* TaskResumable */]: import_zod4.z.tuple([import_zod4.z.string()]),
632
+ ["taskIdle" /* TaskIdle */]: import_zod4.z.tuple([import_zod4.z.string()]),
633
+ ["taskPaused" /* TaskPaused */]: import_zod4.z.tuple([import_zod4.z.string()]),
634
+ ["taskUnpaused" /* TaskUnpaused */]: import_zod4.z.tuple([import_zod4.z.string()]),
635
+ ["taskSpawned" /* TaskSpawned */]: import_zod4.z.tuple([import_zod4.z.string(), import_zod4.z.string()]),
636
+ ["taskDelegated" /* TaskDelegated */]: import_zod4.z.tuple([
637
+ import_zod4.z.string(),
577
638
  // parentTaskId
578
- import_zod3.z.string()
639
+ import_zod4.z.string()
579
640
  // childTaskId
580
641
  ]),
581
- ["taskDelegationCompleted" /* TaskDelegationCompleted */]: import_zod3.z.tuple([
582
- import_zod3.z.string(),
642
+ ["taskDelegationCompleted" /* TaskDelegationCompleted */]: import_zod4.z.tuple([
643
+ import_zod4.z.string(),
583
644
  // parentTaskId
584
- import_zod3.z.string(),
645
+ import_zod4.z.string(),
585
646
  // childTaskId
586
- import_zod3.z.string()
647
+ import_zod4.z.string()
587
648
  // completionResultSummary
588
649
  ]),
589
- ["taskDelegationResumed" /* TaskDelegationResumed */]: import_zod3.z.tuple([
590
- import_zod3.z.string(),
650
+ ["taskDelegationResumed" /* TaskDelegationResumed */]: import_zod4.z.tuple([
651
+ import_zod4.z.string(),
591
652
  // parentTaskId
592
- import_zod3.z.string()
653
+ import_zod4.z.string()
593
654
  // childTaskId
594
655
  ]),
595
- ["message" /* Message */]: import_zod3.z.tuple([
596
- import_zod3.z.object({
597
- taskId: import_zod3.z.string(),
598
- action: import_zod3.z.union([import_zod3.z.literal("created"), import_zod3.z.literal("updated")]),
656
+ ["message" /* Message */]: import_zod4.z.tuple([
657
+ import_zod4.z.object({
658
+ taskId: import_zod4.z.string(),
659
+ action: import_zod4.z.union([import_zod4.z.literal("created"), import_zod4.z.literal("updated")]),
599
660
  message: clineMessageSchema
600
661
  })
601
662
  ]),
602
- ["taskModeSwitched" /* TaskModeSwitched */]: import_zod3.z.tuple([import_zod3.z.string(), import_zod3.z.string()]),
603
- ["taskAskResponded" /* TaskAskResponded */]: import_zod3.z.tuple([import_zod3.z.string()]),
604
- ["taskUserMessage" /* TaskUserMessage */]: import_zod3.z.tuple([import_zod3.z.string()]),
605
- ["queuedMessagesUpdated" /* QueuedMessagesUpdated */]: import_zod3.z.tuple([import_zod3.z.string(), import_zod3.z.array(queuedMessageSchema)]),
606
- ["taskToolFailed" /* TaskToolFailed */]: import_zod3.z.tuple([import_zod3.z.string(), toolNamesSchema, import_zod3.z.string()]),
607
- ["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */]: import_zod3.z.tuple([import_zod3.z.string(), tokenUsageSchema, toolUsageSchema]),
608
- ["modeChanged" /* ModeChanged */]: import_zod3.z.tuple([import_zod3.z.string()]),
609
- ["providerProfileChanged" /* ProviderProfileChanged */]: import_zod3.z.tuple([import_zod3.z.object({ name: import_zod3.z.string(), provider: import_zod3.z.string() })])
663
+ ["taskModeSwitched" /* TaskModeSwitched */]: import_zod4.z.tuple([import_zod4.z.string(), import_zod4.z.string()]),
664
+ ["taskAskResponded" /* TaskAskResponded */]: import_zod4.z.tuple([import_zod4.z.string()]),
665
+ ["taskUserMessage" /* TaskUserMessage */]: import_zod4.z.tuple([import_zod4.z.string()]),
666
+ ["queuedMessagesUpdated" /* QueuedMessagesUpdated */]: import_zod4.z.tuple([import_zod4.z.string(), import_zod4.z.array(queuedMessageSchema)]),
667
+ ["taskToolFailed" /* TaskToolFailed */]: import_zod4.z.tuple([import_zod4.z.string(), toolNamesSchema, import_zod4.z.string()]),
668
+ ["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */]: import_zod4.z.tuple([import_zod4.z.string(), tokenUsageSchema, toolUsageSchema]),
669
+ ["modeChanged" /* ModeChanged */]: import_zod4.z.tuple([import_zod4.z.string()]),
670
+ ["providerProfileChanged" /* ProviderProfileChanged */]: import_zod4.z.tuple([import_zod4.z.object({ name: import_zod4.z.string(), provider: import_zod4.z.string() })]),
671
+ ["commandsResponse" /* CommandsResponse */]: import_zod4.z.tuple([
672
+ import_zod4.z.array(
673
+ import_zod4.z.object({
674
+ name: import_zod4.z.string(),
675
+ source: import_zod4.z.enum(["global", "project", "built-in"]),
676
+ filePath: import_zod4.z.string().optional(),
677
+ description: import_zod4.z.string().optional(),
678
+ argumentHint: import_zod4.z.string().optional()
679
+ })
680
+ )
681
+ ]),
682
+ ["modesResponse" /* ModesResponse */]: import_zod4.z.tuple([import_zod4.z.array(import_zod4.z.object({ slug: import_zod4.z.string(), name: import_zod4.z.string() }))]),
683
+ ["modelsResponse" /* ModelsResponse */]: import_zod4.z.tuple([import_zod4.z.record(import_zod4.z.string(), modelInfoSchema)])
610
684
  });
611
- var taskEventSchema = import_zod3.z.discriminatedUnion("eventName", [
685
+ var taskEventSchema = import_zod4.z.discriminatedUnion("eventName", [
612
686
  // Task Provider Lifecycle
613
- import_zod3.z.object({
614
- eventName: import_zod3.z.literal("taskCreated" /* TaskCreated */),
687
+ import_zod4.z.object({
688
+ eventName: import_zod4.z.literal("taskCreated" /* TaskCreated */),
615
689
  payload: rooCodeEventsSchema.shape["taskCreated" /* TaskCreated */],
616
- taskId: import_zod3.z.number().optional()
690
+ taskId: import_zod4.z.number().optional()
617
691
  }),
618
692
  // Task Lifecycle
619
- import_zod3.z.object({
620
- eventName: import_zod3.z.literal("taskStarted" /* TaskStarted */),
693
+ import_zod4.z.object({
694
+ eventName: import_zod4.z.literal("taskStarted" /* TaskStarted */),
621
695
  payload: rooCodeEventsSchema.shape["taskStarted" /* TaskStarted */],
622
- taskId: import_zod3.z.number().optional()
696
+ taskId: import_zod4.z.number().optional()
623
697
  }),
624
- import_zod3.z.object({
625
- eventName: import_zod3.z.literal("taskCompleted" /* TaskCompleted */),
698
+ import_zod4.z.object({
699
+ eventName: import_zod4.z.literal("taskCompleted" /* TaskCompleted */),
626
700
  payload: rooCodeEventsSchema.shape["taskCompleted" /* TaskCompleted */],
627
- taskId: import_zod3.z.number().optional()
701
+ taskId: import_zod4.z.number().optional()
628
702
  }),
629
- import_zod3.z.object({
630
- eventName: import_zod3.z.literal("taskAborted" /* TaskAborted */),
703
+ import_zod4.z.object({
704
+ eventName: import_zod4.z.literal("taskAborted" /* TaskAborted */),
631
705
  payload: rooCodeEventsSchema.shape["taskAborted" /* TaskAborted */],
632
- taskId: import_zod3.z.number().optional()
706
+ taskId: import_zod4.z.number().optional()
633
707
  }),
634
- import_zod3.z.object({
635
- eventName: import_zod3.z.literal("taskFocused" /* TaskFocused */),
708
+ import_zod4.z.object({
709
+ eventName: import_zod4.z.literal("taskFocused" /* TaskFocused */),
636
710
  payload: rooCodeEventsSchema.shape["taskFocused" /* TaskFocused */],
637
- taskId: import_zod3.z.number().optional()
711
+ taskId: import_zod4.z.number().optional()
638
712
  }),
639
- import_zod3.z.object({
640
- eventName: import_zod3.z.literal("taskUnfocused" /* TaskUnfocused */),
713
+ import_zod4.z.object({
714
+ eventName: import_zod4.z.literal("taskUnfocused" /* TaskUnfocused */),
641
715
  payload: rooCodeEventsSchema.shape["taskUnfocused" /* TaskUnfocused */],
642
- taskId: import_zod3.z.number().optional()
716
+ taskId: import_zod4.z.number().optional()
643
717
  }),
644
- import_zod3.z.object({
645
- eventName: import_zod3.z.literal("taskActive" /* TaskActive */),
718
+ import_zod4.z.object({
719
+ eventName: import_zod4.z.literal("taskActive" /* TaskActive */),
646
720
  payload: rooCodeEventsSchema.shape["taskActive" /* TaskActive */],
647
- taskId: import_zod3.z.number().optional()
721
+ taskId: import_zod4.z.number().optional()
648
722
  }),
649
- import_zod3.z.object({
650
- eventName: import_zod3.z.literal("taskInteractive" /* TaskInteractive */),
723
+ import_zod4.z.object({
724
+ eventName: import_zod4.z.literal("taskInteractive" /* TaskInteractive */),
651
725
  payload: rooCodeEventsSchema.shape["taskInteractive" /* TaskInteractive */],
652
- taskId: import_zod3.z.number().optional()
726
+ taskId: import_zod4.z.number().optional()
653
727
  }),
654
- import_zod3.z.object({
655
- eventName: import_zod3.z.literal("taskResumable" /* TaskResumable */),
728
+ import_zod4.z.object({
729
+ eventName: import_zod4.z.literal("taskResumable" /* TaskResumable */),
656
730
  payload: rooCodeEventsSchema.shape["taskResumable" /* TaskResumable */],
657
- taskId: import_zod3.z.number().optional()
731
+ taskId: import_zod4.z.number().optional()
658
732
  }),
659
- import_zod3.z.object({
660
- eventName: import_zod3.z.literal("taskIdle" /* TaskIdle */),
733
+ import_zod4.z.object({
734
+ eventName: import_zod4.z.literal("taskIdle" /* TaskIdle */),
661
735
  payload: rooCodeEventsSchema.shape["taskIdle" /* TaskIdle */],
662
- taskId: import_zod3.z.number().optional()
736
+ taskId: import_zod4.z.number().optional()
663
737
  }),
664
738
  // Subtask Lifecycle
665
- import_zod3.z.object({
666
- eventName: import_zod3.z.literal("taskPaused" /* TaskPaused */),
739
+ import_zod4.z.object({
740
+ eventName: import_zod4.z.literal("taskPaused" /* TaskPaused */),
667
741
  payload: rooCodeEventsSchema.shape["taskPaused" /* TaskPaused */],
668
- taskId: import_zod3.z.number().optional()
742
+ taskId: import_zod4.z.number().optional()
669
743
  }),
670
- import_zod3.z.object({
671
- eventName: import_zod3.z.literal("taskUnpaused" /* TaskUnpaused */),
744
+ import_zod4.z.object({
745
+ eventName: import_zod4.z.literal("taskUnpaused" /* TaskUnpaused */),
672
746
  payload: rooCodeEventsSchema.shape["taskUnpaused" /* TaskUnpaused */],
673
- taskId: import_zod3.z.number().optional()
747
+ taskId: import_zod4.z.number().optional()
674
748
  }),
675
- import_zod3.z.object({
676
- eventName: import_zod3.z.literal("taskSpawned" /* TaskSpawned */),
749
+ import_zod4.z.object({
750
+ eventName: import_zod4.z.literal("taskSpawned" /* TaskSpawned */),
677
751
  payload: rooCodeEventsSchema.shape["taskSpawned" /* TaskSpawned */],
678
- taskId: import_zod3.z.number().optional()
752
+ taskId: import_zod4.z.number().optional()
679
753
  }),
680
- import_zod3.z.object({
681
- eventName: import_zod3.z.literal("taskDelegated" /* TaskDelegated */),
754
+ import_zod4.z.object({
755
+ eventName: import_zod4.z.literal("taskDelegated" /* TaskDelegated */),
682
756
  payload: rooCodeEventsSchema.shape["taskDelegated" /* TaskDelegated */],
683
- taskId: import_zod3.z.number().optional()
757
+ taskId: import_zod4.z.number().optional()
684
758
  }),
685
- import_zod3.z.object({
686
- eventName: import_zod3.z.literal("taskDelegationCompleted" /* TaskDelegationCompleted */),
759
+ import_zod4.z.object({
760
+ eventName: import_zod4.z.literal("taskDelegationCompleted" /* TaskDelegationCompleted */),
687
761
  payload: rooCodeEventsSchema.shape["taskDelegationCompleted" /* TaskDelegationCompleted */],
688
- taskId: import_zod3.z.number().optional()
762
+ taskId: import_zod4.z.number().optional()
689
763
  }),
690
- import_zod3.z.object({
691
- eventName: import_zod3.z.literal("taskDelegationResumed" /* TaskDelegationResumed */),
764
+ import_zod4.z.object({
765
+ eventName: import_zod4.z.literal("taskDelegationResumed" /* TaskDelegationResumed */),
692
766
  payload: rooCodeEventsSchema.shape["taskDelegationResumed" /* TaskDelegationResumed */],
693
- taskId: import_zod3.z.number().optional()
767
+ taskId: import_zod4.z.number().optional()
694
768
  }),
695
769
  // Task Execution
696
- import_zod3.z.object({
697
- eventName: import_zod3.z.literal("message" /* Message */),
770
+ import_zod4.z.object({
771
+ eventName: import_zod4.z.literal("message" /* Message */),
698
772
  payload: rooCodeEventsSchema.shape["message" /* Message */],
699
- taskId: import_zod3.z.number().optional()
773
+ taskId: import_zod4.z.number().optional()
700
774
  }),
701
- import_zod3.z.object({
702
- eventName: import_zod3.z.literal("taskModeSwitched" /* TaskModeSwitched */),
775
+ import_zod4.z.object({
776
+ eventName: import_zod4.z.literal("taskModeSwitched" /* TaskModeSwitched */),
703
777
  payload: rooCodeEventsSchema.shape["taskModeSwitched" /* TaskModeSwitched */],
704
- taskId: import_zod3.z.number().optional()
778
+ taskId: import_zod4.z.number().optional()
705
779
  }),
706
- import_zod3.z.object({
707
- eventName: import_zod3.z.literal("taskAskResponded" /* TaskAskResponded */),
780
+ import_zod4.z.object({
781
+ eventName: import_zod4.z.literal("taskAskResponded" /* TaskAskResponded */),
708
782
  payload: rooCodeEventsSchema.shape["taskAskResponded" /* TaskAskResponded */],
709
- taskId: import_zod3.z.number().optional()
783
+ taskId: import_zod4.z.number().optional()
710
784
  }),
711
- import_zod3.z.object({
712
- eventName: import_zod3.z.literal("queuedMessagesUpdated" /* QueuedMessagesUpdated */),
785
+ import_zod4.z.object({
786
+ eventName: import_zod4.z.literal("queuedMessagesUpdated" /* QueuedMessagesUpdated */),
713
787
  payload: rooCodeEventsSchema.shape["queuedMessagesUpdated" /* QueuedMessagesUpdated */],
714
- taskId: import_zod3.z.number().optional()
788
+ taskId: import_zod4.z.number().optional()
715
789
  }),
716
790
  // Task Analytics
717
- import_zod3.z.object({
718
- eventName: import_zod3.z.literal("taskToolFailed" /* TaskToolFailed */),
791
+ import_zod4.z.object({
792
+ eventName: import_zod4.z.literal("taskToolFailed" /* TaskToolFailed */),
719
793
  payload: rooCodeEventsSchema.shape["taskToolFailed" /* TaskToolFailed */],
720
- taskId: import_zod3.z.number().optional()
794
+ taskId: import_zod4.z.number().optional()
721
795
  }),
722
- import_zod3.z.object({
723
- eventName: import_zod3.z.literal("taskTokenUsageUpdated" /* TaskTokenUsageUpdated */),
796
+ import_zod4.z.object({
797
+ eventName: import_zod4.z.literal("taskTokenUsageUpdated" /* TaskTokenUsageUpdated */),
724
798
  payload: rooCodeEventsSchema.shape["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */],
725
- taskId: import_zod3.z.number().optional()
799
+ taskId: import_zod4.z.number().optional()
800
+ }),
801
+ // Query Responses
802
+ import_zod4.z.object({
803
+ eventName: import_zod4.z.literal("commandsResponse" /* CommandsResponse */),
804
+ payload: rooCodeEventsSchema.shape["commandsResponse" /* CommandsResponse */],
805
+ taskId: import_zod4.z.number().optional()
806
+ }),
807
+ import_zod4.z.object({
808
+ eventName: import_zod4.z.literal("modesResponse" /* ModesResponse */),
809
+ payload: rooCodeEventsSchema.shape["modesResponse" /* ModesResponse */],
810
+ taskId: import_zod4.z.number().optional()
811
+ }),
812
+ import_zod4.z.object({
813
+ eventName: import_zod4.z.literal("modelsResponse" /* ModelsResponse */),
814
+ payload: rooCodeEventsSchema.shape["modelsResponse" /* ModelsResponse */],
815
+ taskId: import_zod4.z.number().optional()
726
816
  }),
727
817
  // Evals
728
- import_zod3.z.object({
729
- eventName: import_zod3.z.literal("evalPass" /* EvalPass */),
730
- payload: import_zod3.z.undefined(),
731
- taskId: import_zod3.z.number()
818
+ import_zod4.z.object({
819
+ eventName: import_zod4.z.literal("evalPass" /* EvalPass */),
820
+ payload: import_zod4.z.undefined(),
821
+ taskId: import_zod4.z.number()
732
822
  }),
733
- import_zod3.z.object({
734
- eventName: import_zod3.z.literal("evalFail" /* EvalFail */),
735
- payload: import_zod3.z.undefined(),
736
- taskId: import_zod3.z.number()
823
+ import_zod4.z.object({
824
+ eventName: import_zod4.z.literal("evalFail" /* EvalFail */),
825
+ payload: import_zod4.z.undefined(),
826
+ taskId: import_zod4.z.number()
737
827
  })
738
828
  ]);
739
829
 
740
830
  // src/task.ts
741
- var import_zod4 = require("zod");
831
+ var import_zod5 = require("zod");
742
832
  var TaskStatus = /* @__PURE__ */ ((TaskStatus2) => {
743
833
  TaskStatus2["Running"] = "running";
744
834
  TaskStatus2["Interactive"] = "interactive";
@@ -747,9 +837,9 @@ var TaskStatus = /* @__PURE__ */ ((TaskStatus2) => {
747
837
  TaskStatus2["None"] = "none";
748
838
  return TaskStatus2;
749
839
  })(TaskStatus || {});
750
- var taskMetadataSchema = import_zod4.z.object({
751
- task: import_zod4.z.string().optional(),
752
- images: import_zod4.z.array(import_zod4.z.string()).optional()
840
+ var taskMetadataSchema = import_zod5.z.object({
841
+ task: import_zod5.z.string().optional(),
842
+ images: import_zod5.z.array(import_zod5.z.string()).optional()
753
843
  });
754
844
 
755
845
  // src/global-settings.ts
@@ -758,86 +848,6 @@ var import_zod14 = require("zod");
758
848
  // src/provider-settings.ts
759
849
  var import_zod8 = require("zod");
760
850
 
761
- // src/model.ts
762
- var import_zod5 = require("zod");
763
- var reasoningEfforts = ["low", "medium", "high"];
764
- var reasoningEffortsSchema = import_zod5.z.enum(reasoningEfforts);
765
- var reasoningEffortWithMinimalSchema = import_zod5.z.union([reasoningEffortsSchema, import_zod5.z.literal("minimal")]);
766
- var reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"];
767
- var reasoningEffortExtendedSchema = import_zod5.z.enum(reasoningEffortsExtended);
768
- var reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"];
769
- var reasoningEffortSettingSchema = import_zod5.z.enum(reasoningEffortSettingValues);
770
- var verbosityLevels = ["low", "medium", "high"];
771
- var verbosityLevelsSchema = import_zod5.z.enum(verbosityLevels);
772
- var serviceTiers = ["default", "flex", "priority"];
773
- var serviceTierSchema = import_zod5.z.enum(serviceTiers);
774
- var modelParameters = ["max_tokens", "temperature", "reasoning", "include_reasoning"];
775
- var modelParametersSchema = import_zod5.z.enum(modelParameters);
776
- var isModelParameter = (value) => modelParameters.includes(value);
777
- var modelInfoSchema = import_zod5.z.object({
778
- maxTokens: import_zod5.z.number().nullish(),
779
- maxThinkingTokens: import_zod5.z.number().nullish(),
780
- contextWindow: import_zod5.z.number(),
781
- supportsImages: import_zod5.z.boolean().optional(),
782
- supportsPromptCache: import_zod5.z.boolean(),
783
- // Optional default prompt cache retention policy for providers that support it.
784
- // When set to "24h", extended prompt caching will be requested; when omitted
785
- // or set to "in_memory", the default in‑memory cache is used.
786
- promptCacheRetention: import_zod5.z.enum(["in_memory", "24h"]).optional(),
787
- // Capability flag to indicate whether the model supports an output verbosity parameter
788
- supportsVerbosity: import_zod5.z.boolean().optional(),
789
- supportsReasoningBudget: import_zod5.z.boolean().optional(),
790
- // Capability flag to indicate whether the model supports simple on/off binary reasoning
791
- supportsReasoningBinary: import_zod5.z.boolean().optional(),
792
- // Capability flag to indicate whether the model supports temperature parameter
793
- supportsTemperature: import_zod5.z.boolean().optional(),
794
- defaultTemperature: import_zod5.z.number().optional(),
795
- requiredReasoningBudget: import_zod5.z.boolean().optional(),
796
- supportsReasoningEffort: import_zod5.z.union([import_zod5.z.boolean(), import_zod5.z.array(import_zod5.z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]).optional(),
797
- requiredReasoningEffort: import_zod5.z.boolean().optional(),
798
- preserveReasoning: import_zod5.z.boolean().optional(),
799
- supportedParameters: import_zod5.z.array(modelParametersSchema).optional(),
800
- inputPrice: import_zod5.z.number().optional(),
801
- outputPrice: import_zod5.z.number().optional(),
802
- cacheWritesPrice: import_zod5.z.number().optional(),
803
- cacheReadsPrice: import_zod5.z.number().optional(),
804
- description: import_zod5.z.string().optional(),
805
- // Default effort value for models that support reasoning effort
806
- reasoningEffort: reasoningEffortExtendedSchema.optional(),
807
- minTokensPerCachePoint: import_zod5.z.number().optional(),
808
- maxCachePoints: import_zod5.z.number().optional(),
809
- cachableFields: import_zod5.z.array(import_zod5.z.string()).optional(),
810
- // Flag to indicate if the model is deprecated and should not be used
811
- deprecated: import_zod5.z.boolean().optional(),
812
- // Flag to indicate if the model should hide vendor/company identity in responses
813
- isStealthModel: import_zod5.z.boolean().optional(),
814
- // Flag to indicate if the model is free (no cost)
815
- isFree: import_zod5.z.boolean().optional(),
816
- // Exclude specific native tools from being available (only applies to native protocol)
817
- // These tools will be removed from the set of tools available to the model
818
- excludedTools: import_zod5.z.array(import_zod5.z.string()).optional(),
819
- // Include specific native tools (only applies to native protocol)
820
- // These tools will be added if they belong to an allowed group in the current mode
821
- // Cannot force-add tools from groups the mode doesn't allow
822
- includedTools: import_zod5.z.array(import_zod5.z.string()).optional(),
823
- /**
824
- * Service tiers with pricing information.
825
- * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
826
- * The top-level input/output/cache* fields represent the default/standard tier.
827
- */
828
- tiers: import_zod5.z.array(
829
- import_zod5.z.object({
830
- name: serviceTierSchema.optional(),
831
- // Service tier name (flex, priority, etc.)
832
- contextWindow: import_zod5.z.number(),
833
- inputPrice: import_zod5.z.number().optional(),
834
- outputPrice: import_zod5.z.number().optional(),
835
- cacheWritesPrice: import_zod5.z.number().optional(),
836
- cacheReadsPrice: import_zod5.z.number().optional()
837
- })
838
- ).optional()
839
- });
840
-
841
851
  // src/codebase-index.ts
842
852
  var import_zod6 = require("zod");
843
853
  var CODEBASE_INDEX_DEFAULTS = {
@@ -1137,1090 +1147,1051 @@ var anthropicModels = {
1137
1147
  };
1138
1148
  var ANTHROPIC_DEFAULT_MAX_TOKENS = 8192;
1139
1149
 
1140
- // src/providers/baseten.ts
1141
- var basetenModels = {
1142
- "moonshotai/Kimi-K2-Thinking": {
1143
- maxTokens: 16384,
1144
- contextWindow: 262e3,
1145
- supportsImages: false,
1146
- supportsPromptCache: false,
1147
- inputPrice: 0.6,
1148
- outputPrice: 2.5,
1149
- cacheWritesPrice: 0,
1150
- cacheReadsPrice: 0,
1151
- description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2"
1152
- },
1153
- "zai-org/GLM-4.6": {
1154
- maxTokens: 16384,
1150
+ // src/providers/azure.ts
1151
+ var azureModels = {
1152
+ "codex-mini": {
1153
+ maxTokens: 1e5,
1155
1154
  contextWindow: 2e5,
1156
1155
  supportsImages: false,
1157
- supportsPromptCache: false,
1158
- inputPrice: 0.6,
1159
- outputPrice: 2.2,
1160
- cacheWritesPrice: 0,
1161
- cacheReadsPrice: 0,
1162
- description: "Frontier open model with advanced agentic, reasoning and coding capabilities"
1156
+ supportsPromptCache: true,
1157
+ inputPrice: 1.5,
1158
+ outputPrice: 6,
1159
+ cacheReadsPrice: 0.375,
1160
+ supportsTemperature: false,
1161
+ description: "Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks"
1163
1162
  },
1164
- "deepseek-ai/DeepSeek-R1": {
1165
- maxTokens: 16384,
1166
- contextWindow: 163840,
1163
+ "gpt-4": {
1164
+ maxTokens: 8192,
1165
+ contextWindow: 8192,
1167
1166
  supportsImages: false,
1168
1167
  supportsPromptCache: false,
1169
- inputPrice: 2.55,
1170
- outputPrice: 5.95,
1171
- cacheWritesPrice: 0,
1172
- cacheReadsPrice: 0,
1173
- description: "DeepSeek's first-generation reasoning model"
1168
+ inputPrice: 60,
1169
+ outputPrice: 120,
1170
+ supportsTemperature: true,
1171
+ description: "GPT-4"
1174
1172
  },
1175
- "deepseek-ai/DeepSeek-R1-0528": {
1176
- maxTokens: 16384,
1177
- contextWindow: 163840,
1173
+ "gpt-4-32k": {
1174
+ maxTokens: 32768,
1175
+ contextWindow: 32768,
1178
1176
  supportsImages: false,
1179
1177
  supportsPromptCache: false,
1180
- inputPrice: 2.55,
1181
- outputPrice: 5.95,
1182
- cacheWritesPrice: 0,
1183
- cacheReadsPrice: 0,
1184
- description: "The latest revision of DeepSeek's first-generation reasoning model"
1178
+ inputPrice: 60,
1179
+ outputPrice: 120,
1180
+ supportsTemperature: true,
1181
+ description: "GPT-4 32K"
1185
1182
  },
1186
- "deepseek-ai/DeepSeek-V3-0324": {
1187
- maxTokens: 16384,
1188
- contextWindow: 163840,
1189
- supportsImages: false,
1183
+ "gpt-4-turbo": {
1184
+ maxTokens: 4096,
1185
+ contextWindow: 128e3,
1186
+ supportsImages: true,
1190
1187
  supportsPromptCache: false,
1191
- inputPrice: 0.77,
1192
- outputPrice: 0.77,
1193
- cacheWritesPrice: 0,
1194
- cacheReadsPrice: 0,
1195
- description: "Fast general-purpose LLM with enhanced reasoning capabilities"
1188
+ inputPrice: 10,
1189
+ outputPrice: 30,
1190
+ supportsTemperature: true,
1191
+ description: "GPT-4 Turbo"
1196
1192
  },
1197
- "deepseek-ai/DeepSeek-V3.1": {
1198
- maxTokens: 16384,
1199
- contextWindow: 163840,
1200
- supportsImages: false,
1193
+ "gpt-4-turbo-vision": {
1194
+ maxTokens: 4096,
1195
+ contextWindow: 128e3,
1196
+ supportsImages: true,
1201
1197
  supportsPromptCache: false,
1202
- inputPrice: 0.5,
1203
- outputPrice: 1.5,
1204
- cacheWritesPrice: 0,
1205
- cacheReadsPrice: 0,
1206
- description: "Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling"
1198
+ inputPrice: 10,
1199
+ outputPrice: 30,
1200
+ supportsTemperature: true,
1201
+ description: "GPT-4 Turbo Vision"
1207
1202
  },
1208
- "deepseek-ai/DeepSeek-V3.2": {
1209
- maxTokens: 16384,
1210
- contextWindow: 163840,
1211
- supportsImages: false,
1212
- supportsPromptCache: false,
1213
- inputPrice: 0.3,
1214
- outputPrice: 0.45,
1215
- cacheWritesPrice: 0,
1216
- cacheReadsPrice: 0,
1217
- description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance"
1203
+ "gpt-4.1": {
1204
+ maxTokens: 32768,
1205
+ contextWindow: 1047576,
1206
+ supportsImages: true,
1207
+ supportsPromptCache: true,
1208
+ inputPrice: 2,
1209
+ outputPrice: 8,
1210
+ cacheReadsPrice: 0.5,
1211
+ supportsTemperature: true,
1212
+ description: "GPT-4.1"
1218
1213
  },
1219
- "openai/gpt-oss-120b": {
1220
- maxTokens: 16384,
1221
- contextWindow: 128072,
1222
- supportsImages: false,
1223
- supportsPromptCache: false,
1214
+ "gpt-4.1-mini": {
1215
+ maxTokens: 32768,
1216
+ contextWindow: 1047576,
1217
+ supportsImages: true,
1218
+ supportsPromptCache: true,
1219
+ inputPrice: 0.4,
1220
+ outputPrice: 1.6,
1221
+ cacheReadsPrice: 0.1,
1222
+ supportsTemperature: true,
1223
+ description: "GPT-4.1 mini"
1224
+ },
1225
+ "gpt-4.1-nano": {
1226
+ maxTokens: 32768,
1227
+ contextWindow: 1047576,
1228
+ supportsImages: true,
1229
+ supportsPromptCache: true,
1224
1230
  inputPrice: 0.1,
1225
- outputPrice: 0.5,
1226
- cacheWritesPrice: 0,
1227
- cacheReadsPrice: 0,
1228
- description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities"
1231
+ outputPrice: 0.4,
1232
+ cacheReadsPrice: 0.03,
1233
+ supportsTemperature: true,
1234
+ description: "GPT-4.1 nano"
1229
1235
  },
1230
- "Qwen/Qwen3-235B-A22B-Instruct-2507": {
1236
+ "gpt-4o": {
1231
1237
  maxTokens: 16384,
1232
- contextWindow: 262144,
1233
- supportsImages: false,
1234
- supportsPromptCache: false,
1235
- inputPrice: 0.22,
1236
- outputPrice: 0.8,
1237
- cacheWritesPrice: 0,
1238
- cacheReadsPrice: 0,
1239
- description: "Mixture-of-experts LLM with math and reasoning capabilities"
1238
+ contextWindow: 128e3,
1239
+ supportsImages: true,
1240
+ supportsPromptCache: true,
1241
+ inputPrice: 2.5,
1242
+ outputPrice: 10,
1243
+ cacheReadsPrice: 1.25,
1244
+ supportsTemperature: true,
1245
+ description: "GPT-4o"
1240
1246
  },
1241
- "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
1247
+ "gpt-4o-mini": {
1242
1248
  maxTokens: 16384,
1243
- contextWindow: 262144,
1244
- supportsImages: false,
1245
- supportsPromptCache: false,
1246
- inputPrice: 0.38,
1247
- outputPrice: 1.53,
1248
- cacheWritesPrice: 0,
1249
- cacheReadsPrice: 0,
1250
- description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities"
1251
- },
1252
- "moonshotai/Kimi-K2-Instruct-0905": {
1253
- maxTokens: 16384,
1254
- contextWindow: 262e3,
1255
- supportsImages: false,
1256
- supportsPromptCache: false,
1257
- inputPrice: 0.6,
1258
- outputPrice: 2.5,
1259
- cacheWritesPrice: 0,
1260
- cacheReadsPrice: 0,
1261
- description: "State of the art language model for agentic and coding tasks. September Update."
1262
- }
1263
- };
1264
- var basetenDefaultModelId = "zai-org/GLM-4.6";
1265
-
1266
- // src/providers/bedrock.ts
1267
- var bedrockDefaultModelId = "anthropic.claude-sonnet-4-5-20250929-v1:0";
1268
- var bedrockDefaultPromptRouterModelId = "anthropic.claude-3-sonnet-20240229-v1:0";
1269
- var bedrockModels = {
1270
- "anthropic.claude-sonnet-4-5-20250929-v1:0": {
1271
- maxTokens: 8192,
1272
- contextWindow: 2e5,
1249
+ contextWindow: 128e3,
1273
1250
  supportsImages: true,
1274
1251
  supportsPromptCache: true,
1275
- supportsReasoningBudget: true,
1276
- inputPrice: 3,
1277
- outputPrice: 15,
1278
- cacheWritesPrice: 3.75,
1279
- cacheReadsPrice: 0.3,
1280
- minTokensPerCachePoint: 1024,
1281
- maxCachePoints: 4,
1282
- cachableFields: ["system", "messages", "tools"]
1252
+ inputPrice: 0.15,
1253
+ outputPrice: 0.6,
1254
+ cacheReadsPrice: 0.08,
1255
+ supportsTemperature: true,
1256
+ description: "GPT-4o mini"
1283
1257
  },
1284
- "amazon.nova-pro-v1:0": {
1285
- maxTokens: 5e3,
1286
- contextWindow: 3e5,
1258
+ "gpt-5": {
1259
+ maxTokens: 128e3,
1260
+ contextWindow: 272e3,
1261
+ includedTools: ["apply_patch"],
1262
+ excludedTools: ["apply_diff", "write_to_file"],
1287
1263
  supportsImages: true,
1288
1264
  supportsPromptCache: true,
1289
- inputPrice: 0.8,
1290
- outputPrice: 3.2,
1291
- cacheWritesPrice: 0.8,
1292
- // per million tokens
1293
- cacheReadsPrice: 0.2,
1294
- // per million tokens
1295
- minTokensPerCachePoint: 1,
1296
- maxCachePoints: 1,
1297
- cachableFields: ["system"]
1298
- },
1299
- "amazon.nova-pro-latency-optimized-v1:0": {
1300
- maxTokens: 5e3,
1301
- contextWindow: 3e5,
1302
- supportsImages: true,
1303
- supportsPromptCache: false,
1304
- inputPrice: 1,
1305
- outputPrice: 4,
1306
- cacheWritesPrice: 1,
1307
- // per million tokens
1308
- cacheReadsPrice: 0.25,
1309
- // per million tokens
1310
- description: "Amazon Nova Pro with latency optimized inference"
1265
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1266
+ reasoningEffort: "medium",
1267
+ inputPrice: 1.25,
1268
+ outputPrice: 10,
1269
+ cacheReadsPrice: 0.13,
1270
+ supportsVerbosity: true,
1271
+ supportsTemperature: false,
1272
+ description: "GPT-5: The best model for coding and agentic tasks across domains"
1311
1273
  },
1312
- "amazon.nova-lite-v1:0": {
1313
- maxTokens: 5e3,
1314
- contextWindow: 3e5,
1274
+ "gpt-5-codex": {
1275
+ maxTokens: 128e3,
1276
+ contextWindow: 4e5,
1277
+ includedTools: ["apply_patch"],
1278
+ excludedTools: ["apply_diff", "write_to_file"],
1315
1279
  supportsImages: true,
1316
1280
  supportsPromptCache: true,
1317
- inputPrice: 0.06,
1318
- outputPrice: 0.24,
1319
- cacheWritesPrice: 0.06,
1320
- // per million tokens
1321
- cacheReadsPrice: 0.015,
1322
- // per million tokens
1323
- minTokensPerCachePoint: 1,
1324
- maxCachePoints: 1,
1325
- cachableFields: ["system"]
1281
+ supportsReasoningEffort: ["low", "medium", "high"],
1282
+ reasoningEffort: "medium",
1283
+ inputPrice: 1.25,
1284
+ outputPrice: 10,
1285
+ cacheReadsPrice: 0.13,
1286
+ supportsTemperature: false,
1287
+ description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex"
1326
1288
  },
1327
- "amazon.nova-2-lite-v1:0": {
1328
- maxTokens: 65535,
1329
- contextWindow: 1e6,
1289
+ "gpt-5-mini": {
1290
+ maxTokens: 128e3,
1291
+ contextWindow: 272e3,
1292
+ includedTools: ["apply_patch"],
1293
+ excludedTools: ["apply_diff", "write_to_file"],
1330
1294
  supportsImages: true,
1331
1295
  supportsPromptCache: true,
1332
- inputPrice: 0.33,
1333
- outputPrice: 2.75,
1334
- cacheWritesPrice: 0,
1335
- cacheReadsPrice: 0.0825,
1336
- // 75% less than input price
1337
- minTokensPerCachePoint: 1,
1338
- maxCachePoints: 1,
1339
- cachableFields: ["system"],
1340
- description: "Amazon Nova 2 Lite - Comparable to Claude Haiku 4.5"
1341
- },
1342
- "amazon.nova-micro-v1:0": {
1343
- maxTokens: 5e3,
1344
- contextWindow: 128e3,
1345
- supportsImages: false,
1346
- supportsPromptCache: true,
1347
- inputPrice: 0.035,
1348
- outputPrice: 0.14,
1349
- cacheWritesPrice: 0.035,
1350
- // per million tokens
1351
- cacheReadsPrice: 875e-5,
1352
- // per million tokens
1353
- minTokensPerCachePoint: 1,
1354
- maxCachePoints: 1,
1355
- cachableFields: ["system"]
1296
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1297
+ reasoningEffort: "medium",
1298
+ inputPrice: 0.25,
1299
+ outputPrice: 2,
1300
+ cacheReadsPrice: 0.03,
1301
+ supportsVerbosity: true,
1302
+ supportsTemperature: false,
1303
+ description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks"
1356
1304
  },
1357
- "anthropic.claude-sonnet-4-20250514-v1:0": {
1358
- maxTokens: 8192,
1359
- contextWindow: 2e5,
1305
+ "gpt-5-nano": {
1306
+ maxTokens: 128e3,
1307
+ contextWindow: 272e3,
1308
+ includedTools: ["apply_patch"],
1309
+ excludedTools: ["apply_diff", "write_to_file"],
1360
1310
  supportsImages: true,
1361
1311
  supportsPromptCache: true,
1362
- supportsReasoningBudget: true,
1363
- inputPrice: 3,
1364
- outputPrice: 15,
1365
- cacheWritesPrice: 3.75,
1366
- cacheReadsPrice: 0.3,
1367
- minTokensPerCachePoint: 1024,
1368
- maxCachePoints: 4,
1369
- cachableFields: ["system", "messages", "tools"]
1312
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1313
+ reasoningEffort: "medium",
1314
+ inputPrice: 0.05,
1315
+ outputPrice: 0.4,
1316
+ cacheReadsPrice: 0.01,
1317
+ supportsVerbosity: true,
1318
+ supportsTemperature: false,
1319
+ description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5"
1370
1320
  },
1371
- "anthropic.claude-opus-4-1-20250805-v1:0": {
1372
- maxTokens: 8192,
1373
- contextWindow: 2e5,
1321
+ "gpt-5-pro": {
1322
+ maxTokens: 272e3,
1323
+ contextWindow: 4e5,
1324
+ includedTools: ["apply_patch"],
1325
+ excludedTools: ["apply_diff", "write_to_file"],
1374
1326
  supportsImages: true,
1375
- supportsPromptCache: true,
1376
- supportsReasoningBudget: true,
1327
+ supportsPromptCache: false,
1328
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1329
+ reasoningEffort: "medium",
1377
1330
  inputPrice: 15,
1378
- outputPrice: 75,
1379
- cacheWritesPrice: 18.75,
1380
- cacheReadsPrice: 1.5,
1381
- minTokensPerCachePoint: 1024,
1382
- maxCachePoints: 4,
1383
- cachableFields: ["system", "messages", "tools"]
1331
+ outputPrice: 120,
1332
+ supportsVerbosity: true,
1333
+ supportsTemperature: false,
1334
+ description: "GPT-5 Pro"
1384
1335
  },
1385
- "anthropic.claude-opus-4-6-v1": {
1386
- maxTokens: 8192,
1387
- contextWindow: 2e5,
1388
- // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
1336
+ "gpt-5.1": {
1337
+ maxTokens: 128e3,
1338
+ contextWindow: 272e3,
1339
+ includedTools: ["apply_patch"],
1340
+ excludedTools: ["apply_diff", "write_to_file"],
1389
1341
  supportsImages: true,
1390
1342
  supportsPromptCache: true,
1391
- supportsReasoningBudget: true,
1392
- inputPrice: 5,
1393
- // $5 per million input tokens (≤200K context)
1394
- outputPrice: 25,
1395
- // $25 per million output tokens (≤200K context)
1396
- cacheWritesPrice: 6.25,
1397
- // $6.25 per million tokens
1398
- cacheReadsPrice: 0.5,
1399
- // $0.50 per million tokens
1400
- minTokensPerCachePoint: 1024,
1401
- maxCachePoints: 4,
1402
- cachableFields: ["system", "messages", "tools"],
1403
- // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
1404
- tiers: [
1405
- {
1406
- contextWindow: 1e6,
1407
- // 1M tokens with beta flag
1408
- inputPrice: 10,
1409
- // $10 per million input tokens (>200K context)
1410
- outputPrice: 37.5,
1411
- // $37.50 per million output tokens (>200K context)
1412
- cacheWritesPrice: 12.5,
1413
- // $12.50 per million tokens (>200K context)
1414
- cacheReadsPrice: 1
1415
- // $1.00 per million tokens (>200K context)
1416
- }
1417
- ]
1343
+ promptCacheRetention: "24h",
1344
+ supportsReasoningEffort: ["none", "low", "medium", "high"],
1345
+ reasoningEffort: "medium",
1346
+ inputPrice: 1.25,
1347
+ outputPrice: 10,
1348
+ cacheReadsPrice: 0.125,
1349
+ supportsVerbosity: true,
1350
+ supportsTemperature: false,
1351
+ description: "GPT-5.1: The best model for coding and agentic tasks across domains"
1418
1352
  },
1419
- "anthropic.claude-opus-4-5-20251101-v1:0": {
1420
- maxTokens: 8192,
1421
- contextWindow: 2e5,
1353
+ "gpt-5.1-chat": {
1354
+ maxTokens: 16384,
1355
+ contextWindow: 128e3,
1356
+ includedTools: ["apply_patch"],
1357
+ excludedTools: ["apply_diff", "write_to_file"],
1422
1358
  supportsImages: true,
1423
1359
  supportsPromptCache: true,
1424
- supportsReasoningBudget: true,
1425
- inputPrice: 5,
1426
- outputPrice: 25,
1427
- cacheWritesPrice: 6.25,
1428
- cacheReadsPrice: 0.5,
1429
- minTokensPerCachePoint: 1024,
1430
- maxCachePoints: 4,
1431
- cachableFields: ["system", "messages", "tools"]
1360
+ promptCacheRetention: "24h",
1361
+ inputPrice: 1.25,
1362
+ outputPrice: 10,
1363
+ cacheReadsPrice: 0.125,
1364
+ supportsTemperature: false,
1365
+ description: "GPT-5.1 Chat: Optimized for conversational AI and chat use cases"
1432
1366
  },
1433
- "anthropic.claude-opus-4-20250514-v1:0": {
1434
- maxTokens: 8192,
1435
- contextWindow: 2e5,
1367
+ "gpt-5.1-codex": {
1368
+ maxTokens: 128e3,
1369
+ contextWindow: 4e5,
1370
+ includedTools: ["apply_patch"],
1371
+ excludedTools: ["apply_diff", "write_to_file"],
1436
1372
  supportsImages: true,
1437
1373
  supportsPromptCache: true,
1438
- supportsReasoningBudget: true,
1439
- inputPrice: 15,
1440
- outputPrice: 75,
1441
- cacheWritesPrice: 18.75,
1442
- cacheReadsPrice: 1.5,
1443
- minTokensPerCachePoint: 1024,
1444
- maxCachePoints: 4,
1445
- cachableFields: ["system", "messages", "tools"]
1446
- },
1447
- "anthropic.claude-3-7-sonnet-20250219-v1:0": {
1448
- maxTokens: 8192,
1449
- contextWindow: 2e5,
1374
+ promptCacheRetention: "24h",
1375
+ supportsReasoningEffort: ["low", "medium", "high"],
1376
+ reasoningEffort: "medium",
1377
+ inputPrice: 1.25,
1378
+ outputPrice: 10,
1379
+ cacheReadsPrice: 0.125,
1380
+ supportsTemperature: false,
1381
+ description: "GPT-5.1 Codex: A version of GPT-5.1 optimized for agentic coding in Codex"
1382
+ },
1383
+ "gpt-5.1-codex-max": {
1384
+ maxTokens: 128e3,
1385
+ contextWindow: 4e5,
1386
+ includedTools: ["apply_patch"],
1387
+ excludedTools: ["apply_diff", "write_to_file"],
1450
1388
  supportsImages: true,
1451
1389
  supportsPromptCache: true,
1452
- supportsReasoningBudget: true,
1453
- inputPrice: 3,
1454
- outputPrice: 15,
1455
- cacheWritesPrice: 3.75,
1456
- cacheReadsPrice: 0.3,
1457
- minTokensPerCachePoint: 1024,
1458
- maxCachePoints: 4,
1459
- cachableFields: ["system", "messages", "tools"]
1390
+ promptCacheRetention: "24h",
1391
+ supportsReasoningEffort: ["low", "medium", "high", "xhigh"],
1392
+ reasoningEffort: "medium",
1393
+ inputPrice: 1.25,
1394
+ outputPrice: 10,
1395
+ cacheReadsPrice: 0.125,
1396
+ supportsTemperature: false,
1397
+ description: "GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks"
1460
1398
  },
1461
- "anthropic.claude-3-5-sonnet-20241022-v2:0": {
1462
- maxTokens: 8192,
1463
- contextWindow: 2e5,
1399
+ "gpt-5.1-codex-mini": {
1400
+ maxTokens: 128e3,
1401
+ contextWindow: 4e5,
1402
+ includedTools: ["apply_patch"],
1403
+ excludedTools: ["apply_diff", "write_to_file"],
1464
1404
  supportsImages: true,
1465
1405
  supportsPromptCache: true,
1466
- inputPrice: 3,
1467
- outputPrice: 15,
1468
- cacheWritesPrice: 3.75,
1469
- cacheReadsPrice: 0.3,
1470
- minTokensPerCachePoint: 1024,
1471
- maxCachePoints: 4,
1472
- cachableFields: ["system", "messages", "tools"]
1406
+ promptCacheRetention: "24h",
1407
+ supportsReasoningEffort: ["low", "medium", "high"],
1408
+ reasoningEffort: "medium",
1409
+ inputPrice: 0.25,
1410
+ outputPrice: 2,
1411
+ cacheReadsPrice: 0.025,
1412
+ supportsTemperature: false,
1413
+ description: "GPT-5.1 Codex mini: A version of GPT-5.1 optimized for agentic coding in Codex"
1473
1414
  },
1474
- "anthropic.claude-3-5-haiku-20241022-v1:0": {
1475
- maxTokens: 8192,
1476
- contextWindow: 2e5,
1477
- supportsImages: false,
1415
+ "gpt-5.2": {
1416
+ maxTokens: 128e3,
1417
+ contextWindow: 4e5,
1418
+ includedTools: ["apply_patch"],
1419
+ excludedTools: ["apply_diff", "write_to_file"],
1420
+ supportsImages: true,
1478
1421
  supportsPromptCache: true,
1479
- inputPrice: 0.8,
1480
- outputPrice: 4,
1481
- cacheWritesPrice: 1,
1482
- cacheReadsPrice: 0.08,
1483
- minTokensPerCachePoint: 2048,
1484
- maxCachePoints: 4,
1485
- cachableFields: ["system", "messages", "tools"]
1422
+ promptCacheRetention: "24h",
1423
+ supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"],
1424
+ reasoningEffort: "medium",
1425
+ inputPrice: 1.75,
1426
+ outputPrice: 14,
1427
+ cacheReadsPrice: 0.125,
1428
+ supportsVerbosity: true,
1429
+ supportsTemperature: false,
1430
+ description: "GPT-5.2: Our flagship model for coding and agentic tasks across industries"
1486
1431
  },
1487
- "anthropic.claude-haiku-4-5-20251001-v1:0": {
1488
- maxTokens: 8192,
1489
- contextWindow: 2e5,
1432
+ "gpt-5.2-chat": {
1433
+ maxTokens: 16384,
1434
+ contextWindow: 128e3,
1435
+ includedTools: ["apply_patch"],
1436
+ excludedTools: ["apply_diff", "write_to_file"],
1490
1437
  supportsImages: true,
1491
1438
  supportsPromptCache: true,
1492
- supportsReasoningBudget: true,
1493
- inputPrice: 1,
1494
- outputPrice: 5,
1495
- cacheWritesPrice: 1.25,
1496
- // 5m cache writes
1497
- cacheReadsPrice: 0.1,
1498
- // cache hits / refreshes
1499
- minTokensPerCachePoint: 2048,
1500
- maxCachePoints: 4,
1501
- cachableFields: ["system", "messages", "tools"]
1439
+ inputPrice: 1.75,
1440
+ outputPrice: 14,
1441
+ cacheReadsPrice: 0.175,
1442
+ supportsTemperature: false,
1443
+ description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases"
1502
1444
  },
1503
- "anthropic.claude-3-5-sonnet-20240620-v1:0": {
1504
- maxTokens: 8192,
1505
- contextWindow: 2e5,
1445
+ "gpt-5.2-codex": {
1446
+ maxTokens: 128e3,
1447
+ contextWindow: 4e5,
1448
+ includedTools: ["apply_patch"],
1449
+ excludedTools: ["apply_diff", "write_to_file"],
1506
1450
  supportsImages: true,
1507
- supportsPromptCache: false,
1508
- inputPrice: 3,
1509
- outputPrice: 15
1451
+ supportsPromptCache: true,
1452
+ promptCacheRetention: "24h",
1453
+ supportsReasoningEffort: ["low", "medium", "high", "xhigh"],
1454
+ reasoningEffort: "medium",
1455
+ inputPrice: 1.75,
1456
+ outputPrice: 14,
1457
+ cacheReadsPrice: 0.175,
1458
+ supportsTemperature: false,
1459
+ description: "GPT-5.2 Codex: Our most intelligent coding model optimized for long-horizon, agentic coding tasks"
1510
1460
  },
1511
- "anthropic.claude-3-opus-20240229-v1:0": {
1512
- maxTokens: 4096,
1461
+ o1: {
1462
+ maxTokens: 1e5,
1513
1463
  contextWindow: 2e5,
1514
1464
  supportsImages: true,
1515
- supportsPromptCache: false,
1465
+ supportsPromptCache: true,
1516
1466
  inputPrice: 15,
1517
- outputPrice: 75
1467
+ outputPrice: 60,
1468
+ cacheReadsPrice: 7.5,
1469
+ supportsTemperature: false,
1470
+ description: "o1"
1518
1471
  },
1519
- "anthropic.claude-3-sonnet-20240229-v1:0": {
1520
- maxTokens: 4096,
1472
+ "o1-mini": {
1473
+ maxTokens: 65536,
1474
+ contextWindow: 128e3,
1475
+ supportsImages: true,
1476
+ supportsPromptCache: true,
1477
+ inputPrice: 1.1,
1478
+ outputPrice: 4.4,
1479
+ cacheReadsPrice: 0.55,
1480
+ supportsTemperature: false,
1481
+ description: "o1-mini"
1482
+ },
1483
+ "o1-preview": {
1484
+ maxTokens: 32768,
1485
+ contextWindow: 128e3,
1486
+ supportsImages: true,
1487
+ supportsPromptCache: true,
1488
+ inputPrice: 16.5,
1489
+ outputPrice: 66,
1490
+ cacheReadsPrice: 8.25,
1491
+ supportsTemperature: false,
1492
+ description: "o1-preview"
1493
+ },
1494
+ o3: {
1495
+ maxTokens: 1e5,
1521
1496
  contextWindow: 2e5,
1522
1497
  supportsImages: true,
1523
- supportsPromptCache: false,
1524
- inputPrice: 3,
1525
- outputPrice: 15
1498
+ supportsPromptCache: true,
1499
+ supportsReasoningEffort: ["low", "medium", "high"],
1500
+ reasoningEffort: "medium",
1501
+ inputPrice: 2,
1502
+ outputPrice: 8,
1503
+ cacheReadsPrice: 0.5,
1504
+ supportsTemperature: false,
1505
+ description: "o3"
1526
1506
  },
1527
- "anthropic.claude-3-haiku-20240307-v1:0": {
1528
- maxTokens: 4096,
1507
+ "o3-mini": {
1508
+ maxTokens: 1e5,
1509
+ contextWindow: 2e5,
1510
+ supportsImages: false,
1511
+ supportsPromptCache: true,
1512
+ supportsReasoningEffort: ["low", "medium", "high"],
1513
+ reasoningEffort: "medium",
1514
+ inputPrice: 1.1,
1515
+ outputPrice: 4.4,
1516
+ cacheReadsPrice: 0.55,
1517
+ supportsTemperature: false,
1518
+ description: "o3-mini"
1519
+ },
1520
+ "o4-mini": {
1521
+ maxTokens: 1e5,
1529
1522
  contextWindow: 2e5,
1530
1523
  supportsImages: true,
1524
+ supportsPromptCache: true,
1525
+ supportsReasoningEffort: ["low", "medium", "high"],
1526
+ reasoningEffort: "medium",
1527
+ inputPrice: 1.1,
1528
+ outputPrice: 4.4,
1529
+ cacheReadsPrice: 0.28,
1530
+ supportsTemperature: false,
1531
+ description: "o4-mini"
1532
+ }
1533
+ };
1534
+ var azureDefaultModelId = "gpt-4o";
1535
+ var azureDefaultModelInfo = azureModels[azureDefaultModelId];
1536
+
1537
+ // src/providers/baseten.ts
1538
+ var basetenModels = {
1539
+ "moonshotai/Kimi-K2-Thinking": {
1540
+ maxTokens: 16384,
1541
+ contextWindow: 262e3,
1542
+ supportsImages: false,
1531
1543
  supportsPromptCache: false,
1532
- inputPrice: 0.25,
1533
- outputPrice: 1.25
1544
+ inputPrice: 0.6,
1545
+ outputPrice: 2.5,
1546
+ cacheWritesPrice: 0,
1547
+ cacheReadsPrice: 0,
1548
+ description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2"
1534
1549
  },
1535
- "deepseek.r1-v1:0": {
1536
- maxTokens: 32768,
1537
- contextWindow: 128e3,
1550
+ "zai-org/GLM-4.6": {
1551
+ maxTokens: 16384,
1552
+ contextWindow: 2e5,
1538
1553
  supportsImages: false,
1539
1554
  supportsPromptCache: false,
1540
- inputPrice: 1.35,
1541
- outputPrice: 5.4
1555
+ inputPrice: 0.6,
1556
+ outputPrice: 2.2,
1557
+ cacheWritesPrice: 0,
1558
+ cacheReadsPrice: 0,
1559
+ description: "Frontier open model with advanced agentic, reasoning and coding capabilities"
1542
1560
  },
1543
- "openai.gpt-oss-20b-1:0": {
1544
- maxTokens: 8192,
1545
- contextWindow: 128e3,
1561
+ "deepseek-ai/DeepSeek-R1": {
1562
+ maxTokens: 16384,
1563
+ contextWindow: 163840,
1546
1564
  supportsImages: false,
1547
1565
  supportsPromptCache: false,
1548
- inputPrice: 0.5,
1549
- outputPrice: 1.5,
1550
- description: "GPT-OSS 20B - Optimized for low latency and local/specialized use cases"
1566
+ inputPrice: 2.55,
1567
+ outputPrice: 5.95,
1568
+ cacheWritesPrice: 0,
1569
+ cacheReadsPrice: 0,
1570
+ description: "DeepSeek's first-generation reasoning model"
1551
1571
  },
1552
- "openai.gpt-oss-120b-1:0": {
1553
- maxTokens: 8192,
1554
- contextWindow: 128e3,
1572
+ "deepseek-ai/DeepSeek-R1-0528": {
1573
+ maxTokens: 16384,
1574
+ contextWindow: 163840,
1555
1575
  supportsImages: false,
1556
1576
  supportsPromptCache: false,
1557
- inputPrice: 2,
1558
- outputPrice: 6,
1559
- description: "GPT-OSS 120B - Production-ready, general-purpose, high-reasoning model"
1577
+ inputPrice: 2.55,
1578
+ outputPrice: 5.95,
1579
+ cacheWritesPrice: 0,
1580
+ cacheReadsPrice: 0,
1581
+ description: "The latest revision of DeepSeek's first-generation reasoning model"
1560
1582
  },
1561
- "meta.llama3-3-70b-instruct-v1:0": {
1562
- maxTokens: 8192,
1563
- contextWindow: 128e3,
1583
+ "deepseek-ai/DeepSeek-V3-0324": {
1584
+ maxTokens: 16384,
1585
+ contextWindow: 163840,
1564
1586
  supportsImages: false,
1565
1587
  supportsPromptCache: false,
1566
- inputPrice: 0.72,
1567
- outputPrice: 0.72,
1568
- description: "Llama 3.3 Instruct (70B)"
1569
- },
1570
- "meta.llama3-2-90b-instruct-v1:0": {
1571
- maxTokens: 8192,
1572
- contextWindow: 128e3,
1573
- supportsImages: true,
1574
- supportsPromptCache: false,
1575
- inputPrice: 0.72,
1576
- outputPrice: 0.72,
1577
- description: "Llama 3.2 Instruct (90B)"
1588
+ inputPrice: 0.77,
1589
+ outputPrice: 0.77,
1590
+ cacheWritesPrice: 0,
1591
+ cacheReadsPrice: 0,
1592
+ description: "Fast general-purpose LLM with enhanced reasoning capabilities"
1578
1593
  },
1579
- "meta.llama3-2-11b-instruct-v1:0": {
1580
- maxTokens: 8192,
1581
- contextWindow: 128e3,
1582
- supportsImages: true,
1594
+ "deepseek-ai/DeepSeek-V3.1": {
1595
+ maxTokens: 16384,
1596
+ contextWindow: 163840,
1597
+ supportsImages: false,
1583
1598
  supportsPromptCache: false,
1584
- inputPrice: 0.16,
1585
- outputPrice: 0.16,
1586
- description: "Llama 3.2 Instruct (11B)"
1599
+ inputPrice: 0.5,
1600
+ outputPrice: 1.5,
1601
+ cacheWritesPrice: 0,
1602
+ cacheReadsPrice: 0,
1603
+ description: "Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling"
1587
1604
  },
1588
- "meta.llama3-2-3b-instruct-v1:0": {
1589
- maxTokens: 8192,
1590
- contextWindow: 128e3,
1605
+ "deepseek-ai/DeepSeek-V3.2": {
1606
+ maxTokens: 16384,
1607
+ contextWindow: 163840,
1591
1608
  supportsImages: false,
1592
1609
  supportsPromptCache: false,
1593
- inputPrice: 0.15,
1594
- outputPrice: 0.15,
1595
- description: "Llama 3.2 Instruct (3B)"
1610
+ inputPrice: 0.3,
1611
+ outputPrice: 0.45,
1612
+ cacheWritesPrice: 0,
1613
+ cacheReadsPrice: 0,
1614
+ description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance"
1596
1615
  },
1597
- "meta.llama3-2-1b-instruct-v1:0": {
1598
- maxTokens: 8192,
1599
- contextWindow: 128e3,
1616
+ "openai/gpt-oss-120b": {
1617
+ maxTokens: 16384,
1618
+ contextWindow: 128072,
1600
1619
  supportsImages: false,
1601
1620
  supportsPromptCache: false,
1602
1621
  inputPrice: 0.1,
1603
- outputPrice: 0.1,
1604
- description: "Llama 3.2 Instruct (1B)"
1622
+ outputPrice: 0.5,
1623
+ cacheWritesPrice: 0,
1624
+ cacheReadsPrice: 0,
1625
+ description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities"
1605
1626
  },
1606
- "meta.llama3-1-405b-instruct-v1:0": {
1607
- maxTokens: 8192,
1608
- contextWindow: 128e3,
1627
+ "Qwen/Qwen3-235B-A22B-Instruct-2507": {
1628
+ maxTokens: 16384,
1629
+ contextWindow: 262144,
1609
1630
  supportsImages: false,
1610
1631
  supportsPromptCache: false,
1611
- inputPrice: 2.4,
1612
- outputPrice: 2.4,
1613
- description: "Llama 3.1 Instruct (405B)"
1632
+ inputPrice: 0.22,
1633
+ outputPrice: 0.8,
1634
+ cacheWritesPrice: 0,
1635
+ cacheReadsPrice: 0,
1636
+ description: "Mixture-of-experts LLM with math and reasoning capabilities"
1614
1637
  },
1615
- "meta.llama3-1-70b-instruct-v1:0": {
1616
- maxTokens: 8192,
1617
- contextWindow: 128e3,
1638
+ "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
1639
+ maxTokens: 16384,
1640
+ contextWindow: 262144,
1618
1641
  supportsImages: false,
1619
1642
  supportsPromptCache: false,
1620
- inputPrice: 0.72,
1621
- outputPrice: 0.72,
1622
- description: "Llama 3.1 Instruct (70B)"
1643
+ inputPrice: 0.38,
1644
+ outputPrice: 1.53,
1645
+ cacheWritesPrice: 0,
1646
+ cacheReadsPrice: 0,
1647
+ description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities"
1623
1648
  },
1624
- "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
1625
- maxTokens: 8192,
1626
- contextWindow: 128e3,
1649
+ "moonshotai/Kimi-K2-Instruct-0905": {
1650
+ maxTokens: 16384,
1651
+ contextWindow: 262e3,
1627
1652
  supportsImages: false,
1628
1653
  supportsPromptCache: false,
1629
- inputPrice: 0.9,
1630
- outputPrice: 0.9,
1631
- description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)"
1632
- },
1633
- "meta.llama3-1-8b-instruct-v1:0": {
1654
+ inputPrice: 0.6,
1655
+ outputPrice: 2.5,
1656
+ cacheWritesPrice: 0,
1657
+ cacheReadsPrice: 0,
1658
+ description: "State of the art language model for agentic and coding tasks. September Update."
1659
+ }
1660
+ };
1661
+ var basetenDefaultModelId = "zai-org/GLM-4.6";
1662
+
1663
+ // src/providers/bedrock.ts
1664
+ var bedrockDefaultModelId = "anthropic.claude-sonnet-4-5-20250929-v1:0";
1665
+ var bedrockDefaultPromptRouterModelId = "anthropic.claude-3-sonnet-20240229-v1:0";
1666
+ var bedrockModels = {
1667
+ "anthropic.claude-sonnet-4-5-20250929-v1:0": {
1634
1668
  maxTokens: 8192,
1635
- contextWindow: 8e3,
1636
- supportsImages: false,
1637
- supportsPromptCache: false,
1638
- inputPrice: 0.22,
1639
- outputPrice: 0.22,
1640
- description: "Llama 3.1 Instruct (8B)"
1641
- },
1642
- "meta.llama3-70b-instruct-v1:0": {
1643
- maxTokens: 2048,
1644
- contextWindow: 8e3,
1645
- supportsImages: false,
1646
- supportsPromptCache: false,
1647
- inputPrice: 2.65,
1648
- outputPrice: 3.5
1669
+ contextWindow: 2e5,
1670
+ supportsImages: true,
1671
+ supportsPromptCache: true,
1672
+ supportsReasoningBudget: true,
1673
+ inputPrice: 3,
1674
+ outputPrice: 15,
1675
+ cacheWritesPrice: 3.75,
1676
+ cacheReadsPrice: 0.3,
1677
+ minTokensPerCachePoint: 1024,
1678
+ maxCachePoints: 4,
1679
+ cachableFields: ["system", "messages", "tools"]
1649
1680
  },
1650
- "meta.llama3-8b-instruct-v1:0": {
1651
- maxTokens: 2048,
1652
- contextWindow: 4e3,
1653
- supportsImages: false,
1654
- supportsPromptCache: false,
1655
- inputPrice: 0.3,
1656
- outputPrice: 0.6
1681
+ "amazon.nova-pro-v1:0": {
1682
+ maxTokens: 5e3,
1683
+ contextWindow: 3e5,
1684
+ supportsImages: true,
1685
+ supportsPromptCache: true,
1686
+ inputPrice: 0.8,
1687
+ outputPrice: 3.2,
1688
+ cacheWritesPrice: 0.8,
1689
+ // per million tokens
1690
+ cacheReadsPrice: 0.2,
1691
+ // per million tokens
1692
+ minTokensPerCachePoint: 1,
1693
+ maxCachePoints: 1,
1694
+ cachableFields: ["system"]
1657
1695
  },
1658
- "amazon.titan-text-lite-v1:0": {
1659
- maxTokens: 4096,
1660
- contextWindow: 8e3,
1661
- supportsImages: false,
1696
+ "amazon.nova-pro-latency-optimized-v1:0": {
1697
+ maxTokens: 5e3,
1698
+ contextWindow: 3e5,
1699
+ supportsImages: true,
1662
1700
  supportsPromptCache: false,
1663
- inputPrice: 0.15,
1664
- outputPrice: 0.2,
1665
- description: "Amazon Titan Text Lite"
1701
+ inputPrice: 1,
1702
+ outputPrice: 4,
1703
+ cacheWritesPrice: 1,
1704
+ // per million tokens
1705
+ cacheReadsPrice: 0.25,
1706
+ // per million tokens
1707
+ description: "Amazon Nova Pro with latency optimized inference"
1666
1708
  },
1667
- "amazon.titan-text-express-v1:0": {
1668
- maxTokens: 4096,
1669
- contextWindow: 8e3,
1670
- supportsImages: false,
1671
- supportsPromptCache: false,
1672
- inputPrice: 0.2,
1673
- outputPrice: 0.6,
1674
- description: "Amazon Titan Text Express"
1709
+ "amazon.nova-lite-v1:0": {
1710
+ maxTokens: 5e3,
1711
+ contextWindow: 3e5,
1712
+ supportsImages: true,
1713
+ supportsPromptCache: true,
1714
+ inputPrice: 0.06,
1715
+ outputPrice: 0.24,
1716
+ cacheWritesPrice: 0.06,
1717
+ // per million tokens
1718
+ cacheReadsPrice: 0.015,
1719
+ // per million tokens
1720
+ minTokensPerCachePoint: 1,
1721
+ maxCachePoints: 1,
1722
+ cachableFields: ["system"]
1675
1723
  },
1676
- "moonshot.kimi-k2-thinking": {
1677
- maxTokens: 32e3,
1678
- contextWindow: 262144,
1679
- supportsImages: false,
1680
- supportsPromptCache: false,
1681
- preserveReasoning: true,
1682
- inputPrice: 0.6,
1683
- outputPrice: 2.5,
1684
- description: "Kimi K2 Thinking (1T parameter MoE model with 32B active parameters)"
1724
+ "amazon.nova-2-lite-v1:0": {
1725
+ maxTokens: 65535,
1726
+ contextWindow: 1e6,
1727
+ supportsImages: true,
1728
+ supportsPromptCache: true,
1729
+ inputPrice: 0.33,
1730
+ outputPrice: 2.75,
1731
+ cacheWritesPrice: 0,
1732
+ cacheReadsPrice: 0.0825,
1733
+ // 75% less than input price
1734
+ minTokensPerCachePoint: 1,
1735
+ maxCachePoints: 1,
1736
+ cachableFields: ["system"],
1737
+ description: "Amazon Nova 2 Lite - Comparable to Claude Haiku 4.5"
1685
1738
  },
1686
- "minimax.minimax-m2": {
1687
- maxTokens: 16384,
1688
- contextWindow: 196608,
1739
+ "amazon.nova-micro-v1:0": {
1740
+ maxTokens: 5e3,
1741
+ contextWindow: 128e3,
1689
1742
  supportsImages: false,
1690
- supportsPromptCache: false,
1691
- preserveReasoning: true,
1692
- inputPrice: 0.3,
1693
- outputPrice: 1.2,
1694
- description: "MiniMax M2 (230B parameter MoE model with 10B active parameters)"
1743
+ supportsPromptCache: true,
1744
+ inputPrice: 0.035,
1745
+ outputPrice: 0.14,
1746
+ cacheWritesPrice: 0.035,
1747
+ // per million tokens
1748
+ cacheReadsPrice: 875e-5,
1749
+ // per million tokens
1750
+ minTokensPerCachePoint: 1,
1751
+ maxCachePoints: 1,
1752
+ cachableFields: ["system"]
1695
1753
  },
1696
- "qwen.qwen3-next-80b-a3b": {
1754
+ "anthropic.claude-sonnet-4-20250514-v1:0": {
1697
1755
  maxTokens: 8192,
1698
- contextWindow: 262144,
1699
- supportsImages: false,
1700
- supportsPromptCache: false,
1701
- inputPrice: 0.15,
1702
- outputPrice: 1.2,
1703
- description: "Qwen3 Next 80B (MoE model with 3B active parameters)"
1756
+ contextWindow: 2e5,
1757
+ supportsImages: true,
1758
+ supportsPromptCache: true,
1759
+ supportsReasoningBudget: true,
1760
+ inputPrice: 3,
1761
+ outputPrice: 15,
1762
+ cacheWritesPrice: 3.75,
1763
+ cacheReadsPrice: 0.3,
1764
+ minTokensPerCachePoint: 1024,
1765
+ maxCachePoints: 4,
1766
+ cachableFields: ["system", "messages", "tools"]
1704
1767
  },
1705
- "qwen.qwen3-coder-480b-a35b-v1:0": {
1768
+ "anthropic.claude-opus-4-1-20250805-v1:0": {
1706
1769
  maxTokens: 8192,
1707
- contextWindow: 262144,
1708
- supportsImages: false,
1709
- supportsPromptCache: false,
1710
- inputPrice: 0.45,
1711
- outputPrice: 1.8,
1712
- description: "Qwen3 Coder 480B (MoE model with 35B active parameters)"
1713
- }
1714
- };
1715
- var BEDROCK_DEFAULT_TEMPERATURE = 0.3;
1716
- var BEDROCK_MAX_TOKENS = 4096;
1717
- var BEDROCK_DEFAULT_CONTEXT = 128e3;
1718
- var AWS_INFERENCE_PROFILE_MAPPING = [
1719
- // Australia regions (Sydney and Melbourne) → au. inference profile (most specific - 14 chars)
1720
- ["ap-southeast-2", "au."],
1721
- ["ap-southeast-4", "au."],
1722
- // Japan regions (Tokyo and Osaka) → jp. inference profile (13 chars)
1723
- ["ap-northeast-", "jp."],
1724
- // US Government Cloud → ug. inference profile (7 chars)
1725
- ["us-gov-", "ug."],
1726
- // Americas regions → us. inference profile (3 chars)
1727
- ["us-", "us."],
1728
- // Europe regions → eu. inference profile (3 chars)
1729
- ["eu-", "eu."],
1730
- // Asia Pacific regions → apac. inference profile (3 chars)
1731
- ["ap-", "apac."],
1732
- // Canada regions → ca. inference profile (3 chars)
1733
- ["ca-", "ca."],
1734
- // South America regions → sa. inference profile (3 chars)
1735
- ["sa-", "sa."]
1736
- ];
1737
- var BEDROCK_REGIONS = [
1738
- { value: "us-east-1", label: "us-east-1" },
1739
- { value: "us-east-2", label: "us-east-2" },
1740
- { value: "us-west-1", label: "us-west-1" },
1741
- { value: "us-west-2", label: "us-west-2" },
1742
- { value: "ap-northeast-1", label: "ap-northeast-1" },
1743
- { value: "ap-northeast-2", label: "ap-northeast-2" },
1744
- { value: "ap-northeast-3", label: "ap-northeast-3" },
1745
- { value: "ap-south-1", label: "ap-south-1" },
1746
- { value: "ap-south-2", label: "ap-south-2" },
1747
- { value: "ap-southeast-1", label: "ap-southeast-1" },
1748
- { value: "ap-southeast-2", label: "ap-southeast-2" },
1749
- { value: "ap-east-1", label: "ap-east-1" },
1750
- { value: "eu-central-1", label: "eu-central-1" },
1751
- { value: "eu-central-2", label: "eu-central-2" },
1752
- { value: "eu-west-1", label: "eu-west-1" },
1753
- { value: "eu-west-2", label: "eu-west-2" },
1754
- { value: "eu-west-3", label: "eu-west-3" },
1755
- { value: "eu-north-1", label: "eu-north-1" },
1756
- { value: "eu-south-1", label: "eu-south-1" },
1757
- { value: "eu-south-2", label: "eu-south-2" },
1758
- { value: "ca-central-1", label: "ca-central-1" },
1759
- { value: "sa-east-1", label: "sa-east-1" },
1760
- { value: "us-gov-east-1", label: "us-gov-east-1" },
1761
- { value: "us-gov-west-1", label: "us-gov-west-1" }
1762
- ].sort((a, b) => a.value.localeCompare(b.value));
1763
- var BEDROCK_1M_CONTEXT_MODEL_IDS = [
1764
- "anthropic.claude-sonnet-4-20250514-v1:0",
1765
- "anthropic.claude-sonnet-4-5-20250929-v1:0",
1766
- "anthropic.claude-opus-4-6-v1"
1767
- ];
1768
- var BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
1769
- "anthropic.claude-sonnet-4-20250514-v1:0",
1770
- "anthropic.claude-sonnet-4-5-20250929-v1:0",
1771
- "anthropic.claude-haiku-4-5-20251001-v1:0",
1772
- "anthropic.claude-opus-4-5-20251101-v1:0",
1773
- "anthropic.claude-opus-4-6-v1"
1774
- ];
1775
- var BEDROCK_SERVICE_TIER_MODEL_IDS = [
1776
- // Amazon Nova models
1777
- "amazon.nova-lite-v1:0",
1778
- "amazon.nova-2-lite-v1:0",
1779
- "amazon.nova-pro-v1:0",
1780
- "amazon.nova-pro-latency-optimized-v1:0",
1781
- // DeepSeek models
1782
- "deepseek.r1-v1:0",
1783
- // Qwen models
1784
- "qwen.qwen3-next-80b-a3b",
1785
- "qwen.qwen3-coder-480b-a35b-v1:0",
1786
- // OpenAI GPT-OSS models
1787
- "openai.gpt-oss-20b-1:0",
1788
- "openai.gpt-oss-120b-1:0"
1789
- ];
1790
- var BEDROCK_SERVICE_TIER_PRICING = {
1791
- STANDARD: 1,
1792
- // Base price
1793
- FLEX: 0.5,
1794
- // 50% discount from standard
1795
- PRIORITY: 1.75
1796
- // 75% premium over standard
1797
- };
1798
-
1799
- // src/providers/cerebras.ts
1800
- var cerebrasDefaultModelId = "gpt-oss-120b";
1801
- var cerebrasModels = {
1802
- "zai-glm-4.7": {
1803
- maxTokens: 16384,
1804
- // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
1805
- contextWindow: 131072,
1806
- supportsImages: false,
1770
+ contextWindow: 2e5,
1771
+ supportsImages: true,
1807
1772
  supportsPromptCache: true,
1808
- supportsTemperature: true,
1809
- defaultTemperature: 1,
1810
- inputPrice: 0,
1811
- outputPrice: 0,
1812
- description: "Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks."
1813
- },
1814
- "qwen-3-235b-a22b-instruct-2507": {
1815
- maxTokens: 16384,
1816
- // Conservative default to avoid premature rate limiting
1817
- contextWindow: 64e3,
1818
- supportsImages: false,
1819
- supportsPromptCache: false,
1820
- inputPrice: 0,
1821
- outputPrice: 0,
1822
- description: "Intelligent model with ~1400 tokens/s"
1823
- },
1824
- "llama-3.3-70b": {
1825
- maxTokens: 16384,
1826
- // Conservative default to avoid premature rate limiting
1827
- contextWindow: 64e3,
1828
- supportsImages: false,
1829
- supportsPromptCache: false,
1830
- inputPrice: 0,
1831
- outputPrice: 0,
1832
- description: "Powerful model with ~2600 tokens/s"
1833
- },
1834
- "qwen-3-32b": {
1835
- maxTokens: 16384,
1836
- // Conservative default to avoid premature rate limiting
1837
- contextWindow: 64e3,
1838
- supportsImages: false,
1839
- supportsPromptCache: false,
1840
- inputPrice: 0,
1841
- outputPrice: 0,
1842
- description: "SOTA coding performance with ~2500 tokens/s"
1843
- },
1844
- "gpt-oss-120b": {
1845
- maxTokens: 16384,
1846
- // Conservative default to avoid premature rate limiting
1847
- contextWindow: 64e3,
1848
- supportsImages: false,
1849
- supportsPromptCache: false,
1850
- inputPrice: 0,
1851
- outputPrice: 0,
1852
- description: "OpenAI GPT OSS model with ~2800 tokens/s\n\n\u2022 64K context window\n\u2022 Excels at efficient reasoning across science, math, and coding"
1853
- }
1854
- };
1855
-
1856
- // src/providers/chutes.ts
1857
- var chutesDefaultModelId = "deepseek-ai/DeepSeek-R1-0528";
1858
- var chutesModels = {
1859
- "deepseek-ai/DeepSeek-R1-0528": {
1860
- maxTokens: 32768,
1861
- contextWindow: 163840,
1862
- supportsImages: false,
1863
- supportsPromptCache: false,
1864
- inputPrice: 0,
1865
- outputPrice: 0,
1866
- description: "DeepSeek R1 0528 model."
1867
- },
1868
- "deepseek-ai/DeepSeek-R1": {
1869
- maxTokens: 32768,
1870
- contextWindow: 163840,
1871
- supportsImages: false,
1872
- supportsPromptCache: false,
1873
- inputPrice: 0,
1874
- outputPrice: 0,
1875
- description: "DeepSeek R1 model."
1876
- },
1877
- "deepseek-ai/DeepSeek-V3": {
1878
- maxTokens: 32768,
1879
- contextWindow: 163840,
1880
- supportsImages: false,
1881
- supportsPromptCache: false,
1882
- inputPrice: 0,
1883
- outputPrice: 0,
1884
- description: "DeepSeek V3 model."
1885
- },
1886
- "deepseek-ai/DeepSeek-V3.1": {
1887
- maxTokens: 32768,
1888
- contextWindow: 163840,
1889
- supportsImages: false,
1890
- supportsPromptCache: false,
1891
- inputPrice: 0,
1892
- outputPrice: 0,
1893
- description: "DeepSeek V3.1 model."
1894
- },
1895
- "deepseek-ai/DeepSeek-V3.1-Terminus": {
1896
- maxTokens: 163840,
1897
- contextWindow: 163840,
1898
- supportsImages: false,
1899
- supportsPromptCache: false,
1900
- inputPrice: 0.23,
1901
- outputPrice: 0.9,
1902
- description: "DeepSeek\u2011V3.1\u2011Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix\u2011ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance."
1903
- },
1904
- "deepseek-ai/DeepSeek-V3.1-turbo": {
1905
- maxTokens: 32768,
1906
- contextWindow: 163840,
1907
- supportsImages: false,
1908
- supportsPromptCache: false,
1909
- inputPrice: 1,
1910
- outputPrice: 3,
1911
- description: "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2\xD7 quota per request and not intended for bulk workloads."
1773
+ supportsReasoningBudget: true,
1774
+ inputPrice: 15,
1775
+ outputPrice: 75,
1776
+ cacheWritesPrice: 18.75,
1777
+ cacheReadsPrice: 1.5,
1778
+ minTokensPerCachePoint: 1024,
1779
+ maxCachePoints: 4,
1780
+ cachableFields: ["system", "messages", "tools"]
1912
1781
  },
1913
- "deepseek-ai/DeepSeek-V3.2-Exp": {
1914
- maxTokens: 163840,
1915
- contextWindow: 163840,
1916
- supportsImages: false,
1917
- supportsPromptCache: false,
1918
- inputPrice: 0.25,
1919
- outputPrice: 0.35,
1920
- description: "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long\u2011context training and inference efficiency while maintaining performance comparable to V3.1\u2011Terminus."
1782
+ "anthropic.claude-opus-4-6-v1": {
1783
+ maxTokens: 8192,
1784
+ contextWindow: 2e5,
1785
+ // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
1786
+ supportsImages: true,
1787
+ supportsPromptCache: true,
1788
+ supportsReasoningBudget: true,
1789
+ inputPrice: 5,
1790
+ // $5 per million input tokens (≤200K context)
1791
+ outputPrice: 25,
1792
+ // $25 per million output tokens (≤200K context)
1793
+ cacheWritesPrice: 6.25,
1794
+ // $6.25 per million tokens
1795
+ cacheReadsPrice: 0.5,
1796
+ // $0.50 per million tokens
1797
+ minTokensPerCachePoint: 1024,
1798
+ maxCachePoints: 4,
1799
+ cachableFields: ["system", "messages", "tools"],
1800
+ // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
1801
+ tiers: [
1802
+ {
1803
+ contextWindow: 1e6,
1804
+ // 1M tokens with beta flag
1805
+ inputPrice: 10,
1806
+ // $10 per million input tokens (>200K context)
1807
+ outputPrice: 37.5,
1808
+ // $37.50 per million output tokens (>200K context)
1809
+ cacheWritesPrice: 12.5,
1810
+ // $12.50 per million tokens (>200K context)
1811
+ cacheReadsPrice: 1
1812
+ // $1.00 per million tokens (>200K context)
1813
+ }
1814
+ ]
1921
1815
  },
1922
- "unsloth/Llama-3.3-70B-Instruct": {
1923
- maxTokens: 32768,
1924
- // From Groq
1925
- contextWindow: 131072,
1926
- // From Groq
1927
- supportsImages: false,
1928
- supportsPromptCache: false,
1929
- inputPrice: 0,
1930
- outputPrice: 0,
1931
- description: "Unsloth Llama 3.3 70B Instruct model."
1816
+ "anthropic.claude-opus-4-5-20251101-v1:0": {
1817
+ maxTokens: 8192,
1818
+ contextWindow: 2e5,
1819
+ supportsImages: true,
1820
+ supportsPromptCache: true,
1821
+ supportsReasoningBudget: true,
1822
+ inputPrice: 5,
1823
+ outputPrice: 25,
1824
+ cacheWritesPrice: 6.25,
1825
+ cacheReadsPrice: 0.5,
1826
+ minTokensPerCachePoint: 1024,
1827
+ maxCachePoints: 4,
1828
+ cachableFields: ["system", "messages", "tools"]
1932
1829
  },
1933
- "chutesai/Llama-4-Scout-17B-16E-Instruct": {
1934
- maxTokens: 32768,
1935
- contextWindow: 512e3,
1936
- supportsImages: false,
1937
- supportsPromptCache: false,
1938
- inputPrice: 0,
1939
- outputPrice: 0,
1940
- description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context."
1830
+ "anthropic.claude-opus-4-20250514-v1:0": {
1831
+ maxTokens: 8192,
1832
+ contextWindow: 2e5,
1833
+ supportsImages: true,
1834
+ supportsPromptCache: true,
1835
+ supportsReasoningBudget: true,
1836
+ inputPrice: 15,
1837
+ outputPrice: 75,
1838
+ cacheWritesPrice: 18.75,
1839
+ cacheReadsPrice: 1.5,
1840
+ minTokensPerCachePoint: 1024,
1841
+ maxCachePoints: 4,
1842
+ cachableFields: ["system", "messages", "tools"]
1941
1843
  },
1942
- "unsloth/Mistral-Nemo-Instruct-2407": {
1943
- maxTokens: 32768,
1944
- contextWindow: 128e3,
1945
- supportsImages: false,
1946
- supportsPromptCache: false,
1947
- inputPrice: 0,
1948
- outputPrice: 0,
1949
- description: "Unsloth Mistral Nemo Instruct model."
1844
+ "anthropic.claude-3-7-sonnet-20250219-v1:0": {
1845
+ maxTokens: 8192,
1846
+ contextWindow: 2e5,
1847
+ supportsImages: true,
1848
+ supportsPromptCache: true,
1849
+ supportsReasoningBudget: true,
1850
+ inputPrice: 3,
1851
+ outputPrice: 15,
1852
+ cacheWritesPrice: 3.75,
1853
+ cacheReadsPrice: 0.3,
1854
+ minTokensPerCachePoint: 1024,
1855
+ maxCachePoints: 4,
1856
+ cachableFields: ["system", "messages", "tools"]
1950
1857
  },
1951
- "unsloth/gemma-3-12b-it": {
1952
- maxTokens: 32768,
1953
- contextWindow: 131072,
1954
- supportsImages: false,
1955
- supportsPromptCache: false,
1956
- inputPrice: 0,
1957
- outputPrice: 0,
1958
- description: "Unsloth Gemma 3 12B IT model."
1858
+ "anthropic.claude-3-5-sonnet-20241022-v2:0": {
1859
+ maxTokens: 8192,
1860
+ contextWindow: 2e5,
1861
+ supportsImages: true,
1862
+ supportsPromptCache: true,
1863
+ inputPrice: 3,
1864
+ outputPrice: 15,
1865
+ cacheWritesPrice: 3.75,
1866
+ cacheReadsPrice: 0.3,
1867
+ minTokensPerCachePoint: 1024,
1868
+ maxCachePoints: 4,
1869
+ cachableFields: ["system", "messages", "tools"]
1959
1870
  },
1960
- "NousResearch/DeepHermes-3-Llama-3-8B-Preview": {
1961
- maxTokens: 32768,
1962
- contextWindow: 131072,
1871
+ "anthropic.claude-3-5-haiku-20241022-v1:0": {
1872
+ maxTokens: 8192,
1873
+ contextWindow: 2e5,
1963
1874
  supportsImages: false,
1964
- supportsPromptCache: false,
1965
- inputPrice: 0,
1966
- outputPrice: 0,
1967
- description: "Nous DeepHermes 3 Llama 3 8B Preview model."
1875
+ supportsPromptCache: true,
1876
+ inputPrice: 0.8,
1877
+ outputPrice: 4,
1878
+ cacheWritesPrice: 1,
1879
+ cacheReadsPrice: 0.08,
1880
+ minTokensPerCachePoint: 2048,
1881
+ maxCachePoints: 4,
1882
+ cachableFields: ["system", "messages", "tools"]
1968
1883
  },
1969
- "unsloth/gemma-3-4b-it": {
1970
- maxTokens: 32768,
1971
- contextWindow: 131072,
1972
- supportsImages: false,
1973
- supportsPromptCache: false,
1974
- inputPrice: 0,
1975
- outputPrice: 0,
1976
- description: "Unsloth Gemma 3 4B IT model."
1884
+ "anthropic.claude-haiku-4-5-20251001-v1:0": {
1885
+ maxTokens: 8192,
1886
+ contextWindow: 2e5,
1887
+ supportsImages: true,
1888
+ supportsPromptCache: true,
1889
+ supportsReasoningBudget: true,
1890
+ inputPrice: 1,
1891
+ outputPrice: 5,
1892
+ cacheWritesPrice: 1.25,
1893
+ // 5m cache writes
1894
+ cacheReadsPrice: 0.1,
1895
+ // cache hits / refreshes
1896
+ minTokensPerCachePoint: 2048,
1897
+ maxCachePoints: 4,
1898
+ cachableFields: ["system", "messages", "tools"]
1977
1899
  },
1978
- "nvidia/Llama-3_3-Nemotron-Super-49B-v1": {
1979
- maxTokens: 32768,
1980
- contextWindow: 131072,
1981
- supportsImages: false,
1900
+ "anthropic.claude-3-5-sonnet-20240620-v1:0": {
1901
+ maxTokens: 8192,
1902
+ contextWindow: 2e5,
1903
+ supportsImages: true,
1982
1904
  supportsPromptCache: false,
1983
- inputPrice: 0,
1984
- outputPrice: 0,
1985
- description: "Nvidia Llama 3.3 Nemotron Super 49B model."
1905
+ inputPrice: 3,
1906
+ outputPrice: 15
1986
1907
  },
1987
- "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": {
1988
- maxTokens: 32768,
1989
- contextWindow: 131072,
1990
- supportsImages: false,
1908
+ "anthropic.claude-3-opus-20240229-v1:0": {
1909
+ maxTokens: 4096,
1910
+ contextWindow: 2e5,
1911
+ supportsImages: true,
1991
1912
  supportsPromptCache: false,
1992
- inputPrice: 0,
1993
- outputPrice: 0,
1994
- description: "Nvidia Llama 3.1 Nemotron Ultra 253B model."
1913
+ inputPrice: 15,
1914
+ outputPrice: 75
1995
1915
  },
1996
- "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": {
1997
- maxTokens: 32768,
1998
- contextWindow: 256e3,
1999
- supportsImages: false,
1916
+ "anthropic.claude-3-sonnet-20240229-v1:0": {
1917
+ maxTokens: 4096,
1918
+ contextWindow: 2e5,
1919
+ supportsImages: true,
2000
1920
  supportsPromptCache: false,
2001
- inputPrice: 0,
2002
- outputPrice: 0,
2003
- description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model."
1921
+ inputPrice: 3,
1922
+ outputPrice: 15
2004
1923
  },
2005
- "deepseek-ai/DeepSeek-V3-Base": {
2006
- maxTokens: 32768,
2007
- contextWindow: 163840,
2008
- supportsImages: false,
1924
+ "anthropic.claude-3-haiku-20240307-v1:0": {
1925
+ maxTokens: 4096,
1926
+ contextWindow: 2e5,
1927
+ supportsImages: true,
2009
1928
  supportsPromptCache: false,
2010
- inputPrice: 0,
2011
- outputPrice: 0,
2012
- description: "DeepSeek V3 Base model."
1929
+ inputPrice: 0.25,
1930
+ outputPrice: 1.25
2013
1931
  },
2014
- "deepseek-ai/DeepSeek-R1-Zero": {
1932
+ "deepseek.r1-v1:0": {
2015
1933
  maxTokens: 32768,
2016
- contextWindow: 163840,
1934
+ contextWindow: 128e3,
2017
1935
  supportsImages: false,
2018
1936
  supportsPromptCache: false,
2019
- inputPrice: 0,
2020
- outputPrice: 0,
2021
- description: "DeepSeek R1 Zero model."
1937
+ inputPrice: 1.35,
1938
+ outputPrice: 5.4
2022
1939
  },
2023
- "deepseek-ai/DeepSeek-V3-0324": {
2024
- maxTokens: 32768,
2025
- contextWindow: 163840,
1940
+ "openai.gpt-oss-20b-1:0": {
1941
+ maxTokens: 8192,
1942
+ contextWindow: 128e3,
2026
1943
  supportsImages: false,
2027
1944
  supportsPromptCache: false,
2028
- inputPrice: 0,
2029
- outputPrice: 0,
2030
- description: "DeepSeek V3 (0324) model."
1945
+ inputPrice: 0.5,
1946
+ outputPrice: 1.5,
1947
+ description: "GPT-OSS 20B - Optimized for low latency and local/specialized use cases"
2031
1948
  },
2032
- "Qwen/Qwen3-235B-A22B-Instruct-2507": {
2033
- maxTokens: 32768,
2034
- contextWindow: 262144,
1949
+ "openai.gpt-oss-120b-1:0": {
1950
+ maxTokens: 8192,
1951
+ contextWindow: 128e3,
2035
1952
  supportsImages: false,
2036
1953
  supportsPromptCache: false,
2037
- inputPrice: 0,
2038
- outputPrice: 0,
2039
- description: "Qwen3 235B A22B Instruct 2507 model with 262K context window."
1954
+ inputPrice: 2,
1955
+ outputPrice: 6,
1956
+ description: "GPT-OSS 120B - Production-ready, general-purpose, high-reasoning model"
2040
1957
  },
2041
- "Qwen/Qwen3-235B-A22B": {
2042
- maxTokens: 32768,
2043
- contextWindow: 40960,
1958
+ "meta.llama3-3-70b-instruct-v1:0": {
1959
+ maxTokens: 8192,
1960
+ contextWindow: 128e3,
2044
1961
  supportsImages: false,
2045
1962
  supportsPromptCache: false,
2046
- inputPrice: 0,
2047
- outputPrice: 0,
2048
- description: "Qwen3 235B A22B model."
1963
+ inputPrice: 0.72,
1964
+ outputPrice: 0.72,
1965
+ description: "Llama 3.3 Instruct (70B)"
2049
1966
  },
2050
- "Qwen/Qwen3-32B": {
2051
- maxTokens: 32768,
2052
- contextWindow: 40960,
2053
- supportsImages: false,
1967
+ "meta.llama3-2-90b-instruct-v1:0": {
1968
+ maxTokens: 8192,
1969
+ contextWindow: 128e3,
1970
+ supportsImages: true,
2054
1971
  supportsPromptCache: false,
2055
- inputPrice: 0,
2056
- outputPrice: 0,
2057
- description: "Qwen3 32B model."
1972
+ inputPrice: 0.72,
1973
+ outputPrice: 0.72,
1974
+ description: "Llama 3.2 Instruct (90B)"
2058
1975
  },
2059
- "Qwen/Qwen3-30B-A3B": {
2060
- maxTokens: 32768,
2061
- contextWindow: 40960,
2062
- supportsImages: false,
1976
+ "meta.llama3-2-11b-instruct-v1:0": {
1977
+ maxTokens: 8192,
1978
+ contextWindow: 128e3,
1979
+ supportsImages: true,
2063
1980
  supportsPromptCache: false,
2064
- inputPrice: 0,
2065
- outputPrice: 0,
2066
- description: "Qwen3 30B A3B model."
1981
+ inputPrice: 0.16,
1982
+ outputPrice: 0.16,
1983
+ description: "Llama 3.2 Instruct (11B)"
2067
1984
  },
2068
- "Qwen/Qwen3-14B": {
2069
- maxTokens: 32768,
2070
- contextWindow: 40960,
1985
+ "meta.llama3-2-3b-instruct-v1:0": {
1986
+ maxTokens: 8192,
1987
+ contextWindow: 128e3,
2071
1988
  supportsImages: false,
2072
1989
  supportsPromptCache: false,
2073
- inputPrice: 0,
2074
- outputPrice: 0,
2075
- description: "Qwen3 14B model."
1990
+ inputPrice: 0.15,
1991
+ outputPrice: 0.15,
1992
+ description: "Llama 3.2 Instruct (3B)"
2076
1993
  },
2077
- "Qwen/Qwen3-8B": {
2078
- maxTokens: 32768,
2079
- contextWindow: 40960,
1994
+ "meta.llama3-2-1b-instruct-v1:0": {
1995
+ maxTokens: 8192,
1996
+ contextWindow: 128e3,
2080
1997
  supportsImages: false,
2081
1998
  supportsPromptCache: false,
2082
- inputPrice: 0,
2083
- outputPrice: 0,
2084
- description: "Qwen3 8B model."
1999
+ inputPrice: 0.1,
2000
+ outputPrice: 0.1,
2001
+ description: "Llama 3.2 Instruct (1B)"
2085
2002
  },
2086
- "microsoft/MAI-DS-R1-FP8": {
2087
- maxTokens: 32768,
2088
- contextWindow: 163840,
2003
+ "meta.llama3-1-405b-instruct-v1:0": {
2004
+ maxTokens: 8192,
2005
+ contextWindow: 128e3,
2089
2006
  supportsImages: false,
2090
2007
  supportsPromptCache: false,
2091
- inputPrice: 0,
2092
- outputPrice: 0,
2093
- description: "Microsoft MAI-DS-R1 FP8 model."
2008
+ inputPrice: 2.4,
2009
+ outputPrice: 2.4,
2010
+ description: "Llama 3.1 Instruct (405B)"
2094
2011
  },
2095
- "tngtech/DeepSeek-R1T-Chimera": {
2096
- maxTokens: 32768,
2097
- contextWindow: 163840,
2012
+ "meta.llama3-1-70b-instruct-v1:0": {
2013
+ maxTokens: 8192,
2014
+ contextWindow: 128e3,
2098
2015
  supportsImages: false,
2099
2016
  supportsPromptCache: false,
2100
- inputPrice: 0,
2101
- outputPrice: 0,
2102
- description: "TNGTech DeepSeek R1T Chimera model."
2017
+ inputPrice: 0.72,
2018
+ outputPrice: 0.72,
2019
+ description: "Llama 3.1 Instruct (70B)"
2103
2020
  },
2104
- "zai-org/GLM-4.5-Air": {
2105
- maxTokens: 32768,
2106
- contextWindow: 151329,
2021
+ "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
2022
+ maxTokens: 8192,
2023
+ contextWindow: 128e3,
2107
2024
  supportsImages: false,
2108
2025
  supportsPromptCache: false,
2109
- inputPrice: 0,
2110
- outputPrice: 0,
2111
- description: "GLM-4.5-Air model with 151,329 token context window and 106B total parameters with 12B activated."
2026
+ inputPrice: 0.9,
2027
+ outputPrice: 0.9,
2028
+ description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)"
2112
2029
  },
2113
- "zai-org/GLM-4.5-FP8": {
2114
- maxTokens: 32768,
2115
- contextWindow: 131072,
2030
+ "meta.llama3-1-8b-instruct-v1:0": {
2031
+ maxTokens: 8192,
2032
+ contextWindow: 8e3,
2116
2033
  supportsImages: false,
2117
2034
  supportsPromptCache: false,
2118
- inputPrice: 0,
2119
- outputPrice: 0,
2120
- description: "GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture."
2035
+ inputPrice: 0.22,
2036
+ outputPrice: 0.22,
2037
+ description: "Llama 3.1 Instruct (8B)"
2121
2038
  },
2122
- "zai-org/GLM-4.5-turbo": {
2123
- maxTokens: 32768,
2124
- contextWindow: 131072,
2039
+ "meta.llama3-70b-instruct-v1:0": {
2040
+ maxTokens: 2048,
2041
+ contextWindow: 8e3,
2125
2042
  supportsImages: false,
2126
2043
  supportsPromptCache: false,
2127
- inputPrice: 1,
2128
- outputPrice: 3,
2129
- description: "GLM-4.5-turbo model with 128K token context window, optimized for fast inference."
2044
+ inputPrice: 2.65,
2045
+ outputPrice: 3.5
2130
2046
  },
2131
- "zai-org/GLM-4.6-FP8": {
2132
- maxTokens: 32768,
2133
- contextWindow: 202752,
2047
+ "meta.llama3-8b-instruct-v1:0": {
2048
+ maxTokens: 2048,
2049
+ contextWindow: 4e3,
2134
2050
  supportsImages: false,
2135
2051
  supportsPromptCache: false,
2136
- inputPrice: 0,
2137
- outputPrice: 0,
2138
- description: "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios."
2052
+ inputPrice: 0.3,
2053
+ outputPrice: 0.6
2139
2054
  },
2140
- "zai-org/GLM-4.6-turbo": {
2141
- maxTokens: 202752,
2142
- // From Chutes /v1/models: max_output_length
2143
- contextWindow: 202752,
2055
+ "amazon.titan-text-lite-v1:0": {
2056
+ maxTokens: 4096,
2057
+ contextWindow: 8e3,
2144
2058
  supportsImages: false,
2145
2059
  supportsPromptCache: false,
2146
- inputPrice: 1.15,
2147
- outputPrice: 3.25,
2148
- description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference."
2060
+ inputPrice: 0.15,
2061
+ outputPrice: 0.2,
2062
+ description: "Amazon Titan Text Lite"
2149
2063
  },
2150
- "meituan-longcat/LongCat-Flash-Thinking-FP8": {
2151
- maxTokens: 32768,
2152
- contextWindow: 128e3,
2064
+ "amazon.titan-text-express-v1:0": {
2065
+ maxTokens: 4096,
2066
+ contextWindow: 8e3,
2153
2067
  supportsImages: false,
2154
2068
  supportsPromptCache: false,
2155
- inputPrice: 0,
2156
- outputPrice: 0,
2157
- description: "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks."
2069
+ inputPrice: 0.2,
2070
+ outputPrice: 0.6,
2071
+ description: "Amazon Titan Text Express"
2158
2072
  },
2159
- "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
2160
- maxTokens: 32768,
2073
+ "moonshot.kimi-k2-thinking": {
2074
+ maxTokens: 32e3,
2161
2075
  contextWindow: 262144,
2162
2076
  supportsImages: false,
2163
2077
  supportsPromptCache: false,
2164
- inputPrice: 0,
2165
- outputPrice: 0,
2166
- description: "Qwen3 Coder 480B A35B Instruct FP8 model, optimized for coding tasks."
2078
+ preserveReasoning: true,
2079
+ inputPrice: 0.6,
2080
+ outputPrice: 2.5,
2081
+ description: "Kimi K2 Thinking (1T parameter MoE model with 32B active parameters)"
2167
2082
  },
2168
- "moonshotai/Kimi-K2-Instruct-75k": {
2169
- maxTokens: 32768,
2170
- contextWindow: 75e3,
2083
+ "minimax.minimax-m2": {
2084
+ maxTokens: 16384,
2085
+ contextWindow: 196608,
2171
2086
  supportsImages: false,
2172
2087
  supportsPromptCache: false,
2173
- inputPrice: 0.1481,
2174
- outputPrice: 0.5926,
2175
- description: "Moonshot AI Kimi K2 Instruct model with 75k context window."
2088
+ preserveReasoning: true,
2089
+ inputPrice: 0.3,
2090
+ outputPrice: 1.2,
2091
+ description: "MiniMax M2 (230B parameter MoE model with 10B active parameters)"
2176
2092
  },
2177
- "moonshotai/Kimi-K2-Instruct-0905": {
2178
- maxTokens: 32768,
2093
+ "qwen.qwen3-next-80b-a3b": {
2094
+ maxTokens: 8192,
2179
2095
  contextWindow: 262144,
2180
2096
  supportsImages: false,
2181
2097
  supportsPromptCache: false,
2182
- inputPrice: 0.1999,
2183
- outputPrice: 0.8001,
2184
- description: "Moonshot AI Kimi K2 Instruct 0905 model with 256k context window."
2098
+ inputPrice: 0.15,
2099
+ outputPrice: 1.2,
2100
+ description: "Qwen3 Next 80B (MoE model with 3B active parameters)"
2185
2101
  },
2186
- "Qwen/Qwen3-235B-A22B-Thinking-2507": {
2187
- maxTokens: 32768,
2102
+ "qwen.qwen3-coder-480b-a35b-v1:0": {
2103
+ maxTokens: 8192,
2188
2104
  contextWindow: 262144,
2189
2105
  supportsImages: false,
2190
2106
  supportsPromptCache: false,
2191
- inputPrice: 0.077968332,
2192
- outputPrice: 0.31202496,
2193
- description: "Qwen3 235B A22B Thinking 2507 model with 262K context window."
2194
- },
2195
- "Qwen/Qwen3-Next-80B-A3B-Instruct": {
2196
- maxTokens: 32768,
2197
- contextWindow: 131072,
2198
- supportsImages: false,
2199
- supportsPromptCache: false,
2200
- inputPrice: 0,
2201
- outputPrice: 0,
2202
- description: "Fast, stable instruction-tuned model optimized for complex tasks, RAG, and tool use without thinking traces."
2203
- },
2204
- "Qwen/Qwen3-Next-80B-A3B-Thinking": {
2205
- maxTokens: 32768,
2206
- contextWindow: 131072,
2207
- supportsImages: false,
2208
- supportsPromptCache: false,
2209
- inputPrice: 0,
2210
- outputPrice: 0,
2211
- description: "Reasoning-first model with structured thinking traces for multi-step problems, math proofs, and code synthesis."
2212
- },
2213
- "Qwen/Qwen3-VL-235B-A22B-Thinking": {
2214
- maxTokens: 262144,
2215
- contextWindow: 262144,
2216
- supportsImages: true,
2217
- supportsPromptCache: false,
2218
- inputPrice: 0.16,
2219
- outputPrice: 0.65,
2220
- description: "Qwen3\u2011VL\u2011235B\u2011A22B\u2011Thinking is an open\u2011weight MoE vision\u2011language model (235B total, ~22B activated) optimized for deliberate multi\u2011step reasoning with strong text\u2011image\u2011video understanding and long\u2011context capabilities."
2107
+ inputPrice: 0.45,
2108
+ outputPrice: 1.8,
2109
+ description: "Qwen3 Coder 480B (MoE model with 35B active parameters)"
2221
2110
  }
2222
2111
  };
2223
- var chutesDefaultModelInfo = chutesModels[chutesDefaultModelId];
2112
+ var BEDROCK_DEFAULT_TEMPERATURE = 0.3;
2113
+ var BEDROCK_MAX_TOKENS = 4096;
2114
+ var BEDROCK_DEFAULT_CONTEXT = 128e3;
2115
+ var AWS_INFERENCE_PROFILE_MAPPING = [
2116
+ // Australia regions (Sydney and Melbourne) → au. inference profile (most specific - 14 chars)
2117
+ ["ap-southeast-2", "au."],
2118
+ ["ap-southeast-4", "au."],
2119
+ // Japan regions (Tokyo and Osaka) → jp. inference profile (13 chars)
2120
+ ["ap-northeast-", "jp."],
2121
+ // US Government Cloud → ug. inference profile (7 chars)
2122
+ ["us-gov-", "ug."],
2123
+ // Americas regions → us. inference profile (3 chars)
2124
+ ["us-", "us."],
2125
+ // Europe regions → eu. inference profile (3 chars)
2126
+ ["eu-", "eu."],
2127
+ // Asia Pacific regions → apac. inference profile (3 chars)
2128
+ ["ap-", "apac."],
2129
+ // Canada regions → ca. inference profile (3 chars)
2130
+ ["ca-", "ca."],
2131
+ // South America regions → sa. inference profile (3 chars)
2132
+ ["sa-", "sa."]
2133
+ ];
2134
+ var BEDROCK_REGIONS = [
2135
+ { value: "us-east-1", label: "us-east-1" },
2136
+ { value: "us-east-2", label: "us-east-2" },
2137
+ { value: "us-west-1", label: "us-west-1" },
2138
+ { value: "us-west-2", label: "us-west-2" },
2139
+ { value: "ap-northeast-1", label: "ap-northeast-1" },
2140
+ { value: "ap-northeast-2", label: "ap-northeast-2" },
2141
+ { value: "ap-northeast-3", label: "ap-northeast-3" },
2142
+ { value: "ap-south-1", label: "ap-south-1" },
2143
+ { value: "ap-south-2", label: "ap-south-2" },
2144
+ { value: "ap-southeast-1", label: "ap-southeast-1" },
2145
+ { value: "ap-southeast-2", label: "ap-southeast-2" },
2146
+ { value: "ap-east-1", label: "ap-east-1" },
2147
+ { value: "eu-central-1", label: "eu-central-1" },
2148
+ { value: "eu-central-2", label: "eu-central-2" },
2149
+ { value: "eu-west-1", label: "eu-west-1" },
2150
+ { value: "eu-west-2", label: "eu-west-2" },
2151
+ { value: "eu-west-3", label: "eu-west-3" },
2152
+ { value: "eu-north-1", label: "eu-north-1" },
2153
+ { value: "eu-south-1", label: "eu-south-1" },
2154
+ { value: "eu-south-2", label: "eu-south-2" },
2155
+ { value: "ca-central-1", label: "ca-central-1" },
2156
+ { value: "sa-east-1", label: "sa-east-1" },
2157
+ { value: "us-gov-east-1", label: "us-gov-east-1" },
2158
+ { value: "us-gov-west-1", label: "us-gov-west-1" }
2159
+ ].sort((a, b) => a.value.localeCompare(b.value));
2160
+ var BEDROCK_1M_CONTEXT_MODEL_IDS = [
2161
+ "anthropic.claude-sonnet-4-20250514-v1:0",
2162
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
2163
+ "anthropic.claude-opus-4-6-v1"
2164
+ ];
2165
+ var BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
2166
+ "anthropic.claude-sonnet-4-20250514-v1:0",
2167
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
2168
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
2169
+ "anthropic.claude-opus-4-5-20251101-v1:0",
2170
+ "anthropic.claude-opus-4-6-v1"
2171
+ ];
2172
+ var BEDROCK_SERVICE_TIER_MODEL_IDS = [
2173
+ // Amazon Nova models
2174
+ "amazon.nova-lite-v1:0",
2175
+ "amazon.nova-2-lite-v1:0",
2176
+ "amazon.nova-pro-v1:0",
2177
+ "amazon.nova-pro-latency-optimized-v1:0",
2178
+ // DeepSeek models
2179
+ "deepseek.r1-v1:0",
2180
+ // Qwen models
2181
+ "qwen.qwen3-next-80b-a3b",
2182
+ "qwen.qwen3-coder-480b-a35b-v1:0",
2183
+ // OpenAI GPT-OSS models
2184
+ "openai.gpt-oss-20b-1:0",
2185
+ "openai.gpt-oss-120b-1:0"
2186
+ ];
2187
+ var BEDROCK_SERVICE_TIER_PRICING = {
2188
+ STANDARD: 1,
2189
+ // Base price
2190
+ FLEX: 0.5,
2191
+ // 50% discount from standard
2192
+ PRIORITY: 1.75
2193
+ // 75% premium over standard
2194
+ };
2224
2195
 
2225
2196
  // src/providers/deepseek.ts
2226
2197
  var deepSeekDefaultModelId = "deepseek-chat";
@@ -2261,109 +2232,6 @@ var deepSeekModels = {
2261
2232
  };
2262
2233
  var DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3;
2263
2234
 
2264
- // src/providers/doubao.ts
2265
- var doubaoDefaultModelId = "doubao-seed-1-6-250615";
2266
- var doubaoModels = {
2267
- "doubao-seed-1-6-250615": {
2268
- maxTokens: 32768,
2269
- contextWindow: 128e3,
2270
- supportsImages: true,
2271
- supportsPromptCache: true,
2272
- inputPrice: 1e-4,
2273
- // $0.0001 per million tokens (cache miss)
2274
- outputPrice: 4e-4,
2275
- // $0.0004 per million tokens
2276
- cacheWritesPrice: 1e-4,
2277
- // $0.0001 per million tokens (cache miss)
2278
- cacheReadsPrice: 2e-5,
2279
- // $0.00002 per million tokens (cache hit)
2280
- description: `Doubao Seed 1.6 is a powerful model designed for high-performance tasks with extensive context handling.`
2281
- },
2282
- "doubao-seed-1-6-thinking-250715": {
2283
- maxTokens: 32768,
2284
- contextWindow: 128e3,
2285
- supportsImages: true,
2286
- supportsPromptCache: true,
2287
- inputPrice: 2e-4,
2288
- // $0.0002 per million tokens
2289
- outputPrice: 8e-4,
2290
- // $0.0008 per million tokens
2291
- cacheWritesPrice: 2e-4,
2292
- // $0.0002 per million
2293
- cacheReadsPrice: 4e-5,
2294
- // $0.00004 per million tokens (cache hit)
2295
- description: `Doubao Seed 1.6 Thinking is optimized for reasoning tasks, providing enhanced performance in complex problem-solving scenarios.`
2296
- },
2297
- "doubao-seed-1-6-flash-250715": {
2298
- maxTokens: 32768,
2299
- contextWindow: 128e3,
2300
- supportsImages: true,
2301
- supportsPromptCache: true,
2302
- inputPrice: 15e-5,
2303
- // $0.00015 per million tokens
2304
- outputPrice: 6e-4,
2305
- // $0.0006 per million tokens
2306
- cacheWritesPrice: 15e-5,
2307
- // $0.00015 per million
2308
- cacheReadsPrice: 3e-5,
2309
- // $0.00003 per million tokens (cache hit)
2310
- description: `Doubao Seed 1.6 Flash is tailored for speed and efficiency, making it ideal for applications requiring rapid responses.`
2311
- }
2312
- };
2313
- var doubaoDefaultModelInfo = doubaoModels[doubaoDefaultModelId];
2314
- var DOUBAO_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3";
2315
- var DOUBAO_API_CHAT_PATH = "/chat/completions";
2316
-
2317
- // src/providers/featherless.ts
2318
- var featherlessModels = {
2319
- "deepseek-ai/DeepSeek-V3-0324": {
2320
- maxTokens: 4096,
2321
- contextWindow: 32678,
2322
- supportsImages: false,
2323
- supportsPromptCache: false,
2324
- inputPrice: 0,
2325
- outputPrice: 0,
2326
- description: "DeepSeek V3 0324 model."
2327
- },
2328
- "deepseek-ai/DeepSeek-R1-0528": {
2329
- maxTokens: 4096,
2330
- contextWindow: 32678,
2331
- supportsImages: false,
2332
- supportsPromptCache: false,
2333
- inputPrice: 0,
2334
- outputPrice: 0,
2335
- description: "DeepSeek R1 0528 model."
2336
- },
2337
- "moonshotai/Kimi-K2-Instruct": {
2338
- maxTokens: 4096,
2339
- contextWindow: 32678,
2340
- supportsImages: false,
2341
- supportsPromptCache: false,
2342
- inputPrice: 0,
2343
- outputPrice: 0,
2344
- description: "Kimi K2 Instruct model."
2345
- },
2346
- "openai/gpt-oss-120b": {
2347
- maxTokens: 4096,
2348
- contextWindow: 32678,
2349
- supportsImages: false,
2350
- supportsPromptCache: false,
2351
- inputPrice: 0,
2352
- outputPrice: 0,
2353
- description: "GPT-OSS 120B model."
2354
- },
2355
- "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
2356
- maxTokens: 4096,
2357
- contextWindow: 32678,
2358
- supportsImages: false,
2359
- supportsPromptCache: false,
2360
- inputPrice: 0,
2361
- outputPrice: 0,
2362
- description: "Qwen3 Coder 480B A35B Instruct model."
2363
- }
2364
- };
2365
- var featherlessDefaultModelId = "moonshotai/Kimi-K2-Instruct";
2366
-
2367
2235
  // src/providers/fireworks.ts
2368
2236
  var fireworksDefaultModelId = "accounts/fireworks/models/kimi-k2-instruct-0905";
2369
2237
  var fireworksModels = {
@@ -2779,121 +2647,6 @@ var geminiModels = {
2779
2647
  }
2780
2648
  };
2781
2649
 
2782
- // src/providers/groq.ts
2783
- var groqDefaultModelId = "moonshotai/kimi-k2-instruct-0905";
2784
- var groqModels = {
2785
- // Models based on API response: https://api.groq.com/openai/v1/models
2786
- "llama-3.1-8b-instant": {
2787
- maxTokens: 8192,
2788
- contextWindow: 131072,
2789
- supportsImages: false,
2790
- supportsPromptCache: false,
2791
- inputPrice: 0.05,
2792
- outputPrice: 0.08,
2793
- description: "Meta Llama 3.1 8B Instant model, 128K context."
2794
- },
2795
- "llama-3.3-70b-versatile": {
2796
- maxTokens: 8192,
2797
- contextWindow: 131072,
2798
- supportsImages: false,
2799
- supportsPromptCache: false,
2800
- inputPrice: 0.59,
2801
- outputPrice: 0.79,
2802
- description: "Meta Llama 3.3 70B Versatile model, 128K context."
2803
- },
2804
- "meta-llama/llama-4-scout-17b-16e-instruct": {
2805
- maxTokens: 8192,
2806
- contextWindow: 131072,
2807
- supportsImages: false,
2808
- supportsPromptCache: false,
2809
- inputPrice: 0.11,
2810
- outputPrice: 0.34,
2811
- description: "Meta Llama 4 Scout 17B Instruct model, 128K context."
2812
- },
2813
- "qwen/qwen3-32b": {
2814
- maxTokens: 8192,
2815
- contextWindow: 131072,
2816
- supportsImages: false,
2817
- supportsPromptCache: false,
2818
- inputPrice: 0.29,
2819
- outputPrice: 0.59,
2820
- description: "Alibaba Qwen 3 32B model, 128K context."
2821
- },
2822
- "moonshotai/kimi-k2-instruct-0905": {
2823
- maxTokens: 16384,
2824
- contextWindow: 262144,
2825
- supportsImages: false,
2826
- supportsPromptCache: true,
2827
- inputPrice: 0.6,
2828
- outputPrice: 2.5,
2829
- cacheReadsPrice: 0.15,
2830
- description: "Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support."
2831
- },
2832
- "openai/gpt-oss-120b": {
2833
- maxTokens: 32766,
2834
- contextWindow: 131072,
2835
- supportsImages: false,
2836
- supportsPromptCache: false,
2837
- inputPrice: 0.15,
2838
- outputPrice: 0.75,
2839
- description: "GPT-OSS 120B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 128 experts."
2840
- },
2841
- "openai/gpt-oss-20b": {
2842
- maxTokens: 32768,
2843
- contextWindow: 131072,
2844
- supportsImages: false,
2845
- supportsPromptCache: false,
2846
- inputPrice: 0.1,
2847
- outputPrice: 0.5,
2848
- description: "GPT-OSS 20B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 32 experts."
2849
- }
2850
- };
2851
-
2852
- // src/providers/huggingface.ts
2853
- var HUGGINGFACE_DEFAULT_MAX_TOKENS = 2048;
2854
- var HUGGINGFACE_MAX_TOKENS_FALLBACK = 8192;
2855
- var HUGGINGFACE_DEFAULT_CONTEXT_WINDOW = 128e3;
2856
- var HUGGINGFACE_SLIDER_STEP = 256;
2857
- var HUGGINGFACE_SLIDER_MIN = 1;
2858
- var HUGGINGFACE_TEMPERATURE_MAX_VALUE = 2;
2859
- var HUGGINGFACE_API_URL = "https://router.huggingface.co/v1/models?collection=roocode";
2860
- var HUGGINGFACE_CACHE_DURATION = 1e3 * 60 * 60;
2861
-
2862
- // src/providers/io-intelligence.ts
2863
- var ioIntelligenceDefaultModelId = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8";
2864
- var ioIntelligenceDefaultBaseUrl = "https://api.intelligence.io.solutions/api/v1";
2865
- var IO_INTELLIGENCE_CACHE_DURATION = 1e3 * 60 * 60;
2866
- var ioIntelligenceModels = {
2867
- "deepseek-ai/DeepSeek-R1-0528": {
2868
- maxTokens: 8192,
2869
- contextWindow: 128e3,
2870
- supportsImages: false,
2871
- supportsPromptCache: false,
2872
- description: "DeepSeek R1 reasoning model"
2873
- },
2874
- "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
2875
- maxTokens: 8192,
2876
- contextWindow: 43e4,
2877
- supportsImages: true,
2878
- supportsPromptCache: false,
2879
- description: "Llama 4 Maverick 17B model"
2880
- },
2881
- "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": {
2882
- maxTokens: 8192,
2883
- contextWindow: 106e3,
2884
- supportsImages: false,
2885
- supportsPromptCache: false,
2886
- description: "Qwen3 Coder 480B specialized for coding"
2887
- },
2888
- "openai/gpt-oss-120b": {
2889
- maxTokens: 8192,
2890
- contextWindow: 131072,
2891
- supportsImages: false,
2892
- supportsPromptCache: false,
2893
- description: "OpenAI GPT-OSS 120B model"
2894
- }
2895
- };
2896
-
2897
2650
  // src/providers/lite-llm.ts
2898
2651
  var litellmDefaultModelId = "claude-3-7-sonnet-20250219";
2899
2652
  var litellmDefaultModelInfo = {
@@ -3595,7 +3348,7 @@ var openAiModelInfoSaneDefaults = {
3595
3348
  inputPrice: 0,
3596
3349
  outputPrice: 0
3597
3350
  };
3598
- var azureOpenAiDefaultApiVersion = "2024-08-01-preview";
3351
+ var azureOpenAiDefaultApiVersion = "2025-04-01-preview";
3599
3352
  var OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0;
3600
3353
  var OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions";
3601
3354
 
@@ -3985,19 +3738,6 @@ var sambaNovaModels = {
3985
3738
  }
3986
3739
  };
3987
3740
 
3988
- // src/providers/unbound.ts
3989
- var unboundDefaultModelId = "anthropic/claude-sonnet-4-5";
3990
- var unboundDefaultModelInfo = {
3991
- maxTokens: 8192,
3992
- contextWindow: 2e5,
3993
- supportsImages: true,
3994
- supportsPromptCache: true,
3995
- inputPrice: 3,
3996
- outputPrice: 15,
3997
- cacheWritesPrice: 3.75,
3998
- cacheReadsPrice: 0.3
3999
- };
4000
-
4001
3741
  // src/providers/vertex.ts
4002
3742
  var vertexDefaultModelId = "claude-sonnet-4-5@20250929";
4003
3743
  var vertexModels = {
@@ -5264,18 +5004,6 @@ var zaiApiLineConfigs = {
5264
5004
  }
5265
5005
  };
5266
5006
 
5267
- // src/providers/deepinfra.ts
5268
- var deepInfraDefaultModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
5269
- var deepInfraDefaultModelInfo = {
5270
- maxTokens: 16384,
5271
- contextWindow: 262144,
5272
- supportsImages: false,
5273
- supportsPromptCache: false,
5274
- inputPrice: 0.3,
5275
- outputPrice: 1.2,
5276
- description: "Qwen 3 Coder 480B A35B Instruct Turbo model, 256K context."
5277
- };
5278
-
5279
5007
  // src/providers/minimax.ts
5280
5008
  var minimaxDefaultModelId = "MiniMax-M2";
5281
5009
  var minimaxModels = {
@@ -5333,18 +5061,10 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5333
5061
  return openRouterDefaultModelId;
5334
5062
  case "requesty":
5335
5063
  return requestyDefaultModelId;
5336
- case "unbound":
5337
- return unboundDefaultModelId;
5338
5064
  case "litellm":
5339
5065
  return litellmDefaultModelId;
5340
5066
  case "xai":
5341
5067
  return xaiDefaultModelId;
5342
- case "groq":
5343
- return groqDefaultModelId;
5344
- case "huggingface":
5345
- return "meta-llama/Llama-3.3-70B-Instruct";
5346
- case "chutes":
5347
- return chutesDefaultModelId;
5348
5068
  case "baseten":
5349
5069
  return basetenDefaultModelId;
5350
5070
  case "bedrock":
@@ -5355,8 +5075,6 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5355
5075
  return geminiDefaultModelId;
5356
5076
  case "deepseek":
5357
5077
  return deepSeekDefaultModelId;
5358
- case "doubao":
5359
- return doubaoDefaultModelId;
5360
5078
  case "moonshot":
5361
5079
  return moonshotDefaultModelId;
5362
5080
  case "minimax":
@@ -5379,26 +5097,20 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5379
5097
  case "lmstudio":
5380
5098
  return "";
5381
5099
  // LMStudio uses dynamic model selection
5382
- case "deepinfra":
5383
- return deepInfraDefaultModelId;
5384
5100
  case "vscode-lm":
5385
5101
  return vscodeLlmDefaultModelId;
5386
- case "cerebras":
5387
- return cerebrasDefaultModelId;
5388
5102
  case "sambanova":
5389
5103
  return sambaNovaDefaultModelId;
5390
5104
  case "fireworks":
5391
5105
  return fireworksDefaultModelId;
5392
- case "featherless":
5393
- return featherlessDefaultModelId;
5394
- case "io-intelligence":
5395
- return ioIntelligenceDefaultModelId;
5396
5106
  case "roo":
5397
5107
  return rooDefaultModelId;
5398
5108
  case "qwen-code":
5399
5109
  return qwenCodeDefaultModelId;
5400
5110
  case "vercel-ai-gateway":
5401
5111
  return vercelAiGatewayDefaultModelId;
5112
+ case "azure":
5113
+ return azureDefaultModelId;
5402
5114
  case "anthropic":
5403
5115
  case "gemini-cli":
5404
5116
  case "fake-ai":
@@ -5409,18 +5121,7 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5409
5121
 
5410
5122
  // src/provider-settings.ts
5411
5123
  var DEFAULT_CONSECUTIVE_MISTAKE_LIMIT = 3;
5412
- var dynamicProviders = [
5413
- "openrouter",
5414
- "vercel-ai-gateway",
5415
- "huggingface",
5416
- "litellm",
5417
- "deepinfra",
5418
- "io-intelligence",
5419
- "requesty",
5420
- "unbound",
5421
- "roo",
5422
- "chutes"
5423
- ];
5124
+ var dynamicProviders = ["openrouter", "vercel-ai-gateway", "litellm", "requesty", "roo"];
5424
5125
  var isDynamicProvider = (key) => dynamicProviders.includes(key);
5425
5126
  var localProviders = ["ollama", "lmstudio"];
5426
5127
  var isLocalProvider = (key) => localProviders.includes(key);
@@ -5437,16 +5138,13 @@ var providerNames = [
5437
5138
  ...customProviders,
5438
5139
  ...fauxProviders,
5439
5140
  "anthropic",
5141
+ "azure",
5440
5142
  "bedrock",
5441
5143
  "baseten",
5442
- "cerebras",
5443
- "doubao",
5444
5144
  "deepseek",
5445
- "featherless",
5446
5145
  "fireworks",
5447
5146
  "gemini",
5448
5147
  "gemini-cli",
5449
- "groq",
5450
5148
  "mistral",
5451
5149
  "moonshot",
5452
5150
  "minimax",
@@ -5461,10 +5159,24 @@ var providerNames = [
5461
5159
  ];
5462
5160
  var providerNamesSchema = import_zod8.z.enum(providerNames);
5463
5161
  var isProviderName = (key) => typeof key === "string" && providerNames.includes(key);
5162
+ var retiredProviderNames = [
5163
+ "cerebras",
5164
+ "chutes",
5165
+ "deepinfra",
5166
+ "doubao",
5167
+ "featherless",
5168
+ "groq",
5169
+ "huggingface",
5170
+ "io-intelligence",
5171
+ "unbound"
5172
+ ];
5173
+ var retiredProviderNamesSchema = import_zod8.z.enum(retiredProviderNames);
5174
+ var isRetiredProvider = (value) => retiredProviderNames.includes(value);
5175
+ var providerNamesWithRetiredSchema = import_zod8.z.union([providerNamesSchema, retiredProviderNamesSchema]);
5464
5176
  var providerSettingsEntrySchema = import_zod8.z.object({
5465
5177
  id: import_zod8.z.string(),
5466
5178
  name: import_zod8.z.string(),
5467
- apiProvider: providerNamesSchema.optional(),
5179
+ apiProvider: providerNamesWithRetiredSchema.optional(),
5468
5180
  modelId: import_zod8.z.string().optional()
5469
5181
  });
5470
5182
  var baseProviderSettingsSchema = import_zod8.z.object({
@@ -5524,8 +5236,6 @@ var vertexSchema = apiModelIdProviderModelSchema.extend({
5524
5236
  vertexJsonCredentials: import_zod8.z.string().optional(),
5525
5237
  vertexProjectId: import_zod8.z.string().optional(),
5526
5238
  vertexRegion: import_zod8.z.string().optional(),
5527
- enableUrlContext: import_zod8.z.boolean().optional(),
5528
- enableGrounding: import_zod8.z.boolean().optional(),
5529
5239
  vertex1MContext: import_zod8.z.boolean().optional()
5530
5240
  // Enable 'context-1m-2025-08-07' beta for 1M context window.
5531
5241
  });
@@ -5564,9 +5274,7 @@ var lmStudioSchema = baseProviderSettingsSchema.extend({
5564
5274
  });
5565
5275
  var geminiSchema = apiModelIdProviderModelSchema.extend({
5566
5276
  geminiApiKey: import_zod8.z.string().optional(),
5567
- googleGeminiBaseUrl: import_zod8.z.string().optional(),
5568
- enableUrlContext: import_zod8.z.boolean().optional(),
5569
- enableGrounding: import_zod8.z.boolean().optional()
5277
+ googleGeminiBaseUrl: import_zod8.z.string().optional()
5570
5278
  });
5571
5279
  var geminiCliSchema = apiModelIdProviderModelSchema.extend({
5572
5280
  geminiCliOAuthPath: import_zod8.z.string().optional(),
@@ -5590,15 +5298,6 @@ var deepSeekSchema = apiModelIdProviderModelSchema.extend({
5590
5298
  deepSeekBaseUrl: import_zod8.z.string().optional(),
5591
5299
  deepSeekApiKey: import_zod8.z.string().optional()
5592
5300
  });
5593
- var deepInfraSchema = apiModelIdProviderModelSchema.extend({
5594
- deepInfraBaseUrl: import_zod8.z.string().optional(),
5595
- deepInfraApiKey: import_zod8.z.string().optional(),
5596
- deepInfraModelId: import_zod8.z.string().optional()
5597
- });
5598
- var doubaoSchema = apiModelIdProviderModelSchema.extend({
5599
- doubaoBaseUrl: import_zod8.z.string().optional(),
5600
- doubaoApiKey: import_zod8.z.string().optional()
5601
- });
5602
5301
  var moonshotSchema = apiModelIdProviderModelSchema.extend({
5603
5302
  moonshotBaseUrl: import_zod8.z.union([import_zod8.z.literal("https://api.moonshot.ai/v1"), import_zod8.z.literal("https://api.moonshot.cn/v1")]).optional(),
5604
5303
  moonshotApiKey: import_zod8.z.string().optional()
@@ -5607,10 +5306,6 @@ var minimaxSchema = apiModelIdProviderModelSchema.extend({
5607
5306
  minimaxBaseUrl: import_zod8.z.union([import_zod8.z.literal("https://api.minimax.io/v1"), import_zod8.z.literal("https://api.minimaxi.com/v1")]).optional(),
5608
5307
  minimaxApiKey: import_zod8.z.string().optional()
5609
5308
  });
5610
- var unboundSchema = baseProviderSettingsSchema.extend({
5611
- unboundApiKey: import_zod8.z.string().optional(),
5612
- unboundModelId: import_zod8.z.string().optional()
5613
- });
5614
5309
  var requestySchema = baseProviderSettingsSchema.extend({
5615
5310
  requestyBaseUrl: import_zod8.z.string().optional(),
5616
5311
  requestyApiKey: import_zod8.z.string().optional(),
@@ -5622,26 +5317,12 @@ var fakeAiSchema = baseProviderSettingsSchema.extend({
5622
5317
  var xaiSchema = apiModelIdProviderModelSchema.extend({
5623
5318
  xaiApiKey: import_zod8.z.string().optional()
5624
5319
  });
5625
- var groqSchema = apiModelIdProviderModelSchema.extend({
5626
- groqApiKey: import_zod8.z.string().optional()
5627
- });
5628
- var huggingFaceSchema = baseProviderSettingsSchema.extend({
5629
- huggingFaceApiKey: import_zod8.z.string().optional(),
5630
- huggingFaceModelId: import_zod8.z.string().optional(),
5631
- huggingFaceInferenceProvider: import_zod8.z.string().optional()
5632
- });
5633
- var chutesSchema = apiModelIdProviderModelSchema.extend({
5634
- chutesApiKey: import_zod8.z.string().optional()
5635
- });
5636
5320
  var litellmSchema = baseProviderSettingsSchema.extend({
5637
5321
  litellmBaseUrl: import_zod8.z.string().optional(),
5638
5322
  litellmApiKey: import_zod8.z.string().optional(),
5639
5323
  litellmModelId: import_zod8.z.string().optional(),
5640
5324
  litellmUsePromptCache: import_zod8.z.boolean().optional()
5641
5325
  });
5642
- var cerebrasSchema = apiModelIdProviderModelSchema.extend({
5643
- cerebrasApiKey: import_zod8.z.string().optional()
5644
- });
5645
5326
  var sambaNovaSchema = apiModelIdProviderModelSchema.extend({
5646
5327
  sambaNovaApiKey: import_zod8.z.string().optional()
5647
5328
  });
@@ -5653,13 +5334,6 @@ var zaiSchema = apiModelIdProviderModelSchema.extend({
5653
5334
  var fireworksSchema = apiModelIdProviderModelSchema.extend({
5654
5335
  fireworksApiKey: import_zod8.z.string().optional()
5655
5336
  });
5656
- var featherlessSchema = apiModelIdProviderModelSchema.extend({
5657
- featherlessApiKey: import_zod8.z.string().optional()
5658
- });
5659
- var ioIntelligenceSchema = apiModelIdProviderModelSchema.extend({
5660
- ioIntelligenceModelId: import_zod8.z.string().optional(),
5661
- ioIntelligenceApiKey: import_zod8.z.string().optional()
5662
- });
5663
5337
  var qwenCodeSchema = apiModelIdProviderModelSchema.extend({
5664
5338
  qwenCodeOauthPath: import_zod8.z.string().optional()
5665
5339
  });
@@ -5674,11 +5348,18 @@ var vercelAiGatewaySchema = baseProviderSettingsSchema.extend({
5674
5348
  var basetenSchema = apiModelIdProviderModelSchema.extend({
5675
5349
  basetenApiKey: import_zod8.z.string().optional()
5676
5350
  });
5351
+ var azureSchema = apiModelIdProviderModelSchema.extend({
5352
+ azureApiKey: import_zod8.z.string().optional(),
5353
+ azureResourceName: import_zod8.z.string().optional(),
5354
+ azureDeploymentName: import_zod8.z.string().optional(),
5355
+ azureApiVersion: import_zod8.z.string().optional()
5356
+ });
5677
5357
  var defaultSchema = import_zod8.z.object({
5678
5358
  apiProvider: import_zod8.z.undefined()
5679
5359
  });
5680
5360
  var providerSettingsSchemaDiscriminated = import_zod8.z.discriminatedUnion("apiProvider", [
5681
5361
  anthropicSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("anthropic") })),
5362
+ azureSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("azure") })),
5682
5363
  openRouterSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("openrouter") })),
5683
5364
  bedrockSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("bedrock") })),
5684
5365
  vertexSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("vertex") })),
@@ -5692,33 +5373,25 @@ var providerSettingsSchemaDiscriminated = import_zod8.z.discriminatedUnion("apiP
5692
5373
  openAiNativeSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("openai-native") })),
5693
5374
  mistralSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("mistral") })),
5694
5375
  deepSeekSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("deepseek") })),
5695
- deepInfraSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("deepinfra") })),
5696
- doubaoSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("doubao") })),
5697
5376
  moonshotSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("moonshot") })),
5698
5377
  minimaxSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("minimax") })),
5699
- unboundSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("unbound") })),
5700
5378
  requestySchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("requesty") })),
5701
5379
  fakeAiSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("fake-ai") })),
5702
5380
  xaiSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("xai") })),
5703
- groqSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("groq") })),
5704
5381
  basetenSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("baseten") })),
5705
- huggingFaceSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("huggingface") })),
5706
- chutesSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("chutes") })),
5707
5382
  litellmSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("litellm") })),
5708
- cerebrasSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("cerebras") })),
5709
5383
  sambaNovaSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("sambanova") })),
5710
5384
  zaiSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("zai") })),
5711
5385
  fireworksSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("fireworks") })),
5712
- featherlessSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("featherless") })),
5713
- ioIntelligenceSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("io-intelligence") })),
5714
5386
  qwenCodeSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("qwen-code") })),
5715
5387
  rooSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("roo") })),
5716
5388
  vercelAiGatewaySchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("vercel-ai-gateway") })),
5717
5389
  defaultSchema
5718
5390
  ]);
5719
5391
  var providerSettingsSchema = import_zod8.z.object({
5720
- apiProvider: providerNamesSchema.optional(),
5392
+ apiProvider: providerNamesWithRetiredSchema.optional(),
5721
5393
  ...anthropicSchema.shape,
5394
+ ...azureSchema.shape,
5722
5395
  ...openRouterSchema.shape,
5723
5396
  ...bedrockSchema.shape,
5724
5397
  ...vertexSchema.shape,
@@ -5732,25 +5405,16 @@ var providerSettingsSchema = import_zod8.z.object({
5732
5405
  ...openAiNativeSchema.shape,
5733
5406
  ...mistralSchema.shape,
5734
5407
  ...deepSeekSchema.shape,
5735
- ...deepInfraSchema.shape,
5736
- ...doubaoSchema.shape,
5737
5408
  ...moonshotSchema.shape,
5738
5409
  ...minimaxSchema.shape,
5739
- ...unboundSchema.shape,
5740
5410
  ...requestySchema.shape,
5741
5411
  ...fakeAiSchema.shape,
5742
5412
  ...xaiSchema.shape,
5743
- ...groqSchema.shape,
5744
5413
  ...basetenSchema.shape,
5745
- ...huggingFaceSchema.shape,
5746
- ...chutesSchema.shape,
5747
5414
  ...litellmSchema.shape,
5748
- ...cerebrasSchema.shape,
5749
5415
  ...sambaNovaSchema.shape,
5750
5416
  ...zaiSchema.shape,
5751
5417
  ...fireworksSchema.shape,
5752
- ...featherlessSchema.shape,
5753
- ...ioIntelligenceSchema.shape,
5754
5418
  ...qwenCodeSchema.shape,
5755
5419
  ...rooSchema.shape,
5756
5420
  ...vercelAiGatewaySchema.shape,
@@ -5768,13 +5432,9 @@ var modelIdKeys = [
5768
5432
  "ollamaModelId",
5769
5433
  "lmStudioModelId",
5770
5434
  "lmStudioDraftModelId",
5771
- "unboundModelId",
5772
5435
  "requestyModelId",
5773
5436
  "litellmModelId",
5774
- "huggingFaceModelId",
5775
- "ioIntelligenceModelId",
5776
- "vercelAiGatewayModelId",
5777
- "deepInfraModelId"
5437
+ "vercelAiGatewayModelId"
5778
5438
  ];
5779
5439
  var getModelId = (settings) => {
5780
5440
  const modelIdKey = modelIdKeys.find((key) => settings[key]);
@@ -5783,6 +5443,7 @@ var getModelId = (settings) => {
5783
5443
  var isTypicalProvider = (key) => isProviderName(key) && !isInternalProvider(key) && !isCustomProvider(key) && !isFauxProvider(key);
5784
5444
  var modelIdKeysByProvider = {
5785
5445
  anthropic: "apiModelId",
5446
+ azure: "apiModelId",
5786
5447
  openrouter: "openRouterModelId",
5787
5448
  bedrock: "apiModelId",
5788
5449
  vertex: "apiModelId",
@@ -5796,23 +5457,14 @@ var modelIdKeysByProvider = {
5796
5457
  moonshot: "apiModelId",
5797
5458
  minimax: "apiModelId",
5798
5459
  deepseek: "apiModelId",
5799
- deepinfra: "deepInfraModelId",
5800
- doubao: "apiModelId",
5801
5460
  "qwen-code": "apiModelId",
5802
- unbound: "unboundModelId",
5803
5461
  requesty: "requestyModelId",
5804
5462
  xai: "apiModelId",
5805
- groq: "apiModelId",
5806
5463
  baseten: "apiModelId",
5807
- chutes: "apiModelId",
5808
5464
  litellm: "litellmModelId",
5809
- huggingface: "huggingFaceModelId",
5810
- cerebras: "apiModelId",
5811
5465
  sambanova: "apiModelId",
5812
5466
  zai: "apiModelId",
5813
5467
  fireworks: "apiModelId",
5814
- featherless: "apiModelId",
5815
- "io-intelligence": "ioIntelligenceModelId",
5816
5468
  roo: "apiModelId",
5817
5469
  "vercel-ai-gateway": "vercelAiGatewayModelId"
5818
5470
  };
@@ -5835,27 +5487,22 @@ var MODELS_BY_PROVIDER = {
5835
5487
  label: "Anthropic",
5836
5488
  models: Object.keys(anthropicModels)
5837
5489
  },
5490
+ azure: {
5491
+ id: "azure",
5492
+ label: "Azure AI Foundry",
5493
+ // Azure uses deployment names configured by the user (not a fixed upstream model ID list)
5494
+ models: []
5495
+ },
5838
5496
  bedrock: {
5839
5497
  id: "bedrock",
5840
5498
  label: "Amazon Bedrock",
5841
5499
  models: Object.keys(bedrockModels)
5842
5500
  },
5843
- cerebras: {
5844
- id: "cerebras",
5845
- label: "Cerebras",
5846
- models: Object.keys(cerebrasModels)
5847
- },
5848
5501
  deepseek: {
5849
5502
  id: "deepseek",
5850
5503
  label: "DeepSeek",
5851
5504
  models: Object.keys(deepSeekModels)
5852
5505
  },
5853
- doubao: { id: "doubao", label: "Doubao", models: Object.keys(doubaoModels) },
5854
- featherless: {
5855
- id: "featherless",
5856
- label: "Featherless",
5857
- models: Object.keys(featherlessModels)
5858
- },
5859
5506
  fireworks: {
5860
5507
  id: "fireworks",
5861
5508
  label: "Fireworks",
@@ -5866,12 +5513,6 @@ var MODELS_BY_PROVIDER = {
5866
5513
  label: "Google Gemini",
5867
5514
  models: Object.keys(geminiModels)
5868
5515
  },
5869
- groq: { id: "groq", label: "Groq", models: Object.keys(groqModels) },
5870
- "io-intelligence": {
5871
- id: "io-intelligence",
5872
- label: "IO Intelligence",
5873
- models: Object.keys(ioIntelligenceModels)
5874
- },
5875
5516
  mistral: {
5876
5517
  id: "mistral",
5877
5518
  label: "Mistral",
@@ -5918,14 +5559,10 @@ var MODELS_BY_PROVIDER = {
5918
5559
  zai: { id: "zai", label: "Z.ai", models: Object.keys(internationalZAiModels) },
5919
5560
  baseten: { id: "baseten", label: "Baseten", models: Object.keys(basetenModels) },
5920
5561
  // Dynamic providers; models pulled from remote APIs.
5921
- huggingface: { id: "huggingface", label: "Hugging Face", models: [] },
5922
5562
  litellm: { id: "litellm", label: "LiteLLM", models: [] },
5923
5563
  openrouter: { id: "openrouter", label: "OpenRouter", models: [] },
5924
5564
  requesty: { id: "requesty", label: "Requesty", models: [] },
5925
- unbound: { id: "unbound", label: "Unbound", models: [] },
5926
- deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
5927
5565
  "vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] },
5928
- chutes: { id: "chutes", label: "Chutes AI", models: [] },
5929
5566
  // Local providers; models discovered from localhost endpoints.
5930
5567
  lmstudio: { id: "lmstudio", label: "LM Studio", models: [] },
5931
5568
  ollama: { id: "ollama", label: "Ollama", models: [] }
@@ -6568,7 +6205,12 @@ var globalSettingsSchema = import_zod14.z.object({
6568
6205
  * Whether to show the worktree selector in the home screen.
6569
6206
  * @default true
6570
6207
  */
6571
- showWorktreesInHomeScreen: import_zod14.z.boolean().optional()
6208
+ showWorktreesInHomeScreen: import_zod14.z.boolean().optional(),
6209
+ /**
6210
+ * List of native tool names to globally disable.
6211
+ * Tools in this list will be excluded from prompt generation and rejected at execution time.
6212
+ */
6213
+ disabledTools: import_zod14.z.array(toolNamesSchema).optional()
6572
6214
  });
6573
6215
  var GLOBAL_SETTINGS_KEYS = globalSettingsSchema.keyof().options;
6574
6216
  var rooCodeSettingsSchema = providerSettingsSchema.merge(globalSettingsSchema);
@@ -6583,19 +6225,13 @@ var SECRET_STATE_KEYS = [
6583
6225
  "ollamaApiKey",
6584
6226
  "geminiApiKey",
6585
6227
  "openAiNativeApiKey",
6586
- "cerebrasApiKey",
6587
6228
  "deepSeekApiKey",
6588
- "doubaoApiKey",
6589
6229
  "moonshotApiKey",
6590
6230
  "mistralApiKey",
6591
6231
  "minimaxApiKey",
6592
- "unboundApiKey",
6593
6232
  "requestyApiKey",
6594
6233
  "xaiApiKey",
6595
- "groqApiKey",
6596
- "chutesApiKey",
6597
6234
  "litellmApiKey",
6598
- "deepInfraApiKey",
6599
6235
  "codeIndexOpenAiKey",
6600
6236
  "codeIndexQdrantApiKey",
6601
6237
  "codebaseIndexOpenAiCompatibleApiKey",
@@ -6603,14 +6239,12 @@ var SECRET_STATE_KEYS = [
6603
6239
  "codebaseIndexMistralApiKey",
6604
6240
  "codebaseIndexVercelAiGatewayApiKey",
6605
6241
  "codebaseIndexOpenRouterApiKey",
6606
- "huggingFaceApiKey",
6607
6242
  "sambaNovaApiKey",
6608
6243
  "zaiApiKey",
6609
6244
  "fireworksApiKey",
6610
- "featherlessApiKey",
6611
- "ioIntelligenceApiKey",
6612
6245
  "vercelAiGatewayApiKey",
6613
- "basetenApiKey"
6246
+ "basetenApiKey",
6247
+ "azureApiKey"
6614
6248
  ];
6615
6249
  var GLOBAL_SECRET_KEYS = [
6616
6250
  "openRouterImageApiKey"
@@ -6746,7 +6380,8 @@ var organizationDefaultSettingsSchema = globalSettingsSchema.pick({
6746
6380
  terminalCommandDelay: true,
6747
6381
  terminalShellIntegrationDisabled: true,
6748
6382
  terminalShellIntegrationTimeout: true,
6749
- terminalZshClearEolMark: true
6383
+ terminalZshClearEolMark: true,
6384
+ disabledTools: true
6750
6385
  }).merge(
6751
6386
  import_zod16.z.object({
6752
6387
  maxOpenTabsContext: import_zod16.z.number().int().nonnegative().optional(),
@@ -7213,6 +6848,9 @@ var TaskCommandName = /* @__PURE__ */ ((TaskCommandName2) => {
7213
6848
  TaskCommandName2["CloseTask"] = "CloseTask";
7214
6849
  TaskCommandName2["ResumeTask"] = "ResumeTask";
7215
6850
  TaskCommandName2["SendMessage"] = "SendMessage";
6851
+ TaskCommandName2["GetCommands"] = "GetCommands";
6852
+ TaskCommandName2["GetModes"] = "GetModes";
6853
+ TaskCommandName2["GetModels"] = "GetModels";
7216
6854
  return TaskCommandName2;
7217
6855
  })(TaskCommandName || {});
7218
6856
  var taskCommandSchema = import_zod18.z.discriminatedUnion("commandName", [
@@ -7241,6 +6879,15 @@ var taskCommandSchema = import_zod18.z.discriminatedUnion("commandName", [
7241
6879
  text: import_zod18.z.string().optional(),
7242
6880
  images: import_zod18.z.array(import_zod18.z.string()).optional()
7243
6881
  })
6882
+ }),
6883
+ import_zod18.z.object({
6884
+ commandName: import_zod18.z.literal("GetCommands" /* GetCommands */)
6885
+ }),
6886
+ import_zod18.z.object({
6887
+ commandName: import_zod18.z.literal("GetModes" /* GetModes */)
6888
+ }),
6889
+ import_zod18.z.object({
6890
+ commandName: import_zod18.z.literal("GetModels" /* GetModels */)
7244
6891
  })
7245
6892
  ]);
7246
6893
  var ipcMessageSchema = import_zod18.z.discriminatedUnion("type", [
@@ -7428,8 +7075,6 @@ var browserActions = [
7428
7075
  DEFAULT_MODES,
7429
7076
  DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE,
7430
7077
  DEFAULT_WRITE_DELAY_MS,
7431
- DOUBAO_API_BASE_URL,
7432
- DOUBAO_API_CHAT_PATH,
7433
7078
  EVALS_SETTINGS,
7434
7079
  EVALS_TIMEOUT,
7435
7080
  EXPECTED_API_ERROR_CODES,
@@ -7440,18 +7085,9 @@ var browserActions = [
7440
7085
  GLOBAL_SETTINGS_KEYS,
7441
7086
  GLOBAL_STATE_KEYS,
7442
7087
  HEARTBEAT_INTERVAL_MS,
7443
- HUGGINGFACE_API_URL,
7444
- HUGGINGFACE_CACHE_DURATION,
7445
- HUGGINGFACE_DEFAULT_CONTEXT_WINDOW,
7446
- HUGGINGFACE_DEFAULT_MAX_TOKENS,
7447
- HUGGINGFACE_MAX_TOKENS_FALLBACK,
7448
- HUGGINGFACE_SLIDER_MIN,
7449
- HUGGINGFACE_SLIDER_STEP,
7450
- HUGGINGFACE_TEMPERATURE_MAX_VALUE,
7451
7088
  IMAGE_GENERATION_MODELS,
7452
7089
  IMAGE_GENERATION_MODEL_IDS,
7453
7090
  INSTANCE_TTL_SECONDS,
7454
- IO_INTELLIGENCE_CACHE_DURATION,
7455
7091
  IpcMessageType,
7456
7092
  IpcOrigin,
7457
7093
  LMSTUDIO_DEFAULT_TEMPERATURE,
@@ -7499,6 +7135,9 @@ var browserActions = [
7499
7135
  anthropicDefaultModelId,
7500
7136
  anthropicModels,
7501
7137
  appPropertiesSchema,
7138
+ azureDefaultModelId,
7139
+ azureDefaultModelInfo,
7140
+ azureModels,
7502
7141
  azureOpenAiDefaultApiVersion,
7503
7142
  basetenDefaultModelId,
7504
7143
  basetenModels,
@@ -7506,13 +7145,8 @@ var browserActions = [
7506
7145
  bedrockDefaultPromptRouterModelId,
7507
7146
  bedrockModels,
7508
7147
  browserActions,
7509
- cerebrasDefaultModelId,
7510
- cerebrasModels,
7511
7148
  checkoutDiffPayloadSchema,
7512
7149
  checkoutRestorePayloadSchema,
7513
- chutesDefaultModelId,
7514
- chutesDefaultModelInfo,
7515
- chutesModels,
7516
7150
  clineAskSchema,
7517
7151
  clineAsks,
7518
7152
  clineMessageSchema,
@@ -7532,15 +7166,10 @@ var browserActions = [
7532
7166
  customModesSettingsSchema,
7533
7167
  customProviders,
7534
7168
  customSupportPromptsSchema,
7535
- deepInfraDefaultModelId,
7536
- deepInfraDefaultModelInfo,
7537
7169
  deepSeekDefaultModelId,
7538
7170
  deepSeekModels,
7539
7171
  defineCustomTool,
7540
7172
  discriminatedProviderSettingsWithIdSchema,
7541
- doubaoDefaultModelId,
7542
- doubaoDefaultModelInfo,
7543
- doubaoModels,
7544
7173
  dynamicAppPropertiesSchema,
7545
7174
  dynamicProviders,
7546
7175
  experimentIds,
@@ -7553,8 +7182,6 @@ var browserActions = [
7553
7182
  extractConsecutiveMistakeErrorProperties,
7554
7183
  extractMessageFromJsonPayload,
7555
7184
  fauxProviders,
7556
- featherlessDefaultModelId,
7557
- featherlessModels,
7558
7185
  fireworksDefaultModelId,
7559
7186
  fireworksModels,
7560
7187
  followUpDataSchema,
@@ -7568,8 +7195,6 @@ var browserActions = [
7568
7195
  getProviderDefaultModelId,
7569
7196
  gitPropertiesSchema,
7570
7197
  globalSettingsSchema,
7571
- groqDefaultModelId,
7572
- groqModels,
7573
7198
  groupEntrySchema,
7574
7199
  groupOptionsSchema,
7575
7200
  historyItemSchema,
@@ -7580,9 +7205,6 @@ var browserActions = [
7580
7205
  internalProviders,
7581
7206
  internationalZAiDefaultModelId,
7582
7207
  internationalZAiModels,
7583
- ioIntelligenceDefaultBaseUrl,
7584
- ioIntelligenceDefaultModelId,
7585
- ioIntelligenceModels,
7586
7208
  ipcMessageSchema,
7587
7209
  isApiProviderError,
7588
7210
  isConsecutiveMistakeError,
@@ -7601,6 +7223,7 @@ var browserActions = [
7601
7223
  isNonBlockingAsk,
7602
7224
  isProviderName,
7603
7225
  isResumableAsk,
7226
+ isRetiredProvider,
7604
7227
  isSecretStateKey,
7605
7228
  isTypicalProvider,
7606
7229
  lMStudioDefaultModelId,
@@ -7651,6 +7274,7 @@ var browserActions = [
7651
7274
  promptComponentSchema,
7652
7275
  providerNames,
7653
7276
  providerNamesSchema,
7277
+ providerNamesWithRetiredSchema,
7654
7278
  providerSettingsEntrySchema,
7655
7279
  providerSettingsSchema,
7656
7280
  providerSettingsSchemaDiscriminated,
@@ -7668,6 +7292,8 @@ var browserActions = [
7668
7292
  requestyDefaultModelId,
7669
7293
  requestyDefaultModelInfo,
7670
7294
  resumableAsks,
7295
+ retiredProviderNames,
7296
+ retiredProviderNamesSchema,
7671
7297
  rooCodeEventsSchema,
7672
7298
  rooCodeSettingsSchema,
7673
7299
  rooCodeTelemetryEventSchema,
@@ -7700,8 +7326,6 @@ var browserActions = [
7700
7326
  toolNamesSchema,
7701
7327
  toolProgressStatusSchema,
7702
7328
  toolUsageSchema,
7703
- unboundDefaultModelId,
7704
- unboundDefaultModelInfo,
7705
7329
  usageStatsSchema,
7706
7330
  userFeaturesSchema,
7707
7331
  userSettingsConfigSchema,