@roo-code/types 1.110.0 → 1.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -44,8 +44,6 @@ __export(index_exports, {
44
44
  DEFAULT_MODES: () => DEFAULT_MODES,
45
45
  DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE: () => DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE,
46
46
  DEFAULT_WRITE_DELAY_MS: () => DEFAULT_WRITE_DELAY_MS,
47
- DOUBAO_API_BASE_URL: () => DOUBAO_API_BASE_URL,
48
- DOUBAO_API_CHAT_PATH: () => DOUBAO_API_CHAT_PATH,
49
47
  EVALS_SETTINGS: () => EVALS_SETTINGS,
50
48
  EVALS_TIMEOUT: () => EVALS_TIMEOUT,
51
49
  EXPECTED_API_ERROR_CODES: () => EXPECTED_API_ERROR_CODES,
@@ -56,18 +54,9 @@ __export(index_exports, {
56
54
  GLOBAL_SETTINGS_KEYS: () => GLOBAL_SETTINGS_KEYS,
57
55
  GLOBAL_STATE_KEYS: () => GLOBAL_STATE_KEYS,
58
56
  HEARTBEAT_INTERVAL_MS: () => HEARTBEAT_INTERVAL_MS,
59
- HUGGINGFACE_API_URL: () => HUGGINGFACE_API_URL,
60
- HUGGINGFACE_CACHE_DURATION: () => HUGGINGFACE_CACHE_DURATION,
61
- HUGGINGFACE_DEFAULT_CONTEXT_WINDOW: () => HUGGINGFACE_DEFAULT_CONTEXT_WINDOW,
62
- HUGGINGFACE_DEFAULT_MAX_TOKENS: () => HUGGINGFACE_DEFAULT_MAX_TOKENS,
63
- HUGGINGFACE_MAX_TOKENS_FALLBACK: () => HUGGINGFACE_MAX_TOKENS_FALLBACK,
64
- HUGGINGFACE_SLIDER_MIN: () => HUGGINGFACE_SLIDER_MIN,
65
- HUGGINGFACE_SLIDER_STEP: () => HUGGINGFACE_SLIDER_STEP,
66
- HUGGINGFACE_TEMPERATURE_MAX_VALUE: () => HUGGINGFACE_TEMPERATURE_MAX_VALUE,
67
57
  IMAGE_GENERATION_MODELS: () => IMAGE_GENERATION_MODELS,
68
58
  IMAGE_GENERATION_MODEL_IDS: () => IMAGE_GENERATION_MODEL_IDS,
69
59
  INSTANCE_TTL_SECONDS: () => INSTANCE_TTL_SECONDS,
70
- IO_INTELLIGENCE_CACHE_DURATION: () => IO_INTELLIGENCE_CACHE_DURATION,
71
60
  IpcMessageType: () => IpcMessageType,
72
61
  IpcOrigin: () => IpcOrigin,
73
62
  LMSTUDIO_DEFAULT_TEMPERATURE: () => LMSTUDIO_DEFAULT_TEMPERATURE,
@@ -115,6 +104,9 @@ __export(index_exports, {
115
104
  anthropicDefaultModelId: () => anthropicDefaultModelId,
116
105
  anthropicModels: () => anthropicModels,
117
106
  appPropertiesSchema: () => appPropertiesSchema,
107
+ azureDefaultModelId: () => azureDefaultModelId,
108
+ azureDefaultModelInfo: () => azureDefaultModelInfo,
109
+ azureModels: () => azureModels,
118
110
  azureOpenAiDefaultApiVersion: () => azureOpenAiDefaultApiVersion,
119
111
  basetenDefaultModelId: () => basetenDefaultModelId,
120
112
  basetenModels: () => basetenModels,
@@ -122,13 +114,8 @@ __export(index_exports, {
122
114
  bedrockDefaultPromptRouterModelId: () => bedrockDefaultPromptRouterModelId,
123
115
  bedrockModels: () => bedrockModels,
124
116
  browserActions: () => browserActions,
125
- cerebrasDefaultModelId: () => cerebrasDefaultModelId,
126
- cerebrasModels: () => cerebrasModels,
127
117
  checkoutDiffPayloadSchema: () => checkoutDiffPayloadSchema,
128
118
  checkoutRestorePayloadSchema: () => checkoutRestorePayloadSchema,
129
- chutesDefaultModelId: () => chutesDefaultModelId,
130
- chutesDefaultModelInfo: () => chutesDefaultModelInfo,
131
- chutesModels: () => chutesModels,
132
119
  clineAskSchema: () => clineAskSchema,
133
120
  clineAsks: () => clineAsks,
134
121
  clineMessageSchema: () => clineMessageSchema,
@@ -148,15 +135,10 @@ __export(index_exports, {
148
135
  customModesSettingsSchema: () => customModesSettingsSchema,
149
136
  customProviders: () => customProviders,
150
137
  customSupportPromptsSchema: () => customSupportPromptsSchema,
151
- deepInfraDefaultModelId: () => deepInfraDefaultModelId,
152
- deepInfraDefaultModelInfo: () => deepInfraDefaultModelInfo,
153
138
  deepSeekDefaultModelId: () => deepSeekDefaultModelId,
154
139
  deepSeekModels: () => deepSeekModels,
155
140
  defineCustomTool: () => defineCustomTool,
156
141
  discriminatedProviderSettingsWithIdSchema: () => discriminatedProviderSettingsWithIdSchema,
157
- doubaoDefaultModelId: () => doubaoDefaultModelId,
158
- doubaoDefaultModelInfo: () => doubaoDefaultModelInfo,
159
- doubaoModels: () => doubaoModels,
160
142
  dynamicAppPropertiesSchema: () => dynamicAppPropertiesSchema,
161
143
  dynamicProviders: () => dynamicProviders,
162
144
  experimentIds: () => experimentIds,
@@ -169,8 +151,6 @@ __export(index_exports, {
169
151
  extractConsecutiveMistakeErrorProperties: () => extractConsecutiveMistakeErrorProperties,
170
152
  extractMessageFromJsonPayload: () => extractMessageFromJsonPayload,
171
153
  fauxProviders: () => fauxProviders,
172
- featherlessDefaultModelId: () => featherlessDefaultModelId,
173
- featherlessModels: () => featherlessModels,
174
154
  fireworksDefaultModelId: () => fireworksDefaultModelId,
175
155
  fireworksModels: () => fireworksModels,
176
156
  followUpDataSchema: () => followUpDataSchema,
@@ -184,8 +164,6 @@ __export(index_exports, {
184
164
  getProviderDefaultModelId: () => getProviderDefaultModelId,
185
165
  gitPropertiesSchema: () => gitPropertiesSchema,
186
166
  globalSettingsSchema: () => globalSettingsSchema,
187
- groqDefaultModelId: () => groqDefaultModelId,
188
- groqModels: () => groqModels,
189
167
  groupEntrySchema: () => groupEntrySchema,
190
168
  groupOptionsSchema: () => groupOptionsSchema,
191
169
  historyItemSchema: () => historyItemSchema,
@@ -196,9 +174,6 @@ __export(index_exports, {
196
174
  internalProviders: () => internalProviders,
197
175
  internationalZAiDefaultModelId: () => internationalZAiDefaultModelId,
198
176
  internationalZAiModels: () => internationalZAiModels,
199
- ioIntelligenceDefaultBaseUrl: () => ioIntelligenceDefaultBaseUrl,
200
- ioIntelligenceDefaultModelId: () => ioIntelligenceDefaultModelId,
201
- ioIntelligenceModels: () => ioIntelligenceModels,
202
177
  ipcMessageSchema: () => ipcMessageSchema,
203
178
  isApiProviderError: () => isApiProviderError,
204
179
  isConsecutiveMistakeError: () => isConsecutiveMistakeError,
@@ -217,6 +192,7 @@ __export(index_exports, {
217
192
  isNonBlockingAsk: () => isNonBlockingAsk,
218
193
  isProviderName: () => isProviderName,
219
194
  isResumableAsk: () => isResumableAsk,
195
+ isRetiredProvider: () => isRetiredProvider,
220
196
  isSecretStateKey: () => isSecretStateKey,
221
197
  isTypicalProvider: () => isTypicalProvider,
222
198
  lMStudioDefaultModelId: () => lMStudioDefaultModelId,
@@ -267,6 +243,7 @@ __export(index_exports, {
267
243
  promptComponentSchema: () => promptComponentSchema,
268
244
  providerNames: () => providerNames,
269
245
  providerNamesSchema: () => providerNamesSchema,
246
+ providerNamesWithRetiredSchema: () => providerNamesWithRetiredSchema,
270
247
  providerSettingsEntrySchema: () => providerSettingsEntrySchema,
271
248
  providerSettingsSchema: () => providerSettingsSchema,
272
249
  providerSettingsSchemaDiscriminated: () => providerSettingsSchemaDiscriminated,
@@ -284,6 +261,8 @@ __export(index_exports, {
284
261
  requestyDefaultModelId: () => requestyDefaultModelId,
285
262
  requestyDefaultModelInfo: () => requestyDefaultModelInfo,
286
263
  resumableAsks: () => resumableAsks,
264
+ retiredProviderNames: () => retiredProviderNames,
265
+ retiredProviderNamesSchema: () => retiredProviderNamesSchema,
287
266
  rooCodeEventsSchema: () => rooCodeEventsSchema,
288
267
  rooCodeSettingsSchema: () => rooCodeSettingsSchema,
289
268
  rooCodeTelemetryEventSchema: () => rooCodeTelemetryEventSchema,
@@ -316,8 +295,6 @@ __export(index_exports, {
316
295
  toolNamesSchema: () => toolNamesSchema,
317
296
  toolProgressStatusSchema: () => toolProgressStatusSchema,
318
297
  toolUsageSchema: () => toolUsageSchema,
319
- unboundDefaultModelId: () => unboundDefaultModelId,
320
- unboundDefaultModelInfo: () => unboundDefaultModelInfo,
321
298
  usageStatsSchema: () => usageStatsSchema,
322
299
  userFeaturesSchema: () => userFeaturesSchema,
323
300
  userSettingsConfigSchema: () => userSettingsConfigSchema,
@@ -342,7 +319,7 @@ module.exports = __toCommonJS(index_exports);
342
319
  var import_zod16 = require("zod");
343
320
 
344
321
  // src/events.ts
345
- var import_zod3 = require("zod");
322
+ var import_zod4 = require("zod");
346
323
 
347
324
  // src/message.ts
348
325
  var import_zod = require("zod");
@@ -481,16 +458,97 @@ var queuedMessageSchema = import_zod.z.object({
481
458
  images: import_zod.z.array(import_zod.z.string()).optional()
482
459
  });
483
460
 
484
- // src/tool.ts
461
+ // src/model.ts
485
462
  var import_zod2 = require("zod");
463
+ var reasoningEfforts = ["low", "medium", "high"];
464
+ var reasoningEffortsSchema = import_zod2.z.enum(reasoningEfforts);
465
+ var reasoningEffortWithMinimalSchema = import_zod2.z.union([reasoningEffortsSchema, import_zod2.z.literal("minimal")]);
466
+ var reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"];
467
+ var reasoningEffortExtendedSchema = import_zod2.z.enum(reasoningEffortsExtended);
468
+ var reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"];
469
+ var reasoningEffortSettingSchema = import_zod2.z.enum(reasoningEffortSettingValues);
470
+ var verbosityLevels = ["low", "medium", "high"];
471
+ var verbosityLevelsSchema = import_zod2.z.enum(verbosityLevels);
472
+ var serviceTiers = ["default", "flex", "priority"];
473
+ var serviceTierSchema = import_zod2.z.enum(serviceTiers);
474
+ var modelParameters = ["max_tokens", "temperature", "reasoning", "include_reasoning"];
475
+ var modelParametersSchema = import_zod2.z.enum(modelParameters);
476
+ var isModelParameter = (value) => modelParameters.includes(value);
477
+ var modelInfoSchema = import_zod2.z.object({
478
+ maxTokens: import_zod2.z.number().nullish(),
479
+ maxThinkingTokens: import_zod2.z.number().nullish(),
480
+ contextWindow: import_zod2.z.number(),
481
+ supportsImages: import_zod2.z.boolean().optional(),
482
+ supportsPromptCache: import_zod2.z.boolean(),
483
+ // Optional default prompt cache retention policy for providers that support it.
484
+ // When set to "24h", extended prompt caching will be requested; when omitted
485
+ // or set to "in_memory", the default in‑memory cache is used.
486
+ promptCacheRetention: import_zod2.z.enum(["in_memory", "24h"]).optional(),
487
+ // Capability flag to indicate whether the model supports an output verbosity parameter
488
+ supportsVerbosity: import_zod2.z.boolean().optional(),
489
+ supportsReasoningBudget: import_zod2.z.boolean().optional(),
490
+ // Capability flag to indicate whether the model supports simple on/off binary reasoning
491
+ supportsReasoningBinary: import_zod2.z.boolean().optional(),
492
+ // Capability flag to indicate whether the model supports temperature parameter
493
+ supportsTemperature: import_zod2.z.boolean().optional(),
494
+ defaultTemperature: import_zod2.z.number().optional(),
495
+ requiredReasoningBudget: import_zod2.z.boolean().optional(),
496
+ supportsReasoningEffort: import_zod2.z.union([import_zod2.z.boolean(), import_zod2.z.array(import_zod2.z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]).optional(),
497
+ requiredReasoningEffort: import_zod2.z.boolean().optional(),
498
+ preserveReasoning: import_zod2.z.boolean().optional(),
499
+ supportedParameters: import_zod2.z.array(modelParametersSchema).optional(),
500
+ inputPrice: import_zod2.z.number().optional(),
501
+ outputPrice: import_zod2.z.number().optional(),
502
+ cacheWritesPrice: import_zod2.z.number().optional(),
503
+ cacheReadsPrice: import_zod2.z.number().optional(),
504
+ description: import_zod2.z.string().optional(),
505
+ // Default effort value for models that support reasoning effort
506
+ reasoningEffort: reasoningEffortExtendedSchema.optional(),
507
+ minTokensPerCachePoint: import_zod2.z.number().optional(),
508
+ maxCachePoints: import_zod2.z.number().optional(),
509
+ cachableFields: import_zod2.z.array(import_zod2.z.string()).optional(),
510
+ // Flag to indicate if the model is deprecated and should not be used
511
+ deprecated: import_zod2.z.boolean().optional(),
512
+ // Flag to indicate if the model should hide vendor/company identity in responses
513
+ isStealthModel: import_zod2.z.boolean().optional(),
514
+ // Flag to indicate if the model is free (no cost)
515
+ isFree: import_zod2.z.boolean().optional(),
516
+ // Exclude specific native tools from being available (only applies to native protocol)
517
+ // These tools will be removed from the set of tools available to the model
518
+ excludedTools: import_zod2.z.array(import_zod2.z.string()).optional(),
519
+ // Include specific native tools (only applies to native protocol)
520
+ // These tools will be added if they belong to an allowed group in the current mode
521
+ // Cannot force-add tools from groups the mode doesn't allow
522
+ includedTools: import_zod2.z.array(import_zod2.z.string()).optional(),
523
+ /**
524
+ * Service tiers with pricing information.
525
+ * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
526
+ * The top-level input/output/cache* fields represent the default/standard tier.
527
+ */
528
+ tiers: import_zod2.z.array(
529
+ import_zod2.z.object({
530
+ name: serviceTierSchema.optional(),
531
+ // Service tier name (flex, priority, etc.)
532
+ contextWindow: import_zod2.z.number(),
533
+ inputPrice: import_zod2.z.number().optional(),
534
+ outputPrice: import_zod2.z.number().optional(),
535
+ cacheWritesPrice: import_zod2.z.number().optional(),
536
+ cacheReadsPrice: import_zod2.z.number().optional()
537
+ })
538
+ ).optional()
539
+ });
540
+
541
+ // src/tool.ts
542
+ var import_zod3 = require("zod");
486
543
  var toolGroups = ["read", "edit", "browser", "command", "mcp", "modes"];
487
- var toolGroupsSchema = import_zod2.z.enum(toolGroups);
544
+ var toolGroupsSchema = import_zod3.z.enum(toolGroups);
488
545
  var toolNames = [
489
546
  "execute_command",
490
547
  "read_file",
491
548
  "read_command_output",
492
549
  "write_to_file",
493
550
  "apply_diff",
551
+ "edit",
494
552
  "search_and_replace",
495
553
  "search_replace",
496
554
  "edit_file",
@@ -511,12 +569,12 @@ var toolNames = [
511
569
  "generate_image",
512
570
  "custom_tool"
513
571
  ];
514
- var toolNamesSchema = import_zod2.z.enum(toolNames);
515
- var toolUsageSchema = import_zod2.z.record(
572
+ var toolNamesSchema = import_zod3.z.enum(toolNames);
573
+ var toolUsageSchema = import_zod3.z.record(
516
574
  toolNamesSchema,
517
- import_zod2.z.object({
518
- attempts: import_zod2.z.number(),
519
- failures: import_zod2.z.number()
575
+ import_zod3.z.object({
576
+ attempts: import_zod3.z.number(),
577
+ failures: import_zod3.z.number()
520
578
  })
521
579
  );
522
580
 
@@ -547,198 +605,230 @@ var RooCodeEventName = /* @__PURE__ */ ((RooCodeEventName2) => {
547
605
  RooCodeEventName2["TaskToolFailed"] = "taskToolFailed";
548
606
  RooCodeEventName2["ModeChanged"] = "modeChanged";
549
607
  RooCodeEventName2["ProviderProfileChanged"] = "providerProfileChanged";
608
+ RooCodeEventName2["CommandsResponse"] = "commandsResponse";
609
+ RooCodeEventName2["ModesResponse"] = "modesResponse";
610
+ RooCodeEventName2["ModelsResponse"] = "modelsResponse";
550
611
  RooCodeEventName2["EvalPass"] = "evalPass";
551
612
  RooCodeEventName2["EvalFail"] = "evalFail";
552
613
  return RooCodeEventName2;
553
614
  })(RooCodeEventName || {});
554
- var rooCodeEventsSchema = import_zod3.z.object({
555
- ["taskCreated" /* TaskCreated */]: import_zod3.z.tuple([import_zod3.z.string()]),
556
- ["taskStarted" /* TaskStarted */]: import_zod3.z.tuple([import_zod3.z.string()]),
557
- ["taskCompleted" /* TaskCompleted */]: import_zod3.z.tuple([
558
- import_zod3.z.string(),
615
+ var rooCodeEventsSchema = import_zod4.z.object({
616
+ ["taskCreated" /* TaskCreated */]: import_zod4.z.tuple([import_zod4.z.string()]),
617
+ ["taskStarted" /* TaskStarted */]: import_zod4.z.tuple([import_zod4.z.string()]),
618
+ ["taskCompleted" /* TaskCompleted */]: import_zod4.z.tuple([
619
+ import_zod4.z.string(),
559
620
  tokenUsageSchema,
560
621
  toolUsageSchema,
561
- import_zod3.z.object({
562
- isSubtask: import_zod3.z.boolean()
622
+ import_zod4.z.object({
623
+ isSubtask: import_zod4.z.boolean()
563
624
  })
564
625
  ]),
565
- ["taskAborted" /* TaskAborted */]: import_zod3.z.tuple([import_zod3.z.string()]),
566
- ["taskFocused" /* TaskFocused */]: import_zod3.z.tuple([import_zod3.z.string()]),
567
- ["taskUnfocused" /* TaskUnfocused */]: import_zod3.z.tuple([import_zod3.z.string()]),
568
- ["taskActive" /* TaskActive */]: import_zod3.z.tuple([import_zod3.z.string()]),
569
- ["taskInteractive" /* TaskInteractive */]: import_zod3.z.tuple([import_zod3.z.string()]),
570
- ["taskResumable" /* TaskResumable */]: import_zod3.z.tuple([import_zod3.z.string()]),
571
- ["taskIdle" /* TaskIdle */]: import_zod3.z.tuple([import_zod3.z.string()]),
572
- ["taskPaused" /* TaskPaused */]: import_zod3.z.tuple([import_zod3.z.string()]),
573
- ["taskUnpaused" /* TaskUnpaused */]: import_zod3.z.tuple([import_zod3.z.string()]),
574
- ["taskSpawned" /* TaskSpawned */]: import_zod3.z.tuple([import_zod3.z.string(), import_zod3.z.string()]),
575
- ["taskDelegated" /* TaskDelegated */]: import_zod3.z.tuple([
576
- import_zod3.z.string(),
626
+ ["taskAborted" /* TaskAborted */]: import_zod4.z.tuple([import_zod4.z.string()]),
627
+ ["taskFocused" /* TaskFocused */]: import_zod4.z.tuple([import_zod4.z.string()]),
628
+ ["taskUnfocused" /* TaskUnfocused */]: import_zod4.z.tuple([import_zod4.z.string()]),
629
+ ["taskActive" /* TaskActive */]: import_zod4.z.tuple([import_zod4.z.string()]),
630
+ ["taskInteractive" /* TaskInteractive */]: import_zod4.z.tuple([import_zod4.z.string()]),
631
+ ["taskResumable" /* TaskResumable */]: import_zod4.z.tuple([import_zod4.z.string()]),
632
+ ["taskIdle" /* TaskIdle */]: import_zod4.z.tuple([import_zod4.z.string()]),
633
+ ["taskPaused" /* TaskPaused */]: import_zod4.z.tuple([import_zod4.z.string()]),
634
+ ["taskUnpaused" /* TaskUnpaused */]: import_zod4.z.tuple([import_zod4.z.string()]),
635
+ ["taskSpawned" /* TaskSpawned */]: import_zod4.z.tuple([import_zod4.z.string(), import_zod4.z.string()]),
636
+ ["taskDelegated" /* TaskDelegated */]: import_zod4.z.tuple([
637
+ import_zod4.z.string(),
577
638
  // parentTaskId
578
- import_zod3.z.string()
639
+ import_zod4.z.string()
579
640
  // childTaskId
580
641
  ]),
581
- ["taskDelegationCompleted" /* TaskDelegationCompleted */]: import_zod3.z.tuple([
582
- import_zod3.z.string(),
642
+ ["taskDelegationCompleted" /* TaskDelegationCompleted */]: import_zod4.z.tuple([
643
+ import_zod4.z.string(),
583
644
  // parentTaskId
584
- import_zod3.z.string(),
645
+ import_zod4.z.string(),
585
646
  // childTaskId
586
- import_zod3.z.string()
647
+ import_zod4.z.string()
587
648
  // completionResultSummary
588
649
  ]),
589
- ["taskDelegationResumed" /* TaskDelegationResumed */]: import_zod3.z.tuple([
590
- import_zod3.z.string(),
650
+ ["taskDelegationResumed" /* TaskDelegationResumed */]: import_zod4.z.tuple([
651
+ import_zod4.z.string(),
591
652
  // parentTaskId
592
- import_zod3.z.string()
653
+ import_zod4.z.string()
593
654
  // childTaskId
594
655
  ]),
595
- ["message" /* Message */]: import_zod3.z.tuple([
596
- import_zod3.z.object({
597
- taskId: import_zod3.z.string(),
598
- action: import_zod3.z.union([import_zod3.z.literal("created"), import_zod3.z.literal("updated")]),
656
+ ["message" /* Message */]: import_zod4.z.tuple([
657
+ import_zod4.z.object({
658
+ taskId: import_zod4.z.string(),
659
+ action: import_zod4.z.union([import_zod4.z.literal("created"), import_zod4.z.literal("updated")]),
599
660
  message: clineMessageSchema
600
661
  })
601
662
  ]),
602
- ["taskModeSwitched" /* TaskModeSwitched */]: import_zod3.z.tuple([import_zod3.z.string(), import_zod3.z.string()]),
603
- ["taskAskResponded" /* TaskAskResponded */]: import_zod3.z.tuple([import_zod3.z.string()]),
604
- ["taskUserMessage" /* TaskUserMessage */]: import_zod3.z.tuple([import_zod3.z.string()]),
605
- ["queuedMessagesUpdated" /* QueuedMessagesUpdated */]: import_zod3.z.tuple([import_zod3.z.string(), import_zod3.z.array(queuedMessageSchema)]),
606
- ["taskToolFailed" /* TaskToolFailed */]: import_zod3.z.tuple([import_zod3.z.string(), toolNamesSchema, import_zod3.z.string()]),
607
- ["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */]: import_zod3.z.tuple([import_zod3.z.string(), tokenUsageSchema, toolUsageSchema]),
608
- ["modeChanged" /* ModeChanged */]: import_zod3.z.tuple([import_zod3.z.string()]),
609
- ["providerProfileChanged" /* ProviderProfileChanged */]: import_zod3.z.tuple([import_zod3.z.object({ name: import_zod3.z.string(), provider: import_zod3.z.string() })])
663
+ ["taskModeSwitched" /* TaskModeSwitched */]: import_zod4.z.tuple([import_zod4.z.string(), import_zod4.z.string()]),
664
+ ["taskAskResponded" /* TaskAskResponded */]: import_zod4.z.tuple([import_zod4.z.string()]),
665
+ ["taskUserMessage" /* TaskUserMessage */]: import_zod4.z.tuple([import_zod4.z.string()]),
666
+ ["queuedMessagesUpdated" /* QueuedMessagesUpdated */]: import_zod4.z.tuple([import_zod4.z.string(), import_zod4.z.array(queuedMessageSchema)]),
667
+ ["taskToolFailed" /* TaskToolFailed */]: import_zod4.z.tuple([import_zod4.z.string(), toolNamesSchema, import_zod4.z.string()]),
668
+ ["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */]: import_zod4.z.tuple([import_zod4.z.string(), tokenUsageSchema, toolUsageSchema]),
669
+ ["modeChanged" /* ModeChanged */]: import_zod4.z.tuple([import_zod4.z.string()]),
670
+ ["providerProfileChanged" /* ProviderProfileChanged */]: import_zod4.z.tuple([import_zod4.z.object({ name: import_zod4.z.string(), provider: import_zod4.z.string() })]),
671
+ ["commandsResponse" /* CommandsResponse */]: import_zod4.z.tuple([
672
+ import_zod4.z.array(
673
+ import_zod4.z.object({
674
+ name: import_zod4.z.string(),
675
+ source: import_zod4.z.enum(["global", "project", "built-in"]),
676
+ filePath: import_zod4.z.string().optional(),
677
+ description: import_zod4.z.string().optional(),
678
+ argumentHint: import_zod4.z.string().optional()
679
+ })
680
+ )
681
+ ]),
682
+ ["modesResponse" /* ModesResponse */]: import_zod4.z.tuple([import_zod4.z.array(import_zod4.z.object({ slug: import_zod4.z.string(), name: import_zod4.z.string() }))]),
683
+ ["modelsResponse" /* ModelsResponse */]: import_zod4.z.tuple([import_zod4.z.record(import_zod4.z.string(), modelInfoSchema)])
610
684
  });
611
- var taskEventSchema = import_zod3.z.discriminatedUnion("eventName", [
685
+ var taskEventSchema = import_zod4.z.discriminatedUnion("eventName", [
612
686
  // Task Provider Lifecycle
613
- import_zod3.z.object({
614
- eventName: import_zod3.z.literal("taskCreated" /* TaskCreated */),
687
+ import_zod4.z.object({
688
+ eventName: import_zod4.z.literal("taskCreated" /* TaskCreated */),
615
689
  payload: rooCodeEventsSchema.shape["taskCreated" /* TaskCreated */],
616
- taskId: import_zod3.z.number().optional()
690
+ taskId: import_zod4.z.number().optional()
617
691
  }),
618
692
  // Task Lifecycle
619
- import_zod3.z.object({
620
- eventName: import_zod3.z.literal("taskStarted" /* TaskStarted */),
693
+ import_zod4.z.object({
694
+ eventName: import_zod4.z.literal("taskStarted" /* TaskStarted */),
621
695
  payload: rooCodeEventsSchema.shape["taskStarted" /* TaskStarted */],
622
- taskId: import_zod3.z.number().optional()
696
+ taskId: import_zod4.z.number().optional()
623
697
  }),
624
- import_zod3.z.object({
625
- eventName: import_zod3.z.literal("taskCompleted" /* TaskCompleted */),
698
+ import_zod4.z.object({
699
+ eventName: import_zod4.z.literal("taskCompleted" /* TaskCompleted */),
626
700
  payload: rooCodeEventsSchema.shape["taskCompleted" /* TaskCompleted */],
627
- taskId: import_zod3.z.number().optional()
701
+ taskId: import_zod4.z.number().optional()
628
702
  }),
629
- import_zod3.z.object({
630
- eventName: import_zod3.z.literal("taskAborted" /* TaskAborted */),
703
+ import_zod4.z.object({
704
+ eventName: import_zod4.z.literal("taskAborted" /* TaskAborted */),
631
705
  payload: rooCodeEventsSchema.shape["taskAborted" /* TaskAborted */],
632
- taskId: import_zod3.z.number().optional()
706
+ taskId: import_zod4.z.number().optional()
633
707
  }),
634
- import_zod3.z.object({
635
- eventName: import_zod3.z.literal("taskFocused" /* TaskFocused */),
708
+ import_zod4.z.object({
709
+ eventName: import_zod4.z.literal("taskFocused" /* TaskFocused */),
636
710
  payload: rooCodeEventsSchema.shape["taskFocused" /* TaskFocused */],
637
- taskId: import_zod3.z.number().optional()
711
+ taskId: import_zod4.z.number().optional()
638
712
  }),
639
- import_zod3.z.object({
640
- eventName: import_zod3.z.literal("taskUnfocused" /* TaskUnfocused */),
713
+ import_zod4.z.object({
714
+ eventName: import_zod4.z.literal("taskUnfocused" /* TaskUnfocused */),
641
715
  payload: rooCodeEventsSchema.shape["taskUnfocused" /* TaskUnfocused */],
642
- taskId: import_zod3.z.number().optional()
716
+ taskId: import_zod4.z.number().optional()
643
717
  }),
644
- import_zod3.z.object({
645
- eventName: import_zod3.z.literal("taskActive" /* TaskActive */),
718
+ import_zod4.z.object({
719
+ eventName: import_zod4.z.literal("taskActive" /* TaskActive */),
646
720
  payload: rooCodeEventsSchema.shape["taskActive" /* TaskActive */],
647
- taskId: import_zod3.z.number().optional()
721
+ taskId: import_zod4.z.number().optional()
648
722
  }),
649
- import_zod3.z.object({
650
- eventName: import_zod3.z.literal("taskInteractive" /* TaskInteractive */),
723
+ import_zod4.z.object({
724
+ eventName: import_zod4.z.literal("taskInteractive" /* TaskInteractive */),
651
725
  payload: rooCodeEventsSchema.shape["taskInteractive" /* TaskInteractive */],
652
- taskId: import_zod3.z.number().optional()
726
+ taskId: import_zod4.z.number().optional()
653
727
  }),
654
- import_zod3.z.object({
655
- eventName: import_zod3.z.literal("taskResumable" /* TaskResumable */),
728
+ import_zod4.z.object({
729
+ eventName: import_zod4.z.literal("taskResumable" /* TaskResumable */),
656
730
  payload: rooCodeEventsSchema.shape["taskResumable" /* TaskResumable */],
657
- taskId: import_zod3.z.number().optional()
731
+ taskId: import_zod4.z.number().optional()
658
732
  }),
659
- import_zod3.z.object({
660
- eventName: import_zod3.z.literal("taskIdle" /* TaskIdle */),
733
+ import_zod4.z.object({
734
+ eventName: import_zod4.z.literal("taskIdle" /* TaskIdle */),
661
735
  payload: rooCodeEventsSchema.shape["taskIdle" /* TaskIdle */],
662
- taskId: import_zod3.z.number().optional()
736
+ taskId: import_zod4.z.number().optional()
663
737
  }),
664
738
  // Subtask Lifecycle
665
- import_zod3.z.object({
666
- eventName: import_zod3.z.literal("taskPaused" /* TaskPaused */),
739
+ import_zod4.z.object({
740
+ eventName: import_zod4.z.literal("taskPaused" /* TaskPaused */),
667
741
  payload: rooCodeEventsSchema.shape["taskPaused" /* TaskPaused */],
668
- taskId: import_zod3.z.number().optional()
742
+ taskId: import_zod4.z.number().optional()
669
743
  }),
670
- import_zod3.z.object({
671
- eventName: import_zod3.z.literal("taskUnpaused" /* TaskUnpaused */),
744
+ import_zod4.z.object({
745
+ eventName: import_zod4.z.literal("taskUnpaused" /* TaskUnpaused */),
672
746
  payload: rooCodeEventsSchema.shape["taskUnpaused" /* TaskUnpaused */],
673
- taskId: import_zod3.z.number().optional()
747
+ taskId: import_zod4.z.number().optional()
674
748
  }),
675
- import_zod3.z.object({
676
- eventName: import_zod3.z.literal("taskSpawned" /* TaskSpawned */),
749
+ import_zod4.z.object({
750
+ eventName: import_zod4.z.literal("taskSpawned" /* TaskSpawned */),
677
751
  payload: rooCodeEventsSchema.shape["taskSpawned" /* TaskSpawned */],
678
- taskId: import_zod3.z.number().optional()
752
+ taskId: import_zod4.z.number().optional()
679
753
  }),
680
- import_zod3.z.object({
681
- eventName: import_zod3.z.literal("taskDelegated" /* TaskDelegated */),
754
+ import_zod4.z.object({
755
+ eventName: import_zod4.z.literal("taskDelegated" /* TaskDelegated */),
682
756
  payload: rooCodeEventsSchema.shape["taskDelegated" /* TaskDelegated */],
683
- taskId: import_zod3.z.number().optional()
757
+ taskId: import_zod4.z.number().optional()
684
758
  }),
685
- import_zod3.z.object({
686
- eventName: import_zod3.z.literal("taskDelegationCompleted" /* TaskDelegationCompleted */),
759
+ import_zod4.z.object({
760
+ eventName: import_zod4.z.literal("taskDelegationCompleted" /* TaskDelegationCompleted */),
687
761
  payload: rooCodeEventsSchema.shape["taskDelegationCompleted" /* TaskDelegationCompleted */],
688
- taskId: import_zod3.z.number().optional()
762
+ taskId: import_zod4.z.number().optional()
689
763
  }),
690
- import_zod3.z.object({
691
- eventName: import_zod3.z.literal("taskDelegationResumed" /* TaskDelegationResumed */),
764
+ import_zod4.z.object({
765
+ eventName: import_zod4.z.literal("taskDelegationResumed" /* TaskDelegationResumed */),
692
766
  payload: rooCodeEventsSchema.shape["taskDelegationResumed" /* TaskDelegationResumed */],
693
- taskId: import_zod3.z.number().optional()
767
+ taskId: import_zod4.z.number().optional()
694
768
  }),
695
769
  // Task Execution
696
- import_zod3.z.object({
697
- eventName: import_zod3.z.literal("message" /* Message */),
770
+ import_zod4.z.object({
771
+ eventName: import_zod4.z.literal("message" /* Message */),
698
772
  payload: rooCodeEventsSchema.shape["message" /* Message */],
699
- taskId: import_zod3.z.number().optional()
773
+ taskId: import_zod4.z.number().optional()
700
774
  }),
701
- import_zod3.z.object({
702
- eventName: import_zod3.z.literal("taskModeSwitched" /* TaskModeSwitched */),
775
+ import_zod4.z.object({
776
+ eventName: import_zod4.z.literal("taskModeSwitched" /* TaskModeSwitched */),
703
777
  payload: rooCodeEventsSchema.shape["taskModeSwitched" /* TaskModeSwitched */],
704
- taskId: import_zod3.z.number().optional()
778
+ taskId: import_zod4.z.number().optional()
705
779
  }),
706
- import_zod3.z.object({
707
- eventName: import_zod3.z.literal("taskAskResponded" /* TaskAskResponded */),
780
+ import_zod4.z.object({
781
+ eventName: import_zod4.z.literal("taskAskResponded" /* TaskAskResponded */),
708
782
  payload: rooCodeEventsSchema.shape["taskAskResponded" /* TaskAskResponded */],
709
- taskId: import_zod3.z.number().optional()
783
+ taskId: import_zod4.z.number().optional()
710
784
  }),
711
- import_zod3.z.object({
712
- eventName: import_zod3.z.literal("queuedMessagesUpdated" /* QueuedMessagesUpdated */),
785
+ import_zod4.z.object({
786
+ eventName: import_zod4.z.literal("queuedMessagesUpdated" /* QueuedMessagesUpdated */),
713
787
  payload: rooCodeEventsSchema.shape["queuedMessagesUpdated" /* QueuedMessagesUpdated */],
714
- taskId: import_zod3.z.number().optional()
788
+ taskId: import_zod4.z.number().optional()
715
789
  }),
716
790
  // Task Analytics
717
- import_zod3.z.object({
718
- eventName: import_zod3.z.literal("taskToolFailed" /* TaskToolFailed */),
791
+ import_zod4.z.object({
792
+ eventName: import_zod4.z.literal("taskToolFailed" /* TaskToolFailed */),
719
793
  payload: rooCodeEventsSchema.shape["taskToolFailed" /* TaskToolFailed */],
720
- taskId: import_zod3.z.number().optional()
794
+ taskId: import_zod4.z.number().optional()
721
795
  }),
722
- import_zod3.z.object({
723
- eventName: import_zod3.z.literal("taskTokenUsageUpdated" /* TaskTokenUsageUpdated */),
796
+ import_zod4.z.object({
797
+ eventName: import_zod4.z.literal("taskTokenUsageUpdated" /* TaskTokenUsageUpdated */),
724
798
  payload: rooCodeEventsSchema.shape["taskTokenUsageUpdated" /* TaskTokenUsageUpdated */],
725
- taskId: import_zod3.z.number().optional()
799
+ taskId: import_zod4.z.number().optional()
800
+ }),
801
+ // Query Responses
802
+ import_zod4.z.object({
803
+ eventName: import_zod4.z.literal("commandsResponse" /* CommandsResponse */),
804
+ payload: rooCodeEventsSchema.shape["commandsResponse" /* CommandsResponse */],
805
+ taskId: import_zod4.z.number().optional()
806
+ }),
807
+ import_zod4.z.object({
808
+ eventName: import_zod4.z.literal("modesResponse" /* ModesResponse */),
809
+ payload: rooCodeEventsSchema.shape["modesResponse" /* ModesResponse */],
810
+ taskId: import_zod4.z.number().optional()
811
+ }),
812
+ import_zod4.z.object({
813
+ eventName: import_zod4.z.literal("modelsResponse" /* ModelsResponse */),
814
+ payload: rooCodeEventsSchema.shape["modelsResponse" /* ModelsResponse */],
815
+ taskId: import_zod4.z.number().optional()
726
816
  }),
727
817
  // Evals
728
- import_zod3.z.object({
729
- eventName: import_zod3.z.literal("evalPass" /* EvalPass */),
730
- payload: import_zod3.z.undefined(),
731
- taskId: import_zod3.z.number()
818
+ import_zod4.z.object({
819
+ eventName: import_zod4.z.literal("evalPass" /* EvalPass */),
820
+ payload: import_zod4.z.undefined(),
821
+ taskId: import_zod4.z.number()
732
822
  }),
733
- import_zod3.z.object({
734
- eventName: import_zod3.z.literal("evalFail" /* EvalFail */),
735
- payload: import_zod3.z.undefined(),
736
- taskId: import_zod3.z.number()
823
+ import_zod4.z.object({
824
+ eventName: import_zod4.z.literal("evalFail" /* EvalFail */),
825
+ payload: import_zod4.z.undefined(),
826
+ taskId: import_zod4.z.number()
737
827
  })
738
828
  ]);
739
829
 
740
830
  // src/task.ts
741
- var import_zod4 = require("zod");
831
+ var import_zod5 = require("zod");
742
832
  var TaskStatus = /* @__PURE__ */ ((TaskStatus2) => {
743
833
  TaskStatus2["Running"] = "running";
744
834
  TaskStatus2["Interactive"] = "interactive";
@@ -747,9 +837,9 @@ var TaskStatus = /* @__PURE__ */ ((TaskStatus2) => {
747
837
  TaskStatus2["None"] = "none";
748
838
  return TaskStatus2;
749
839
  })(TaskStatus || {});
750
- var taskMetadataSchema = import_zod4.z.object({
751
- task: import_zod4.z.string().optional(),
752
- images: import_zod4.z.array(import_zod4.z.string()).optional()
840
+ var taskMetadataSchema = import_zod5.z.object({
841
+ task: import_zod5.z.string().optional(),
842
+ images: import_zod5.z.array(import_zod5.z.string()).optional()
753
843
  });
754
844
 
755
845
  // src/global-settings.ts
@@ -758,86 +848,6 @@ var import_zod14 = require("zod");
758
848
  // src/provider-settings.ts
759
849
  var import_zod8 = require("zod");
760
850
 
761
- // src/model.ts
762
- var import_zod5 = require("zod");
763
- var reasoningEfforts = ["low", "medium", "high"];
764
- var reasoningEffortsSchema = import_zod5.z.enum(reasoningEfforts);
765
- var reasoningEffortWithMinimalSchema = import_zod5.z.union([reasoningEffortsSchema, import_zod5.z.literal("minimal")]);
766
- var reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"];
767
- var reasoningEffortExtendedSchema = import_zod5.z.enum(reasoningEffortsExtended);
768
- var reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"];
769
- var reasoningEffortSettingSchema = import_zod5.z.enum(reasoningEffortSettingValues);
770
- var verbosityLevels = ["low", "medium", "high"];
771
- var verbosityLevelsSchema = import_zod5.z.enum(verbosityLevels);
772
- var serviceTiers = ["default", "flex", "priority"];
773
- var serviceTierSchema = import_zod5.z.enum(serviceTiers);
774
- var modelParameters = ["max_tokens", "temperature", "reasoning", "include_reasoning"];
775
- var modelParametersSchema = import_zod5.z.enum(modelParameters);
776
- var isModelParameter = (value) => modelParameters.includes(value);
777
- var modelInfoSchema = import_zod5.z.object({
778
- maxTokens: import_zod5.z.number().nullish(),
779
- maxThinkingTokens: import_zod5.z.number().nullish(),
780
- contextWindow: import_zod5.z.number(),
781
- supportsImages: import_zod5.z.boolean().optional(),
782
- supportsPromptCache: import_zod5.z.boolean(),
783
- // Optional default prompt cache retention policy for providers that support it.
784
- // When set to "24h", extended prompt caching will be requested; when omitted
785
- // or set to "in_memory", the default in‑memory cache is used.
786
- promptCacheRetention: import_zod5.z.enum(["in_memory", "24h"]).optional(),
787
- // Capability flag to indicate whether the model supports an output verbosity parameter
788
- supportsVerbosity: import_zod5.z.boolean().optional(),
789
- supportsReasoningBudget: import_zod5.z.boolean().optional(),
790
- // Capability flag to indicate whether the model supports simple on/off binary reasoning
791
- supportsReasoningBinary: import_zod5.z.boolean().optional(),
792
- // Capability flag to indicate whether the model supports temperature parameter
793
- supportsTemperature: import_zod5.z.boolean().optional(),
794
- defaultTemperature: import_zod5.z.number().optional(),
795
- requiredReasoningBudget: import_zod5.z.boolean().optional(),
796
- supportsReasoningEffort: import_zod5.z.union([import_zod5.z.boolean(), import_zod5.z.array(import_zod5.z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))]).optional(),
797
- requiredReasoningEffort: import_zod5.z.boolean().optional(),
798
- preserveReasoning: import_zod5.z.boolean().optional(),
799
- supportedParameters: import_zod5.z.array(modelParametersSchema).optional(),
800
- inputPrice: import_zod5.z.number().optional(),
801
- outputPrice: import_zod5.z.number().optional(),
802
- cacheWritesPrice: import_zod5.z.number().optional(),
803
- cacheReadsPrice: import_zod5.z.number().optional(),
804
- description: import_zod5.z.string().optional(),
805
- // Default effort value for models that support reasoning effort
806
- reasoningEffort: reasoningEffortExtendedSchema.optional(),
807
- minTokensPerCachePoint: import_zod5.z.number().optional(),
808
- maxCachePoints: import_zod5.z.number().optional(),
809
- cachableFields: import_zod5.z.array(import_zod5.z.string()).optional(),
810
- // Flag to indicate if the model is deprecated and should not be used
811
- deprecated: import_zod5.z.boolean().optional(),
812
- // Flag to indicate if the model should hide vendor/company identity in responses
813
- isStealthModel: import_zod5.z.boolean().optional(),
814
- // Flag to indicate if the model is free (no cost)
815
- isFree: import_zod5.z.boolean().optional(),
816
- // Exclude specific native tools from being available (only applies to native protocol)
817
- // These tools will be removed from the set of tools available to the model
818
- excludedTools: import_zod5.z.array(import_zod5.z.string()).optional(),
819
- // Include specific native tools (only applies to native protocol)
820
- // These tools will be added if they belong to an allowed group in the current mode
821
- // Cannot force-add tools from groups the mode doesn't allow
822
- includedTools: import_zod5.z.array(import_zod5.z.string()).optional(),
823
- /**
824
- * Service tiers with pricing information.
825
- * Each tier can have a name (for OpenAI service tiers) and pricing overrides.
826
- * The top-level input/output/cache* fields represent the default/standard tier.
827
- */
828
- tiers: import_zod5.z.array(
829
- import_zod5.z.object({
830
- name: serviceTierSchema.optional(),
831
- // Service tier name (flex, priority, etc.)
832
- contextWindow: import_zod5.z.number(),
833
- inputPrice: import_zod5.z.number().optional(),
834
- outputPrice: import_zod5.z.number().optional(),
835
- cacheWritesPrice: import_zod5.z.number().optional(),
836
- cacheReadsPrice: import_zod5.z.number().optional()
837
- })
838
- ).optional()
839
- });
840
-
841
851
  // src/codebase-index.ts
842
852
  var import_zod6 = require("zod");
843
853
  var CODEBASE_INDEX_DEFAULTS = {
@@ -1137,1090 +1147,1051 @@ var anthropicModels = {
1137
1147
  };
1138
1148
  var ANTHROPIC_DEFAULT_MAX_TOKENS = 8192;
1139
1149
 
1140
- // src/providers/baseten.ts
1141
- var basetenModels = {
1142
- "moonshotai/Kimi-K2-Thinking": {
1143
- maxTokens: 16384,
1144
- contextWindow: 262e3,
1145
- supportsImages: false,
1146
- supportsPromptCache: false,
1147
- inputPrice: 0.6,
1148
- outputPrice: 2.5,
1149
- cacheWritesPrice: 0,
1150
- cacheReadsPrice: 0,
1151
- description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2"
1152
- },
1153
- "zai-org/GLM-4.6": {
1154
- maxTokens: 16384,
1150
+ // src/providers/azure.ts
1151
+ var azureModels = {
1152
+ "codex-mini": {
1153
+ maxTokens: 1e5,
1155
1154
  contextWindow: 2e5,
1156
1155
  supportsImages: false,
1157
- supportsPromptCache: false,
1158
- inputPrice: 0.6,
1159
- outputPrice: 2.2,
1160
- cacheWritesPrice: 0,
1161
- cacheReadsPrice: 0,
1162
- description: "Frontier open model with advanced agentic, reasoning and coding capabilities"
1156
+ supportsPromptCache: true,
1157
+ inputPrice: 1.5,
1158
+ outputPrice: 6,
1159
+ cacheReadsPrice: 0.375,
1160
+ supportsTemperature: false,
1161
+ description: "Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks"
1163
1162
  },
1164
- "deepseek-ai/DeepSeek-R1": {
1165
- maxTokens: 16384,
1166
- contextWindow: 163840,
1163
+ "gpt-4": {
1164
+ maxTokens: 8192,
1165
+ contextWindow: 8192,
1167
1166
  supportsImages: false,
1168
1167
  supportsPromptCache: false,
1169
- inputPrice: 2.55,
1170
- outputPrice: 5.95,
1171
- cacheWritesPrice: 0,
1172
- cacheReadsPrice: 0,
1173
- description: "DeepSeek's first-generation reasoning model"
1168
+ inputPrice: 60,
1169
+ outputPrice: 120,
1170
+ supportsTemperature: true,
1171
+ description: "GPT-4"
1174
1172
  },
1175
- "deepseek-ai/DeepSeek-R1-0528": {
1176
- maxTokens: 16384,
1177
- contextWindow: 163840,
1173
+ "gpt-4-32k": {
1174
+ maxTokens: 32768,
1175
+ contextWindow: 32768,
1178
1176
  supportsImages: false,
1179
1177
  supportsPromptCache: false,
1180
- inputPrice: 2.55,
1181
- outputPrice: 5.95,
1182
- cacheWritesPrice: 0,
1183
- cacheReadsPrice: 0,
1184
- description: "The latest revision of DeepSeek's first-generation reasoning model"
1178
+ inputPrice: 60,
1179
+ outputPrice: 120,
1180
+ supportsTemperature: true,
1181
+ description: "GPT-4 32K"
1185
1182
  },
1186
- "deepseek-ai/DeepSeek-V3-0324": {
1187
- maxTokens: 16384,
1188
- contextWindow: 163840,
1189
- supportsImages: false,
1183
+ "gpt-4-turbo": {
1184
+ maxTokens: 4096,
1185
+ contextWindow: 128e3,
1186
+ supportsImages: true,
1190
1187
  supportsPromptCache: false,
1191
- inputPrice: 0.77,
1192
- outputPrice: 0.77,
1193
- cacheWritesPrice: 0,
1194
- cacheReadsPrice: 0,
1195
- description: "Fast general-purpose LLM with enhanced reasoning capabilities"
1188
+ inputPrice: 10,
1189
+ outputPrice: 30,
1190
+ supportsTemperature: true,
1191
+ description: "GPT-4 Turbo"
1196
1192
  },
1197
- "deepseek-ai/DeepSeek-V3.1": {
1198
- maxTokens: 16384,
1199
- contextWindow: 163840,
1200
- supportsImages: false,
1193
+ "gpt-4-turbo-vision": {
1194
+ maxTokens: 4096,
1195
+ contextWindow: 128e3,
1196
+ supportsImages: true,
1201
1197
  supportsPromptCache: false,
1202
- inputPrice: 0.5,
1203
- outputPrice: 1.5,
1204
- cacheWritesPrice: 0,
1205
- cacheReadsPrice: 0,
1206
- description: "Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling"
1198
+ inputPrice: 10,
1199
+ outputPrice: 30,
1200
+ supportsTemperature: true,
1201
+ description: "GPT-4 Turbo Vision"
1207
1202
  },
1208
- "deepseek-ai/DeepSeek-V3.2": {
1209
- maxTokens: 16384,
1210
- contextWindow: 163840,
1211
- supportsImages: false,
1212
- supportsPromptCache: false,
1213
- inputPrice: 0.3,
1214
- outputPrice: 0.45,
1215
- cacheWritesPrice: 0,
1216
- cacheReadsPrice: 0,
1217
- description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance"
1203
+ "gpt-4.1": {
1204
+ maxTokens: 32768,
1205
+ contextWindow: 1047576,
1206
+ supportsImages: true,
1207
+ supportsPromptCache: true,
1208
+ inputPrice: 2,
1209
+ outputPrice: 8,
1210
+ cacheReadsPrice: 0.5,
1211
+ supportsTemperature: true,
1212
+ description: "GPT-4.1"
1218
1213
  },
1219
- "openai/gpt-oss-120b": {
1220
- maxTokens: 16384,
1221
- contextWindow: 128072,
1222
- supportsImages: false,
1223
- supportsPromptCache: false,
1214
+ "gpt-4.1-mini": {
1215
+ maxTokens: 32768,
1216
+ contextWindow: 1047576,
1217
+ supportsImages: true,
1218
+ supportsPromptCache: true,
1219
+ inputPrice: 0.4,
1220
+ outputPrice: 1.6,
1221
+ cacheReadsPrice: 0.1,
1222
+ supportsTemperature: true,
1223
+ description: "GPT-4.1 mini"
1224
+ },
1225
+ "gpt-4.1-nano": {
1226
+ maxTokens: 32768,
1227
+ contextWindow: 1047576,
1228
+ supportsImages: true,
1229
+ supportsPromptCache: true,
1224
1230
  inputPrice: 0.1,
1225
- outputPrice: 0.5,
1226
- cacheWritesPrice: 0,
1227
- cacheReadsPrice: 0,
1228
- description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities"
1231
+ outputPrice: 0.4,
1232
+ cacheReadsPrice: 0.03,
1233
+ supportsTemperature: true,
1234
+ description: "GPT-4.1 nano"
1229
1235
  },
1230
- "Qwen/Qwen3-235B-A22B-Instruct-2507": {
1236
+ "gpt-4o": {
1231
1237
  maxTokens: 16384,
1232
- contextWindow: 262144,
1233
- supportsImages: false,
1234
- supportsPromptCache: false,
1235
- inputPrice: 0.22,
1236
- outputPrice: 0.8,
1237
- cacheWritesPrice: 0,
1238
- cacheReadsPrice: 0,
1239
- description: "Mixture-of-experts LLM with math and reasoning capabilities"
1238
+ contextWindow: 128e3,
1239
+ supportsImages: true,
1240
+ supportsPromptCache: true,
1241
+ inputPrice: 2.5,
1242
+ outputPrice: 10,
1243
+ cacheReadsPrice: 1.25,
1244
+ supportsTemperature: true,
1245
+ description: "GPT-4o"
1240
1246
  },
1241
- "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
1247
+ "gpt-4o-mini": {
1242
1248
  maxTokens: 16384,
1243
- contextWindow: 262144,
1244
- supportsImages: false,
1245
- supportsPromptCache: false,
1246
- inputPrice: 0.38,
1247
- outputPrice: 1.53,
1248
- cacheWritesPrice: 0,
1249
- cacheReadsPrice: 0,
1250
- description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities"
1251
- },
1252
- "moonshotai/Kimi-K2-Instruct-0905": {
1253
- maxTokens: 16384,
1254
- contextWindow: 262e3,
1255
- supportsImages: false,
1256
- supportsPromptCache: false,
1257
- inputPrice: 0.6,
1258
- outputPrice: 2.5,
1259
- cacheWritesPrice: 0,
1260
- cacheReadsPrice: 0,
1261
- description: "State of the art language model for agentic and coding tasks. September Update."
1262
- }
1263
- };
1264
- var basetenDefaultModelId = "zai-org/GLM-4.6";
1265
-
1266
- // src/providers/bedrock.ts
1267
- var bedrockDefaultModelId = "anthropic.claude-sonnet-4-5-20250929-v1:0";
1268
- var bedrockDefaultPromptRouterModelId = "anthropic.claude-3-sonnet-20240229-v1:0";
1269
- var bedrockModels = {
1270
- "anthropic.claude-sonnet-4-5-20250929-v1:0": {
1271
- maxTokens: 8192,
1272
- contextWindow: 2e5,
1249
+ contextWindow: 128e3,
1273
1250
  supportsImages: true,
1274
1251
  supportsPromptCache: true,
1275
- supportsReasoningBudget: true,
1276
- inputPrice: 3,
1277
- outputPrice: 15,
1278
- cacheWritesPrice: 3.75,
1279
- cacheReadsPrice: 0.3,
1280
- minTokensPerCachePoint: 1024,
1281
- maxCachePoints: 4,
1282
- cachableFields: ["system", "messages", "tools"]
1252
+ inputPrice: 0.15,
1253
+ outputPrice: 0.6,
1254
+ cacheReadsPrice: 0.08,
1255
+ supportsTemperature: true,
1256
+ description: "GPT-4o mini"
1283
1257
  },
1284
- "amazon.nova-pro-v1:0": {
1285
- maxTokens: 5e3,
1286
- contextWindow: 3e5,
1258
+ "gpt-5": {
1259
+ maxTokens: 128e3,
1260
+ contextWindow: 272e3,
1261
+ includedTools: ["apply_patch"],
1262
+ excludedTools: ["apply_diff", "write_to_file"],
1287
1263
  supportsImages: true,
1288
1264
  supportsPromptCache: true,
1289
- inputPrice: 0.8,
1290
- outputPrice: 3.2,
1291
- cacheWritesPrice: 0.8,
1292
- // per million tokens
1293
- cacheReadsPrice: 0.2,
1294
- // per million tokens
1295
- minTokensPerCachePoint: 1,
1296
- maxCachePoints: 1,
1297
- cachableFields: ["system"]
1298
- },
1299
- "amazon.nova-pro-latency-optimized-v1:0": {
1300
- maxTokens: 5e3,
1301
- contextWindow: 3e5,
1302
- supportsImages: true,
1303
- supportsPromptCache: false,
1304
- inputPrice: 1,
1305
- outputPrice: 4,
1306
- cacheWritesPrice: 1,
1307
- // per million tokens
1308
- cacheReadsPrice: 0.25,
1309
- // per million tokens
1310
- description: "Amazon Nova Pro with latency optimized inference"
1265
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1266
+ reasoningEffort: "medium",
1267
+ inputPrice: 1.25,
1268
+ outputPrice: 10,
1269
+ cacheReadsPrice: 0.13,
1270
+ supportsVerbosity: true,
1271
+ supportsTemperature: false,
1272
+ description: "GPT-5: The best model for coding and agentic tasks across domains"
1311
1273
  },
1312
- "amazon.nova-lite-v1:0": {
1313
- maxTokens: 5e3,
1314
- contextWindow: 3e5,
1274
+ "gpt-5-codex": {
1275
+ maxTokens: 128e3,
1276
+ contextWindow: 4e5,
1277
+ includedTools: ["apply_patch"],
1278
+ excludedTools: ["apply_diff", "write_to_file"],
1315
1279
  supportsImages: true,
1316
1280
  supportsPromptCache: true,
1317
- inputPrice: 0.06,
1318
- outputPrice: 0.24,
1319
- cacheWritesPrice: 0.06,
1320
- // per million tokens
1321
- cacheReadsPrice: 0.015,
1322
- // per million tokens
1323
- minTokensPerCachePoint: 1,
1324
- maxCachePoints: 1,
1325
- cachableFields: ["system"]
1281
+ supportsReasoningEffort: ["low", "medium", "high"],
1282
+ reasoningEffort: "medium",
1283
+ inputPrice: 1.25,
1284
+ outputPrice: 10,
1285
+ cacheReadsPrice: 0.13,
1286
+ supportsTemperature: false,
1287
+ description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex"
1326
1288
  },
1327
- "amazon.nova-2-lite-v1:0": {
1328
- maxTokens: 65535,
1329
- contextWindow: 1e6,
1289
+ "gpt-5-mini": {
1290
+ maxTokens: 128e3,
1291
+ contextWindow: 272e3,
1292
+ includedTools: ["apply_patch"],
1293
+ excludedTools: ["apply_diff", "write_to_file"],
1330
1294
  supportsImages: true,
1331
1295
  supportsPromptCache: true,
1332
- inputPrice: 0.33,
1333
- outputPrice: 2.75,
1334
- cacheWritesPrice: 0,
1335
- cacheReadsPrice: 0.0825,
1336
- // 75% less than input price
1337
- minTokensPerCachePoint: 1,
1338
- maxCachePoints: 1,
1339
- cachableFields: ["system"],
1340
- description: "Amazon Nova 2 Lite - Comparable to Claude Haiku 4.5"
1341
- },
1342
- "amazon.nova-micro-v1:0": {
1343
- maxTokens: 5e3,
1344
- contextWindow: 128e3,
1345
- supportsImages: false,
1346
- supportsPromptCache: true,
1347
- inputPrice: 0.035,
1348
- outputPrice: 0.14,
1349
- cacheWritesPrice: 0.035,
1350
- // per million tokens
1351
- cacheReadsPrice: 875e-5,
1352
- // per million tokens
1353
- minTokensPerCachePoint: 1,
1354
- maxCachePoints: 1,
1355
- cachableFields: ["system"]
1296
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1297
+ reasoningEffort: "medium",
1298
+ inputPrice: 0.25,
1299
+ outputPrice: 2,
1300
+ cacheReadsPrice: 0.03,
1301
+ supportsVerbosity: true,
1302
+ supportsTemperature: false,
1303
+ description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks"
1356
1304
  },
1357
- "anthropic.claude-sonnet-4-20250514-v1:0": {
1358
- maxTokens: 8192,
1359
- contextWindow: 2e5,
1305
+ "gpt-5-nano": {
1306
+ maxTokens: 128e3,
1307
+ contextWindow: 272e3,
1308
+ includedTools: ["apply_patch"],
1309
+ excludedTools: ["apply_diff", "write_to_file"],
1360
1310
  supportsImages: true,
1361
1311
  supportsPromptCache: true,
1362
- supportsReasoningBudget: true,
1363
- inputPrice: 3,
1364
- outputPrice: 15,
1365
- cacheWritesPrice: 3.75,
1366
- cacheReadsPrice: 0.3,
1367
- minTokensPerCachePoint: 1024,
1368
- maxCachePoints: 4,
1369
- cachableFields: ["system", "messages", "tools"]
1312
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1313
+ reasoningEffort: "medium",
1314
+ inputPrice: 0.05,
1315
+ outputPrice: 0.4,
1316
+ cacheReadsPrice: 0.01,
1317
+ supportsVerbosity: true,
1318
+ supportsTemperature: false,
1319
+ description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5"
1370
1320
  },
1371
- "anthropic.claude-opus-4-1-20250805-v1:0": {
1372
- maxTokens: 8192,
1373
- contextWindow: 2e5,
1321
+ "gpt-5-pro": {
1322
+ maxTokens: 272e3,
1323
+ contextWindow: 4e5,
1324
+ includedTools: ["apply_patch"],
1325
+ excludedTools: ["apply_diff", "write_to_file"],
1374
1326
  supportsImages: true,
1375
- supportsPromptCache: true,
1376
- supportsReasoningBudget: true,
1327
+ supportsPromptCache: false,
1328
+ supportsReasoningEffort: ["minimal", "low", "medium", "high"],
1329
+ reasoningEffort: "medium",
1377
1330
  inputPrice: 15,
1378
- outputPrice: 75,
1379
- cacheWritesPrice: 18.75,
1380
- cacheReadsPrice: 1.5,
1381
- minTokensPerCachePoint: 1024,
1382
- maxCachePoints: 4,
1383
- cachableFields: ["system", "messages", "tools"]
1331
+ outputPrice: 120,
1332
+ supportsVerbosity: true,
1333
+ supportsTemperature: false,
1334
+ description: "GPT-5 Pro"
1384
1335
  },
1385
- "anthropic.claude-opus-4-6-v1": {
1386
- maxTokens: 8192,
1387
- contextWindow: 2e5,
1388
- // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
1336
+ "gpt-5.1": {
1337
+ maxTokens: 128e3,
1338
+ contextWindow: 272e3,
1339
+ includedTools: ["apply_patch"],
1340
+ excludedTools: ["apply_diff", "write_to_file"],
1389
1341
  supportsImages: true,
1390
1342
  supportsPromptCache: true,
1391
- supportsReasoningBudget: true,
1392
- inputPrice: 5,
1393
- // $5 per million input tokens (≤200K context)
1394
- outputPrice: 25,
1395
- // $25 per million output tokens (≤200K context)
1396
- cacheWritesPrice: 6.25,
1397
- // $6.25 per million tokens
1398
- cacheReadsPrice: 0.5,
1399
- // $0.50 per million tokens
1400
- minTokensPerCachePoint: 1024,
1401
- maxCachePoints: 4,
1402
- cachableFields: ["system", "messages", "tools"],
1403
- // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
1404
- tiers: [
1405
- {
1406
- contextWindow: 1e6,
1407
- // 1M tokens with beta flag
1408
- inputPrice: 10,
1409
- // $10 per million input tokens (>200K context)
1410
- outputPrice: 37.5,
1411
- // $37.50 per million output tokens (>200K context)
1412
- cacheWritesPrice: 12.5,
1413
- // $12.50 per million tokens (>200K context)
1414
- cacheReadsPrice: 1
1415
- // $1.00 per million tokens (>200K context)
1416
- }
1417
- ]
1343
+ promptCacheRetention: "24h",
1344
+ supportsReasoningEffort: ["none", "low", "medium", "high"],
1345
+ reasoningEffort: "medium",
1346
+ inputPrice: 1.25,
1347
+ outputPrice: 10,
1348
+ cacheReadsPrice: 0.125,
1349
+ supportsVerbosity: true,
1350
+ supportsTemperature: false,
1351
+ description: "GPT-5.1: The best model for coding and agentic tasks across domains"
1418
1352
  },
1419
- "anthropic.claude-opus-4-5-20251101-v1:0": {
1420
- maxTokens: 8192,
1421
- contextWindow: 2e5,
1353
+ "gpt-5.1-chat": {
1354
+ maxTokens: 16384,
1355
+ contextWindow: 128e3,
1356
+ includedTools: ["apply_patch"],
1357
+ excludedTools: ["apply_diff", "write_to_file"],
1422
1358
  supportsImages: true,
1423
1359
  supportsPromptCache: true,
1424
- supportsReasoningBudget: true,
1425
- inputPrice: 5,
1426
- outputPrice: 25,
1427
- cacheWritesPrice: 6.25,
1428
- cacheReadsPrice: 0.5,
1429
- minTokensPerCachePoint: 1024,
1430
- maxCachePoints: 4,
1431
- cachableFields: ["system", "messages", "tools"]
1360
+ promptCacheRetention: "24h",
1361
+ inputPrice: 1.25,
1362
+ outputPrice: 10,
1363
+ cacheReadsPrice: 0.125,
1364
+ supportsTemperature: false,
1365
+ description: "GPT-5.1 Chat: Optimized for conversational AI and chat use cases"
1432
1366
  },
1433
- "anthropic.claude-opus-4-20250514-v1:0": {
1434
- maxTokens: 8192,
1435
- contextWindow: 2e5,
1367
+ "gpt-5.1-codex": {
1368
+ maxTokens: 128e3,
1369
+ contextWindow: 4e5,
1370
+ includedTools: ["apply_patch"],
1371
+ excludedTools: ["apply_diff", "write_to_file"],
1436
1372
  supportsImages: true,
1437
1373
  supportsPromptCache: true,
1438
- supportsReasoningBudget: true,
1439
- inputPrice: 15,
1440
- outputPrice: 75,
1441
- cacheWritesPrice: 18.75,
1442
- cacheReadsPrice: 1.5,
1443
- minTokensPerCachePoint: 1024,
1444
- maxCachePoints: 4,
1445
- cachableFields: ["system", "messages", "tools"]
1446
- },
1447
- "anthropic.claude-3-7-sonnet-20250219-v1:0": {
1448
- maxTokens: 8192,
1449
- contextWindow: 2e5,
1374
+ promptCacheRetention: "24h",
1375
+ supportsReasoningEffort: ["low", "medium", "high"],
1376
+ reasoningEffort: "medium",
1377
+ inputPrice: 1.25,
1378
+ outputPrice: 10,
1379
+ cacheReadsPrice: 0.125,
1380
+ supportsTemperature: false,
1381
+ description: "GPT-5.1 Codex: A version of GPT-5.1 optimized for agentic coding in Codex"
1382
+ },
1383
+ "gpt-5.1-codex-max": {
1384
+ maxTokens: 128e3,
1385
+ contextWindow: 4e5,
1386
+ includedTools: ["apply_patch"],
1387
+ excludedTools: ["apply_diff", "write_to_file"],
1450
1388
  supportsImages: true,
1451
1389
  supportsPromptCache: true,
1452
- supportsReasoningBudget: true,
1453
- inputPrice: 3,
1454
- outputPrice: 15,
1455
- cacheWritesPrice: 3.75,
1456
- cacheReadsPrice: 0.3,
1457
- minTokensPerCachePoint: 1024,
1458
- maxCachePoints: 4,
1459
- cachableFields: ["system", "messages", "tools"]
1390
+ promptCacheRetention: "24h",
1391
+ supportsReasoningEffort: ["low", "medium", "high", "xhigh"],
1392
+ reasoningEffort: "medium",
1393
+ inputPrice: 1.25,
1394
+ outputPrice: 10,
1395
+ cacheReadsPrice: 0.125,
1396
+ supportsTemperature: false,
1397
+ description: "GPT-5.1 Codex Max: Our most intelligent coding model optimized for long-horizon, agentic coding tasks"
1460
1398
  },
1461
- "anthropic.claude-3-5-sonnet-20241022-v2:0": {
1462
- maxTokens: 8192,
1463
- contextWindow: 2e5,
1399
+ "gpt-5.1-codex-mini": {
1400
+ maxTokens: 128e3,
1401
+ contextWindow: 4e5,
1402
+ includedTools: ["apply_patch"],
1403
+ excludedTools: ["apply_diff", "write_to_file"],
1464
1404
  supportsImages: true,
1465
1405
  supportsPromptCache: true,
1466
- inputPrice: 3,
1467
- outputPrice: 15,
1468
- cacheWritesPrice: 3.75,
1469
- cacheReadsPrice: 0.3,
1470
- minTokensPerCachePoint: 1024,
1471
- maxCachePoints: 4,
1472
- cachableFields: ["system", "messages", "tools"]
1406
+ promptCacheRetention: "24h",
1407
+ supportsReasoningEffort: ["low", "medium", "high"],
1408
+ reasoningEffort: "medium",
1409
+ inputPrice: 0.25,
1410
+ outputPrice: 2,
1411
+ cacheReadsPrice: 0.025,
1412
+ supportsTemperature: false,
1413
+ description: "GPT-5.1 Codex mini: A version of GPT-5.1 optimized for agentic coding in Codex"
1473
1414
  },
1474
- "anthropic.claude-3-5-haiku-20241022-v1:0": {
1475
- maxTokens: 8192,
1476
- contextWindow: 2e5,
1477
- supportsImages: false,
1415
+ "gpt-5.2": {
1416
+ maxTokens: 128e3,
1417
+ contextWindow: 4e5,
1418
+ includedTools: ["apply_patch"],
1419
+ excludedTools: ["apply_diff", "write_to_file"],
1420
+ supportsImages: true,
1478
1421
  supportsPromptCache: true,
1479
- inputPrice: 0.8,
1480
- outputPrice: 4,
1481
- cacheWritesPrice: 1,
1482
- cacheReadsPrice: 0.08,
1483
- minTokensPerCachePoint: 2048,
1484
- maxCachePoints: 4,
1485
- cachableFields: ["system", "messages", "tools"]
1422
+ promptCacheRetention: "24h",
1423
+ supportsReasoningEffort: ["none", "low", "medium", "high", "xhigh"],
1424
+ reasoningEffort: "medium",
1425
+ inputPrice: 1.75,
1426
+ outputPrice: 14,
1427
+ cacheReadsPrice: 0.125,
1428
+ supportsVerbosity: true,
1429
+ supportsTemperature: false,
1430
+ description: "GPT-5.2: Our flagship model for coding and agentic tasks across industries"
1486
1431
  },
1487
- "anthropic.claude-haiku-4-5-20251001-v1:0": {
1488
- maxTokens: 8192,
1489
- contextWindow: 2e5,
1432
+ "gpt-5.2-chat": {
1433
+ maxTokens: 16384,
1434
+ contextWindow: 128e3,
1435
+ includedTools: ["apply_patch"],
1436
+ excludedTools: ["apply_diff", "write_to_file"],
1490
1437
  supportsImages: true,
1491
1438
  supportsPromptCache: true,
1492
- supportsReasoningBudget: true,
1493
- inputPrice: 1,
1494
- outputPrice: 5,
1495
- cacheWritesPrice: 1.25,
1496
- // 5m cache writes
1497
- cacheReadsPrice: 0.1,
1498
- // cache hits / refreshes
1499
- minTokensPerCachePoint: 2048,
1500
- maxCachePoints: 4,
1501
- cachableFields: ["system", "messages", "tools"]
1439
+ inputPrice: 1.75,
1440
+ outputPrice: 14,
1441
+ cacheReadsPrice: 0.175,
1442
+ supportsTemperature: false,
1443
+ description: "GPT-5.2 Chat: Optimized for conversational AI and chat use cases"
1502
1444
  },
1503
- "anthropic.claude-3-5-sonnet-20240620-v1:0": {
1504
- maxTokens: 8192,
1505
- contextWindow: 2e5,
1445
+ "gpt-5.2-codex": {
1446
+ maxTokens: 128e3,
1447
+ contextWindow: 4e5,
1448
+ includedTools: ["apply_patch"],
1449
+ excludedTools: ["apply_diff", "write_to_file"],
1506
1450
  supportsImages: true,
1507
- supportsPromptCache: false,
1508
- inputPrice: 3,
1509
- outputPrice: 15
1451
+ supportsPromptCache: true,
1452
+ promptCacheRetention: "24h",
1453
+ supportsReasoningEffort: ["low", "medium", "high", "xhigh"],
1454
+ reasoningEffort: "medium",
1455
+ inputPrice: 1.75,
1456
+ outputPrice: 14,
1457
+ cacheReadsPrice: 0.175,
1458
+ supportsTemperature: false,
1459
+ description: "GPT-5.2 Codex: Our most intelligent coding model optimized for long-horizon, agentic coding tasks"
1510
1460
  },
1511
- "anthropic.claude-3-opus-20240229-v1:0": {
1512
- maxTokens: 4096,
1461
+ o1: {
1462
+ maxTokens: 1e5,
1513
1463
  contextWindow: 2e5,
1514
1464
  supportsImages: true,
1515
- supportsPromptCache: false,
1465
+ supportsPromptCache: true,
1516
1466
  inputPrice: 15,
1517
- outputPrice: 75
1467
+ outputPrice: 60,
1468
+ cacheReadsPrice: 7.5,
1469
+ supportsTemperature: false,
1470
+ description: "o1"
1518
1471
  },
1519
- "anthropic.claude-3-sonnet-20240229-v1:0": {
1520
- maxTokens: 4096,
1472
+ "o1-mini": {
1473
+ maxTokens: 65536,
1474
+ contextWindow: 128e3,
1475
+ supportsImages: true,
1476
+ supportsPromptCache: true,
1477
+ inputPrice: 1.1,
1478
+ outputPrice: 4.4,
1479
+ cacheReadsPrice: 0.55,
1480
+ supportsTemperature: false,
1481
+ description: "o1-mini"
1482
+ },
1483
+ "o1-preview": {
1484
+ maxTokens: 32768,
1485
+ contextWindow: 128e3,
1486
+ supportsImages: true,
1487
+ supportsPromptCache: true,
1488
+ inputPrice: 16.5,
1489
+ outputPrice: 66,
1490
+ cacheReadsPrice: 8.25,
1491
+ supportsTemperature: false,
1492
+ description: "o1-preview"
1493
+ },
1494
+ o3: {
1495
+ maxTokens: 1e5,
1521
1496
  contextWindow: 2e5,
1522
1497
  supportsImages: true,
1523
- supportsPromptCache: false,
1524
- inputPrice: 3,
1525
- outputPrice: 15
1498
+ supportsPromptCache: true,
1499
+ supportsReasoningEffort: ["low", "medium", "high"],
1500
+ reasoningEffort: "medium",
1501
+ inputPrice: 2,
1502
+ outputPrice: 8,
1503
+ cacheReadsPrice: 0.5,
1504
+ supportsTemperature: false,
1505
+ description: "o3"
1526
1506
  },
1527
- "anthropic.claude-3-haiku-20240307-v1:0": {
1528
- maxTokens: 4096,
1507
+ "o3-mini": {
1508
+ maxTokens: 1e5,
1509
+ contextWindow: 2e5,
1510
+ supportsImages: false,
1511
+ supportsPromptCache: true,
1512
+ supportsReasoningEffort: ["low", "medium", "high"],
1513
+ reasoningEffort: "medium",
1514
+ inputPrice: 1.1,
1515
+ outputPrice: 4.4,
1516
+ cacheReadsPrice: 0.55,
1517
+ supportsTemperature: false,
1518
+ description: "o3-mini"
1519
+ },
1520
+ "o4-mini": {
1521
+ maxTokens: 1e5,
1529
1522
  contextWindow: 2e5,
1530
1523
  supportsImages: true,
1524
+ supportsPromptCache: true,
1525
+ supportsReasoningEffort: ["low", "medium", "high"],
1526
+ reasoningEffort: "medium",
1527
+ inputPrice: 1.1,
1528
+ outputPrice: 4.4,
1529
+ cacheReadsPrice: 0.28,
1530
+ supportsTemperature: false,
1531
+ description: "o4-mini"
1532
+ }
1533
+ };
1534
+ var azureDefaultModelId = "gpt-4o";
1535
+ var azureDefaultModelInfo = azureModels[azureDefaultModelId];
1536
+
1537
+ // src/providers/baseten.ts
1538
+ var basetenModels = {
1539
+ "moonshotai/Kimi-K2-Thinking": {
1540
+ maxTokens: 16384,
1541
+ contextWindow: 262e3,
1542
+ supportsImages: false,
1531
1543
  supportsPromptCache: false,
1532
- inputPrice: 0.25,
1533
- outputPrice: 1.25
1544
+ inputPrice: 0.6,
1545
+ outputPrice: 2.5,
1546
+ cacheWritesPrice: 0,
1547
+ cacheReadsPrice: 0,
1548
+ description: "Kimi K2 Thinking - A model with enhanced reasoning capabilities from Kimi K2"
1534
1549
  },
1535
- "deepseek.r1-v1:0": {
1536
- maxTokens: 32768,
1537
- contextWindow: 128e3,
1550
+ "zai-org/GLM-4.6": {
1551
+ maxTokens: 16384,
1552
+ contextWindow: 2e5,
1538
1553
  supportsImages: false,
1539
1554
  supportsPromptCache: false,
1540
- inputPrice: 1.35,
1541
- outputPrice: 5.4
1555
+ inputPrice: 0.6,
1556
+ outputPrice: 2.2,
1557
+ cacheWritesPrice: 0,
1558
+ cacheReadsPrice: 0,
1559
+ description: "Frontier open model with advanced agentic, reasoning and coding capabilities"
1542
1560
  },
1543
- "openai.gpt-oss-20b-1:0": {
1544
- maxTokens: 8192,
1545
- contextWindow: 128e3,
1561
+ "deepseek-ai/DeepSeek-R1": {
1562
+ maxTokens: 16384,
1563
+ contextWindow: 163840,
1546
1564
  supportsImages: false,
1547
1565
  supportsPromptCache: false,
1548
- inputPrice: 0.5,
1549
- outputPrice: 1.5,
1550
- description: "GPT-OSS 20B - Optimized for low latency and local/specialized use cases"
1566
+ inputPrice: 2.55,
1567
+ outputPrice: 5.95,
1568
+ cacheWritesPrice: 0,
1569
+ cacheReadsPrice: 0,
1570
+ description: "DeepSeek's first-generation reasoning model"
1551
1571
  },
1552
- "openai.gpt-oss-120b-1:0": {
1553
- maxTokens: 8192,
1554
- contextWindow: 128e3,
1572
+ "deepseek-ai/DeepSeek-R1-0528": {
1573
+ maxTokens: 16384,
1574
+ contextWindow: 163840,
1555
1575
  supportsImages: false,
1556
1576
  supportsPromptCache: false,
1557
- inputPrice: 2,
1558
- outputPrice: 6,
1559
- description: "GPT-OSS 120B - Production-ready, general-purpose, high-reasoning model"
1577
+ inputPrice: 2.55,
1578
+ outputPrice: 5.95,
1579
+ cacheWritesPrice: 0,
1580
+ cacheReadsPrice: 0,
1581
+ description: "The latest revision of DeepSeek's first-generation reasoning model"
1560
1582
  },
1561
- "meta.llama3-3-70b-instruct-v1:0": {
1562
- maxTokens: 8192,
1563
- contextWindow: 128e3,
1583
+ "deepseek-ai/DeepSeek-V3-0324": {
1584
+ maxTokens: 16384,
1585
+ contextWindow: 163840,
1564
1586
  supportsImages: false,
1565
1587
  supportsPromptCache: false,
1566
- inputPrice: 0.72,
1567
- outputPrice: 0.72,
1568
- description: "Llama 3.3 Instruct (70B)"
1569
- },
1570
- "meta.llama3-2-90b-instruct-v1:0": {
1571
- maxTokens: 8192,
1572
- contextWindow: 128e3,
1573
- supportsImages: true,
1574
- supportsPromptCache: false,
1575
- inputPrice: 0.72,
1576
- outputPrice: 0.72,
1577
- description: "Llama 3.2 Instruct (90B)"
1588
+ inputPrice: 0.77,
1589
+ outputPrice: 0.77,
1590
+ cacheWritesPrice: 0,
1591
+ cacheReadsPrice: 0,
1592
+ description: "Fast general-purpose LLM with enhanced reasoning capabilities"
1578
1593
  },
1579
- "meta.llama3-2-11b-instruct-v1:0": {
1580
- maxTokens: 8192,
1581
- contextWindow: 128e3,
1582
- supportsImages: true,
1594
+ "deepseek-ai/DeepSeek-V3.1": {
1595
+ maxTokens: 16384,
1596
+ contextWindow: 163840,
1597
+ supportsImages: false,
1583
1598
  supportsPromptCache: false,
1584
- inputPrice: 0.16,
1585
- outputPrice: 0.16,
1586
- description: "Llama 3.2 Instruct (11B)"
1599
+ inputPrice: 0.5,
1600
+ outputPrice: 1.5,
1601
+ cacheWritesPrice: 0,
1602
+ cacheReadsPrice: 0,
1603
+ description: "Extremely capable general-purpose LLM with hybrid reasoning capabilities and advanced tool calling"
1587
1604
  },
1588
- "meta.llama3-2-3b-instruct-v1:0": {
1589
- maxTokens: 8192,
1590
- contextWindow: 128e3,
1605
+ "deepseek-ai/DeepSeek-V3.2": {
1606
+ maxTokens: 16384,
1607
+ contextWindow: 163840,
1591
1608
  supportsImages: false,
1592
1609
  supportsPromptCache: false,
1593
- inputPrice: 0.15,
1594
- outputPrice: 0.15,
1595
- description: "Llama 3.2 Instruct (3B)"
1610
+ inputPrice: 0.3,
1611
+ outputPrice: 0.45,
1612
+ cacheWritesPrice: 0,
1613
+ cacheReadsPrice: 0,
1614
+ description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance"
1596
1615
  },
1597
- "meta.llama3-2-1b-instruct-v1:0": {
1598
- maxTokens: 8192,
1599
- contextWindow: 128e3,
1616
+ "openai/gpt-oss-120b": {
1617
+ maxTokens: 16384,
1618
+ contextWindow: 128072,
1600
1619
  supportsImages: false,
1601
1620
  supportsPromptCache: false,
1602
1621
  inputPrice: 0.1,
1603
- outputPrice: 0.1,
1604
- description: "Llama 3.2 Instruct (1B)"
1622
+ outputPrice: 0.5,
1623
+ cacheWritesPrice: 0,
1624
+ cacheReadsPrice: 0,
1625
+ description: "Extremely capable general-purpose LLM with strong, controllable reasoning capabilities"
1605
1626
  },
1606
- "meta.llama3-1-405b-instruct-v1:0": {
1607
- maxTokens: 8192,
1608
- contextWindow: 128e3,
1627
+ "Qwen/Qwen3-235B-A22B-Instruct-2507": {
1628
+ maxTokens: 16384,
1629
+ contextWindow: 262144,
1609
1630
  supportsImages: false,
1610
1631
  supportsPromptCache: false,
1611
- inputPrice: 2.4,
1612
- outputPrice: 2.4,
1613
- description: "Llama 3.1 Instruct (405B)"
1632
+ inputPrice: 0.22,
1633
+ outputPrice: 0.8,
1634
+ cacheWritesPrice: 0,
1635
+ cacheReadsPrice: 0,
1636
+ description: "Mixture-of-experts LLM with math and reasoning capabilities"
1614
1637
  },
1615
- "meta.llama3-1-70b-instruct-v1:0": {
1616
- maxTokens: 8192,
1617
- contextWindow: 128e3,
1638
+ "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
1639
+ maxTokens: 16384,
1640
+ contextWindow: 262144,
1618
1641
  supportsImages: false,
1619
1642
  supportsPromptCache: false,
1620
- inputPrice: 0.72,
1621
- outputPrice: 0.72,
1622
- description: "Llama 3.1 Instruct (70B)"
1643
+ inputPrice: 0.38,
1644
+ outputPrice: 1.53,
1645
+ cacheWritesPrice: 0,
1646
+ cacheReadsPrice: 0,
1647
+ description: "Mixture-of-experts LLM with advanced coding and reasoning capabilities"
1623
1648
  },
1624
- "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
1625
- maxTokens: 8192,
1626
- contextWindow: 128e3,
1649
+ "moonshotai/Kimi-K2-Instruct-0905": {
1650
+ maxTokens: 16384,
1651
+ contextWindow: 262e3,
1627
1652
  supportsImages: false,
1628
1653
  supportsPromptCache: false,
1629
- inputPrice: 0.9,
1630
- outputPrice: 0.9,
1631
- description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)"
1632
- },
1633
- "meta.llama3-1-8b-instruct-v1:0": {
1654
+ inputPrice: 0.6,
1655
+ outputPrice: 2.5,
1656
+ cacheWritesPrice: 0,
1657
+ cacheReadsPrice: 0,
1658
+ description: "State of the art language model for agentic and coding tasks. September Update."
1659
+ }
1660
+ };
1661
+ var basetenDefaultModelId = "zai-org/GLM-4.6";
1662
+
1663
+ // src/providers/bedrock.ts
1664
+ var bedrockDefaultModelId = "anthropic.claude-sonnet-4-5-20250929-v1:0";
1665
+ var bedrockDefaultPromptRouterModelId = "anthropic.claude-3-sonnet-20240229-v1:0";
1666
+ var bedrockModels = {
1667
+ "anthropic.claude-sonnet-4-5-20250929-v1:0": {
1634
1668
  maxTokens: 8192,
1635
- contextWindow: 8e3,
1636
- supportsImages: false,
1637
- supportsPromptCache: false,
1638
- inputPrice: 0.22,
1639
- outputPrice: 0.22,
1640
- description: "Llama 3.1 Instruct (8B)"
1641
- },
1642
- "meta.llama3-70b-instruct-v1:0": {
1643
- maxTokens: 2048,
1644
- contextWindow: 8e3,
1645
- supportsImages: false,
1646
- supportsPromptCache: false,
1647
- inputPrice: 2.65,
1648
- outputPrice: 3.5
1669
+ contextWindow: 2e5,
1670
+ supportsImages: true,
1671
+ supportsPromptCache: true,
1672
+ supportsReasoningBudget: true,
1673
+ inputPrice: 3,
1674
+ outputPrice: 15,
1675
+ cacheWritesPrice: 3.75,
1676
+ cacheReadsPrice: 0.3,
1677
+ minTokensPerCachePoint: 1024,
1678
+ maxCachePoints: 4,
1679
+ cachableFields: ["system", "messages", "tools"]
1649
1680
  },
1650
- "meta.llama3-8b-instruct-v1:0": {
1651
- maxTokens: 2048,
1652
- contextWindow: 4e3,
1653
- supportsImages: false,
1654
- supportsPromptCache: false,
1655
- inputPrice: 0.3,
1656
- outputPrice: 0.6
1681
+ "amazon.nova-pro-v1:0": {
1682
+ maxTokens: 5e3,
1683
+ contextWindow: 3e5,
1684
+ supportsImages: true,
1685
+ supportsPromptCache: true,
1686
+ inputPrice: 0.8,
1687
+ outputPrice: 3.2,
1688
+ cacheWritesPrice: 0.8,
1689
+ // per million tokens
1690
+ cacheReadsPrice: 0.2,
1691
+ // per million tokens
1692
+ minTokensPerCachePoint: 1,
1693
+ maxCachePoints: 1,
1694
+ cachableFields: ["system"]
1657
1695
  },
1658
- "amazon.titan-text-lite-v1:0": {
1659
- maxTokens: 4096,
1660
- contextWindow: 8e3,
1661
- supportsImages: false,
1696
+ "amazon.nova-pro-latency-optimized-v1:0": {
1697
+ maxTokens: 5e3,
1698
+ contextWindow: 3e5,
1699
+ supportsImages: true,
1662
1700
  supportsPromptCache: false,
1663
- inputPrice: 0.15,
1664
- outputPrice: 0.2,
1665
- description: "Amazon Titan Text Lite"
1701
+ inputPrice: 1,
1702
+ outputPrice: 4,
1703
+ cacheWritesPrice: 1,
1704
+ // per million tokens
1705
+ cacheReadsPrice: 0.25,
1706
+ // per million tokens
1707
+ description: "Amazon Nova Pro with latency optimized inference"
1666
1708
  },
1667
- "amazon.titan-text-express-v1:0": {
1668
- maxTokens: 4096,
1669
- contextWindow: 8e3,
1670
- supportsImages: false,
1671
- supportsPromptCache: false,
1672
- inputPrice: 0.2,
1673
- outputPrice: 0.6,
1674
- description: "Amazon Titan Text Express"
1709
+ "amazon.nova-lite-v1:0": {
1710
+ maxTokens: 5e3,
1711
+ contextWindow: 3e5,
1712
+ supportsImages: true,
1713
+ supportsPromptCache: true,
1714
+ inputPrice: 0.06,
1715
+ outputPrice: 0.24,
1716
+ cacheWritesPrice: 0.06,
1717
+ // per million tokens
1718
+ cacheReadsPrice: 0.015,
1719
+ // per million tokens
1720
+ minTokensPerCachePoint: 1,
1721
+ maxCachePoints: 1,
1722
+ cachableFields: ["system"]
1675
1723
  },
1676
- "moonshot.kimi-k2-thinking": {
1677
- maxTokens: 32e3,
1678
- contextWindow: 262144,
1679
- supportsImages: false,
1680
- supportsPromptCache: false,
1681
- preserveReasoning: true,
1682
- inputPrice: 0.6,
1683
- outputPrice: 2.5,
1684
- description: "Kimi K2 Thinking (1T parameter MoE model with 32B active parameters)"
1724
+ "amazon.nova-2-lite-v1:0": {
1725
+ maxTokens: 65535,
1726
+ contextWindow: 1e6,
1727
+ supportsImages: true,
1728
+ supportsPromptCache: true,
1729
+ inputPrice: 0.33,
1730
+ outputPrice: 2.75,
1731
+ cacheWritesPrice: 0,
1732
+ cacheReadsPrice: 0.0825,
1733
+ // 75% less than input price
1734
+ minTokensPerCachePoint: 1,
1735
+ maxCachePoints: 1,
1736
+ cachableFields: ["system"],
1737
+ description: "Amazon Nova 2 Lite - Comparable to Claude Haiku 4.5"
1685
1738
  },
1686
- "minimax.minimax-m2": {
1687
- maxTokens: 16384,
1688
- contextWindow: 196608,
1739
+ "amazon.nova-micro-v1:0": {
1740
+ maxTokens: 5e3,
1741
+ contextWindow: 128e3,
1689
1742
  supportsImages: false,
1690
- supportsPromptCache: false,
1691
- preserveReasoning: true,
1692
- inputPrice: 0.3,
1693
- outputPrice: 1.2,
1694
- description: "MiniMax M2 (230B parameter MoE model with 10B active parameters)"
1743
+ supportsPromptCache: true,
1744
+ inputPrice: 0.035,
1745
+ outputPrice: 0.14,
1746
+ cacheWritesPrice: 0.035,
1747
+ // per million tokens
1748
+ cacheReadsPrice: 875e-5,
1749
+ // per million tokens
1750
+ minTokensPerCachePoint: 1,
1751
+ maxCachePoints: 1,
1752
+ cachableFields: ["system"]
1695
1753
  },
1696
- "qwen.qwen3-next-80b-a3b": {
1754
+ "anthropic.claude-sonnet-4-20250514-v1:0": {
1697
1755
  maxTokens: 8192,
1698
- contextWindow: 262144,
1699
- supportsImages: false,
1700
- supportsPromptCache: false,
1701
- inputPrice: 0.15,
1702
- outputPrice: 1.2,
1703
- description: "Qwen3 Next 80B (MoE model with 3B active parameters)"
1756
+ contextWindow: 2e5,
1757
+ supportsImages: true,
1758
+ supportsPromptCache: true,
1759
+ supportsReasoningBudget: true,
1760
+ inputPrice: 3,
1761
+ outputPrice: 15,
1762
+ cacheWritesPrice: 3.75,
1763
+ cacheReadsPrice: 0.3,
1764
+ minTokensPerCachePoint: 1024,
1765
+ maxCachePoints: 4,
1766
+ cachableFields: ["system", "messages", "tools"]
1704
1767
  },
1705
- "qwen.qwen3-coder-480b-a35b-v1:0": {
1768
+ "anthropic.claude-opus-4-1-20250805-v1:0": {
1706
1769
  maxTokens: 8192,
1707
- contextWindow: 262144,
1708
- supportsImages: false,
1709
- supportsPromptCache: false,
1710
- inputPrice: 0.45,
1711
- outputPrice: 1.8,
1712
- description: "Qwen3 Coder 480B (MoE model with 35B active parameters)"
1713
- }
1714
- };
1715
- var BEDROCK_DEFAULT_TEMPERATURE = 0.3;
1716
- var BEDROCK_MAX_TOKENS = 4096;
1717
- var BEDROCK_DEFAULT_CONTEXT = 128e3;
1718
- var AWS_INFERENCE_PROFILE_MAPPING = [
1719
- // Australia regions (Sydney and Melbourne) → au. inference profile (most specific - 14 chars)
1720
- ["ap-southeast-2", "au."],
1721
- ["ap-southeast-4", "au."],
1722
- // Japan regions (Tokyo and Osaka) → jp. inference profile (13 chars)
1723
- ["ap-northeast-", "jp."],
1724
- // US Government Cloud → ug. inference profile (7 chars)
1725
- ["us-gov-", "ug."],
1726
- // Americas regions → us. inference profile (3 chars)
1727
- ["us-", "us."],
1728
- // Europe regions → eu. inference profile (3 chars)
1729
- ["eu-", "eu."],
1730
- // Asia Pacific regions → apac. inference profile (3 chars)
1731
- ["ap-", "apac."],
1732
- // Canada regions → ca. inference profile (3 chars)
1733
- ["ca-", "ca."],
1734
- // South America regions → sa. inference profile (3 chars)
1735
- ["sa-", "sa."]
1736
- ];
1737
- var BEDROCK_REGIONS = [
1738
- { value: "us-east-1", label: "us-east-1" },
1739
- { value: "us-east-2", label: "us-east-2" },
1740
- { value: "us-west-1", label: "us-west-1" },
1741
- { value: "us-west-2", label: "us-west-2" },
1742
- { value: "ap-northeast-1", label: "ap-northeast-1" },
1743
- { value: "ap-northeast-2", label: "ap-northeast-2" },
1744
- { value: "ap-northeast-3", label: "ap-northeast-3" },
1745
- { value: "ap-south-1", label: "ap-south-1" },
1746
- { value: "ap-south-2", label: "ap-south-2" },
1747
- { value: "ap-southeast-1", label: "ap-southeast-1" },
1748
- { value: "ap-southeast-2", label: "ap-southeast-2" },
1749
- { value: "ap-east-1", label: "ap-east-1" },
1750
- { value: "eu-central-1", label: "eu-central-1" },
1751
- { value: "eu-central-2", label: "eu-central-2" },
1752
- { value: "eu-west-1", label: "eu-west-1" },
1753
- { value: "eu-west-2", label: "eu-west-2" },
1754
- { value: "eu-west-3", label: "eu-west-3" },
1755
- { value: "eu-north-1", label: "eu-north-1" },
1756
- { value: "eu-south-1", label: "eu-south-1" },
1757
- { value: "eu-south-2", label: "eu-south-2" },
1758
- { value: "ca-central-1", label: "ca-central-1" },
1759
- { value: "sa-east-1", label: "sa-east-1" },
1760
- { value: "us-gov-east-1", label: "us-gov-east-1" },
1761
- { value: "us-gov-west-1", label: "us-gov-west-1" }
1762
- ].sort((a, b) => a.value.localeCompare(b.value));
1763
- var BEDROCK_1M_CONTEXT_MODEL_IDS = [
1764
- "anthropic.claude-sonnet-4-20250514-v1:0",
1765
- "anthropic.claude-sonnet-4-5-20250929-v1:0",
1766
- "anthropic.claude-opus-4-6-v1"
1767
- ];
1768
- var BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
1769
- "anthropic.claude-sonnet-4-20250514-v1:0",
1770
- "anthropic.claude-sonnet-4-5-20250929-v1:0",
1771
- "anthropic.claude-haiku-4-5-20251001-v1:0",
1772
- "anthropic.claude-opus-4-5-20251101-v1:0",
1773
- "anthropic.claude-opus-4-6-v1"
1774
- ];
1775
- var BEDROCK_SERVICE_TIER_MODEL_IDS = [
1776
- // Amazon Nova models
1777
- "amazon.nova-lite-v1:0",
1778
- "amazon.nova-2-lite-v1:0",
1779
- "amazon.nova-pro-v1:0",
1780
- "amazon.nova-pro-latency-optimized-v1:0",
1781
- // DeepSeek models
1782
- "deepseek.r1-v1:0",
1783
- // Qwen models
1784
- "qwen.qwen3-next-80b-a3b",
1785
- "qwen.qwen3-coder-480b-a35b-v1:0",
1786
- // OpenAI GPT-OSS models
1787
- "openai.gpt-oss-20b-1:0",
1788
- "openai.gpt-oss-120b-1:0"
1789
- ];
1790
- var BEDROCK_SERVICE_TIER_PRICING = {
1791
- STANDARD: 1,
1792
- // Base price
1793
- FLEX: 0.5,
1794
- // 50% discount from standard
1795
- PRIORITY: 1.75
1796
- // 75% premium over standard
1797
- };
1798
-
1799
- // src/providers/cerebras.ts
1800
- var cerebrasDefaultModelId = "gpt-oss-120b";
1801
- var cerebrasModels = {
1802
- "zai-glm-4.7": {
1803
- maxTokens: 16384,
1804
- // Conservative default to avoid premature rate limiting (Cerebras reserves quota upfront)
1805
- contextWindow: 131072,
1806
- supportsImages: false,
1770
+ contextWindow: 2e5,
1771
+ supportsImages: true,
1807
1772
  supportsPromptCache: true,
1808
- supportsTemperature: true,
1809
- defaultTemperature: 1,
1810
- inputPrice: 0,
1811
- outputPrice: 0,
1812
- description: "Highly capable general-purpose model on Cerebras (up to 1,000 tokens/s), competitive with leading proprietary models on coding tasks."
1813
- },
1814
- "qwen-3-235b-a22b-instruct-2507": {
1815
- maxTokens: 16384,
1816
- // Conservative default to avoid premature rate limiting
1817
- contextWindow: 64e3,
1818
- supportsImages: false,
1819
- supportsPromptCache: false,
1820
- inputPrice: 0,
1821
- outputPrice: 0,
1822
- description: "Intelligent model with ~1400 tokens/s"
1823
- },
1824
- "llama-3.3-70b": {
1825
- maxTokens: 16384,
1826
- // Conservative default to avoid premature rate limiting
1827
- contextWindow: 64e3,
1828
- supportsImages: false,
1829
- supportsPromptCache: false,
1830
- inputPrice: 0,
1831
- outputPrice: 0,
1832
- description: "Powerful model with ~2600 tokens/s"
1833
- },
1834
- "qwen-3-32b": {
1835
- maxTokens: 16384,
1836
- // Conservative default to avoid premature rate limiting
1837
- contextWindow: 64e3,
1838
- supportsImages: false,
1839
- supportsPromptCache: false,
1840
- inputPrice: 0,
1841
- outputPrice: 0,
1842
- description: "SOTA coding performance with ~2500 tokens/s"
1843
- },
1844
- "gpt-oss-120b": {
1845
- maxTokens: 16384,
1846
- // Conservative default to avoid premature rate limiting
1847
- contextWindow: 64e3,
1848
- supportsImages: false,
1849
- supportsPromptCache: false,
1850
- inputPrice: 0,
1851
- outputPrice: 0,
1852
- description: "OpenAI GPT OSS model with ~2800 tokens/s\n\n\u2022 64K context window\n\u2022 Excels at efficient reasoning across science, math, and coding"
1853
- }
1854
- };
1855
-
1856
- // src/providers/chutes.ts
1857
- var chutesDefaultModelId = "deepseek-ai/DeepSeek-R1-0528";
1858
- var chutesModels = {
1859
- "deepseek-ai/DeepSeek-R1-0528": {
1860
- maxTokens: 32768,
1861
- contextWindow: 163840,
1862
- supportsImages: false,
1863
- supportsPromptCache: false,
1864
- inputPrice: 0,
1865
- outputPrice: 0,
1866
- description: "DeepSeek R1 0528 model."
1867
- },
1868
- "deepseek-ai/DeepSeek-R1": {
1869
- maxTokens: 32768,
1870
- contextWindow: 163840,
1871
- supportsImages: false,
1872
- supportsPromptCache: false,
1873
- inputPrice: 0,
1874
- outputPrice: 0,
1875
- description: "DeepSeek R1 model."
1876
- },
1877
- "deepseek-ai/DeepSeek-V3": {
1878
- maxTokens: 32768,
1879
- contextWindow: 163840,
1880
- supportsImages: false,
1881
- supportsPromptCache: false,
1882
- inputPrice: 0,
1883
- outputPrice: 0,
1884
- description: "DeepSeek V3 model."
1885
- },
1886
- "deepseek-ai/DeepSeek-V3.1": {
1887
- maxTokens: 32768,
1888
- contextWindow: 163840,
1889
- supportsImages: false,
1890
- supportsPromptCache: false,
1891
- inputPrice: 0,
1892
- outputPrice: 0,
1893
- description: "DeepSeek V3.1 model."
1894
- },
1895
- "deepseek-ai/DeepSeek-V3.1-Terminus": {
1896
- maxTokens: 163840,
1897
- contextWindow: 163840,
1898
- supportsImages: false,
1899
- supportsPromptCache: false,
1900
- inputPrice: 0.23,
1901
- outputPrice: 0.9,
1902
- description: "DeepSeek\u2011V3.1\u2011Terminus is an update to V3.1 that improves language consistency by reducing CN/EN mix\u2011ups and eliminating random characters, while strengthening agent capabilities with notably better Code Agent and Search Agent performance."
1903
- },
1904
- "deepseek-ai/DeepSeek-V3.1-turbo": {
1905
- maxTokens: 32768,
1906
- contextWindow: 163840,
1907
- supportsImages: false,
1908
- supportsPromptCache: false,
1909
- inputPrice: 1,
1910
- outputPrice: 3,
1911
- description: "DeepSeek-V3.1-turbo is an FP8, speculative-decoding turbo variant optimized for ultra-fast single-shot queries (~200 TPS), with outputs close to the originals and solid function calling/reasoning/structured output, priced at $1/M input and $3/M output tokens, using 2\xD7 quota per request and not intended for bulk workloads."
1773
+ supportsReasoningBudget: true,
1774
+ inputPrice: 15,
1775
+ outputPrice: 75,
1776
+ cacheWritesPrice: 18.75,
1777
+ cacheReadsPrice: 1.5,
1778
+ minTokensPerCachePoint: 1024,
1779
+ maxCachePoints: 4,
1780
+ cachableFields: ["system", "messages", "tools"]
1912
1781
  },
1913
- "deepseek-ai/DeepSeek-V3.2-Exp": {
1914
- maxTokens: 163840,
1915
- contextWindow: 163840,
1916
- supportsImages: false,
1917
- supportsPromptCache: false,
1918
- inputPrice: 0.25,
1919
- outputPrice: 0.35,
1920
- description: "DeepSeek-V3.2-Exp is an experimental LLM that introduces DeepSeek Sparse Attention to improve long\u2011context training and inference efficiency while maintaining performance comparable to V3.1\u2011Terminus."
1782
+ "anthropic.claude-opus-4-6-v1": {
1783
+ maxTokens: 8192,
1784
+ contextWindow: 2e5,
1785
+ // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
1786
+ supportsImages: true,
1787
+ supportsPromptCache: true,
1788
+ supportsReasoningBudget: true,
1789
+ inputPrice: 5,
1790
+ // $5 per million input tokens (≤200K context)
1791
+ outputPrice: 25,
1792
+ // $25 per million output tokens (≤200K context)
1793
+ cacheWritesPrice: 6.25,
1794
+ // $6.25 per million tokens
1795
+ cacheReadsPrice: 0.5,
1796
+ // $0.50 per million tokens
1797
+ minTokensPerCachePoint: 1024,
1798
+ maxCachePoints: 4,
1799
+ cachableFields: ["system", "messages", "tools"],
1800
+ // Tiered pricing for extended context (requires beta flag 'context-1m-2025-08-07')
1801
+ tiers: [
1802
+ {
1803
+ contextWindow: 1e6,
1804
+ // 1M tokens with beta flag
1805
+ inputPrice: 10,
1806
+ // $10 per million input tokens (>200K context)
1807
+ outputPrice: 37.5,
1808
+ // $37.50 per million output tokens (>200K context)
1809
+ cacheWritesPrice: 12.5,
1810
+ // $12.50 per million tokens (>200K context)
1811
+ cacheReadsPrice: 1
1812
+ // $1.00 per million tokens (>200K context)
1813
+ }
1814
+ ]
1921
1815
  },
1922
- "unsloth/Llama-3.3-70B-Instruct": {
1923
- maxTokens: 32768,
1924
- // From Groq
1925
- contextWindow: 131072,
1926
- // From Groq
1927
- supportsImages: false,
1928
- supportsPromptCache: false,
1929
- inputPrice: 0,
1930
- outputPrice: 0,
1931
- description: "Unsloth Llama 3.3 70B Instruct model."
1816
+ "anthropic.claude-opus-4-5-20251101-v1:0": {
1817
+ maxTokens: 8192,
1818
+ contextWindow: 2e5,
1819
+ supportsImages: true,
1820
+ supportsPromptCache: true,
1821
+ supportsReasoningBudget: true,
1822
+ inputPrice: 5,
1823
+ outputPrice: 25,
1824
+ cacheWritesPrice: 6.25,
1825
+ cacheReadsPrice: 0.5,
1826
+ minTokensPerCachePoint: 1024,
1827
+ maxCachePoints: 4,
1828
+ cachableFields: ["system", "messages", "tools"]
1932
1829
  },
1933
- "chutesai/Llama-4-Scout-17B-16E-Instruct": {
1934
- maxTokens: 32768,
1935
- contextWindow: 512e3,
1936
- supportsImages: false,
1937
- supportsPromptCache: false,
1938
- inputPrice: 0,
1939
- outputPrice: 0,
1940
- description: "ChutesAI Llama 4 Scout 17B Instruct model, 512K context."
1830
+ "anthropic.claude-opus-4-20250514-v1:0": {
1831
+ maxTokens: 8192,
1832
+ contextWindow: 2e5,
1833
+ supportsImages: true,
1834
+ supportsPromptCache: true,
1835
+ supportsReasoningBudget: true,
1836
+ inputPrice: 15,
1837
+ outputPrice: 75,
1838
+ cacheWritesPrice: 18.75,
1839
+ cacheReadsPrice: 1.5,
1840
+ minTokensPerCachePoint: 1024,
1841
+ maxCachePoints: 4,
1842
+ cachableFields: ["system", "messages", "tools"]
1941
1843
  },
1942
- "unsloth/Mistral-Nemo-Instruct-2407": {
1943
- maxTokens: 32768,
1944
- contextWindow: 128e3,
1945
- supportsImages: false,
1946
- supportsPromptCache: false,
1947
- inputPrice: 0,
1948
- outputPrice: 0,
1949
- description: "Unsloth Mistral Nemo Instruct model."
1844
+ "anthropic.claude-3-7-sonnet-20250219-v1:0": {
1845
+ maxTokens: 8192,
1846
+ contextWindow: 2e5,
1847
+ supportsImages: true,
1848
+ supportsPromptCache: true,
1849
+ supportsReasoningBudget: true,
1850
+ inputPrice: 3,
1851
+ outputPrice: 15,
1852
+ cacheWritesPrice: 3.75,
1853
+ cacheReadsPrice: 0.3,
1854
+ minTokensPerCachePoint: 1024,
1855
+ maxCachePoints: 4,
1856
+ cachableFields: ["system", "messages", "tools"]
1950
1857
  },
1951
- "unsloth/gemma-3-12b-it": {
1952
- maxTokens: 32768,
1953
- contextWindow: 131072,
1954
- supportsImages: false,
1955
- supportsPromptCache: false,
1956
- inputPrice: 0,
1957
- outputPrice: 0,
1958
- description: "Unsloth Gemma 3 12B IT model."
1858
+ "anthropic.claude-3-5-sonnet-20241022-v2:0": {
1859
+ maxTokens: 8192,
1860
+ contextWindow: 2e5,
1861
+ supportsImages: true,
1862
+ supportsPromptCache: true,
1863
+ inputPrice: 3,
1864
+ outputPrice: 15,
1865
+ cacheWritesPrice: 3.75,
1866
+ cacheReadsPrice: 0.3,
1867
+ minTokensPerCachePoint: 1024,
1868
+ maxCachePoints: 4,
1869
+ cachableFields: ["system", "messages", "tools"]
1959
1870
  },
1960
- "NousResearch/DeepHermes-3-Llama-3-8B-Preview": {
1961
- maxTokens: 32768,
1962
- contextWindow: 131072,
1871
+ "anthropic.claude-3-5-haiku-20241022-v1:0": {
1872
+ maxTokens: 8192,
1873
+ contextWindow: 2e5,
1963
1874
  supportsImages: false,
1964
- supportsPromptCache: false,
1965
- inputPrice: 0,
1966
- outputPrice: 0,
1967
- description: "Nous DeepHermes 3 Llama 3 8B Preview model."
1875
+ supportsPromptCache: true,
1876
+ inputPrice: 0.8,
1877
+ outputPrice: 4,
1878
+ cacheWritesPrice: 1,
1879
+ cacheReadsPrice: 0.08,
1880
+ minTokensPerCachePoint: 2048,
1881
+ maxCachePoints: 4,
1882
+ cachableFields: ["system", "messages", "tools"]
1968
1883
  },
1969
- "unsloth/gemma-3-4b-it": {
1970
- maxTokens: 32768,
1971
- contextWindow: 131072,
1972
- supportsImages: false,
1973
- supportsPromptCache: false,
1974
- inputPrice: 0,
1975
- outputPrice: 0,
1976
- description: "Unsloth Gemma 3 4B IT model."
1884
+ "anthropic.claude-haiku-4-5-20251001-v1:0": {
1885
+ maxTokens: 8192,
1886
+ contextWindow: 2e5,
1887
+ supportsImages: true,
1888
+ supportsPromptCache: true,
1889
+ supportsReasoningBudget: true,
1890
+ inputPrice: 1,
1891
+ outputPrice: 5,
1892
+ cacheWritesPrice: 1.25,
1893
+ // 5m cache writes
1894
+ cacheReadsPrice: 0.1,
1895
+ // cache hits / refreshes
1896
+ minTokensPerCachePoint: 2048,
1897
+ maxCachePoints: 4,
1898
+ cachableFields: ["system", "messages", "tools"]
1977
1899
  },
1978
- "nvidia/Llama-3_3-Nemotron-Super-49B-v1": {
1979
- maxTokens: 32768,
1980
- contextWindow: 131072,
1981
- supportsImages: false,
1900
+ "anthropic.claude-3-5-sonnet-20240620-v1:0": {
1901
+ maxTokens: 8192,
1902
+ contextWindow: 2e5,
1903
+ supportsImages: true,
1982
1904
  supportsPromptCache: false,
1983
- inputPrice: 0,
1984
- outputPrice: 0,
1985
- description: "Nvidia Llama 3.3 Nemotron Super 49B model."
1905
+ inputPrice: 3,
1906
+ outputPrice: 15
1986
1907
  },
1987
- "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1": {
1988
- maxTokens: 32768,
1989
- contextWindow: 131072,
1990
- supportsImages: false,
1908
+ "anthropic.claude-3-opus-20240229-v1:0": {
1909
+ maxTokens: 4096,
1910
+ contextWindow: 2e5,
1911
+ supportsImages: true,
1991
1912
  supportsPromptCache: false,
1992
- inputPrice: 0,
1993
- outputPrice: 0,
1994
- description: "Nvidia Llama 3.1 Nemotron Ultra 253B model."
1913
+ inputPrice: 15,
1914
+ outputPrice: 75
1995
1915
  },
1996
- "chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8": {
1997
- maxTokens: 32768,
1998
- contextWindow: 256e3,
1999
- supportsImages: false,
1916
+ "anthropic.claude-3-sonnet-20240229-v1:0": {
1917
+ maxTokens: 4096,
1918
+ contextWindow: 2e5,
1919
+ supportsImages: true,
2000
1920
  supportsPromptCache: false,
2001
- inputPrice: 0,
2002
- outputPrice: 0,
2003
- description: "ChutesAI Llama 4 Maverick 17B Instruct FP8 model."
1921
+ inputPrice: 3,
1922
+ outputPrice: 15
2004
1923
  },
2005
- "deepseek-ai/DeepSeek-V3-Base": {
2006
- maxTokens: 32768,
2007
- contextWindow: 163840,
2008
- supportsImages: false,
1924
+ "anthropic.claude-3-haiku-20240307-v1:0": {
1925
+ maxTokens: 4096,
1926
+ contextWindow: 2e5,
1927
+ supportsImages: true,
2009
1928
  supportsPromptCache: false,
2010
- inputPrice: 0,
2011
- outputPrice: 0,
2012
- description: "DeepSeek V3 Base model."
1929
+ inputPrice: 0.25,
1930
+ outputPrice: 1.25
2013
1931
  },
2014
- "deepseek-ai/DeepSeek-R1-Zero": {
1932
+ "deepseek.r1-v1:0": {
2015
1933
  maxTokens: 32768,
2016
- contextWindow: 163840,
1934
+ contextWindow: 128e3,
2017
1935
  supportsImages: false,
2018
1936
  supportsPromptCache: false,
2019
- inputPrice: 0,
2020
- outputPrice: 0,
2021
- description: "DeepSeek R1 Zero model."
1937
+ inputPrice: 1.35,
1938
+ outputPrice: 5.4
2022
1939
  },
2023
- "deepseek-ai/DeepSeek-V3-0324": {
2024
- maxTokens: 32768,
2025
- contextWindow: 163840,
1940
+ "openai.gpt-oss-20b-1:0": {
1941
+ maxTokens: 8192,
1942
+ contextWindow: 128e3,
2026
1943
  supportsImages: false,
2027
1944
  supportsPromptCache: false,
2028
- inputPrice: 0,
2029
- outputPrice: 0,
2030
- description: "DeepSeek V3 (0324) model."
1945
+ inputPrice: 0.5,
1946
+ outputPrice: 1.5,
1947
+ description: "GPT-OSS 20B - Optimized for low latency and local/specialized use cases"
2031
1948
  },
2032
- "Qwen/Qwen3-235B-A22B-Instruct-2507": {
2033
- maxTokens: 32768,
2034
- contextWindow: 262144,
1949
+ "openai.gpt-oss-120b-1:0": {
1950
+ maxTokens: 8192,
1951
+ contextWindow: 128e3,
2035
1952
  supportsImages: false,
2036
1953
  supportsPromptCache: false,
2037
- inputPrice: 0,
2038
- outputPrice: 0,
2039
- description: "Qwen3 235B A22B Instruct 2507 model with 262K context window."
1954
+ inputPrice: 2,
1955
+ outputPrice: 6,
1956
+ description: "GPT-OSS 120B - Production-ready, general-purpose, high-reasoning model"
2040
1957
  },
2041
- "Qwen/Qwen3-235B-A22B": {
2042
- maxTokens: 32768,
2043
- contextWindow: 40960,
1958
+ "meta.llama3-3-70b-instruct-v1:0": {
1959
+ maxTokens: 8192,
1960
+ contextWindow: 128e3,
2044
1961
  supportsImages: false,
2045
1962
  supportsPromptCache: false,
2046
- inputPrice: 0,
2047
- outputPrice: 0,
2048
- description: "Qwen3 235B A22B model."
1963
+ inputPrice: 0.72,
1964
+ outputPrice: 0.72,
1965
+ description: "Llama 3.3 Instruct (70B)"
2049
1966
  },
2050
- "Qwen/Qwen3-32B": {
2051
- maxTokens: 32768,
2052
- contextWindow: 40960,
2053
- supportsImages: false,
1967
+ "meta.llama3-2-90b-instruct-v1:0": {
1968
+ maxTokens: 8192,
1969
+ contextWindow: 128e3,
1970
+ supportsImages: true,
2054
1971
  supportsPromptCache: false,
2055
- inputPrice: 0,
2056
- outputPrice: 0,
2057
- description: "Qwen3 32B model."
1972
+ inputPrice: 0.72,
1973
+ outputPrice: 0.72,
1974
+ description: "Llama 3.2 Instruct (90B)"
2058
1975
  },
2059
- "Qwen/Qwen3-30B-A3B": {
2060
- maxTokens: 32768,
2061
- contextWindow: 40960,
2062
- supportsImages: false,
1976
+ "meta.llama3-2-11b-instruct-v1:0": {
1977
+ maxTokens: 8192,
1978
+ contextWindow: 128e3,
1979
+ supportsImages: true,
2063
1980
  supportsPromptCache: false,
2064
- inputPrice: 0,
2065
- outputPrice: 0,
2066
- description: "Qwen3 30B A3B model."
1981
+ inputPrice: 0.16,
1982
+ outputPrice: 0.16,
1983
+ description: "Llama 3.2 Instruct (11B)"
2067
1984
  },
2068
- "Qwen/Qwen3-14B": {
2069
- maxTokens: 32768,
2070
- contextWindow: 40960,
1985
+ "meta.llama3-2-3b-instruct-v1:0": {
1986
+ maxTokens: 8192,
1987
+ contextWindow: 128e3,
2071
1988
  supportsImages: false,
2072
1989
  supportsPromptCache: false,
2073
- inputPrice: 0,
2074
- outputPrice: 0,
2075
- description: "Qwen3 14B model."
1990
+ inputPrice: 0.15,
1991
+ outputPrice: 0.15,
1992
+ description: "Llama 3.2 Instruct (3B)"
2076
1993
  },
2077
- "Qwen/Qwen3-8B": {
2078
- maxTokens: 32768,
2079
- contextWindow: 40960,
1994
+ "meta.llama3-2-1b-instruct-v1:0": {
1995
+ maxTokens: 8192,
1996
+ contextWindow: 128e3,
2080
1997
  supportsImages: false,
2081
1998
  supportsPromptCache: false,
2082
- inputPrice: 0,
2083
- outputPrice: 0,
2084
- description: "Qwen3 8B model."
1999
+ inputPrice: 0.1,
2000
+ outputPrice: 0.1,
2001
+ description: "Llama 3.2 Instruct (1B)"
2085
2002
  },
2086
- "microsoft/MAI-DS-R1-FP8": {
2087
- maxTokens: 32768,
2088
- contextWindow: 163840,
2003
+ "meta.llama3-1-405b-instruct-v1:0": {
2004
+ maxTokens: 8192,
2005
+ contextWindow: 128e3,
2089
2006
  supportsImages: false,
2090
2007
  supportsPromptCache: false,
2091
- inputPrice: 0,
2092
- outputPrice: 0,
2093
- description: "Microsoft MAI-DS-R1 FP8 model."
2008
+ inputPrice: 2.4,
2009
+ outputPrice: 2.4,
2010
+ description: "Llama 3.1 Instruct (405B)"
2094
2011
  },
2095
- "tngtech/DeepSeek-R1T-Chimera": {
2096
- maxTokens: 32768,
2097
- contextWindow: 163840,
2012
+ "meta.llama3-1-70b-instruct-v1:0": {
2013
+ maxTokens: 8192,
2014
+ contextWindow: 128e3,
2098
2015
  supportsImages: false,
2099
2016
  supportsPromptCache: false,
2100
- inputPrice: 0,
2101
- outputPrice: 0,
2102
- description: "TNGTech DeepSeek R1T Chimera model."
2017
+ inputPrice: 0.72,
2018
+ outputPrice: 0.72,
2019
+ description: "Llama 3.1 Instruct (70B)"
2103
2020
  },
2104
- "zai-org/GLM-4.5-Air": {
2105
- maxTokens: 32768,
2106
- contextWindow: 151329,
2021
+ "meta.llama3-1-70b-instruct-latency-optimized-v1:0": {
2022
+ maxTokens: 8192,
2023
+ contextWindow: 128e3,
2107
2024
  supportsImages: false,
2108
2025
  supportsPromptCache: false,
2109
- inputPrice: 0,
2110
- outputPrice: 0,
2111
- description: "GLM-4.5-Air model with 151,329 token context window and 106B total parameters with 12B activated."
2026
+ inputPrice: 0.9,
2027
+ outputPrice: 0.9,
2028
+ description: "Llama 3.1 Instruct (70B) (w/ latency optimized inference)"
2112
2029
  },
2113
- "zai-org/GLM-4.5-FP8": {
2114
- maxTokens: 32768,
2115
- contextWindow: 131072,
2030
+ "meta.llama3-1-8b-instruct-v1:0": {
2031
+ maxTokens: 8192,
2032
+ contextWindow: 8e3,
2116
2033
  supportsImages: false,
2117
2034
  supportsPromptCache: false,
2118
- inputPrice: 0,
2119
- outputPrice: 0,
2120
- description: "GLM-4.5-FP8 model with 128k token context window, optimized for agent-based applications with MoE architecture."
2035
+ inputPrice: 0.22,
2036
+ outputPrice: 0.22,
2037
+ description: "Llama 3.1 Instruct (8B)"
2121
2038
  },
2122
- "zai-org/GLM-4.5-turbo": {
2123
- maxTokens: 32768,
2124
- contextWindow: 131072,
2039
+ "meta.llama3-70b-instruct-v1:0": {
2040
+ maxTokens: 2048,
2041
+ contextWindow: 8e3,
2125
2042
  supportsImages: false,
2126
2043
  supportsPromptCache: false,
2127
- inputPrice: 1,
2128
- outputPrice: 3,
2129
- description: "GLM-4.5-turbo model with 128K token context window, optimized for fast inference."
2044
+ inputPrice: 2.65,
2045
+ outputPrice: 3.5
2130
2046
  },
2131
- "zai-org/GLM-4.6-FP8": {
2132
- maxTokens: 32768,
2133
- contextWindow: 202752,
2047
+ "meta.llama3-8b-instruct-v1:0": {
2048
+ maxTokens: 2048,
2049
+ contextWindow: 4e3,
2134
2050
  supportsImages: false,
2135
2051
  supportsPromptCache: false,
2136
- inputPrice: 0,
2137
- outputPrice: 0,
2138
- description: "GLM-4.6 introduces major upgrades over GLM-4.5, including a longer 200K-token context window for complex tasks, stronger coding performance in benchmarks and real-world tools (such as Claude Code, Cline, Roo Code, and Kilo Code), improved reasoning with tool use during inference, more capable and efficient agent integration, and refined writing that better matches human style, readability, and natural role-play scenarios."
2052
+ inputPrice: 0.3,
2053
+ outputPrice: 0.6
2139
2054
  },
2140
- "zai-org/GLM-4.6-turbo": {
2141
- maxTokens: 202752,
2142
- // From Chutes /v1/models: max_output_length
2143
- contextWindow: 202752,
2055
+ "amazon.titan-text-lite-v1:0": {
2056
+ maxTokens: 4096,
2057
+ contextWindow: 8e3,
2144
2058
  supportsImages: false,
2145
2059
  supportsPromptCache: false,
2146
- inputPrice: 1.15,
2147
- outputPrice: 3.25,
2148
- description: "GLM-4.6-turbo model with 200K-token context window, optimized for fast inference."
2060
+ inputPrice: 0.15,
2061
+ outputPrice: 0.2,
2062
+ description: "Amazon Titan Text Lite"
2149
2063
  },
2150
- "meituan-longcat/LongCat-Flash-Thinking-FP8": {
2151
- maxTokens: 32768,
2152
- contextWindow: 128e3,
2064
+ "amazon.titan-text-express-v1:0": {
2065
+ maxTokens: 4096,
2066
+ contextWindow: 8e3,
2153
2067
  supportsImages: false,
2154
2068
  supportsPromptCache: false,
2155
- inputPrice: 0,
2156
- outputPrice: 0,
2157
- description: "LongCat Flash Thinking FP8 model with 128K context window, optimized for complex reasoning and coding tasks."
2069
+ inputPrice: 0.2,
2070
+ outputPrice: 0.6,
2071
+ description: "Amazon Titan Text Express"
2158
2072
  },
2159
- "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8": {
2160
- maxTokens: 32768,
2073
+ "moonshot.kimi-k2-thinking": {
2074
+ maxTokens: 32e3,
2161
2075
  contextWindow: 262144,
2162
2076
  supportsImages: false,
2163
2077
  supportsPromptCache: false,
2164
- inputPrice: 0,
2165
- outputPrice: 0,
2166
- description: "Qwen3 Coder 480B A35B Instruct FP8 model, optimized for coding tasks."
2078
+ preserveReasoning: true,
2079
+ inputPrice: 0.6,
2080
+ outputPrice: 2.5,
2081
+ description: "Kimi K2 Thinking (1T parameter MoE model with 32B active parameters)"
2167
2082
  },
2168
- "moonshotai/Kimi-K2-Instruct-75k": {
2169
- maxTokens: 32768,
2170
- contextWindow: 75e3,
2083
+ "minimax.minimax-m2": {
2084
+ maxTokens: 16384,
2085
+ contextWindow: 196608,
2171
2086
  supportsImages: false,
2172
2087
  supportsPromptCache: false,
2173
- inputPrice: 0.1481,
2174
- outputPrice: 0.5926,
2175
- description: "Moonshot AI Kimi K2 Instruct model with 75k context window."
2088
+ preserveReasoning: true,
2089
+ inputPrice: 0.3,
2090
+ outputPrice: 1.2,
2091
+ description: "MiniMax M2 (230B parameter MoE model with 10B active parameters)"
2176
2092
  },
2177
- "moonshotai/Kimi-K2-Instruct-0905": {
2178
- maxTokens: 32768,
2093
+ "qwen.qwen3-next-80b-a3b": {
2094
+ maxTokens: 8192,
2179
2095
  contextWindow: 262144,
2180
2096
  supportsImages: false,
2181
2097
  supportsPromptCache: false,
2182
- inputPrice: 0.1999,
2183
- outputPrice: 0.8001,
2184
- description: "Moonshot AI Kimi K2 Instruct 0905 model with 256k context window."
2098
+ inputPrice: 0.15,
2099
+ outputPrice: 1.2,
2100
+ description: "Qwen3 Next 80B (MoE model with 3B active parameters)"
2185
2101
  },
2186
- "Qwen/Qwen3-235B-A22B-Thinking-2507": {
2187
- maxTokens: 32768,
2102
+ "qwen.qwen3-coder-480b-a35b-v1:0": {
2103
+ maxTokens: 8192,
2188
2104
  contextWindow: 262144,
2189
2105
  supportsImages: false,
2190
2106
  supportsPromptCache: false,
2191
- inputPrice: 0.077968332,
2192
- outputPrice: 0.31202496,
2193
- description: "Qwen3 235B A22B Thinking 2507 model with 262K context window."
2194
- },
2195
- "Qwen/Qwen3-Next-80B-A3B-Instruct": {
2196
- maxTokens: 32768,
2197
- contextWindow: 131072,
2198
- supportsImages: false,
2199
- supportsPromptCache: false,
2200
- inputPrice: 0,
2201
- outputPrice: 0,
2202
- description: "Fast, stable instruction-tuned model optimized for complex tasks, RAG, and tool use without thinking traces."
2203
- },
2204
- "Qwen/Qwen3-Next-80B-A3B-Thinking": {
2205
- maxTokens: 32768,
2206
- contextWindow: 131072,
2207
- supportsImages: false,
2208
- supportsPromptCache: false,
2209
- inputPrice: 0,
2210
- outputPrice: 0,
2211
- description: "Reasoning-first model with structured thinking traces for multi-step problems, math proofs, and code synthesis."
2212
- },
2213
- "Qwen/Qwen3-VL-235B-A22B-Thinking": {
2214
- maxTokens: 262144,
2215
- contextWindow: 262144,
2216
- supportsImages: true,
2217
- supportsPromptCache: false,
2218
- inputPrice: 0.16,
2219
- outputPrice: 0.65,
2220
- description: "Qwen3\u2011VL\u2011235B\u2011A22B\u2011Thinking is an open\u2011weight MoE vision\u2011language model (235B total, ~22B activated) optimized for deliberate multi\u2011step reasoning with strong text\u2011image\u2011video understanding and long\u2011context capabilities."
2107
+ inputPrice: 0.45,
2108
+ outputPrice: 1.8,
2109
+ description: "Qwen3 Coder 480B (MoE model with 35B active parameters)"
2221
2110
  }
2222
2111
  };
2223
- var chutesDefaultModelInfo = chutesModels[chutesDefaultModelId];
2112
+ var BEDROCK_DEFAULT_TEMPERATURE = 0.3;
2113
+ var BEDROCK_MAX_TOKENS = 4096;
2114
+ var BEDROCK_DEFAULT_CONTEXT = 128e3;
2115
+ var AWS_INFERENCE_PROFILE_MAPPING = [
2116
+ // Australia regions (Sydney and Melbourne) → au. inference profile (most specific - 14 chars)
2117
+ ["ap-southeast-2", "au."],
2118
+ ["ap-southeast-4", "au."],
2119
+ // Japan regions (Tokyo and Osaka) → jp. inference profile (13 chars)
2120
+ ["ap-northeast-", "jp."],
2121
+ // US Government Cloud → ug. inference profile (7 chars)
2122
+ ["us-gov-", "ug."],
2123
+ // Americas regions → us. inference profile (3 chars)
2124
+ ["us-", "us."],
2125
+ // Europe regions → eu. inference profile (3 chars)
2126
+ ["eu-", "eu."],
2127
+ // Asia Pacific regions → apac. inference profile (3 chars)
2128
+ ["ap-", "apac."],
2129
+ // Canada regions → ca. inference profile (3 chars)
2130
+ ["ca-", "ca."],
2131
+ // South America regions → sa. inference profile (3 chars)
2132
+ ["sa-", "sa."]
2133
+ ];
2134
+ var BEDROCK_REGIONS = [
2135
+ { value: "us-east-1", label: "us-east-1" },
2136
+ { value: "us-east-2", label: "us-east-2" },
2137
+ { value: "us-west-1", label: "us-west-1" },
2138
+ { value: "us-west-2", label: "us-west-2" },
2139
+ { value: "ap-northeast-1", label: "ap-northeast-1" },
2140
+ { value: "ap-northeast-2", label: "ap-northeast-2" },
2141
+ { value: "ap-northeast-3", label: "ap-northeast-3" },
2142
+ { value: "ap-south-1", label: "ap-south-1" },
2143
+ { value: "ap-south-2", label: "ap-south-2" },
2144
+ { value: "ap-southeast-1", label: "ap-southeast-1" },
2145
+ { value: "ap-southeast-2", label: "ap-southeast-2" },
2146
+ { value: "ap-east-1", label: "ap-east-1" },
2147
+ { value: "eu-central-1", label: "eu-central-1" },
2148
+ { value: "eu-central-2", label: "eu-central-2" },
2149
+ { value: "eu-west-1", label: "eu-west-1" },
2150
+ { value: "eu-west-2", label: "eu-west-2" },
2151
+ { value: "eu-west-3", label: "eu-west-3" },
2152
+ { value: "eu-north-1", label: "eu-north-1" },
2153
+ { value: "eu-south-1", label: "eu-south-1" },
2154
+ { value: "eu-south-2", label: "eu-south-2" },
2155
+ { value: "ca-central-1", label: "ca-central-1" },
2156
+ { value: "sa-east-1", label: "sa-east-1" },
2157
+ { value: "us-gov-east-1", label: "us-gov-east-1" },
2158
+ { value: "us-gov-west-1", label: "us-gov-west-1" }
2159
+ ].sort((a, b) => a.value.localeCompare(b.value));
2160
+ var BEDROCK_1M_CONTEXT_MODEL_IDS = [
2161
+ "anthropic.claude-sonnet-4-20250514-v1:0",
2162
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
2163
+ "anthropic.claude-opus-4-6-v1"
2164
+ ];
2165
+ var BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
2166
+ "anthropic.claude-sonnet-4-20250514-v1:0",
2167
+ "anthropic.claude-sonnet-4-5-20250929-v1:0",
2168
+ "anthropic.claude-haiku-4-5-20251001-v1:0",
2169
+ "anthropic.claude-opus-4-5-20251101-v1:0",
2170
+ "anthropic.claude-opus-4-6-v1"
2171
+ ];
2172
+ var BEDROCK_SERVICE_TIER_MODEL_IDS = [
2173
+ // Amazon Nova models
2174
+ "amazon.nova-lite-v1:0",
2175
+ "amazon.nova-2-lite-v1:0",
2176
+ "amazon.nova-pro-v1:0",
2177
+ "amazon.nova-pro-latency-optimized-v1:0",
2178
+ // DeepSeek models
2179
+ "deepseek.r1-v1:0",
2180
+ // Qwen models
2181
+ "qwen.qwen3-next-80b-a3b",
2182
+ "qwen.qwen3-coder-480b-a35b-v1:0",
2183
+ // OpenAI GPT-OSS models
2184
+ "openai.gpt-oss-20b-1:0",
2185
+ "openai.gpt-oss-120b-1:0"
2186
+ ];
2187
+ var BEDROCK_SERVICE_TIER_PRICING = {
2188
+ STANDARD: 1,
2189
+ // Base price
2190
+ FLEX: 0.5,
2191
+ // 50% discount from standard
2192
+ PRIORITY: 1.75
2193
+ // 75% premium over standard
2194
+ };
2224
2195
 
2225
2196
  // src/providers/deepseek.ts
2226
2197
  var deepSeekDefaultModelId = "deepseek-chat";
@@ -2261,109 +2232,6 @@ var deepSeekModels = {
2261
2232
  };
2262
2233
  var DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3;
2263
2234
 
2264
- // src/providers/doubao.ts
2265
- var doubaoDefaultModelId = "doubao-seed-1-6-250615";
2266
- var doubaoModels = {
2267
- "doubao-seed-1-6-250615": {
2268
- maxTokens: 32768,
2269
- contextWindow: 128e3,
2270
- supportsImages: true,
2271
- supportsPromptCache: true,
2272
- inputPrice: 1e-4,
2273
- // $0.0001 per million tokens (cache miss)
2274
- outputPrice: 4e-4,
2275
- // $0.0004 per million tokens
2276
- cacheWritesPrice: 1e-4,
2277
- // $0.0001 per million tokens (cache miss)
2278
- cacheReadsPrice: 2e-5,
2279
- // $0.00002 per million tokens (cache hit)
2280
- description: `Doubao Seed 1.6 is a powerful model designed for high-performance tasks with extensive context handling.`
2281
- },
2282
- "doubao-seed-1-6-thinking-250715": {
2283
- maxTokens: 32768,
2284
- contextWindow: 128e3,
2285
- supportsImages: true,
2286
- supportsPromptCache: true,
2287
- inputPrice: 2e-4,
2288
- // $0.0002 per million tokens
2289
- outputPrice: 8e-4,
2290
- // $0.0008 per million tokens
2291
- cacheWritesPrice: 2e-4,
2292
- // $0.0002 per million
2293
- cacheReadsPrice: 4e-5,
2294
- // $0.00004 per million tokens (cache hit)
2295
- description: `Doubao Seed 1.6 Thinking is optimized for reasoning tasks, providing enhanced performance in complex problem-solving scenarios.`
2296
- },
2297
- "doubao-seed-1-6-flash-250715": {
2298
- maxTokens: 32768,
2299
- contextWindow: 128e3,
2300
- supportsImages: true,
2301
- supportsPromptCache: true,
2302
- inputPrice: 15e-5,
2303
- // $0.00015 per million tokens
2304
- outputPrice: 6e-4,
2305
- // $0.0006 per million tokens
2306
- cacheWritesPrice: 15e-5,
2307
- // $0.00015 per million
2308
- cacheReadsPrice: 3e-5,
2309
- // $0.00003 per million tokens (cache hit)
2310
- description: `Doubao Seed 1.6 Flash is tailored for speed and efficiency, making it ideal for applications requiring rapid responses.`
2311
- }
2312
- };
2313
- var doubaoDefaultModelInfo = doubaoModels[doubaoDefaultModelId];
2314
- var DOUBAO_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3";
2315
- var DOUBAO_API_CHAT_PATH = "/chat/completions";
2316
-
2317
- // src/providers/featherless.ts
2318
- var featherlessModels = {
2319
- "deepseek-ai/DeepSeek-V3-0324": {
2320
- maxTokens: 4096,
2321
- contextWindow: 32678,
2322
- supportsImages: false,
2323
- supportsPromptCache: false,
2324
- inputPrice: 0,
2325
- outputPrice: 0,
2326
- description: "DeepSeek V3 0324 model."
2327
- },
2328
- "deepseek-ai/DeepSeek-R1-0528": {
2329
- maxTokens: 4096,
2330
- contextWindow: 32678,
2331
- supportsImages: false,
2332
- supportsPromptCache: false,
2333
- inputPrice: 0,
2334
- outputPrice: 0,
2335
- description: "DeepSeek R1 0528 model."
2336
- },
2337
- "moonshotai/Kimi-K2-Instruct": {
2338
- maxTokens: 4096,
2339
- contextWindow: 32678,
2340
- supportsImages: false,
2341
- supportsPromptCache: false,
2342
- inputPrice: 0,
2343
- outputPrice: 0,
2344
- description: "Kimi K2 Instruct model."
2345
- },
2346
- "openai/gpt-oss-120b": {
2347
- maxTokens: 4096,
2348
- contextWindow: 32678,
2349
- supportsImages: false,
2350
- supportsPromptCache: false,
2351
- inputPrice: 0,
2352
- outputPrice: 0,
2353
- description: "GPT-OSS 120B model."
2354
- },
2355
- "Qwen/Qwen3-Coder-480B-A35B-Instruct": {
2356
- maxTokens: 4096,
2357
- contextWindow: 32678,
2358
- supportsImages: false,
2359
- supportsPromptCache: false,
2360
- inputPrice: 0,
2361
- outputPrice: 0,
2362
- description: "Qwen3 Coder 480B A35B Instruct model."
2363
- }
2364
- };
2365
- var featherlessDefaultModelId = "moonshotai/Kimi-K2-Instruct";
2366
-
2367
2235
  // src/providers/fireworks.ts
2368
2236
  var fireworksDefaultModelId = "accounts/fireworks/models/kimi-k2-instruct-0905";
2369
2237
  var fireworksModels = {
@@ -2779,121 +2647,6 @@ var geminiModels = {
2779
2647
  }
2780
2648
  };
2781
2649
 
2782
- // src/providers/groq.ts
2783
- var groqDefaultModelId = "moonshotai/kimi-k2-instruct-0905";
2784
- var groqModels = {
2785
- // Models based on API response: https://api.groq.com/openai/v1/models
2786
- "llama-3.1-8b-instant": {
2787
- maxTokens: 8192,
2788
- contextWindow: 131072,
2789
- supportsImages: false,
2790
- supportsPromptCache: false,
2791
- inputPrice: 0.05,
2792
- outputPrice: 0.08,
2793
- description: "Meta Llama 3.1 8B Instant model, 128K context."
2794
- },
2795
- "llama-3.3-70b-versatile": {
2796
- maxTokens: 8192,
2797
- contextWindow: 131072,
2798
- supportsImages: false,
2799
- supportsPromptCache: false,
2800
- inputPrice: 0.59,
2801
- outputPrice: 0.79,
2802
- description: "Meta Llama 3.3 70B Versatile model, 128K context."
2803
- },
2804
- "meta-llama/llama-4-scout-17b-16e-instruct": {
2805
- maxTokens: 8192,
2806
- contextWindow: 131072,
2807
- supportsImages: false,
2808
- supportsPromptCache: false,
2809
- inputPrice: 0.11,
2810
- outputPrice: 0.34,
2811
- description: "Meta Llama 4 Scout 17B Instruct model, 128K context."
2812
- },
2813
- "qwen/qwen3-32b": {
2814
- maxTokens: 8192,
2815
- contextWindow: 131072,
2816
- supportsImages: false,
2817
- supportsPromptCache: false,
2818
- inputPrice: 0.29,
2819
- outputPrice: 0.59,
2820
- description: "Alibaba Qwen 3 32B model, 128K context."
2821
- },
2822
- "moonshotai/kimi-k2-instruct-0905": {
2823
- maxTokens: 16384,
2824
- contextWindow: 262144,
2825
- supportsImages: false,
2826
- supportsPromptCache: true,
2827
- inputPrice: 0.6,
2828
- outputPrice: 2.5,
2829
- cacheReadsPrice: 0.15,
2830
- description: "Kimi K2 model gets a new version update: Agentic coding: more accurate, better generalization across scaffolds. Frontend coding: improved aesthetics and functionalities on web, 3d, and other tasks. Context length: extended from 128k to 256k, providing better long-horizon support."
2831
- },
2832
- "openai/gpt-oss-120b": {
2833
- maxTokens: 32766,
2834
- contextWindow: 131072,
2835
- supportsImages: false,
2836
- supportsPromptCache: false,
2837
- inputPrice: 0.15,
2838
- outputPrice: 0.75,
2839
- description: "GPT-OSS 120B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 128 experts."
2840
- },
2841
- "openai/gpt-oss-20b": {
2842
- maxTokens: 32768,
2843
- contextWindow: 131072,
2844
- supportsImages: false,
2845
- supportsPromptCache: false,
2846
- inputPrice: 0.1,
2847
- outputPrice: 0.5,
2848
- description: "GPT-OSS 20B is OpenAI's flagship open source model, built on a Mixture-of-Experts (MoE) architecture with 20 billion parameters and 32 experts."
2849
- }
2850
- };
2851
-
2852
- // src/providers/huggingface.ts
2853
- var HUGGINGFACE_DEFAULT_MAX_TOKENS = 2048;
2854
- var HUGGINGFACE_MAX_TOKENS_FALLBACK = 8192;
2855
- var HUGGINGFACE_DEFAULT_CONTEXT_WINDOW = 128e3;
2856
- var HUGGINGFACE_SLIDER_STEP = 256;
2857
- var HUGGINGFACE_SLIDER_MIN = 1;
2858
- var HUGGINGFACE_TEMPERATURE_MAX_VALUE = 2;
2859
- var HUGGINGFACE_API_URL = "https://router.huggingface.co/v1/models?collection=roocode";
2860
- var HUGGINGFACE_CACHE_DURATION = 1e3 * 60 * 60;
2861
-
2862
- // src/providers/io-intelligence.ts
2863
- var ioIntelligenceDefaultModelId = "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8";
2864
- var ioIntelligenceDefaultBaseUrl = "https://api.intelligence.io.solutions/api/v1";
2865
- var IO_INTELLIGENCE_CACHE_DURATION = 1e3 * 60 * 60;
2866
- var ioIntelligenceModels = {
2867
- "deepseek-ai/DeepSeek-R1-0528": {
2868
- maxTokens: 8192,
2869
- contextWindow: 128e3,
2870
- supportsImages: false,
2871
- supportsPromptCache: false,
2872
- description: "DeepSeek R1 reasoning model"
2873
- },
2874
- "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": {
2875
- maxTokens: 8192,
2876
- contextWindow: 43e4,
2877
- supportsImages: true,
2878
- supportsPromptCache: false,
2879
- description: "Llama 4 Maverick 17B model"
2880
- },
2881
- "Intel/Qwen3-Coder-480B-A35B-Instruct-int4-mixed-ar": {
2882
- maxTokens: 8192,
2883
- contextWindow: 106e3,
2884
- supportsImages: false,
2885
- supportsPromptCache: false,
2886
- description: "Qwen3 Coder 480B specialized for coding"
2887
- },
2888
- "openai/gpt-oss-120b": {
2889
- maxTokens: 8192,
2890
- contextWindow: 131072,
2891
- supportsImages: false,
2892
- supportsPromptCache: false,
2893
- description: "OpenAI GPT-OSS 120B model"
2894
- }
2895
- };
2896
-
2897
2650
  // src/providers/lite-llm.ts
2898
2651
  var litellmDefaultModelId = "claude-3-7-sonnet-20250219";
2899
2652
  var litellmDefaultModelInfo = {
@@ -3595,7 +3348,7 @@ var openAiModelInfoSaneDefaults = {
3595
3348
  inputPrice: 0,
3596
3349
  outputPrice: 0
3597
3350
  };
3598
- var azureOpenAiDefaultApiVersion = "2024-08-01-preview";
3351
+ var azureOpenAiDefaultApiVersion = "2025-04-01-preview";
3599
3352
  var OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0;
3600
3353
  var OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions";
3601
3354
 
@@ -3985,19 +3738,6 @@ var sambaNovaModels = {
3985
3738
  }
3986
3739
  };
3987
3740
 
3988
- // src/providers/unbound.ts
3989
- var unboundDefaultModelId = "anthropic/claude-sonnet-4-5";
3990
- var unboundDefaultModelInfo = {
3991
- maxTokens: 8192,
3992
- contextWindow: 2e5,
3993
- supportsImages: true,
3994
- supportsPromptCache: true,
3995
- inputPrice: 3,
3996
- outputPrice: 15,
3997
- cacheWritesPrice: 3.75,
3998
- cacheReadsPrice: 0.3
3999
- };
4000
-
4001
3741
  // src/providers/vertex.ts
4002
3742
  var vertexDefaultModelId = "claude-sonnet-4-5@20250929";
4003
3743
  var vertexModels = {
@@ -5264,18 +5004,6 @@ var zaiApiLineConfigs = {
5264
5004
  }
5265
5005
  };
5266
5006
 
5267
- // src/providers/deepinfra.ts
5268
- var deepInfraDefaultModelId = "Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo";
5269
- var deepInfraDefaultModelInfo = {
5270
- maxTokens: 16384,
5271
- contextWindow: 262144,
5272
- supportsImages: false,
5273
- supportsPromptCache: false,
5274
- inputPrice: 0.3,
5275
- outputPrice: 1.2,
5276
- description: "Qwen 3 Coder 480B A35B Instruct Turbo model, 256K context."
5277
- };
5278
-
5279
5007
  // src/providers/minimax.ts
5280
5008
  var minimaxDefaultModelId = "MiniMax-M2";
5281
5009
  var minimaxModels = {
@@ -5333,18 +5061,10 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5333
5061
  return openRouterDefaultModelId;
5334
5062
  case "requesty":
5335
5063
  return requestyDefaultModelId;
5336
- case "unbound":
5337
- return unboundDefaultModelId;
5338
5064
  case "litellm":
5339
5065
  return litellmDefaultModelId;
5340
5066
  case "xai":
5341
5067
  return xaiDefaultModelId;
5342
- case "groq":
5343
- return groqDefaultModelId;
5344
- case "huggingface":
5345
- return "meta-llama/Llama-3.3-70B-Instruct";
5346
- case "chutes":
5347
- return chutesDefaultModelId;
5348
5068
  case "baseten":
5349
5069
  return basetenDefaultModelId;
5350
5070
  case "bedrock":
@@ -5355,8 +5075,6 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5355
5075
  return geminiDefaultModelId;
5356
5076
  case "deepseek":
5357
5077
  return deepSeekDefaultModelId;
5358
- case "doubao":
5359
- return doubaoDefaultModelId;
5360
5078
  case "moonshot":
5361
5079
  return moonshotDefaultModelId;
5362
5080
  case "minimax":
@@ -5379,26 +5097,20 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5379
5097
  case "lmstudio":
5380
5098
  return "";
5381
5099
  // LMStudio uses dynamic model selection
5382
- case "deepinfra":
5383
- return deepInfraDefaultModelId;
5384
5100
  case "vscode-lm":
5385
5101
  return vscodeLlmDefaultModelId;
5386
- case "cerebras":
5387
- return cerebrasDefaultModelId;
5388
5102
  case "sambanova":
5389
5103
  return sambaNovaDefaultModelId;
5390
5104
  case "fireworks":
5391
5105
  return fireworksDefaultModelId;
5392
- case "featherless":
5393
- return featherlessDefaultModelId;
5394
- case "io-intelligence":
5395
- return ioIntelligenceDefaultModelId;
5396
5106
  case "roo":
5397
5107
  return rooDefaultModelId;
5398
5108
  case "qwen-code":
5399
5109
  return qwenCodeDefaultModelId;
5400
5110
  case "vercel-ai-gateway":
5401
5111
  return vercelAiGatewayDefaultModelId;
5112
+ case "azure":
5113
+ return azureDefaultModelId;
5402
5114
  case "anthropic":
5403
5115
  case "gemini-cli":
5404
5116
  case "fake-ai":
@@ -5409,18 +5121,7 @@ function getProviderDefaultModelId(provider, options = { isChina: false }) {
5409
5121
 
5410
5122
  // src/provider-settings.ts
5411
5123
  var DEFAULT_CONSECUTIVE_MISTAKE_LIMIT = 3;
5412
- var dynamicProviders = [
5413
- "openrouter",
5414
- "vercel-ai-gateway",
5415
- "huggingface",
5416
- "litellm",
5417
- "deepinfra",
5418
- "io-intelligence",
5419
- "requesty",
5420
- "unbound",
5421
- "roo",
5422
- "chutes"
5423
- ];
5124
+ var dynamicProviders = ["openrouter", "vercel-ai-gateway", "litellm", "requesty", "roo"];
5424
5125
  var isDynamicProvider = (key) => dynamicProviders.includes(key);
5425
5126
  var localProviders = ["ollama", "lmstudio"];
5426
5127
  var isLocalProvider = (key) => localProviders.includes(key);
@@ -5437,16 +5138,13 @@ var providerNames = [
5437
5138
  ...customProviders,
5438
5139
  ...fauxProviders,
5439
5140
  "anthropic",
5141
+ "azure",
5440
5142
  "bedrock",
5441
5143
  "baseten",
5442
- "cerebras",
5443
- "doubao",
5444
5144
  "deepseek",
5445
- "featherless",
5446
5145
  "fireworks",
5447
5146
  "gemini",
5448
5147
  "gemini-cli",
5449
- "groq",
5450
5148
  "mistral",
5451
5149
  "moonshot",
5452
5150
  "minimax",
@@ -5461,10 +5159,24 @@ var providerNames = [
5461
5159
  ];
5462
5160
  var providerNamesSchema = import_zod8.z.enum(providerNames);
5463
5161
  var isProviderName = (key) => typeof key === "string" && providerNames.includes(key);
5162
+ var retiredProviderNames = [
5163
+ "cerebras",
5164
+ "chutes",
5165
+ "deepinfra",
5166
+ "doubao",
5167
+ "featherless",
5168
+ "groq",
5169
+ "huggingface",
5170
+ "io-intelligence",
5171
+ "unbound"
5172
+ ];
5173
+ var retiredProviderNamesSchema = import_zod8.z.enum(retiredProviderNames);
5174
+ var isRetiredProvider = (value) => retiredProviderNames.includes(value);
5175
+ var providerNamesWithRetiredSchema = import_zod8.z.union([providerNamesSchema, retiredProviderNamesSchema]);
5464
5176
  var providerSettingsEntrySchema = import_zod8.z.object({
5465
5177
  id: import_zod8.z.string(),
5466
5178
  name: import_zod8.z.string(),
5467
- apiProvider: providerNamesSchema.optional(),
5179
+ apiProvider: providerNamesWithRetiredSchema.optional(),
5468
5180
  modelId: import_zod8.z.string().optional()
5469
5181
  });
5470
5182
  var baseProviderSettingsSchema = import_zod8.z.object({
@@ -5586,15 +5298,6 @@ var deepSeekSchema = apiModelIdProviderModelSchema.extend({
5586
5298
  deepSeekBaseUrl: import_zod8.z.string().optional(),
5587
5299
  deepSeekApiKey: import_zod8.z.string().optional()
5588
5300
  });
5589
- var deepInfraSchema = apiModelIdProviderModelSchema.extend({
5590
- deepInfraBaseUrl: import_zod8.z.string().optional(),
5591
- deepInfraApiKey: import_zod8.z.string().optional(),
5592
- deepInfraModelId: import_zod8.z.string().optional()
5593
- });
5594
- var doubaoSchema = apiModelIdProviderModelSchema.extend({
5595
- doubaoBaseUrl: import_zod8.z.string().optional(),
5596
- doubaoApiKey: import_zod8.z.string().optional()
5597
- });
5598
5301
  var moonshotSchema = apiModelIdProviderModelSchema.extend({
5599
5302
  moonshotBaseUrl: import_zod8.z.union([import_zod8.z.literal("https://api.moonshot.ai/v1"), import_zod8.z.literal("https://api.moonshot.cn/v1")]).optional(),
5600
5303
  moonshotApiKey: import_zod8.z.string().optional()
@@ -5603,10 +5306,6 @@ var minimaxSchema = apiModelIdProviderModelSchema.extend({
5603
5306
  minimaxBaseUrl: import_zod8.z.union([import_zod8.z.literal("https://api.minimax.io/v1"), import_zod8.z.literal("https://api.minimaxi.com/v1")]).optional(),
5604
5307
  minimaxApiKey: import_zod8.z.string().optional()
5605
5308
  });
5606
- var unboundSchema = baseProviderSettingsSchema.extend({
5607
- unboundApiKey: import_zod8.z.string().optional(),
5608
- unboundModelId: import_zod8.z.string().optional()
5609
- });
5610
5309
  var requestySchema = baseProviderSettingsSchema.extend({
5611
5310
  requestyBaseUrl: import_zod8.z.string().optional(),
5612
5311
  requestyApiKey: import_zod8.z.string().optional(),
@@ -5618,26 +5317,12 @@ var fakeAiSchema = baseProviderSettingsSchema.extend({
5618
5317
  var xaiSchema = apiModelIdProviderModelSchema.extend({
5619
5318
  xaiApiKey: import_zod8.z.string().optional()
5620
5319
  });
5621
- var groqSchema = apiModelIdProviderModelSchema.extend({
5622
- groqApiKey: import_zod8.z.string().optional()
5623
- });
5624
- var huggingFaceSchema = baseProviderSettingsSchema.extend({
5625
- huggingFaceApiKey: import_zod8.z.string().optional(),
5626
- huggingFaceModelId: import_zod8.z.string().optional(),
5627
- huggingFaceInferenceProvider: import_zod8.z.string().optional()
5628
- });
5629
- var chutesSchema = apiModelIdProviderModelSchema.extend({
5630
- chutesApiKey: import_zod8.z.string().optional()
5631
- });
5632
5320
  var litellmSchema = baseProviderSettingsSchema.extend({
5633
5321
  litellmBaseUrl: import_zod8.z.string().optional(),
5634
5322
  litellmApiKey: import_zod8.z.string().optional(),
5635
5323
  litellmModelId: import_zod8.z.string().optional(),
5636
5324
  litellmUsePromptCache: import_zod8.z.boolean().optional()
5637
5325
  });
5638
- var cerebrasSchema = apiModelIdProviderModelSchema.extend({
5639
- cerebrasApiKey: import_zod8.z.string().optional()
5640
- });
5641
5326
  var sambaNovaSchema = apiModelIdProviderModelSchema.extend({
5642
5327
  sambaNovaApiKey: import_zod8.z.string().optional()
5643
5328
  });
@@ -5649,13 +5334,6 @@ var zaiSchema = apiModelIdProviderModelSchema.extend({
5649
5334
  var fireworksSchema = apiModelIdProviderModelSchema.extend({
5650
5335
  fireworksApiKey: import_zod8.z.string().optional()
5651
5336
  });
5652
- var featherlessSchema = apiModelIdProviderModelSchema.extend({
5653
- featherlessApiKey: import_zod8.z.string().optional()
5654
- });
5655
- var ioIntelligenceSchema = apiModelIdProviderModelSchema.extend({
5656
- ioIntelligenceModelId: import_zod8.z.string().optional(),
5657
- ioIntelligenceApiKey: import_zod8.z.string().optional()
5658
- });
5659
5337
  var qwenCodeSchema = apiModelIdProviderModelSchema.extend({
5660
5338
  qwenCodeOauthPath: import_zod8.z.string().optional()
5661
5339
  });
@@ -5670,11 +5348,18 @@ var vercelAiGatewaySchema = baseProviderSettingsSchema.extend({
5670
5348
  var basetenSchema = apiModelIdProviderModelSchema.extend({
5671
5349
  basetenApiKey: import_zod8.z.string().optional()
5672
5350
  });
5351
+ var azureSchema = apiModelIdProviderModelSchema.extend({
5352
+ azureApiKey: import_zod8.z.string().optional(),
5353
+ azureResourceName: import_zod8.z.string().optional(),
5354
+ azureDeploymentName: import_zod8.z.string().optional(),
5355
+ azureApiVersion: import_zod8.z.string().optional()
5356
+ });
5673
5357
  var defaultSchema = import_zod8.z.object({
5674
5358
  apiProvider: import_zod8.z.undefined()
5675
5359
  });
5676
5360
  var providerSettingsSchemaDiscriminated = import_zod8.z.discriminatedUnion("apiProvider", [
5677
5361
  anthropicSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("anthropic") })),
5362
+ azureSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("azure") })),
5678
5363
  openRouterSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("openrouter") })),
5679
5364
  bedrockSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("bedrock") })),
5680
5365
  vertexSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("vertex") })),
@@ -5688,33 +5373,25 @@ var providerSettingsSchemaDiscriminated = import_zod8.z.discriminatedUnion("apiP
5688
5373
  openAiNativeSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("openai-native") })),
5689
5374
  mistralSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("mistral") })),
5690
5375
  deepSeekSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("deepseek") })),
5691
- deepInfraSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("deepinfra") })),
5692
- doubaoSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("doubao") })),
5693
5376
  moonshotSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("moonshot") })),
5694
5377
  minimaxSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("minimax") })),
5695
- unboundSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("unbound") })),
5696
5378
  requestySchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("requesty") })),
5697
5379
  fakeAiSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("fake-ai") })),
5698
5380
  xaiSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("xai") })),
5699
- groqSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("groq") })),
5700
5381
  basetenSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("baseten") })),
5701
- huggingFaceSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("huggingface") })),
5702
- chutesSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("chutes") })),
5703
5382
  litellmSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("litellm") })),
5704
- cerebrasSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("cerebras") })),
5705
5383
  sambaNovaSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("sambanova") })),
5706
5384
  zaiSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("zai") })),
5707
5385
  fireworksSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("fireworks") })),
5708
- featherlessSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("featherless") })),
5709
- ioIntelligenceSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("io-intelligence") })),
5710
5386
  qwenCodeSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("qwen-code") })),
5711
5387
  rooSchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("roo") })),
5712
5388
  vercelAiGatewaySchema.merge(import_zod8.z.object({ apiProvider: import_zod8.z.literal("vercel-ai-gateway") })),
5713
5389
  defaultSchema
5714
5390
  ]);
5715
5391
  var providerSettingsSchema = import_zod8.z.object({
5716
- apiProvider: providerNamesSchema.optional(),
5392
+ apiProvider: providerNamesWithRetiredSchema.optional(),
5717
5393
  ...anthropicSchema.shape,
5394
+ ...azureSchema.shape,
5718
5395
  ...openRouterSchema.shape,
5719
5396
  ...bedrockSchema.shape,
5720
5397
  ...vertexSchema.shape,
@@ -5728,25 +5405,16 @@ var providerSettingsSchema = import_zod8.z.object({
5728
5405
  ...openAiNativeSchema.shape,
5729
5406
  ...mistralSchema.shape,
5730
5407
  ...deepSeekSchema.shape,
5731
- ...deepInfraSchema.shape,
5732
- ...doubaoSchema.shape,
5733
5408
  ...moonshotSchema.shape,
5734
5409
  ...minimaxSchema.shape,
5735
- ...unboundSchema.shape,
5736
5410
  ...requestySchema.shape,
5737
5411
  ...fakeAiSchema.shape,
5738
5412
  ...xaiSchema.shape,
5739
- ...groqSchema.shape,
5740
5413
  ...basetenSchema.shape,
5741
- ...huggingFaceSchema.shape,
5742
- ...chutesSchema.shape,
5743
5414
  ...litellmSchema.shape,
5744
- ...cerebrasSchema.shape,
5745
5415
  ...sambaNovaSchema.shape,
5746
5416
  ...zaiSchema.shape,
5747
5417
  ...fireworksSchema.shape,
5748
- ...featherlessSchema.shape,
5749
- ...ioIntelligenceSchema.shape,
5750
5418
  ...qwenCodeSchema.shape,
5751
5419
  ...rooSchema.shape,
5752
5420
  ...vercelAiGatewaySchema.shape,
@@ -5764,13 +5432,9 @@ var modelIdKeys = [
5764
5432
  "ollamaModelId",
5765
5433
  "lmStudioModelId",
5766
5434
  "lmStudioDraftModelId",
5767
- "unboundModelId",
5768
5435
  "requestyModelId",
5769
5436
  "litellmModelId",
5770
- "huggingFaceModelId",
5771
- "ioIntelligenceModelId",
5772
- "vercelAiGatewayModelId",
5773
- "deepInfraModelId"
5437
+ "vercelAiGatewayModelId"
5774
5438
  ];
5775
5439
  var getModelId = (settings) => {
5776
5440
  const modelIdKey = modelIdKeys.find((key) => settings[key]);
@@ -5779,6 +5443,7 @@ var getModelId = (settings) => {
5779
5443
  var isTypicalProvider = (key) => isProviderName(key) && !isInternalProvider(key) && !isCustomProvider(key) && !isFauxProvider(key);
5780
5444
  var modelIdKeysByProvider = {
5781
5445
  anthropic: "apiModelId",
5446
+ azure: "apiModelId",
5782
5447
  openrouter: "openRouterModelId",
5783
5448
  bedrock: "apiModelId",
5784
5449
  vertex: "apiModelId",
@@ -5792,23 +5457,14 @@ var modelIdKeysByProvider = {
5792
5457
  moonshot: "apiModelId",
5793
5458
  minimax: "apiModelId",
5794
5459
  deepseek: "apiModelId",
5795
- deepinfra: "deepInfraModelId",
5796
- doubao: "apiModelId",
5797
5460
  "qwen-code": "apiModelId",
5798
- unbound: "unboundModelId",
5799
5461
  requesty: "requestyModelId",
5800
5462
  xai: "apiModelId",
5801
- groq: "apiModelId",
5802
5463
  baseten: "apiModelId",
5803
- chutes: "apiModelId",
5804
5464
  litellm: "litellmModelId",
5805
- huggingface: "huggingFaceModelId",
5806
- cerebras: "apiModelId",
5807
5465
  sambanova: "apiModelId",
5808
5466
  zai: "apiModelId",
5809
5467
  fireworks: "apiModelId",
5810
- featherless: "apiModelId",
5811
- "io-intelligence": "ioIntelligenceModelId",
5812
5468
  roo: "apiModelId",
5813
5469
  "vercel-ai-gateway": "vercelAiGatewayModelId"
5814
5470
  };
@@ -5831,27 +5487,22 @@ var MODELS_BY_PROVIDER = {
5831
5487
  label: "Anthropic",
5832
5488
  models: Object.keys(anthropicModels)
5833
5489
  },
5490
+ azure: {
5491
+ id: "azure",
5492
+ label: "Azure AI Foundry",
5493
+ // Azure uses deployment names configured by the user (not a fixed upstream model ID list)
5494
+ models: []
5495
+ },
5834
5496
  bedrock: {
5835
5497
  id: "bedrock",
5836
5498
  label: "Amazon Bedrock",
5837
5499
  models: Object.keys(bedrockModels)
5838
5500
  },
5839
- cerebras: {
5840
- id: "cerebras",
5841
- label: "Cerebras",
5842
- models: Object.keys(cerebrasModels)
5843
- },
5844
5501
  deepseek: {
5845
5502
  id: "deepseek",
5846
5503
  label: "DeepSeek",
5847
5504
  models: Object.keys(deepSeekModels)
5848
5505
  },
5849
- doubao: { id: "doubao", label: "Doubao", models: Object.keys(doubaoModels) },
5850
- featherless: {
5851
- id: "featherless",
5852
- label: "Featherless",
5853
- models: Object.keys(featherlessModels)
5854
- },
5855
5506
  fireworks: {
5856
5507
  id: "fireworks",
5857
5508
  label: "Fireworks",
@@ -5862,12 +5513,6 @@ var MODELS_BY_PROVIDER = {
5862
5513
  label: "Google Gemini",
5863
5514
  models: Object.keys(geminiModels)
5864
5515
  },
5865
- groq: { id: "groq", label: "Groq", models: Object.keys(groqModels) },
5866
- "io-intelligence": {
5867
- id: "io-intelligence",
5868
- label: "IO Intelligence",
5869
- models: Object.keys(ioIntelligenceModels)
5870
- },
5871
5516
  mistral: {
5872
5517
  id: "mistral",
5873
5518
  label: "Mistral",
@@ -5914,14 +5559,10 @@ var MODELS_BY_PROVIDER = {
5914
5559
  zai: { id: "zai", label: "Z.ai", models: Object.keys(internationalZAiModels) },
5915
5560
  baseten: { id: "baseten", label: "Baseten", models: Object.keys(basetenModels) },
5916
5561
  // Dynamic providers; models pulled from remote APIs.
5917
- huggingface: { id: "huggingface", label: "Hugging Face", models: [] },
5918
5562
  litellm: { id: "litellm", label: "LiteLLM", models: [] },
5919
5563
  openrouter: { id: "openrouter", label: "OpenRouter", models: [] },
5920
5564
  requesty: { id: "requesty", label: "Requesty", models: [] },
5921
- unbound: { id: "unbound", label: "Unbound", models: [] },
5922
- deepinfra: { id: "deepinfra", label: "DeepInfra", models: [] },
5923
5565
  "vercel-ai-gateway": { id: "vercel-ai-gateway", label: "Vercel AI Gateway", models: [] },
5924
- chutes: { id: "chutes", label: "Chutes AI", models: [] },
5925
5566
  // Local providers; models discovered from localhost endpoints.
5926
5567
  lmstudio: { id: "lmstudio", label: "LM Studio", models: [] },
5927
5568
  ollama: { id: "ollama", label: "Ollama", models: [] }
@@ -6584,19 +6225,13 @@ var SECRET_STATE_KEYS = [
6584
6225
  "ollamaApiKey",
6585
6226
  "geminiApiKey",
6586
6227
  "openAiNativeApiKey",
6587
- "cerebrasApiKey",
6588
6228
  "deepSeekApiKey",
6589
- "doubaoApiKey",
6590
6229
  "moonshotApiKey",
6591
6230
  "mistralApiKey",
6592
6231
  "minimaxApiKey",
6593
- "unboundApiKey",
6594
6232
  "requestyApiKey",
6595
6233
  "xaiApiKey",
6596
- "groqApiKey",
6597
- "chutesApiKey",
6598
6234
  "litellmApiKey",
6599
- "deepInfraApiKey",
6600
6235
  "codeIndexOpenAiKey",
6601
6236
  "codeIndexQdrantApiKey",
6602
6237
  "codebaseIndexOpenAiCompatibleApiKey",
@@ -6604,14 +6239,12 @@ var SECRET_STATE_KEYS = [
6604
6239
  "codebaseIndexMistralApiKey",
6605
6240
  "codebaseIndexVercelAiGatewayApiKey",
6606
6241
  "codebaseIndexOpenRouterApiKey",
6607
- "huggingFaceApiKey",
6608
6242
  "sambaNovaApiKey",
6609
6243
  "zaiApiKey",
6610
6244
  "fireworksApiKey",
6611
- "featherlessApiKey",
6612
- "ioIntelligenceApiKey",
6613
6245
  "vercelAiGatewayApiKey",
6614
- "basetenApiKey"
6246
+ "basetenApiKey",
6247
+ "azureApiKey"
6615
6248
  ];
6616
6249
  var GLOBAL_SECRET_KEYS = [
6617
6250
  "openRouterImageApiKey"
@@ -7215,6 +6848,9 @@ var TaskCommandName = /* @__PURE__ */ ((TaskCommandName2) => {
7215
6848
  TaskCommandName2["CloseTask"] = "CloseTask";
7216
6849
  TaskCommandName2["ResumeTask"] = "ResumeTask";
7217
6850
  TaskCommandName2["SendMessage"] = "SendMessage";
6851
+ TaskCommandName2["GetCommands"] = "GetCommands";
6852
+ TaskCommandName2["GetModes"] = "GetModes";
6853
+ TaskCommandName2["GetModels"] = "GetModels";
7218
6854
  return TaskCommandName2;
7219
6855
  })(TaskCommandName || {});
7220
6856
  var taskCommandSchema = import_zod18.z.discriminatedUnion("commandName", [
@@ -7243,6 +6879,15 @@ var taskCommandSchema = import_zod18.z.discriminatedUnion("commandName", [
7243
6879
  text: import_zod18.z.string().optional(),
7244
6880
  images: import_zod18.z.array(import_zod18.z.string()).optional()
7245
6881
  })
6882
+ }),
6883
+ import_zod18.z.object({
6884
+ commandName: import_zod18.z.literal("GetCommands" /* GetCommands */)
6885
+ }),
6886
+ import_zod18.z.object({
6887
+ commandName: import_zod18.z.literal("GetModes" /* GetModes */)
6888
+ }),
6889
+ import_zod18.z.object({
6890
+ commandName: import_zod18.z.literal("GetModels" /* GetModels */)
7246
6891
  })
7247
6892
  ]);
7248
6893
  var ipcMessageSchema = import_zod18.z.discriminatedUnion("type", [
@@ -7430,8 +7075,6 @@ var browserActions = [
7430
7075
  DEFAULT_MODES,
7431
7076
  DEFAULT_TERMINAL_OUTPUT_PREVIEW_SIZE,
7432
7077
  DEFAULT_WRITE_DELAY_MS,
7433
- DOUBAO_API_BASE_URL,
7434
- DOUBAO_API_CHAT_PATH,
7435
7078
  EVALS_SETTINGS,
7436
7079
  EVALS_TIMEOUT,
7437
7080
  EXPECTED_API_ERROR_CODES,
@@ -7442,18 +7085,9 @@ var browserActions = [
7442
7085
  GLOBAL_SETTINGS_KEYS,
7443
7086
  GLOBAL_STATE_KEYS,
7444
7087
  HEARTBEAT_INTERVAL_MS,
7445
- HUGGINGFACE_API_URL,
7446
- HUGGINGFACE_CACHE_DURATION,
7447
- HUGGINGFACE_DEFAULT_CONTEXT_WINDOW,
7448
- HUGGINGFACE_DEFAULT_MAX_TOKENS,
7449
- HUGGINGFACE_MAX_TOKENS_FALLBACK,
7450
- HUGGINGFACE_SLIDER_MIN,
7451
- HUGGINGFACE_SLIDER_STEP,
7452
- HUGGINGFACE_TEMPERATURE_MAX_VALUE,
7453
7088
  IMAGE_GENERATION_MODELS,
7454
7089
  IMAGE_GENERATION_MODEL_IDS,
7455
7090
  INSTANCE_TTL_SECONDS,
7456
- IO_INTELLIGENCE_CACHE_DURATION,
7457
7091
  IpcMessageType,
7458
7092
  IpcOrigin,
7459
7093
  LMSTUDIO_DEFAULT_TEMPERATURE,
@@ -7501,6 +7135,9 @@ var browserActions = [
7501
7135
  anthropicDefaultModelId,
7502
7136
  anthropicModels,
7503
7137
  appPropertiesSchema,
7138
+ azureDefaultModelId,
7139
+ azureDefaultModelInfo,
7140
+ azureModels,
7504
7141
  azureOpenAiDefaultApiVersion,
7505
7142
  basetenDefaultModelId,
7506
7143
  basetenModels,
@@ -7508,13 +7145,8 @@ var browserActions = [
7508
7145
  bedrockDefaultPromptRouterModelId,
7509
7146
  bedrockModels,
7510
7147
  browserActions,
7511
- cerebrasDefaultModelId,
7512
- cerebrasModels,
7513
7148
  checkoutDiffPayloadSchema,
7514
7149
  checkoutRestorePayloadSchema,
7515
- chutesDefaultModelId,
7516
- chutesDefaultModelInfo,
7517
- chutesModels,
7518
7150
  clineAskSchema,
7519
7151
  clineAsks,
7520
7152
  clineMessageSchema,
@@ -7534,15 +7166,10 @@ var browserActions = [
7534
7166
  customModesSettingsSchema,
7535
7167
  customProviders,
7536
7168
  customSupportPromptsSchema,
7537
- deepInfraDefaultModelId,
7538
- deepInfraDefaultModelInfo,
7539
7169
  deepSeekDefaultModelId,
7540
7170
  deepSeekModels,
7541
7171
  defineCustomTool,
7542
7172
  discriminatedProviderSettingsWithIdSchema,
7543
- doubaoDefaultModelId,
7544
- doubaoDefaultModelInfo,
7545
- doubaoModels,
7546
7173
  dynamicAppPropertiesSchema,
7547
7174
  dynamicProviders,
7548
7175
  experimentIds,
@@ -7555,8 +7182,6 @@ var browserActions = [
7555
7182
  extractConsecutiveMistakeErrorProperties,
7556
7183
  extractMessageFromJsonPayload,
7557
7184
  fauxProviders,
7558
- featherlessDefaultModelId,
7559
- featherlessModels,
7560
7185
  fireworksDefaultModelId,
7561
7186
  fireworksModels,
7562
7187
  followUpDataSchema,
@@ -7570,8 +7195,6 @@ var browserActions = [
7570
7195
  getProviderDefaultModelId,
7571
7196
  gitPropertiesSchema,
7572
7197
  globalSettingsSchema,
7573
- groqDefaultModelId,
7574
- groqModels,
7575
7198
  groupEntrySchema,
7576
7199
  groupOptionsSchema,
7577
7200
  historyItemSchema,
@@ -7582,9 +7205,6 @@ var browserActions = [
7582
7205
  internalProviders,
7583
7206
  internationalZAiDefaultModelId,
7584
7207
  internationalZAiModels,
7585
- ioIntelligenceDefaultBaseUrl,
7586
- ioIntelligenceDefaultModelId,
7587
- ioIntelligenceModels,
7588
7208
  ipcMessageSchema,
7589
7209
  isApiProviderError,
7590
7210
  isConsecutiveMistakeError,
@@ -7603,6 +7223,7 @@ var browserActions = [
7603
7223
  isNonBlockingAsk,
7604
7224
  isProviderName,
7605
7225
  isResumableAsk,
7226
+ isRetiredProvider,
7606
7227
  isSecretStateKey,
7607
7228
  isTypicalProvider,
7608
7229
  lMStudioDefaultModelId,
@@ -7653,6 +7274,7 @@ var browserActions = [
7653
7274
  promptComponentSchema,
7654
7275
  providerNames,
7655
7276
  providerNamesSchema,
7277
+ providerNamesWithRetiredSchema,
7656
7278
  providerSettingsEntrySchema,
7657
7279
  providerSettingsSchema,
7658
7280
  providerSettingsSchemaDiscriminated,
@@ -7670,6 +7292,8 @@ var browserActions = [
7670
7292
  requestyDefaultModelId,
7671
7293
  requestyDefaultModelInfo,
7672
7294
  resumableAsks,
7295
+ retiredProviderNames,
7296
+ retiredProviderNamesSchema,
7673
7297
  rooCodeEventsSchema,
7674
7298
  rooCodeSettingsSchema,
7675
7299
  rooCodeTelemetryEventSchema,
@@ -7702,8 +7326,6 @@ var browserActions = [
7702
7326
  toolNamesSchema,
7703
7327
  toolProgressStatusSchema,
7704
7328
  toolUsageSchema,
7705
- unboundDefaultModelId,
7706
- unboundDefaultModelInfo,
7707
7329
  usageStatsSchema,
7708
7330
  userFeaturesSchema,
7709
7331
  userSettingsConfigSchema,