@ljoukov/llm 3.0.14 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -199,9 +199,14 @@ var FIREWORKS_GLM_5_PRICING = {
199
199
  };
200
200
  var FIREWORKS_MINIMAX_M21_PRICING = {
201
201
  inputRate: 0.3 / 1e6,
202
- cachedRate: 0.15 / 1e6,
202
+ cachedRate: 0.03 / 1e6,
203
203
  outputRate: 1.2 / 1e6
204
204
  };
205
+ var FIREWORKS_GPT_OSS_120B_PRICING = {
206
+ inputRate: 0.15 / 1e6,
207
+ cachedRate: 0.075 / 1e6,
208
+ outputRate: 0.6 / 1e6
209
+ };
205
210
  function getFireworksPricing(modelId) {
206
211
  if (modelId.includes("kimi-k2.5") || modelId.includes("kimi-k2p5")) {
207
212
  return FIREWORKS_KIMI_K25_PRICING;
@@ -212,6 +217,9 @@ function getFireworksPricing(modelId) {
212
217
  if (modelId.includes("minimax-m2.1") || modelId.includes("minimax-m2p1")) {
213
218
  return FIREWORKS_MINIMAX_M21_PRICING;
214
219
  }
220
+ if (modelId.includes("gpt-oss-120b")) {
221
+ return FIREWORKS_GPT_OSS_120B_PRICING;
222
+ }
215
223
  return void 0;
216
224
  }
217
225
 
@@ -243,7 +251,16 @@ var GEMINI_2_5_FLASH_PRICING = {
243
251
  outputRateLow: 2.5 / 1e6,
244
252
  outputRateHigh: 2.5 / 1e6
245
253
  };
246
- var GEMINI_IMAGE_PREVIEW_PRICING = {
254
+ var GEMINI_2_5_FLASH_LITE_PRICING = {
255
+ threshold: 2e5,
256
+ inputRateLow: 0.1 / 1e6,
257
+ inputRateHigh: 0.1 / 1e6,
258
+ cachedRateLow: 0.025 / 1e6,
259
+ cachedRateHigh: 0.025 / 1e6,
260
+ outputRateLow: 0.4 / 1e6,
261
+ outputRateHigh: 0.4 / 1e6
262
+ };
263
+ var GEMINI_3_PRO_IMAGE_PREVIEW_PRICING = {
247
264
  inputRate: 2 / 1e6,
248
265
  cachedRate: 0.2 / 1e6,
249
266
  outputTextRate: 12 / 1e6,
@@ -254,11 +271,26 @@ var GEMINI_IMAGE_PREVIEW_PRICING = {
254
271
  "4K": 0.24
255
272
  }
256
273
  };
274
+ var GEMINI_3_1_FLASH_IMAGE_PREVIEW_PRICING = {
275
+ inputRate: 0.5 / 1e6,
276
+ cachedRate: 0.125 / 1e6,
277
+ outputTextRate: 3 / 1e6,
278
+ outputImageRate: 60 / 1e6,
279
+ imagePrices: {
280
+ "512": 0.045,
281
+ "1K": 0.067,
282
+ "2K": 0.101,
283
+ "4K": 0.15
284
+ }
285
+ };
257
286
  function getGeminiProPricing(modelId) {
258
287
  if (modelId.includes("gemini-2.5-pro")) {
259
288
  return GEMINI_2_5_PRO_PRICING;
260
289
  }
261
- if (modelId.includes("gemini-2.5-flash") || modelId.includes("gemini-flash-latest")) {
290
+ if (modelId.includes("gemini-flash-lite-latest")) {
291
+ return GEMINI_2_5_FLASH_LITE_PRICING;
292
+ }
293
+ if (modelId.includes("gemini-2.5-flash") || modelId.includes("gemini-flash-latest") || modelId.includes("gemini-3-flash-preview")) {
262
294
  return GEMINI_2_5_FLASH_PRICING;
263
295
  }
264
296
  if (modelId.includes("gemini-3-pro") || modelId.includes("gemini-3.1-pro")) {
@@ -267,8 +299,14 @@ function getGeminiProPricing(modelId) {
267
299
  return void 0;
268
300
  }
269
301
  function getGeminiImagePricing(modelId) {
302
+ if (modelId.includes("gemini-3.1-flash-image-preview")) {
303
+ return GEMINI_3_1_FLASH_IMAGE_PREVIEW_PRICING;
304
+ }
305
+ if (modelId.includes("gemini-3-pro-image-preview")) {
306
+ return GEMINI_3_PRO_IMAGE_PREVIEW_PRICING;
307
+ }
270
308
  if (modelId.includes("image-preview")) {
271
- return GEMINI_IMAGE_PREVIEW_PRICING;
309
+ return GEMINI_3_PRO_IMAGE_PREVIEW_PRICING;
272
310
  }
273
311
  return void 0;
274
312
  }
@@ -3116,9 +3154,16 @@ function resolveProvider(model) {
3116
3154
  function isOpenAiCodexModel(modelId) {
3117
3155
  return modelId.includes("codex");
3118
3156
  }
3119
- function resolveOpenAiReasoningEffort(modelId, override) {
3120
- if (override) {
3121
- return override;
3157
+ function resolveOpenAiReasoningEffort(modelId, thinkingLevel) {
3158
+ if (thinkingLevel) {
3159
+ switch (thinkingLevel) {
3160
+ case "low":
3161
+ return "low";
3162
+ case "medium":
3163
+ return "medium";
3164
+ case "high":
3165
+ return "xhigh";
3166
+ }
3122
3167
  }
3123
3168
  if (isOpenAiCodexModel(modelId)) {
3124
3169
  return "medium";
@@ -4403,10 +4448,42 @@ function extractFireworksToolCalls(message) {
4403
4448
  }
4404
4449
  return calls;
4405
4450
  }
4406
- function resolveGeminiThinkingConfig(modelId) {
4451
+ function toGeminiThinkingLevel(thinkingLevel) {
4452
+ switch (thinkingLevel) {
4453
+ case "low":
4454
+ return import_genai2.ThinkingLevel.LOW;
4455
+ case "medium":
4456
+ return import_genai2.ThinkingLevel.MEDIUM;
4457
+ case "high":
4458
+ return import_genai2.ThinkingLevel.HIGH;
4459
+ }
4460
+ }
4461
+ function toGemini25ProThinkingBudget(thinkingLevel) {
4462
+ switch (thinkingLevel) {
4463
+ case "low":
4464
+ return 256;
4465
+ case "medium":
4466
+ return 4096;
4467
+ case "high":
4468
+ return 32768;
4469
+ }
4470
+ }
4471
+ function resolveGeminiThinkingConfig(modelId, thinkingLevel) {
4407
4472
  if (isGeminiImageModelId(modelId)) {
4408
4473
  return void 0;
4409
4474
  }
4475
+ if (thinkingLevel) {
4476
+ if (modelId === "gemini-2.5-pro") {
4477
+ return {
4478
+ includeThoughts: true,
4479
+ thinkingBudget: toGemini25ProThinkingBudget(thinkingLevel)
4480
+ };
4481
+ }
4482
+ return {
4483
+ includeThoughts: true,
4484
+ thinkingLevel: toGeminiThinkingLevel(thinkingLevel)
4485
+ };
4486
+ }
4410
4487
  switch (modelId) {
4411
4488
  case "gemini-3.1-pro-preview":
4412
4489
  return { includeThoughts: true };
@@ -4493,10 +4570,7 @@ async function runTextCall(params) {
4493
4570
  if (provider === "openai") {
4494
4571
  const openAiInput = toOpenAiInput(contents);
4495
4572
  const openAiTools = toOpenAiTools(request.tools);
4496
- const reasoningEffort = resolveOpenAiReasoningEffort(
4497
- modelForProvider,
4498
- request.openAiReasoningEffort
4499
- );
4573
+ const reasoningEffort = resolveOpenAiReasoningEffort(modelForProvider, request.thinkingLevel);
4500
4574
  const openAiTextConfig = {
4501
4575
  format: request.openAiTextFormat ?? { type: "text" },
4502
4576
  verbosity: resolveOpenAiVerbosity(modelForProvider)
@@ -4566,10 +4640,7 @@ async function runTextCall(params) {
4566
4640
  }, modelForProvider);
4567
4641
  } else if (provider === "chatgpt") {
4568
4642
  const chatGptInput = toChatGptInput(contents);
4569
- const reasoningEffort = resolveOpenAiReasoningEffort(
4570
- request.model,
4571
- request.openAiReasoningEffort
4572
- );
4643
+ const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
4573
4644
  const openAiTools = toOpenAiTools(request.tools);
4574
4645
  const requestPayload = {
4575
4646
  model: modelForProvider,
@@ -4661,7 +4732,7 @@ async function runTextCall(params) {
4661
4732
  }, modelForProvider);
4662
4733
  } else {
4663
4734
  const geminiContents = contents.map(convertLlmContentToGeminiContent);
4664
- const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider);
4735
+ const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider, request.thinkingLevel);
4665
4736
  const config = {
4666
4737
  maxOutputTokens: 32e3,
4667
4738
  ...thinkingConfig ? { thinkingConfig } : {},
@@ -4839,7 +4910,7 @@ function streamJson(request) {
4839
4910
  tools: request.tools,
4840
4911
  responseMimeType: request.responseMimeType ?? "application/json",
4841
4912
  responseJsonSchema,
4842
- openAiReasoningEffort: request.openAiReasoningEffort,
4913
+ thinkingLevel: request.thinkingLevel,
4843
4914
  ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
4844
4915
  signal
4845
4916
  });
@@ -4916,7 +4987,7 @@ async function generateJson(request) {
4916
4987
  tools: request.tools,
4917
4988
  responseMimeType: request.responseMimeType ?? "application/json",
4918
4989
  responseJsonSchema,
4919
- openAiReasoningEffort: request.openAiReasoningEffort,
4990
+ thinkingLevel: request.thinkingLevel,
4920
4991
  ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
4921
4992
  signal: request.signal
4922
4993
  });
@@ -5182,7 +5253,7 @@ async function runToolLoop(request) {
5182
5253
  const openAiTools = openAiNativeTools ? [...openAiNativeTools, ...openAiAgentTools] : [...openAiAgentTools];
5183
5254
  const reasoningEffort = resolveOpenAiReasoningEffort(
5184
5255
  providerInfo.model,
5185
- request.openAiReasoningEffort
5256
+ request.thinkingLevel
5186
5257
  );
5187
5258
  const textConfig = {
5188
5259
  format: { type: "text" },
@@ -5450,10 +5521,7 @@ async function runToolLoop(request) {
5450
5521
  const openAiAgentTools = buildOpenAiToolsFromToolSet(request.tools);
5451
5522
  const openAiNativeTools = toOpenAiTools(request.modelTools);
5452
5523
  const openAiTools = openAiNativeTools ? [...openAiNativeTools, ...openAiAgentTools] : [...openAiAgentTools];
5453
- const reasoningEffort = resolveOpenAiReasoningEffort(
5454
- request.model,
5455
- request.openAiReasoningEffort
5456
- );
5524
+ const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
5457
5525
  const toolLoopInput = toChatGptInput(contents);
5458
5526
  const conversationId = `tool-loop-${(0, import_node_crypto.randomBytes)(8).toString("hex")}`;
5459
5527
  const promptCacheKey = conversationId;
@@ -5892,7 +5960,7 @@ async function runToolLoop(request) {
5892
5960
  firstModelEventAtMs = Date.now();
5893
5961
  }
5894
5962
  };
5895
- const thinkingConfig = resolveGeminiThinkingConfig(request.model);
5963
+ const thinkingConfig = resolveGeminiThinkingConfig(request.model, request.thinkingLevel);
5896
5964
  const config = {
5897
5965
  maxOutputTokens: 32e3,
5898
5966
  tools: geminiTools,
@@ -9268,7 +9336,7 @@ function createSubagentController(params) {
9268
9336
  subagentTool: params.subagentSelection,
9269
9337
  modelTools: params.toolLoopRequest.modelTools,
9270
9338
  maxSteps: subagentRequest.maxSteps,
9271
- openAiReasoningEffort: params.toolLoopRequest.openAiReasoningEffort,
9339
+ thinkingLevel: params.toolLoopRequest.thinkingLevel,
9272
9340
  signal: subagentRequest.signal
9273
9341
  },
9274
9342
  {