npm - @ljoukov/llm - Versions diffs - 3.0.14 → 4.0.0 - Mend

@ljoukov/llm 3.0.14 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -199,9 +199,14 @@ var FIREWORKS_GLM_5_PRICING = {
 };
 var FIREWORKS_MINIMAX_M21_PRICING = {
   inputRate: 0.3 / 1e6,
-  cachedRate: 0.15 / 1e6,
+  cachedRate: 0.03 / 1e6,
   outputRate: 1.2 / 1e6
 };
+var FIREWORKS_GPT_OSS_120B_PRICING = {
+  inputRate: 0.15 / 1e6,
+  cachedRate: 0.075 / 1e6,
+  outputRate: 0.6 / 1e6
+};
 function getFireworksPricing(modelId) {
   if (modelId.includes("kimi-k2.5") || modelId.includes("kimi-k2p5")) {
     return FIREWORKS_KIMI_K25_PRICING;
@@ -212,6 +217,9 @@ function getFireworksPricing(modelId) {
   if (modelId.includes("minimax-m2.1") || modelId.includes("minimax-m2p1")) {
     return FIREWORKS_MINIMAX_M21_PRICING;
   }
+  if (modelId.includes("gpt-oss-120b")) {
+    return FIREWORKS_GPT_OSS_120B_PRICING;
+  }
   return void 0;
 }
@@ -243,7 +251,16 @@ var GEMINI_2_5_FLASH_PRICING = {
   outputRateLow: 2.5 / 1e6,
   outputRateHigh: 2.5 / 1e6
 };
-var GEMINI_IMAGE_PREVIEW_PRICING = {
+var GEMINI_2_5_FLASH_LITE_PRICING = {
+  threshold: 2e5,
+  inputRateLow: 0.1 / 1e6,
+  inputRateHigh: 0.1 / 1e6,
+  cachedRateLow: 0.025 / 1e6,
+  cachedRateHigh: 0.025 / 1e6,
+  outputRateLow: 0.4 / 1e6,
+  outputRateHigh: 0.4 / 1e6
+};
+var GEMINI_3_PRO_IMAGE_PREVIEW_PRICING = {
   inputRate: 2 / 1e6,
   cachedRate: 0.2 / 1e6,
   outputTextRate: 12 / 1e6,
@@ -254,11 +271,26 @@ var GEMINI_IMAGE_PREVIEW_PRICING = {
     "4K": 0.24
   }
 };
+var GEMINI_3_1_FLASH_IMAGE_PREVIEW_PRICING = {
+  inputRate: 0.5 / 1e6,
+  cachedRate: 0.125 / 1e6,
+  outputTextRate: 3 / 1e6,
+  outputImageRate: 60 / 1e6,
+  imagePrices: {
+    "512": 0.045,
+    "1K": 0.067,
+    "2K": 0.101,
+    "4K": 0.15
+  }
+};
 function getGeminiProPricing(modelId) {
   if (modelId.includes("gemini-2.5-pro")) {
     return GEMINI_2_5_PRO_PRICING;
   }
-  if (modelId.includes("gemini-2.5-flash") || modelId.includes("gemini-flash-latest")) {
+  if (modelId.includes("gemini-flash-lite-latest")) {
+    return GEMINI_2_5_FLASH_LITE_PRICING;
+  }
+  if (modelId.includes("gemini-2.5-flash") || modelId.includes("gemini-flash-latest") || modelId.includes("gemini-3-flash-preview")) {
     return GEMINI_2_5_FLASH_PRICING;
   }
   if (modelId.includes("gemini-3-pro") || modelId.includes("gemini-3.1-pro")) {
@@ -267,8 +299,14 @@ function getGeminiProPricing(modelId) {
   return void 0;
 }
 function getGeminiImagePricing(modelId) {
+  if (modelId.includes("gemini-3.1-flash-image-preview")) {
+    return GEMINI_3_1_FLASH_IMAGE_PREVIEW_PRICING;
+  }
+  if (modelId.includes("gemini-3-pro-image-preview")) {
+    return GEMINI_3_PRO_IMAGE_PREVIEW_PRICING;
+  }
   if (modelId.includes("image-preview")) {
-    return GEMINI_IMAGE_PREVIEW_PRICING;
+    return GEMINI_3_PRO_IMAGE_PREVIEW_PRICING;
   }
   return void 0;
 }
@@ -3116,9 +3154,16 @@ function resolveProvider(model) {
 function isOpenAiCodexModel(modelId) {
   return modelId.includes("codex");
 }
-function resolveOpenAiReasoningEffort(modelId, override) {
-  if (override) {
-    return override;
+function resolveOpenAiReasoningEffort(modelId, thinkingLevel) {
+  if (thinkingLevel) {
+    switch (thinkingLevel) {
+      case "low":
+        return "low";
+      case "medium":
+        return "medium";
+      case "high":
+        return "xhigh";
+    }
   }
   if (isOpenAiCodexModel(modelId)) {
     return "medium";
@@ -4403,10 +4448,42 @@ function extractFireworksToolCalls(message) {
   }
   return calls;
 }
-function resolveGeminiThinkingConfig(modelId) {
+function toGeminiThinkingLevel(thinkingLevel) {
+  switch (thinkingLevel) {
+    case "low":
+      return import_genai2.ThinkingLevel.LOW;
+    case "medium":
+      return import_genai2.ThinkingLevel.MEDIUM;
+    case "high":
+      return import_genai2.ThinkingLevel.HIGH;
+  }
+}
+function toGemini25ProThinkingBudget(thinkingLevel) {
+  switch (thinkingLevel) {
+    case "low":
+      return 256;
+    case "medium":
+      return 4096;
+    case "high":
+      return 32768;
+  }
+}
+function resolveGeminiThinkingConfig(modelId, thinkingLevel) {
   if (isGeminiImageModelId(modelId)) {
     return void 0;
   }
+  if (thinkingLevel) {
+    if (modelId === "gemini-2.5-pro") {
+      return {
+        includeThoughts: true,
+        thinkingBudget: toGemini25ProThinkingBudget(thinkingLevel)
+      };
+    }
+    return {
+      includeThoughts: true,
+      thinkingLevel: toGeminiThinkingLevel(thinkingLevel)
+    };
+  }
   switch (modelId) {
     case "gemini-3.1-pro-preview":
       return { includeThoughts: true };
@@ -4493,10 +4570,7 @@ async function runTextCall(params) {
   if (provider === "openai") {
     const openAiInput = toOpenAiInput(contents);
     const openAiTools = toOpenAiTools(request.tools);
-    const reasoningEffort = resolveOpenAiReasoningEffort(
-      modelForProvider,
-      request.openAiReasoningEffort
-    );
+    const reasoningEffort = resolveOpenAiReasoningEffort(modelForProvider, request.thinkingLevel);
     const openAiTextConfig = {
       format: request.openAiTextFormat ?? { type: "text" },
       verbosity: resolveOpenAiVerbosity(modelForProvider)
@@ -4566,10 +4640,7 @@ async function runTextCall(params) {
     }, modelForProvider);
   } else if (provider === "chatgpt") {
     const chatGptInput = toChatGptInput(contents);
-    const reasoningEffort = resolveOpenAiReasoningEffort(
-      request.model,
-      request.openAiReasoningEffort
-    );
+    const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
     const openAiTools = toOpenAiTools(request.tools);
     const requestPayload = {
       model: modelForProvider,
@@ -4661,7 +4732,7 @@ async function runTextCall(params) {
     }, modelForProvider);
   } else {
     const geminiContents = contents.map(convertLlmContentToGeminiContent);
-    const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider);
+    const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider, request.thinkingLevel);
     const config = {
       maxOutputTokens: 32e3,
       ...thinkingConfig ? { thinkingConfig } : {},
@@ -4839,7 +4910,7 @@ function streamJson(request) {
           tools: request.tools,
           responseMimeType: request.responseMimeType ?? "application/json",
           responseJsonSchema,
-          openAiReasoningEffort: request.openAiReasoningEffort,
+          thinkingLevel: request.thinkingLevel,
           ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
           signal
         });
@@ -4916,7 +4987,7 @@ async function generateJson(request) {
         tools: request.tools,
         responseMimeType: request.responseMimeType ?? "application/json",
         responseJsonSchema,
-        openAiReasoningEffort: request.openAiReasoningEffort,
+        thinkingLevel: request.thinkingLevel,
         ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
         signal: request.signal
       });
@@ -5182,7 +5253,7 @@ async function runToolLoop(request) {
       const openAiTools = openAiNativeTools ? [...openAiNativeTools, ...openAiAgentTools] : [...openAiAgentTools];
       const reasoningEffort = resolveOpenAiReasoningEffort(
         providerInfo.model,
-        request.openAiReasoningEffort
+        request.thinkingLevel
       );
       const textConfig = {
         format: { type: "text" },
@@ -5450,10 +5521,7 @@ async function runToolLoop(request) {
       const openAiAgentTools = buildOpenAiToolsFromToolSet(request.tools);
       const openAiNativeTools = toOpenAiTools(request.modelTools);
       const openAiTools = openAiNativeTools ? [...openAiNativeTools, ...openAiAgentTools] : [...openAiAgentTools];
-      const reasoningEffort = resolveOpenAiReasoningEffort(
-        request.model,
-        request.openAiReasoningEffort
-      );
+      const reasoningEffort = resolveOpenAiReasoningEffort(request.model, request.thinkingLevel);
       const toolLoopInput = toChatGptInput(contents);
       const conversationId = `tool-loop-${(0, import_node_crypto.randomBytes)(8).toString("hex")}`;
       const promptCacheKey = conversationId;
@@ -5892,7 +5960,7 @@ async function runToolLoop(request) {
           firstModelEventAtMs = Date.now();
         }
       };
-      const thinkingConfig = resolveGeminiThinkingConfig(request.model);
+      const thinkingConfig = resolveGeminiThinkingConfig(request.model, request.thinkingLevel);
       const config = {
         maxOutputTokens: 32e3,
         tools: geminiTools,
@@ -9268,7 +9336,7 @@ function createSubagentController(params) {
           subagentTool: params.subagentSelection,
           modelTools: params.toolLoopRequest.modelTools,
           maxSteps: subagentRequest.maxSteps,
-          openAiReasoningEffort: params.toolLoopRequest.openAiReasoningEffort,
+          thinkingLevel: params.toolLoopRequest.thinkingLevel,
           signal: subagentRequest.signal
         },
         {