npm - plugin-custom-llm - Versions diffs - 1.3.1 → 1.3.2 - Mend

plugin-custom-llm 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/server/llm-providers/custom-llm.js +43 -36
package/package.json +1 -1
package/src/server/llm-providers/custom-llm.ts +59 -45

package/dist/server/llm-providers/custom-llm.js CHANGED Viewed

@@ -542,44 +542,51 @@ function fixEmptyToolProperties(model) {
   };
   return model;
 }
-function wrapWithToolCallIdSanitizer(model) {
-  var _a, _b;
-  const originalGenerate = (_a = model._generate) == null ? void 0 : _a.bind(model);
-  if (originalGenerate) {
-    model._generate = async function(...args) {
-      const result = await originalGenerate(...args);
-      for (const gen of (result == null ? void 0 : result.generations) ?? []) {
-        const msg = gen == null ? void 0 : gen.message;
-        if (msg == null ? void 0 : msg.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
+function sanitizeGenerateResult(result) {
+  if (!result) return result;
+  for (const gen of (result == null ? void 0 : result.generations) ?? []) {
+    const msg = gen == null ? void 0 : gen.message;
+    if (msg == null ? void 0 : msg.tool_calls) {
+      for (const tc of msg.tool_calls) {
+        tc.id = sanitizeToolCallId(tc.id);
       }
-      return result;
-    };
+    }
   }
-  const streamMethod = typeof model._streamResponseChunks === "function" ? "_streamResponseChunks" : "_stream";
-  const originalStream = (_b = model[streamMethod]) == null ? void 0 : _b.bind(model);
-  if (originalStream) {
-    model[streamMethod] = async function* (...args) {
-      for await (const chunk of originalStream(...args)) {
-        const msg = chunk == null ? void 0 : chunk.message;
-        if (msg == null ? void 0 : msg.tool_call_chunks) {
-          for (const tc of msg.tool_call_chunks) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        if (msg == null ? void 0 : msg.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
+  return result;
+}
+function sanitizeStreamChunk(chunk) {
+  const msg = chunk == null ? void 0 : chunk.message;
+  if (msg == null ? void 0 : msg.tool_call_chunks) {
+    for (const tc of msg.tool_call_chunks) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+  if (msg == null ? void 0 : msg.tool_calls) {
+    for (const tc of msg.tool_calls) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+  return chunk;
+}
+function createSanitizedChatClass(BaseClass) {
+  return class SanitizedChatModel extends BaseClass {
+    async _generate(messages, options, runManager) {
+      const result = await super._generate(messages, options, runManager);
+      return sanitizeGenerateResult(result);
+    }
+    async *_streamResponseChunks(messages, options, runManager) {
+      for await (const chunk of super._streamResponseChunks(messages, options, runManager)) {
+        yield sanitizeStreamChunk(chunk);
+      }
+    }
+    async *_stream(messages, options, runManager) {
+      if (typeof super._stream === "function") {
+        for await (const chunk of super._stream(messages, options, runManager)) {
+          yield sanitizeStreamChunk(chunk);
         }
-        yield chunk;
       }
-    };
-  }
-  return model;
+    }
+  };
 }
 class CustomLLMProvider extends import_plugin_ai.LLMProvider {
   get baseURL() {
@@ -618,7 +625,8 @@ class CustomLLMProvider extends import_plugin_ai.LLMProvider {
     if (reqConfig.extraBody && typeof reqConfig.extraBody === "object") {
       Object.assign(modelKwargs, reqConfig.extraBody);
     }
-    const ChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
+    const BaseChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
+    const ChatClass = createSanitizedChatClass(BaseChatClass);
     const config = {
       apiKey,
       ...this.modelOptions,
@@ -644,7 +652,6 @@ class CustomLLMProvider extends import_plugin_ai.LLMProvider {
     }
     let model = new ChatClass(config);
     model = fixEmptyToolProperties(model);
-    model = wrapWithToolCallIdSanitizer(model);
     if (streamKeepAlive && !disableStream) {
       return wrapWithStreamKeepAlive(model, {
         intervalMs: Number(keepAliveIntervalMs) || 5e3,

package/package.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "displayName": "AI LLM: Custom (OpenAI Compatible)",
   "displayName.zh-CN": "AI LLM：自定义（OpenAI 兼容）",
   "description": "OpenAI-compatible LLM provider with auto response format detection for external LLM services.",
-  "version": "1.3.1",
+  "version": "1.3.2",
   "main": "dist/server/index.js",
   "files": [
     "dist",

package/src/server/llm-providers/custom-llm.ts CHANGED Viewed

@@ -727,53 +727,66 @@ function fixEmptyToolProperties(model: any) {
 }
 /**
- * Wrap a chat model to sanitize tool call IDs in outputs.
- * Gemini models can return IDs like `call_xxx__thought__<long_base64>`
- * which are too long for langgraph to handle on message replay.
- * This strips the `__thought__...` suffix at the model output level
- * so downstream code (convertAIMessage, etc.) only sees clean IDs.
+ * Sanitize all tool call IDs in a ChatResult (used after _generate).
  */
-function wrapWithToolCallIdSanitizer(model: any) {
-  // Patch _generate (used by invoke / non-streaming)
-  const originalGenerate = model._generate?.bind(model);
-  if (originalGenerate) {
-    model._generate = async function (...args: any[]) {
-      const result = await originalGenerate(...args);
-      for (const gen of result?.generations ?? []) {
-        const msg = gen?.message;
-        if (msg?.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
+function sanitizeGenerateResult(result: any): any {
+  if (!result) return result;
+  for (const gen of result?.generations ?? []) {
+    const msg = gen?.message;
+    if (msg?.tool_calls) {
+      for (const tc of msg.tool_calls) {
+        tc.id = sanitizeToolCallId(tc.id);
       }
-      return result;
-    };
+    }
   }
+  return result;
+}
-  // Patch _streamResponseChunks or _stream (used by streamEvents / streaming)
-  const streamMethod = typeof model._streamResponseChunks === 'function' ? '_streamResponseChunks' : '_stream';
-  const originalStream = model[streamMethod]?.bind(model);
-  if (originalStream) {
-    model[streamMethod] = async function* (...args: any[]) {
-      for await (const chunk of originalStream(...args)) {
-        const msg = chunk?.message;
-        if (msg?.tool_call_chunks) {
-          for (const tc of msg.tool_call_chunks) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        if (msg?.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        yield chunk;
-      }
-    };
+/**
+ * Sanitize tool call IDs in a streaming chunk.
+ */
+function sanitizeStreamChunk(chunk: any): any {
+  const msg = chunk?.message;
+  if (msg?.tool_call_chunks) {
+    for (const tc of msg.tool_call_chunks) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+  if (msg?.tool_calls) {
+    for (const tc of msg.tool_calls) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
   }
+  return chunk;
+}
-  return model;
+/**
+ * Create a subclass of the given ChatModel class that sanitizes tool call IDs
+ * in all outputs. Gemini models return IDs like `call_xxx__thought__<long_base64>`
+ * which are too long for langgraph. Using class-level overrides (instead of
+ * instance patching) ensures the sanitization survives bindTools/RunnableBinding.
+ */
+function createSanitizedChatClass(BaseClass: any) {
+  return class SanitizedChatModel extends BaseClass {
+    async _generate(messages: any[], options: any, runManager?: any) {
+      const result = await super._generate(messages, options, runManager);
+      return sanitizeGenerateResult(result);
+    }
+    async *_streamResponseChunks(messages: any[], options: any, runManager?: any) {
+      for await (const chunk of super._streamResponseChunks(messages, options, runManager)) {
+        yield sanitizeStreamChunk(chunk);
+      }
+    }
+    async *_stream(messages: any[], options: any, runManager?: any) {
+      if (typeof super._stream === 'function') {
+        for await (const chunk of super._stream(messages, options, runManager)) {
+          yield sanitizeStreamChunk(chunk);
+        }
+      }
+    }
+  };
 }
 export class CustomLLMProvider extends LLMProvider {
@@ -816,7 +829,11 @@ export class CustomLLMProvider extends LLMProvider {
     // Issue #4: Use ReasoningChatOpenAI when enableReasoning is set.
     // This ensures reasoning_content is preserved and patched back into
     // assistant messages during tool call round-trips (required by DeepSeek-R1, etc.)
-    const ChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
+    // Wrap with tool call ID sanitizer at the class level — ensures
+    // __thought__<base64> suffixes from Gemini are stripped in all code paths
+    // (invoke, stream, bindTools bindings) via prototype chain.
+    const BaseChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
+    const ChatClass = createSanitizedChatClass(BaseChatClass);
     const config: Record<string, any> = {
       apiKey,
       ...this.modelOptions,
@@ -855,9 +872,6 @@ export class CustomLLMProvider extends LLMProvider {
     // Fix empty tool properties for strict providers (Gemini, etc.)
     model = fixEmptyToolProperties(model);
-    // Sanitize Gemini's __thought__<base64> suffixes in tool call IDs
-    model = wrapWithToolCallIdSanitizer(model);
     // Wrap with keepalive proxy if enabled (and streaming is not disabled)
     if (streamKeepAlive && !disableStream) {
       return wrapWithStreamKeepAlive(model, {