npm - plugin-custom-llm - Versions diffs - 1.3.1 → 1.4.0 - Mend

plugin-custom-llm 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/server/llm-providers/custom-llm.js +62 -34
package/package.json +1 -1
package/src/server/llm-providers/custom-llm.ts +81 -42

package/dist/server/llm-providers/custom-llm.js CHANGED Viewed

@@ -542,44 +542,72 @@ function fixEmptyToolProperties(model) {
   };
   return model;
 }
-function wrapWithToolCallIdSanitizer(model) {
-  var _a, _b;
-  const originalGenerate = (_a = model._generate) == null ? void 0 : _a.bind(model);
-  if (originalGenerate) {
-    model._generate = async function(...args) {
-      const result = await originalGenerate(...args);
-      for (const gen of (result == null ? void 0 : result.generations) ?? []) {
-        const msg = gen == null ? void 0 : gen.message;
-        if (msg == null ? void 0 : msg.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-      }
+function sanitizeAIMessageToolCalls(msg) {
+  if (!msg) return;
+  if (msg.tool_calls) {
+    for (const tc of msg.tool_calls) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+  if (msg.tool_call_chunks) {
+    for (const tc of msg.tool_call_chunks) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+}
+function patchRunnableForSanitization(runnable) {
+  var _a, _b, _c, _d;
+  if (!runnable || runnable.__toolCallSanitized) return runnable;
+  runnable.__toolCallSanitized = true;
+  const origInvoke = (_a = runnable.invoke) == null ? void 0 : _a.bind(runnable);
+  if (origInvoke) {
+    runnable.invoke = async function(...args) {
+      const result = await origInvoke(...args);
+      sanitizeAIMessageToolCalls(result);
       return result;
     };
   }
-  const streamMethod = typeof model._streamResponseChunks === "function" ? "_streamResponseChunks" : "_stream";
-  const originalStream = (_b = model[streamMethod]) == null ? void 0 : _b.bind(model);
-  if (originalStream) {
-    model[streamMethod] = async function* (...args) {
-      for await (const chunk of originalStream(...args)) {
-        const msg = chunk == null ? void 0 : chunk.message;
-        if (msg == null ? void 0 : msg.tool_call_chunks) {
-          for (const tc of msg.tool_call_chunks) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        if (msg == null ? void 0 : msg.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        yield chunk;
+  const origStream = (_b = runnable.stream) == null ? void 0 : _b.bind(runnable);
+  if (origStream) {
+    runnable.stream = async function(...args) {
+      var _a2;
+      const iter = await origStream(...args);
+      const origIterator = (_a2 = iter[Symbol.asyncIterator]) == null ? void 0 : _a2.bind(iter);
+      if (origIterator) {
+        iter[Symbol.asyncIterator] = function() {
+          var _a3, _b2;
+          const it = origIterator();
+          return {
+            async next() {
+              const { value, done } = await it.next();
+              if (!done && value) {
+                sanitizeAIMessageToolCalls(value);
+              }
+              return { value, done };
+            },
+            return: (_a3 = it.return) == null ? void 0 : _a3.bind(it),
+            throw: (_b2 = it.throw) == null ? void 0 : _b2.bind(it)
+          };
+        };
       }
+      return iter;
     };
   }
-  return model;
+  const origBindTools = (_c = runnable.bindTools) == null ? void 0 : _c.bind(runnable);
+  if (origBindTools) {
+    runnable.bindTools = function(...args) {
+      const bound = origBindTools(...args);
+      return patchRunnableForSanitization(bound);
+    };
+  }
+  const origBind = (_d = runnable.bind) == null ? void 0 : _d.bind(runnable);
+  if (origBind) {
+    runnable.bind = function(...args) {
+      const bound = origBind(...args);
+      return patchRunnableForSanitization(bound);
+    };
+  }
+  return runnable;
 }
 class CustomLLMProvider extends import_plugin_ai.LLMProvider {
   get baseURL() {
@@ -644,13 +672,13 @@ class CustomLLMProvider extends import_plugin_ai.LLMProvider {
     }
     let model = new ChatClass(config);
     model = fixEmptyToolProperties(model);
-    model = wrapWithToolCallIdSanitizer(model);
     if (streamKeepAlive && !disableStream) {
-      return wrapWithStreamKeepAlive(model, {
+      model = wrapWithStreamKeepAlive(model, {
         intervalMs: Number(keepAliveIntervalMs) || 5e3,
         keepAliveContent: keepAliveContent || "..."
       });
     }
+    model = patchRunnableForSanitization(model);
     return model;
   }
   parseResponseChunk(chunk) {

package/package.json CHANGED Viewed

@@ -3,7 +3,7 @@
   "displayName": "AI LLM: Custom (OpenAI Compatible)",
   "displayName.zh-CN": "AI LLM：自定义（OpenAI 兼容）",
   "description": "OpenAI-compatible LLM provider with auto response format detection for external LLM services.",
-  "version": "1.3.1",
+  "version": "1.4.0",
   "main": "dist/server/index.js",
   "files": [
     "dist",

package/src/server/llm-providers/custom-llm.ts CHANGED Viewed

@@ -727,53 +727,89 @@ function fixEmptyToolProperties(model: any) {
 }
 /**
- * Wrap a chat model to sanitize tool call IDs in outputs.
- * Gemini models can return IDs like `call_xxx__thought__<long_base64>`
- * which are too long for langgraph to handle on message replay.
- * This strips the `__thought__...` suffix at the model output level
- * so downstream code (convertAIMessage, etc.) only sees clean IDs.
+ * Sanitize tool_calls on an AIMessage (mutates in place).
  */
-function wrapWithToolCallIdSanitizer(model: any) {
-  // Patch _generate (used by invoke / non-streaming)
-  const originalGenerate = model._generate?.bind(model);
-  if (originalGenerate) {
-    model._generate = async function (...args: any[]) {
-      const result = await originalGenerate(...args);
-      for (const gen of result?.generations ?? []) {
-        const msg = gen?.message;
-        if (msg?.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-      }
+function sanitizeAIMessageToolCalls(msg: any): void {
+  if (!msg) return;
+  if (msg.tool_calls) {
+    for (const tc of msg.tool_calls) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+  if (msg.tool_call_chunks) {
+    for (const tc of msg.tool_call_chunks) {
+      tc.id = sanitizeToolCallId(tc.id);
+    }
+  }
+}
+/**
+ * Patch a runnable (model or bound model) so that `invoke` and `stream`
+ * sanitize tool call IDs on every AIMessage output.
+ * Also patches `bindTools` and `bind` so that derived runnables inherit
+ * the sanitization — this is critical because langgraph calls
+ * `model.bindTools(tools)` and then uses the BOUND model.
+ */
+function patchRunnableForSanitization(runnable: any): any {
+  if (!runnable || runnable.__toolCallSanitized) return runnable;
+  runnable.__toolCallSanitized = true;
+  // Patch invoke — covers non-streaming and internal streaming-via-invoke
+  const origInvoke = runnable.invoke?.bind(runnable);
+  if (origInvoke) {
+    runnable.invoke = async function (...args: any[]) {
+      const result = await origInvoke(...args);
+      sanitizeAIMessageToolCalls(result);
       return result;
     };
   }
-  // Patch _streamResponseChunks or _stream (used by streamEvents / streaming)
-  const streamMethod = typeof model._streamResponseChunks === 'function' ? '_streamResponseChunks' : '_stream';
-  const originalStream = model[streamMethod]?.bind(model);
-  if (originalStream) {
-    model[streamMethod] = async function* (...args: any[]) {
-      for await (const chunk of originalStream(...args)) {
-        const msg = chunk?.message;
-        if (msg?.tool_call_chunks) {
-          for (const tc of msg.tool_call_chunks) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        if (msg?.tool_calls) {
-          for (const tc of msg.tool_calls) {
-            tc.id = sanitizeToolCallId(tc.id);
-          }
-        }
-        yield chunk;
+  // Patch stream — covers streaming path
+  const origStream = runnable.stream?.bind(runnable);
+  if (origStream) {
+    runnable.stream = async function (...args: any[]) {
+      const iter = await origStream(...args);
+      // Wrap the async iterable to sanitize each chunk
+      const origIterator = iter[Symbol.asyncIterator]?.bind(iter);
+      if (origIterator) {
+        iter[Symbol.asyncIterator] = function () {
+          const it = origIterator();
+          return {
+            async next() {
+              const { value, done } = await it.next();
+              if (!done && value) {
+                sanitizeAIMessageToolCalls(value);
+              }
+              return { value, done };
+            },
+            return: it.return?.bind(it),
+            throw: it.throw?.bind(it),
+          };
+        };
       }
+      return iter;
     };
   }
-  return model;
+  // Patch bindTools — the result is a RunnableBinding used by langgraph
+  const origBindTools = runnable.bindTools?.bind(runnable);
+  if (origBindTools) {
+    runnable.bindTools = function (...args: any[]) {
+      const bound = origBindTools(...args);
+      return patchRunnableForSanitization(bound);
+    };
+  }
+  // Patch bind — bindTools internally calls bind(), some runnables use it directly
+  const origBind = runnable.bind?.bind(runnable);
+  if (origBind) {
+    runnable.bind = function (...args: any[]) {
+      const bound = origBind(...args);
+      return patchRunnableForSanitization(bound);
+    };
+  }
+  return runnable;
 }
 export class CustomLLMProvider extends LLMProvider {
@@ -855,17 +891,20 @@ export class CustomLLMProvider extends LLMProvider {
     // Fix empty tool properties for strict providers (Gemini, etc.)
     model = fixEmptyToolProperties(model);
-    // Sanitize Gemini's __thought__<base64> suffixes in tool call IDs
-    model = wrapWithToolCallIdSanitizer(model);
     // Wrap with keepalive proxy if enabled (and streaming is not disabled)
     if (streamKeepAlive && !disableStream) {
-      return wrapWithStreamKeepAlive(model, {
+      model = wrapWithStreamKeepAlive(model, {
         intervalMs: Number(keepAliveIntervalMs) || 5000,
         keepAliveContent: keepAliveContent || '...',
       });
     }
+    // Sanitize Gemini's __thought__<base64> suffixes in tool call IDs.
+    // Patches invoke/stream/bindTools/bind at the public API level so that
+    // ALL code paths (including langgraph's internal model calls via
+    // RunnableBinding after bindTools) return clean IDs.
+    model = patchRunnableForSanitization(model);
     return model;
   }