plugin-custom-llm 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -542,44 +542,72 @@ function fixEmptyToolProperties(model) {
542
542
  };
543
543
  return model;
544
544
  }
545
- function wrapWithToolCallIdSanitizer(model) {
546
- var _a, _b;
547
- const originalGenerate = (_a = model._generate) == null ? void 0 : _a.bind(model);
548
- if (originalGenerate) {
549
- model._generate = async function(...args) {
550
- const result = await originalGenerate(...args);
551
- for (const gen of (result == null ? void 0 : result.generations) ?? []) {
552
- const msg = gen == null ? void 0 : gen.message;
553
- if (msg == null ? void 0 : msg.tool_calls) {
554
- for (const tc of msg.tool_calls) {
555
- tc.id = sanitizeToolCallId(tc.id);
556
- }
557
- }
558
- }
545
+ function sanitizeAIMessageToolCalls(msg) {
546
+ if (!msg) return;
547
+ if (msg.tool_calls) {
548
+ for (const tc of msg.tool_calls) {
549
+ tc.id = sanitizeToolCallId(tc.id);
550
+ }
551
+ }
552
+ if (msg.tool_call_chunks) {
553
+ for (const tc of msg.tool_call_chunks) {
554
+ tc.id = sanitizeToolCallId(tc.id);
555
+ }
556
+ }
557
+ }
558
+ function patchRunnableForSanitization(runnable) {
559
+ var _a, _b, _c, _d;
560
+ if (!runnable || runnable.__toolCallSanitized) return runnable;
561
+ runnable.__toolCallSanitized = true;
562
+ const origInvoke = (_a = runnable.invoke) == null ? void 0 : _a.bind(runnable);
563
+ if (origInvoke) {
564
+ runnable.invoke = async function(...args) {
565
+ const result = await origInvoke(...args);
566
+ sanitizeAIMessageToolCalls(result);
559
567
  return result;
560
568
  };
561
569
  }
562
- const streamMethod = typeof model._streamResponseChunks === "function" ? "_streamResponseChunks" : "_stream";
563
- const originalStream = (_b = model[streamMethod]) == null ? void 0 : _b.bind(model);
564
- if (originalStream) {
565
- model[streamMethod] = async function* (...args) {
566
- for await (const chunk of originalStream(...args)) {
567
- const msg = chunk == null ? void 0 : chunk.message;
568
- if (msg == null ? void 0 : msg.tool_call_chunks) {
569
- for (const tc of msg.tool_call_chunks) {
570
- tc.id = sanitizeToolCallId(tc.id);
571
- }
572
- }
573
- if (msg == null ? void 0 : msg.tool_calls) {
574
- for (const tc of msg.tool_calls) {
575
- tc.id = sanitizeToolCallId(tc.id);
576
- }
577
- }
578
- yield chunk;
570
+ const origStream = (_b = runnable.stream) == null ? void 0 : _b.bind(runnable);
571
+ if (origStream) {
572
+ runnable.stream = async function(...args) {
573
+ var _a2;
574
+ const iter = await origStream(...args);
575
+ const origIterator = (_a2 = iter[Symbol.asyncIterator]) == null ? void 0 : _a2.bind(iter);
576
+ if (origIterator) {
577
+ iter[Symbol.asyncIterator] = function() {
578
+ var _a3, _b2;
579
+ const it = origIterator();
580
+ return {
581
+ async next() {
582
+ const { value, done } = await it.next();
583
+ if (!done && value) {
584
+ sanitizeAIMessageToolCalls(value);
585
+ }
586
+ return { value, done };
587
+ },
588
+ return: (_a3 = it.return) == null ? void 0 : _a3.bind(it),
589
+ throw: (_b2 = it.throw) == null ? void 0 : _b2.bind(it)
590
+ };
591
+ };
579
592
  }
593
+ return iter;
580
594
  };
581
595
  }
582
- return model;
596
+ const origBindTools = (_c = runnable.bindTools) == null ? void 0 : _c.bind(runnable);
597
+ if (origBindTools) {
598
+ runnable.bindTools = function(...args) {
599
+ const bound = origBindTools(...args);
600
+ return patchRunnableForSanitization(bound);
601
+ };
602
+ }
603
+ const origBind = (_d = runnable.bind) == null ? void 0 : _d.bind(runnable);
604
+ if (origBind) {
605
+ runnable.bind = function(...args) {
606
+ const bound = origBind(...args);
607
+ return patchRunnableForSanitization(bound);
608
+ };
609
+ }
610
+ return runnable;
583
611
  }
584
612
  class CustomLLMProvider extends import_plugin_ai.LLMProvider {
585
613
  get baseURL() {
@@ -644,13 +672,13 @@ class CustomLLMProvider extends import_plugin_ai.LLMProvider {
644
672
  }
645
673
  let model = new ChatClass(config);
646
674
  model = fixEmptyToolProperties(model);
647
- model = wrapWithToolCallIdSanitizer(model);
648
675
  if (streamKeepAlive && !disableStream) {
649
- return wrapWithStreamKeepAlive(model, {
676
+ model = wrapWithStreamKeepAlive(model, {
650
677
  intervalMs: Number(keepAliveIntervalMs) || 5e3,
651
678
  keepAliveContent: keepAliveContent || "..."
652
679
  });
653
680
  }
681
+ model = patchRunnableForSanitization(model);
654
682
  return model;
655
683
  }
656
684
  parseResponseChunk(chunk) {
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "displayName": "AI LLM: Custom (OpenAI Compatible)",
4
4
  "displayName.zh-CN": "AI LLM:自定义(OpenAI 兼容)",
5
5
  "description": "OpenAI-compatible LLM provider with auto response format detection for external LLM services.",
6
- "version": "1.3.1",
6
+ "version": "1.4.0",
7
7
  "main": "dist/server/index.js",
8
8
  "files": [
9
9
  "dist",
@@ -727,53 +727,89 @@ function fixEmptyToolProperties(model: any) {
727
727
  }
728
728
 
729
729
  /**
730
- * Wrap a chat model to sanitize tool call IDs in outputs.
731
- * Gemini models can return IDs like `call_xxx__thought__<long_base64>`
732
- * which are too long for langgraph to handle on message replay.
733
- * This strips the `__thought__...` suffix at the model output level
734
- * so downstream code (convertAIMessage, etc.) only sees clean IDs.
730
+ * Sanitize tool_calls on an AIMessage (mutates in place).
735
731
  */
736
- function wrapWithToolCallIdSanitizer(model: any) {
737
- // Patch _generate (used by invoke / non-streaming)
738
- const originalGenerate = model._generate?.bind(model);
739
- if (originalGenerate) {
740
- model._generate = async function (...args: any[]) {
741
- const result = await originalGenerate(...args);
742
- for (const gen of result?.generations ?? []) {
743
- const msg = gen?.message;
744
- if (msg?.tool_calls) {
745
- for (const tc of msg.tool_calls) {
746
- tc.id = sanitizeToolCallId(tc.id);
747
- }
748
- }
749
- }
732
+ function sanitizeAIMessageToolCalls(msg: any): void {
733
+ if (!msg) return;
734
+ if (msg.tool_calls) {
735
+ for (const tc of msg.tool_calls) {
736
+ tc.id = sanitizeToolCallId(tc.id);
737
+ }
738
+ }
739
+ if (msg.tool_call_chunks) {
740
+ for (const tc of msg.tool_call_chunks) {
741
+ tc.id = sanitizeToolCallId(tc.id);
742
+ }
743
+ }
744
+ }
745
+
746
+ /**
747
+ * Patch a runnable (model or bound model) so that `invoke` and `stream`
748
+ * sanitize tool call IDs on every AIMessage output.
749
+ * Also patches `bindTools` and `bind` so that derived runnables inherit
750
+ * the sanitization — this is critical because langgraph calls
751
+ * `model.bindTools(tools)` and then uses the BOUND model.
752
+ */
753
+ function patchRunnableForSanitization(runnable: any): any {
754
+ if (!runnable || runnable.__toolCallSanitized) return runnable;
755
+ runnable.__toolCallSanitized = true;
756
+
757
+ // Patch invoke — covers non-streaming and internal streaming-via-invoke
758
+ const origInvoke = runnable.invoke?.bind(runnable);
759
+ if (origInvoke) {
760
+ runnable.invoke = async function (...args: any[]) {
761
+ const result = await origInvoke(...args);
762
+ sanitizeAIMessageToolCalls(result);
750
763
  return result;
751
764
  };
752
765
  }
753
766
 
754
- // Patch _streamResponseChunks or _stream (used by streamEvents / streaming)
755
- const streamMethod = typeof model._streamResponseChunks === 'function' ? '_streamResponseChunks' : '_stream';
756
- const originalStream = model[streamMethod]?.bind(model);
757
- if (originalStream) {
758
- model[streamMethod] = async function* (...args: any[]) {
759
- for await (const chunk of originalStream(...args)) {
760
- const msg = chunk?.message;
761
- if (msg?.tool_call_chunks) {
762
- for (const tc of msg.tool_call_chunks) {
763
- tc.id = sanitizeToolCallId(tc.id);
764
- }
765
- }
766
- if (msg?.tool_calls) {
767
- for (const tc of msg.tool_calls) {
768
- tc.id = sanitizeToolCallId(tc.id);
769
- }
770
- }
771
- yield chunk;
767
+ // Patch stream covers streaming path
768
+ const origStream = runnable.stream?.bind(runnable);
769
+ if (origStream) {
770
+ runnable.stream = async function (...args: any[]) {
771
+ const iter = await origStream(...args);
772
+ // Wrap the async iterable to sanitize each chunk
773
+ const origIterator = iter[Symbol.asyncIterator]?.bind(iter);
774
+ if (origIterator) {
775
+ iter[Symbol.asyncIterator] = function () {
776
+ const it = origIterator();
777
+ return {
778
+ async next() {
779
+ const { value, done } = await it.next();
780
+ if (!done && value) {
781
+ sanitizeAIMessageToolCalls(value);
782
+ }
783
+ return { value, done };
784
+ },
785
+ return: it.return?.bind(it),
786
+ throw: it.throw?.bind(it),
787
+ };
788
+ };
772
789
  }
790
+ return iter;
773
791
  };
774
792
  }
775
793
 
776
- return model;
794
+ // Patch bindTools — the result is a RunnableBinding used by langgraph
795
+ const origBindTools = runnable.bindTools?.bind(runnable);
796
+ if (origBindTools) {
797
+ runnable.bindTools = function (...args: any[]) {
798
+ const bound = origBindTools(...args);
799
+ return patchRunnableForSanitization(bound);
800
+ };
801
+ }
802
+
803
+ // Patch bind — bindTools internally calls bind(), some runnables use it directly
804
+ const origBind = runnable.bind?.bind(runnable);
805
+ if (origBind) {
806
+ runnable.bind = function (...args: any[]) {
807
+ const bound = origBind(...args);
808
+ return patchRunnableForSanitization(bound);
809
+ };
810
+ }
811
+
812
+ return runnable;
777
813
  }
778
814
 
779
815
  export class CustomLLMProvider extends LLMProvider {
@@ -855,17 +891,20 @@ export class CustomLLMProvider extends LLMProvider {
855
891
  // Fix empty tool properties for strict providers (Gemini, etc.)
856
892
  model = fixEmptyToolProperties(model);
857
893
 
858
- // Sanitize Gemini's __thought__<base64> suffixes in tool call IDs
859
- model = wrapWithToolCallIdSanitizer(model);
860
-
861
894
  // Wrap with keepalive proxy if enabled (and streaming is not disabled)
862
895
  if (streamKeepAlive && !disableStream) {
863
- return wrapWithStreamKeepAlive(model, {
896
+ model = wrapWithStreamKeepAlive(model, {
864
897
  intervalMs: Number(keepAliveIntervalMs) || 5000,
865
898
  keepAliveContent: keepAliveContent || '...',
866
899
  });
867
900
  }
868
901
 
902
+ // Sanitize Gemini's __thought__<base64> suffixes in tool call IDs.
903
+ // Patches invoke/stream/bindTools/bind at the public API level so that
904
+ // ALL code paths (including langgraph's internal model calls via
905
+ // RunnableBinding after bindTools) return clean IDs.
906
+ model = patchRunnableForSanitization(model);
907
+
869
908
  return model;
870
909
  }
871
910