plugin-custom-llm 1.3.2 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -542,51 +542,72 @@ function fixEmptyToolProperties(model) {
542
542
  };
543
543
  return model;
544
544
  }
545
- function sanitizeGenerateResult(result) {
546
- if (!result) return result;
547
- for (const gen of (result == null ? void 0 : result.generations) ?? []) {
548
- const msg = gen == null ? void 0 : gen.message;
549
- if (msg == null ? void 0 : msg.tool_calls) {
550
- for (const tc of msg.tool_calls) {
551
- tc.id = sanitizeToolCallId(tc.id);
552
- }
553
- }
554
- }
555
- return result;
556
- }
557
- function sanitizeStreamChunk(chunk) {
558
- const msg = chunk == null ? void 0 : chunk.message;
559
- if (msg == null ? void 0 : msg.tool_call_chunks) {
560
- for (const tc of msg.tool_call_chunks) {
545
+ function sanitizeAIMessageToolCalls(msg) {
546
+ if (!msg) return;
547
+ if (msg.tool_calls) {
548
+ for (const tc of msg.tool_calls) {
561
549
  tc.id = sanitizeToolCallId(tc.id);
562
550
  }
563
551
  }
564
- if (msg == null ? void 0 : msg.tool_calls) {
565
- for (const tc of msg.tool_calls) {
552
+ if (msg.tool_call_chunks) {
553
+ for (const tc of msg.tool_call_chunks) {
566
554
  tc.id = sanitizeToolCallId(tc.id);
567
555
  }
568
556
  }
569
- return chunk;
570
557
  }
571
- function createSanitizedChatClass(BaseClass) {
572
- return class SanitizedChatModel extends BaseClass {
573
- async _generate(messages, options, runManager) {
574
- const result = await super._generate(messages, options, runManager);
575
- return sanitizeGenerateResult(result);
576
- }
577
- async *_streamResponseChunks(messages, options, runManager) {
578
- for await (const chunk of super._streamResponseChunks(messages, options, runManager)) {
579
- yield sanitizeStreamChunk(chunk);
580
- }
581
- }
582
- async *_stream(messages, options, runManager) {
583
- if (typeof super._stream === "function") {
584
- for await (const chunk of super._stream(messages, options, runManager)) {
585
- yield sanitizeStreamChunk(chunk);
586
- }
558
+ function patchRunnableForSanitization(runnable) {
559
+ var _a, _b, _c, _d;
560
+ if (!runnable || runnable.__toolCallSanitized) return runnable;
561
+ runnable.__toolCallSanitized = true;
562
+ const origInvoke = (_a = runnable.invoke) == null ? void 0 : _a.bind(runnable);
563
+ if (origInvoke) {
564
+ runnable.invoke = async function(...args) {
565
+ const result = await origInvoke(...args);
566
+ sanitizeAIMessageToolCalls(result);
567
+ return result;
568
+ };
569
+ }
570
+ const origStream = (_b = runnable.stream) == null ? void 0 : _b.bind(runnable);
571
+ if (origStream) {
572
+ runnable.stream = async function(...args) {
573
+ var _a2;
574
+ const iter = await origStream(...args);
575
+ const origIterator = (_a2 = iter[Symbol.asyncIterator]) == null ? void 0 : _a2.bind(iter);
576
+ if (origIterator) {
577
+ iter[Symbol.asyncIterator] = function() {
578
+ var _a3, _b2;
579
+ const it = origIterator();
580
+ return {
581
+ async next() {
582
+ const { value, done } = await it.next();
583
+ if (!done && value) {
584
+ sanitizeAIMessageToolCalls(value);
585
+ }
586
+ return { value, done };
587
+ },
588
+ return: (_a3 = it.return) == null ? void 0 : _a3.bind(it),
589
+ throw: (_b2 = it.throw) == null ? void 0 : _b2.bind(it)
590
+ };
591
+ };
587
592
  }
588
- }
589
- };
593
+ return iter;
594
+ };
595
+ }
596
+ const origBindTools = (_c = runnable.bindTools) == null ? void 0 : _c.bind(runnable);
597
+ if (origBindTools) {
598
+ runnable.bindTools = function(...args) {
599
+ const bound = origBindTools(...args);
600
+ return patchRunnableForSanitization(bound);
601
+ };
602
+ }
603
+ const origBind = (_d = runnable.bind) == null ? void 0 : _d.bind(runnable);
604
+ if (origBind) {
605
+ runnable.bind = function(...args) {
606
+ const bound = origBind(...args);
607
+ return patchRunnableForSanitization(bound);
608
+ };
609
+ }
610
+ return runnable;
590
611
  }
591
612
  class CustomLLMProvider extends import_plugin_ai.LLMProvider {
592
613
  get baseURL() {
@@ -625,8 +646,7 @@ class CustomLLMProvider extends import_plugin_ai.LLMProvider {
625
646
  if (reqConfig.extraBody && typeof reqConfig.extraBody === "object") {
626
647
  Object.assign(modelKwargs, reqConfig.extraBody);
627
648
  }
628
- const BaseChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
629
- const ChatClass = createSanitizedChatClass(BaseChatClass);
649
+ const ChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
630
650
  const config = {
631
651
  apiKey,
632
652
  ...this.modelOptions,
@@ -653,11 +673,12 @@ class CustomLLMProvider extends import_plugin_ai.LLMProvider {
653
673
  let model = new ChatClass(config);
654
674
  model = fixEmptyToolProperties(model);
655
675
  if (streamKeepAlive && !disableStream) {
656
- return wrapWithStreamKeepAlive(model, {
676
+ model = wrapWithStreamKeepAlive(model, {
657
677
  intervalMs: Number(keepAliveIntervalMs) || 5e3,
658
678
  keepAliveContent: keepAliveContent || "..."
659
679
  });
660
680
  }
681
+ model = patchRunnableForSanitization(model);
661
682
  return model;
662
683
  }
663
684
  parseResponseChunk(chunk) {
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "displayName": "AI LLM: Custom (OpenAI Compatible)",
4
4
  "displayName.zh-CN": "AI LLM:自定义(OpenAI 兼容)",
5
5
  "description": "OpenAI-compatible LLM provider with auto response format detection for external LLM services.",
6
- "version": "1.3.2",
6
+ "version": "1.4.0",
7
7
  "main": "dist/server/index.js",
8
8
  "files": [
9
9
  "dist",
@@ -727,66 +727,89 @@ function fixEmptyToolProperties(model: any) {
727
727
  }
728
728
 
729
729
  /**
730
- * Sanitize all tool call IDs in a ChatResult (used after _generate).
730
+ * Sanitize tool_calls on an AIMessage (mutates in place).
731
731
  */
732
- function sanitizeGenerateResult(result: any): any {
733
- if (!result) return result;
734
- for (const gen of result?.generations ?? []) {
735
- const msg = gen?.message;
736
- if (msg?.tool_calls) {
737
- for (const tc of msg.tool_calls) {
738
- tc.id = sanitizeToolCallId(tc.id);
739
- }
740
- }
741
- }
742
- return result;
743
- }
744
-
745
- /**
746
- * Sanitize tool call IDs in a streaming chunk.
747
- */
748
- function sanitizeStreamChunk(chunk: any): any {
749
- const msg = chunk?.message;
750
- if (msg?.tool_call_chunks) {
751
- for (const tc of msg.tool_call_chunks) {
732
+ function sanitizeAIMessageToolCalls(msg: any): void {
733
+ if (!msg) return;
734
+ if (msg.tool_calls) {
735
+ for (const tc of msg.tool_calls) {
752
736
  tc.id = sanitizeToolCallId(tc.id);
753
737
  }
754
738
  }
755
- if (msg?.tool_calls) {
756
- for (const tc of msg.tool_calls) {
739
+ if (msg.tool_call_chunks) {
740
+ for (const tc of msg.tool_call_chunks) {
757
741
  tc.id = sanitizeToolCallId(tc.id);
758
742
  }
759
743
  }
760
- return chunk;
761
744
  }
762
745
 
763
746
  /**
764
- * Create a subclass of the given ChatModel class that sanitizes tool call IDs
765
- * in all outputs. Gemini models return IDs like `call_xxx__thought__<long_base64>`
766
- * which are too long for langgraph. Using class-level overrides (instead of
767
- * instance patching) ensures the sanitization survives bindTools/RunnableBinding.
747
+ * Patch a runnable (model or bound model) so that `invoke` and `stream`
748
+ * sanitize tool call IDs on every AIMessage output.
749
+ * Also patches `bindTools` and `bind` so that derived runnables inherit
750
+ * the sanitization this is critical because langgraph calls
751
+ * `model.bindTools(tools)` and then uses the BOUND model.
768
752
  */
769
- function createSanitizedChatClass(BaseClass: any) {
770
- return class SanitizedChatModel extends BaseClass {
771
- async _generate(messages: any[], options: any, runManager?: any) {
772
- const result = await super._generate(messages, options, runManager);
773
- return sanitizeGenerateResult(result);
774
- }
753
+ function patchRunnableForSanitization(runnable: any): any {
754
+ if (!runnable || runnable.__toolCallSanitized) return runnable;
755
+ runnable.__toolCallSanitized = true;
756
+
757
+ // Patch invoke — covers non-streaming and internal streaming-via-invoke
758
+ const origInvoke = runnable.invoke?.bind(runnable);
759
+ if (origInvoke) {
760
+ runnable.invoke = async function (...args: any[]) {
761
+ const result = await origInvoke(...args);
762
+ sanitizeAIMessageToolCalls(result);
763
+ return result;
764
+ };
765
+ }
775
766
 
776
- async *_streamResponseChunks(messages: any[], options: any, runManager?: any) {
777
- for await (const chunk of super._streamResponseChunks(messages, options, runManager)) {
778
- yield sanitizeStreamChunk(chunk);
767
+ // Patch stream covers streaming path
768
+ const origStream = runnable.stream?.bind(runnable);
769
+ if (origStream) {
770
+ runnable.stream = async function (...args: any[]) {
771
+ const iter = await origStream(...args);
772
+ // Wrap the async iterable to sanitize each chunk
773
+ const origIterator = iter[Symbol.asyncIterator]?.bind(iter);
774
+ if (origIterator) {
775
+ iter[Symbol.asyncIterator] = function () {
776
+ const it = origIterator();
777
+ return {
778
+ async next() {
779
+ const { value, done } = await it.next();
780
+ if (!done && value) {
781
+ sanitizeAIMessageToolCalls(value);
782
+ }
783
+ return { value, done };
784
+ },
785
+ return: it.return?.bind(it),
786
+ throw: it.throw?.bind(it),
787
+ };
788
+ };
779
789
  }
780
- }
790
+ return iter;
791
+ };
792
+ }
781
793
 
782
- async *_stream(messages: any[], options: any, runManager?: any) {
783
- if (typeof super._stream === 'function') {
784
- for await (const chunk of super._stream(messages, options, runManager)) {
785
- yield sanitizeStreamChunk(chunk);
786
- }
787
- }
788
- }
789
- };
794
+ // Patch bindTools the result is a RunnableBinding used by langgraph
795
+ const origBindTools = runnable.bindTools?.bind(runnable);
796
+ if (origBindTools) {
797
+ runnable.bindTools = function (...args: any[]) {
798
+ const bound = origBindTools(...args);
799
+ return patchRunnableForSanitization(bound);
800
+ };
801
+ }
802
+
803
+ // Patch bind — bindTools internally calls bind(), some runnables use it directly
804
+ const origBind = runnable.bind?.bind(runnable);
805
+ if (origBind) {
806
+ runnable.bind = function (...args: any[]) {
807
+ const bound = origBind(...args);
808
+ return patchRunnableForSanitization(bound);
809
+ };
810
+ }
811
+
812
+ return runnable;
790
813
  }
791
814
 
792
815
  export class CustomLLMProvider extends LLMProvider {
@@ -829,11 +852,7 @@ export class CustomLLMProvider extends LLMProvider {
829
852
  // Issue #4: Use ReasoningChatOpenAI when enableReasoning is set.
830
853
  // This ensures reasoning_content is preserved and patched back into
831
854
  // assistant messages during tool call round-trips (required by DeepSeek-R1, etc.)
832
- // Wrap with tool call ID sanitizer at the class level — ensures
833
- // __thought__<base64> suffixes from Gemini are stripped in all code paths
834
- // (invoke, stream, bindTools bindings) via prototype chain.
835
- const BaseChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
836
- const ChatClass = createSanitizedChatClass(BaseChatClass);
855
+ const ChatClass = enableReasoning ? createReasoningChatClass() : getChatOpenAI();
837
856
  const config: Record<string, any> = {
838
857
  apiKey,
839
858
  ...this.modelOptions,
@@ -874,12 +893,18 @@ export class CustomLLMProvider extends LLMProvider {
874
893
 
875
894
  // Wrap with keepalive proxy if enabled (and streaming is not disabled)
876
895
  if (streamKeepAlive && !disableStream) {
877
- return wrapWithStreamKeepAlive(model, {
896
+ model = wrapWithStreamKeepAlive(model, {
878
897
  intervalMs: Number(keepAliveIntervalMs) || 5000,
879
898
  keepAliveContent: keepAliveContent || '...',
880
899
  });
881
900
  }
882
901
 
902
+ // Sanitize Gemini's __thought__<base64> suffixes in tool call IDs.
903
+ // Patches invoke/stream/bindTools/bind at the public API level so that
904
+ // ALL code paths (including langgraph's internal model calls via
905
+ // RunnableBinding after bindTools) return clean IDs.
906
+ model = patchRunnableForSanitization(model);
907
+
883
908
  return model;
884
909
  }
885
910