npm - @librechat/agents - Versions diffs - 3.1.71-dev.0 → 3.1.71 - Mend

@librechat/agents 3.1.71-dev.0 → 3.1.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/dist/cjs/graphs/Graph.cjs +7 -0
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/llm/invoke.cjs +13 -2
package/dist/cjs/llm/invoke.cjs.map +1 -1
package/dist/cjs/tools/BashExecutor.cjs +3 -1
package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
package/dist/cjs/tools/ToolNode.cjs +84 -55
package/dist/cjs/tools/ToolNode.cjs.map +1 -1
package/dist/cjs/tools/toolOutputReferences.cjs +195 -0
package/dist/cjs/tools/toolOutputReferences.cjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +7 -0
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/llm/invoke.mjs +13 -2
package/dist/esm/llm/invoke.mjs.map +1 -1
package/dist/esm/tools/BashExecutor.mjs +3 -1
package/dist/esm/tools/BashExecutor.mjs.map +1 -1
package/dist/esm/tools/ToolNode.mjs +85 -56
package/dist/esm/tools/ToolNode.mjs.map +1 -1
package/dist/esm/tools/toolOutputReferences.mjs +195 -1
package/dist/esm/tools/toolOutputReferences.mjs.map +1 -1
package/dist/types/graphs/Graph.d.ts +9 -2
package/dist/types/llm/invoke.d.ts +29 -3
package/dist/types/tools/ToolNode.d.ts +11 -13
package/dist/types/tools/toolOutputReferences.d.ts +31 -0
package/dist/types/types/index.d.ts +1 -0
package/dist/types/types/messages.d.ts +26 -0
package/package.json +1 -1
package/src/graphs/Graph.ts +8 -1
package/src/llm/invoke.test.ts +446 -0
package/src/llm/invoke.ts +45 -5
package/src/tools/BashExecutor.ts +3 -1
package/src/tools/ToolNode.ts +94 -81
package/src/tools/__tests__/BashExecutor.test.ts +13 -0
package/src/tools/__tests__/ToolNode.outputReferences.test.ts +98 -55
package/src/tools/__tests__/annotateMessagesForLLM.test.ts +479 -0
package/src/tools/toolOutputReferences.ts +235 -0
package/src/types/index.ts +1 -0
package/src/types/messages.ts +27 -0

package/src/tools/ToolNode.ts CHANGED Viewed

@@ -36,7 +36,6 @@ import { executeHooks } from '@/hooks';
 import { Constants, GraphEvents, CODE_EXECUTION_TOOLS } from '@/common';
 import {
   buildReferenceKey,
-  annotateToolOutputWithReference,
   ToolOutputReferenceRegistry,
 } from '@/tools/toolOutputReferences';
@@ -429,21 +428,17 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
         const isError = toolMsg.status === 'error';
         if (isError) {
           /**
-           * Error ToolMessages bypass registration/annotation but must
-           * still carry the unresolved-refs hint so the LLM can
-           * self-correct when its reference key caused the failure.
+           * Error ToolMessages bypass registration but still stamp the
+           * unresolved-refs hint into `additional_kwargs` so the lazy
+           * annotation transform surfaces it to the LLM, letting the
+           * model self-correct when its reference key caused the
+           * failure. Persisted `content` stays clean.
            */
-          if (
-            unresolvedRefs.length > 0 &&
-            typeof toolMsg.content === 'string'
-          ) {
-            toolMsg.content = this.applyOutputReference(
-              runId,
-              toolMsg.content,
-              toolMsg.content,
-              undefined,
-              unresolvedRefs
-            );
+          if (unresolvedRefs.length > 0) {
+            toolMsg.additional_kwargs = {
+              ...toolMsg.additional_kwargs,
+              _unresolvedRefs: unresolvedRefs,
+            };
           }
           return toolMsg;
         }
@@ -454,35 +449,35 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
               rawContent,
               this.maxToolResultChars
             );
-            toolMsg.content = this.applyOutputReference(
+            toolMsg.content = llmContent;
+            const refMeta = this.recordOutputReference(
               runId,
-              llmContent,
               rawContent,
               refKey,
               unresolvedRefs
             );
+            if (refMeta != null) {
+              toolMsg.additional_kwargs = {
+                ...toolMsg.additional_kwargs,
+                ...refMeta,
+              };
+            }
           } else {
             /**
              * Non-string content (multi-part content blocks — text +
              * image). Known limitation: we cannot register under a
              * reference key because there's no canonical serialized
              * form. Warn once per tool per run when the caller
-             * intended to register.
-             *
-             * Still surface unresolved-ref warnings so the LLM gets
-             * the self-correction signal that the string and error
-             * paths already emit. Prepended as a leading text block
-             * to keep the original content ordering intact.
+             * intended to register. The unresolved-refs hint is still
+             * stamped as metadata; the lazy transform prepends a text
+             * block at request time so the LLM gets the self-correction
+             * signal.
              */
-            if (unresolvedRefs.length > 0 && Array.isArray(toolMsg.content)) {
-              const warningBlock = {
-                type: 'text',
-                text: `[unresolved refs: ${unresolvedRefs.join(', ')}]`,
+            if (unresolvedRefs.length > 0) {
+              toolMsg.additional_kwargs = {
+                ...toolMsg.additional_kwargs,
+                _unresolvedRefs: unresolvedRefs,
               };
-              toolMsg.content = [
-                warningBlock,
-                ...toolMsg.content,
-              ] as typeof toolMsg.content;
             }
             if (
               refKey != null &&
@@ -504,9 +499,8 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
         rawContent,
         this.maxToolResultChars
       );
-      const content = this.applyOutputReference(
+      const refMeta = this.recordOutputReference(
         runId,
-        truncated,
         rawContent,
         refKey,
         unresolvedRefs
@@ -514,8 +508,11 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
       return new ToolMessage({
         status: 'success',
         name: tool.name,
-        content,
+        content: truncated,
         tool_call_id: call.id!,
+        ...(refMeta != null && {
+          additional_kwargs: refMeta as Record<string, unknown>,
+        }),
       });
     } catch (_e: unknown) {
       const e = _e as Error;
@@ -561,64 +558,73 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
           });
         }
       }
-      let errorContent = `Error: ${e.message}\n Please fix your mistakes.`;
-      if (unresolvedRefs.length > 0) {
-        errorContent = this.applyOutputReference(
-          runId,
-          errorContent,
-          errorContent,
-          undefined,
-          unresolvedRefs
-        );
-      }
+      const errorContent = `Error: ${e.message}\n Please fix your mistakes.`;
+      const refMeta =
+        unresolvedRefs.length > 0
+          ? this.recordOutputReference(
+            runId,
+            errorContent,
+            undefined,
+            unresolvedRefs
+          )
+          : undefined;
       return new ToolMessage({
         status: 'error',
         content: errorContent,
         name: call.name,
         tool_call_id: call.id ?? '',
+        ...(refMeta != null && {
+          additional_kwargs: refMeta as Record<string, unknown>,
+        }),
       });
     }
   }
   /**
-   * Finalizes the LLM-visible content for a tool call and (when a
-   * `refKey` is provided) registers the full, raw output under that
-   * key.
+   * Registers the full, raw output under `refKey` (when provided) and
+   * builds the per-message ref metadata stamped onto the resulting
+   * `ToolMessage.additional_kwargs`. The metadata is read at LLM-
+   * request time by `annotateMessagesForLLM` to produce a transient
+   * annotated copy of the message — the persisted `content` itself
+   * stays clean.
    *
-   * @param llmContent  The content string the LLM will see. This is
-   *   the already-truncated, post-hook view; the annotation is
-   *   applied on top of it.
    * @param registryContent  The full, untruncated output to store in
    *   the registry so `{{tool<i>turn<n>}}` substitutions deliver the
    *   complete payload. Ignored when `refKey` is undefined.
    * @param refKey  Precomputed `tool<i>turn<n>` key, or undefined when
    *   the output is not to be registered (errors, disabled feature,
    *   unavailable batch/turn).
-   * @param unresolved  Placeholder keys that did not resolve; appended
-   *   as `[unresolved refs: …]` so the LLM can self-correct.
-   *
-   * `refKey` is passed in (rather than built from `this.currentTurn`)
-   * so parallel `invoke()` calls on the same ToolNode cannot race on
-   * the shared turn field.
+   * @param unresolved  Placeholder keys that did not resolve; surfaced
+   *   to the LLM lazily so it can self-correct.
+   * @returns A `ToolMessageRefMetadata` object when there is anything
+   *   to stamp, otherwise `undefined`.
    */
-  private applyOutputReference(
+  private recordOutputReference(
     runId: string | undefined,
-    llmContent: string,
     registryContent: string,
     refKey: string | undefined,
     unresolved: string[]
-  ): string {
+  ): t.ToolMessageRefMetadata | undefined {
     if (this.toolOutputRegistry != null && refKey != null) {
       this.toolOutputRegistry.set(runId, refKey, registryContent);
     }
-    /**
-     * `annotateToolOutputWithReference` handles both the ref-key and
-     * unresolved-refs cases together so JSON-object outputs stay
-     * parseable: unresolved refs land in an `_unresolved_refs` field
-     * instead of as a trailing text line that would break
-     * `JSON.parse` for downstream consumers.
-     */
-    return annotateToolOutputWithReference(llmContent, refKey, unresolved);
+    if (refKey == null && unresolved.length === 0) return undefined;
+    const meta: t.ToolMessageRefMetadata = {};
+    if (refKey != null) {
+      meta._refKey = refKey;
+      /**
+       * Stamp the registry scope alongside the key so the lazy
+       * annotation transform can look up the right bucket. Anonymous
+       * invocations get a synthetic per-batch scope (`\0anon-<n>`)
+       * that `attemptInvoke` cannot derive from
+       * `config.configurable.run_id` — without this, anonymous-run
+       * refs would silently fail registry lookup and the LLM would
+       * never see `[ref: …]` markers for outputs that were registered.
+       */
+      if (runId != null) meta._refScope = runId;
+    }
+    if (unresolved.length > 0) meta._unresolvedRefs = unresolved;
+    return meta;
   }
   /**
@@ -1054,25 +1060,30 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
         if (result.status === 'error') {
           contentString = `Error: ${result.errorMessage ?? 'Unknown error'}\n Please fix your mistakes.`;
           /**
-           * Error results bypass registration/annotation but must still
-           * carry the unresolved-refs hint so the LLM can self-correct
-           * when its reference key caused the failure.
+           * Error results bypass registration but stamp the
+           * unresolved-refs hint into `additional_kwargs` so the lazy
+           * annotation transform surfaces it to the LLM at request
+           * time, letting the model self-correct when its reference
+           * key caused the failure. Persisted `content` stays clean.
            */
           const unresolved = unresolvedByCallId.get(result.toolCallId) ?? [];
-          if (unresolved.length > 0) {
-            contentString = this.applyOutputReference(
-              registryRunId,
-              contentString,
-              contentString,
-              undefined,
-              unresolved
-            );
-          }
+          const errorRefMeta =
+            unresolved.length > 0
+              ? this.recordOutputReference(
+                registryRunId,
+                contentString,
+                undefined,
+                unresolved
+              )
+              : undefined;
           toolMessage = new ToolMessage({
             status: 'error',
             content: contentString,
             name: toolName,
             tool_call_id: result.toolCallId,
+            ...(errorRefMeta != null && {
+              additional_kwargs: errorRefMeta as Record<string, unknown>,
+            }),
           });
           if (hasFailureHook) {
@@ -1145,9 +1156,8 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
             turn != null
               ? buildReferenceKey(batchIndex, turn)
               : undefined;
-          contentString = this.applyOutputReference(
+          const successRefMeta = this.recordOutputReference(
             registryRunId,
-            contentString,
             registryRaw,
             refKey,
             unresolved
@@ -1159,6 +1169,9 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
             content: contentString,
             artifact: result.artifact,
             tool_call_id: result.toolCallId,
+            ...(successRefMeta != null && {
+              additional_kwargs: successRefMeta as Record<string, unknown>,
+            }),
           });
         }

package/src/tools/__tests__/BashExecutor.test.ts CHANGED Viewed

@@ -27,6 +27,19 @@ describe('buildBashExecutionToolDescription', () => {
     expect(composed).toContain('{{tool<idx>turn<turn>}}');
   });
+  it('nudges the model toward heredoc when payloads may contain shell metacharacters', () => {
+    /**
+     * Real-world failure observed against ClickHouse + bash piping:
+     * the model emitted `echo '{{ref}}' | wc -c` and the substituted
+     * binary payload contained literal single quotes, breaking the
+     * shell. The model self-corrected to a heredoc on retry. Surface
+     * the heredoc pattern upfront so the round-trip isn't burned to
+     * rediscover it.
+     */
+    expect(BashToolOutputReferencesGuide).toContain('heredoc');
+    expect(BashToolOutputReferencesGuide).toContain('<< \'EOF\'');
+  });
   it('separates base and guide with a blank line', () => {
     const composed = buildBashExecutionToolDescription({
       enableToolOutputReferences: true,

package/src/tools/__tests__/ToolNode.outputReferences.test.ts CHANGED Viewed

@@ -7,10 +7,27 @@ import type * as t from '@/types';
 import * as events from '@/utils/events';
 import { HookRegistry } from '@/hooks';
 import { ToolNode } from '../ToolNode';
-import {
-  ToolOutputReferenceRegistry,
-  TOOL_OUTPUT_REF_KEY,
-} from '../toolOutputReferences';
+import { ToolOutputReferenceRegistry } from '../toolOutputReferences';
+/**
+ * Reads the lazy ref-metadata stamped onto a `ToolMessage` by ToolNode.
+ * The metadata replaces the durable `[ref: …]` content mutation that the
+ * earlier eager-annotation design used; the LLM-facing annotation is
+ * applied at request time by `annotateMessagesForLLM` instead.
+ */
+function getRefKey(msg: ToolMessage): string | undefined {
+  return (msg.additional_kwargs as { _refKey?: string } | undefined)?._refKey;
+}
+function getRefScope(msg: ToolMessage): string | undefined {
+  return (msg.additional_kwargs as { _refScope?: string } | undefined)
+    ?._refScope;
+}
+function getUnresolvedRefs(msg: ToolMessage): string[] {
+  return (
+    (msg.additional_kwargs as { _unresolvedRefs?: string[] } | undefined)
+      ?._unresolvedRefs ?? []
+  );
+}
 /**
  * Captures the `command` arg each time the tool is invoked and returns
@@ -98,7 +115,7 @@ describe('ToolNode tool output references', () => {
   });
   describe('enabled', () => {
-    it('annotates string outputs with a [ref: …] prefix line', async () => {
+    it('keeps string outputs clean and stamps the ref key as metadata', async () => {
       const t1 = createEchoTool({
         capturedArgs: [],
         outputs: ['hello world'],
@@ -112,10 +129,19 @@ describe('ToolNode tool output references', () => {
         { id: 'c1', name: 'echo', command: 'run' },
       ]);
-      expect(msg.content).toBe('[ref: tool0turn0]\nhello world');
+      expect(msg.content).toBe('hello world');
+      expect(getRefKey(msg)).toBe('tool0turn0');
+      /**
+       * `_refScope` is what lets `annotateMessagesForLLM` recover the
+       * registry bucket at request time without re-deriving it from
+       * `config.configurable.run_id` (which fails for anonymous
+       * batches). For named runs it equals the run_id.
+       */
+      expect(getRefScope(msg)).toBe('test-run');
+      expect(getUnresolvedRefs(msg)).toEqual([]);
     });
-    it('injects _ref into JSON-object string outputs', async () => {
+    it('keeps JSON-object string outputs unmodified and stamps ref metadata', async () => {
       const t1 = createEchoTool({
         capturedArgs: [],
         outputs: ['{"a":1,"b":"x"}'],
@@ -130,11 +156,13 @@ describe('ToolNode tool output references', () => {
       ]);
       const parsed = JSON.parse(msg.content as string);
-      expect(parsed[TOOL_OUTPUT_REF_KEY]).toBe('tool0turn0');
       expect(parsed.a).toBe(1);
+      expect(parsed.b).toBe('x');
+      expect(parsed._ref).toBeUndefined();
+      expect(getRefKey(msg)).toBe('tool0turn0');
     });
-    it('uses the [ref: …] prefix for JSON array outputs', async () => {
+    it('keeps JSON array outputs unmodified and stamps ref metadata', async () => {
       const t1 = createEchoTool({ capturedArgs: [], outputs: ['[1,2,3]'] });
       const node = new ToolNode({
         tools: [t1],
@@ -145,7 +173,8 @@ describe('ToolNode tool output references', () => {
         { id: 'c1', name: 'echo', command: 'run' },
       ]);
-      expect(msg.content).toBe('[ref: tool0turn0]\n[1,2,3]');
+      expect(msg.content).toBe('[1,2,3]');
+      expect(getRefKey(msg)).toBe('tool0turn0');
     });
     it('registers the un-annotated output for piping into later calls', async () => {
@@ -192,9 +221,9 @@ describe('ToolNode tool output references', () => {
         { id: 'b3c1', name: 'echo', command: '{{tool0turn1}}' },
       ]);
-      expect(m0.content).toContain('[ref: tool0turn0]');
-      expect(m1.content).toContain('[ref: tool0turn1]');
-      expect(m2.content).toContain('[ref: tool0turn2]');
+      expect(getRefKey(m0)).toBe('tool0turn0');
+      expect(getRefKey(m1)).toBe('tool0turn1');
+      expect(getRefKey(m2)).toBe('tool0turn2');
       expect(capturedArgs[2]).toBe('two');
     });
@@ -221,8 +250,8 @@ describe('ToolNode tool output references', () => {
         { id: 'c2', name: 'beta', command: 'b' },
       ]);
-      expect(messages[0].content).toContain('[ref: tool0turn0]');
-      expect(messages[1].content).toContain('[ref: tool1turn0]');
+      expect(getRefKey(messages[0])).toBe('tool0turn0');
+      expect(getRefKey(messages[1])).toBe('tool1turn0');
     });
     it('reports unresolved placeholders after the output', async () => {
@@ -242,7 +271,8 @@ describe('ToolNode tool output references', () => {
       ]);
       expect(capturedArgs[0]).toBe('see {{tool9turn9}}');
-      expect(msg.content).toContain('[unresolved refs: tool9turn9]');
+      expect(msg.content).toBe('done');
+      expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
     });
     it('stores the raw untruncated output in the registry, independent of the LLM-visible truncation', async () => {
@@ -343,10 +373,10 @@ describe('ToolNode tool output references', () => {
         messages: ToolMessage[];
       }>;
-      expect(resA.messages[0].content).toContain('[ref: tool0turn0]');
-      expect(resA.messages[0].content).toContain('output-A');
-      expect(resB.messages[0].content).toContain('[ref: tool0turn1]');
-      expect(resB.messages[0].content).toContain('output-B');
+      expect(getRefKey(resA.messages[0])).toBe('tool0turn0');
+      expect(resA.messages[0].content).toBe('output-A');
+      expect(getRefKey(resB.messages[0])).toBe('tool0turn1');
+      expect(resB.messages[0].content).toBe('output-B');
       const registry = node._unsafeGetToolOutputRegistry()!;
       expect(registry.get('concurrent-run', 'tool0turn0')).toBe('output-A');
@@ -417,7 +447,7 @@ describe('ToolNode tool output references', () => {
         { id: 'c1', name: 'boom', command: 'x' },
       ]);
-      expect((msg.content as string).startsWith('[ref:')).toBe(false);
+      expect(getRefKey(msg)).toBeUndefined();
       expect(
         node._unsafeGetToolOutputRegistry()!.get('test-run', 'tool0turn0')
       ).toBeUndefined();
@@ -445,7 +475,8 @@ describe('ToolNode tool output references', () => {
       ]);
       expect(msg.content).toContain('Error: nope');
-      expect(msg.content).toContain('[unresolved refs: tool9turn9]');
+      expect(msg.content as string).not.toContain('[unresolved refs:');
+      expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
     });
     it('surfaces unresolved refs on tool-returned error ToolMessages', async () => {
@@ -473,8 +504,8 @@ describe('ToolNode tool output references', () => {
         { id: 'c1', name: 'errReturn', command: 'see {{tool9turn9}}' },
       ]);
-      expect(msg.content).toContain('handled failure');
-      expect(msg.content).toContain('[unresolved refs: tool9turn9]');
+      expect(msg.content).toBe('handled failure');
+      expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
     });
     it('isolates state between overlapping runs on the same ToolNode', async () => {
@@ -584,13 +615,26 @@ describe('ToolNode tool output references', () => {
         messages: ToolMessage[];
       }>;
-      // Each invocation produces its own annotated output — neither's
+      // Each invocation stamps its own ref metadata — neither's
       // registered tool0turn0 was clobbered by the other's sync-prefix
       // reset.
-      expect(resA.messages[0].content).toContain('[ref: tool0turn0]');
-      expect(resA.messages[0].content).toContain('out-A');
-      expect(resB.messages[0].content).toContain('[ref: tool0turn0]');
-      expect(resB.messages[0].content).toContain('out-B');
+      expect(getRefKey(resA.messages[0])).toBe('tool0turn0');
+      expect(resA.messages[0].content).toBe('out-A');
+      expect(getRefKey(resB.messages[0])).toBe('tool0turn0');
+      expect(resB.messages[0].content).toBe('out-B');
+      /**
+       * Each anonymous invocation stamps a distinct synthetic
+       * `_refScope` so the lazy annotation transform can later look
+       * up the right registry bucket — `config.configurable.run_id`
+       * is undefined for both calls and would collapse them to the
+       * same `\0anon` bucket without this stamping.
+       */
+      const scopeA = getRefScope(resA.messages[0]);
+      const scopeB = getRefScope(resB.messages[0]);
+      expect(scopeA).toMatch(/^\0anon-\d+$/);
+      expect(scopeB).toMatch(/^\0anon-\d+$/);
+      expect(scopeA).not.toBe(scopeB);
     });
     it('clears state on every batch when run_id is absent (anonymous caller)', async () => {
@@ -616,9 +660,7 @@ describe('ToolNode tool output references', () => {
       })) as { messages: ToolMessage[] };
       expect(capturedArgs[1]).toBe('echo {{tool0turn0}}');
-      expect(result.messages[0].content).toContain(
-        '[unresolved refs: tool0turn0]'
-      );
+      expect(getUnresolvedRefs(result.messages[0])).toEqual(['tool0turn0']);
     });
     it('lets two ToolNodes sharing a registry resolve each other\'s refs', async () => {
@@ -728,7 +770,7 @@ describe('ToolNode tool output references', () => {
       expect(JSON.parse(stepCompletedArgs[1]).command).toBe('echo STORED');
     });
-    it('prepends unresolved-refs warning to non-string ToolMessage content', async () => {
+    it('records unresolved refs as metadata on non-string ToolMessage content (content untouched)', async () => {
       const complexTool = tool(
         async () =>
           new ToolMessage({
@@ -760,12 +802,13 @@ describe('ToolNode tool output references', () => {
       expect(Array.isArray(msg.content)).toBe(true);
       const blocks = msg.content as Array<{ type: string; text?: string }>;
+      // Multi-part content is untouched at storage time — the lazy
+      // transform handles the unresolved-refs warning at request time.
+      expect(blocks).toHaveLength(2);
       expect(blocks[0].type).toBe('text');
-      expect(blocks[0].text).toContain('[unresolved refs: tool9turn9]');
-      // Original blocks follow the warning.
-      expect(blocks[1].type).toBe('text');
-      expect(blocks[1].text).toBe('data');
-      expect(blocks[2].type).toBe('image_url');
+      expect(blocks[0].text).toBe('data');
+      expect(blocks[1].type).toBe('image_url');
+      expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
     });
     it('resets the registry and turn counter when the runId changes', async () => {
@@ -800,10 +843,9 @@ describe('ToolNode tool output references', () => {
       )) as { messages: ToolMessage[] };
       expect(capturedArgs[1]).toBe('echo {{tool0turn0}}');
-      expect(resultB.messages[0].content).toContain('[ref: tool0turn0]');
-      expect(resultB.messages[0].content).toContain(
-        '[unresolved refs: tool0turn0]'
-      );
+      expect(resultB.messages[0].content).toBe('from-run-B');
+      expect(getRefKey(resultB.messages[0])).toBe('tool0turn0');
+      expect(getUnresolvedRefs(resultB.messages[0])).toEqual(['tool0turn0']);
     });
   });
@@ -836,7 +878,7 @@ describe('ToolNode tool output references', () => {
       }) as unknown as StructuredToolInterface;
     }
-    it('annotates the output the host returns', async () => {
+    it('keeps host-returned output clean and stamps the ref key as metadata', async () => {
       const node = new ToolNode({
         tools: [createSchemaStub('echo')],
         eventDrivenMode: true,
@@ -858,7 +900,8 @@ describe('ToolNode tool output references', () => {
         { configurable: { run_id: 'run-host' } }
       )) as { messages: ToolMessage[] };
-      expect(result.messages[0].content).toBe('[ref: tool0turn0]\nhost-output');
+      expect(result.messages[0].content).toBe('host-output');
+      expect(getRefKey(result.messages[0])).toBe('tool0turn0');
       expect(
         node._unsafeGetToolOutputRegistry()!.get('run-host', 'tool0turn0')
       ).toBe('host-output');
@@ -963,9 +1006,10 @@ describe('ToolNode tool output references', () => {
       })) as { messages: ToolMessage[] };
       expect(result.messages[0].content).toContain('Error: host failure');
-      expect(result.messages[0].content).toContain(
-        '[unresolved refs: tool9turn9]'
+      expect(result.messages[0].content as string).not.toContain(
+        '[unresolved refs:'
       );
+      expect(getUnresolvedRefs(result.messages[0])).toEqual(['tool9turn9']);
     });
     it('reports unresolved refs even when the host succeeds', async () => {
@@ -995,9 +1039,8 @@ describe('ToolNode tool output references', () => {
         ],
       })) as { messages: ToolMessage[] };
-      expect(result.messages[0].content).toContain(
-        '[unresolved refs: tool9turn9]'
-      );
+      expect(result.messages[0].content).toBe('done');
+      expect(getUnresolvedRefs(result.messages[0])).toEqual(['tool9turn9']);
     });
     it('registers the post-hook output when PostToolUse replaces it', async () => {
@@ -1035,9 +1078,8 @@ describe('ToolNode tool output references', () => {
         { configurable: { run_id: 'run-posthook' } }
       )) as { messages: ToolMessage[] };
-      expect(result.messages[0].content).toBe(
-        '[ref: tool0turn0]\nhooked-output'
-      );
+      expect(result.messages[0].content).toBe('hooked-output');
+      expect(getRefKey(result.messages[0])).toBe('tool0turn0');
       expect(
         node._unsafeGetToolOutputRegistry()!.get('run-posthook', 'tool0turn0')
       ).toBe('hooked-output');
@@ -1252,9 +1294,10 @@ describe('ToolNode tool output references', () => {
       // call attempts `{{tool0turn0}}` — which points at the direct
       // call running *in the same batch*. Correct behavior: the
       // placeholder stays unresolved (cross-batch only), and the
-      // event args received by the host must carry the literal
-      // template string plus the LLM-visible `[unresolved refs:…]`
-      // trailer.
+      // event args received by the host carry the literal template
+      // string. The unresolved-refs hint is stamped into the resulting
+      // ToolMessage's `additional_kwargs._unresolvedRefs` so the lazy
+      // annotation transform surfaces it to the LLM at request time.
       await node.invoke(
         {
           messages: [