@librechat/agents 3.1.71-dev.0 → 3.1.71-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,6 @@ import { executeHooks } from '@/hooks';
36
36
  import { Constants, GraphEvents, CODE_EXECUTION_TOOLS } from '@/common';
37
37
  import {
38
38
  buildReferenceKey,
39
- annotateToolOutputWithReference,
40
39
  ToolOutputReferenceRegistry,
41
40
  } from '@/tools/toolOutputReferences';
42
41
 
@@ -429,21 +428,17 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
429
428
  const isError = toolMsg.status === 'error';
430
429
  if (isError) {
431
430
  /**
432
- * Error ToolMessages bypass registration/annotation but must
433
- * still carry the unresolved-refs hint so the LLM can
434
- * self-correct when its reference key caused the failure.
431
+ * Error ToolMessages bypass registration but still stamp the
432
+ * unresolved-refs hint into `additional_kwargs` so the lazy
433
+ * annotation transform surfaces it to the LLM, letting the
434
+ * model self-correct when its reference key caused the
435
+ * failure. Persisted `content` stays clean.
435
436
  */
436
- if (
437
- unresolvedRefs.length > 0 &&
438
- typeof toolMsg.content === 'string'
439
- ) {
440
- toolMsg.content = this.applyOutputReference(
441
- runId,
442
- toolMsg.content,
443
- toolMsg.content,
444
- undefined,
445
- unresolvedRefs
446
- );
437
+ if (unresolvedRefs.length > 0) {
438
+ toolMsg.additional_kwargs = {
439
+ ...toolMsg.additional_kwargs,
440
+ _unresolvedRefs: unresolvedRefs,
441
+ };
447
442
  }
448
443
  return toolMsg;
449
444
  }
@@ -454,35 +449,35 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
454
449
  rawContent,
455
450
  this.maxToolResultChars
456
451
  );
457
- toolMsg.content = this.applyOutputReference(
452
+ toolMsg.content = llmContent;
453
+ const refMeta = this.recordOutputReference(
458
454
  runId,
459
- llmContent,
460
455
  rawContent,
461
456
  refKey,
462
457
  unresolvedRefs
463
458
  );
459
+ if (refMeta != null) {
460
+ toolMsg.additional_kwargs = {
461
+ ...toolMsg.additional_kwargs,
462
+ ...refMeta,
463
+ };
464
+ }
464
465
  } else {
465
466
  /**
466
467
  * Non-string content (multi-part content blocks — text +
467
468
  * image). Known limitation: we cannot register under a
468
469
  * reference key because there's no canonical serialized
469
470
  * form. Warn once per tool per run when the caller
470
- * intended to register.
471
- *
472
- * Still surface unresolved-ref warnings so the LLM gets
473
- * the self-correction signal that the string and error
474
- * paths already emit. Prepended as a leading text block
475
- * to keep the original content ordering intact.
471
+ * intended to register. The unresolved-refs hint is still
472
+ * stamped as metadata; the lazy transform prepends a text
473
+ * block at request time so the LLM gets the self-correction
474
+ * signal.
476
475
  */
477
- if (unresolvedRefs.length > 0 && Array.isArray(toolMsg.content)) {
478
- const warningBlock = {
479
- type: 'text',
480
- text: `[unresolved refs: ${unresolvedRefs.join(', ')}]`,
476
+ if (unresolvedRefs.length > 0) {
477
+ toolMsg.additional_kwargs = {
478
+ ...toolMsg.additional_kwargs,
479
+ _unresolvedRefs: unresolvedRefs,
481
480
  };
482
- toolMsg.content = [
483
- warningBlock,
484
- ...toolMsg.content,
485
- ] as typeof toolMsg.content;
486
481
  }
487
482
  if (
488
483
  refKey != null &&
@@ -504,9 +499,8 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
504
499
  rawContent,
505
500
  this.maxToolResultChars
506
501
  );
507
- const content = this.applyOutputReference(
502
+ const refMeta = this.recordOutputReference(
508
503
  runId,
509
- truncated,
510
504
  rawContent,
511
505
  refKey,
512
506
  unresolvedRefs
@@ -514,8 +508,11 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
514
508
  return new ToolMessage({
515
509
  status: 'success',
516
510
  name: tool.name,
517
- content,
511
+ content: truncated,
518
512
  tool_call_id: call.id!,
513
+ ...(refMeta != null && {
514
+ additional_kwargs: refMeta as Record<string, unknown>,
515
+ }),
519
516
  });
520
517
  } catch (_e: unknown) {
521
518
  const e = _e as Error;
@@ -561,64 +558,73 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
561
558
  });
562
559
  }
563
560
  }
564
- let errorContent = `Error: ${e.message}\n Please fix your mistakes.`;
565
- if (unresolvedRefs.length > 0) {
566
- errorContent = this.applyOutputReference(
567
- runId,
568
- errorContent,
569
- errorContent,
570
- undefined,
571
- unresolvedRefs
572
- );
573
- }
561
+ const errorContent = `Error: ${e.message}\n Please fix your mistakes.`;
562
+ const refMeta =
563
+ unresolvedRefs.length > 0
564
+ ? this.recordOutputReference(
565
+ runId,
566
+ errorContent,
567
+ undefined,
568
+ unresolvedRefs
569
+ )
570
+ : undefined;
574
571
  return new ToolMessage({
575
572
  status: 'error',
576
573
  content: errorContent,
577
574
  name: call.name,
578
575
  tool_call_id: call.id ?? '',
576
+ ...(refMeta != null && {
577
+ additional_kwargs: refMeta as Record<string, unknown>,
578
+ }),
579
579
  });
580
580
  }
581
581
  }
582
582
 
583
583
  /**
584
- * Finalizes the LLM-visible content for a tool call and (when a
585
- * `refKey` is provided) registers the full, raw output under that
586
- * key.
584
+ * Registers the full, raw output under `refKey` (when provided) and
585
+ * builds the per-message ref metadata stamped onto the resulting
586
+ * `ToolMessage.additional_kwargs`. The metadata is read at LLM-
587
+ * request time by `annotateMessagesForLLM` to produce a transient
588
+ * annotated copy of the message — the persisted `content` itself
589
+ * stays clean.
587
590
  *
588
- * @param llmContent The content string the LLM will see. This is
589
- * the already-truncated, post-hook view; the annotation is
590
- * applied on top of it.
591
591
  * @param registryContent The full, untruncated output to store in
592
592
  * the registry so `{{tool<i>turn<n>}}` substitutions deliver the
593
593
  * complete payload. Ignored when `refKey` is undefined.
594
594
  * @param refKey Precomputed `tool<i>turn<n>` key, or undefined when
595
595
  * the output is not to be registered (errors, disabled feature,
596
596
  * unavailable batch/turn).
597
- * @param unresolved Placeholder keys that did not resolve; appended
598
- * as `[unresolved refs: …]` so the LLM can self-correct.
599
- *
600
- * `refKey` is passed in (rather than built from `this.currentTurn`)
601
- * so parallel `invoke()` calls on the same ToolNode cannot race on
602
- * the shared turn field.
597
+ * @param unresolved Placeholder keys that did not resolve; surfaced
598
+ * to the LLM lazily so it can self-correct.
599
+ * @returns A `ToolMessageRefMetadata` object when there is anything
600
+ * to stamp, otherwise `undefined`.
603
601
  */
604
- private applyOutputReference(
602
+ private recordOutputReference(
605
603
  runId: string | undefined,
606
- llmContent: string,
607
604
  registryContent: string,
608
605
  refKey: string | undefined,
609
606
  unresolved: string[]
610
- ): string {
607
+ ): t.ToolMessageRefMetadata | undefined {
611
608
  if (this.toolOutputRegistry != null && refKey != null) {
612
609
  this.toolOutputRegistry.set(runId, refKey, registryContent);
613
610
  }
614
- /**
615
- * `annotateToolOutputWithReference` handles both the ref-key and
616
- * unresolved-refs cases together so JSON-object outputs stay
617
- * parseable: unresolved refs land in an `_unresolved_refs` field
618
- * instead of as a trailing text line that would break
619
- * `JSON.parse` for downstream consumers.
620
- */
621
- return annotateToolOutputWithReference(llmContent, refKey, unresolved);
611
+ if (refKey == null && unresolved.length === 0) return undefined;
612
+ const meta: t.ToolMessageRefMetadata = {};
613
+ if (refKey != null) {
614
+ meta._refKey = refKey;
615
+ /**
616
+ * Stamp the registry scope alongside the key so the lazy
617
+ * annotation transform can look up the right bucket. Anonymous
618
+ * invocations get a synthetic per-batch scope (`\0anon-<n>`)
619
+ * that `attemptInvoke` cannot derive from
620
+ * `config.configurable.run_id` — without this, anonymous-run
621
+ * refs would silently fail registry lookup and the LLM would
622
+ * never see `[ref: …]` markers for outputs that were registered.
623
+ */
624
+ if (runId != null) meta._refScope = runId;
625
+ }
626
+ if (unresolved.length > 0) meta._unresolvedRefs = unresolved;
627
+ return meta;
622
628
  }
623
629
 
624
630
  /**
@@ -1054,25 +1060,30 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
1054
1060
  if (result.status === 'error') {
1055
1061
  contentString = `Error: ${result.errorMessage ?? 'Unknown error'}\n Please fix your mistakes.`;
1056
1062
  /**
1057
- * Error results bypass registration/annotation but must still
1058
- * carry the unresolved-refs hint so the LLM can self-correct
1059
- * when its reference key caused the failure.
1063
+ * Error results bypass registration but stamp the
1064
+ * unresolved-refs hint into `additional_kwargs` so the lazy
1065
+ * annotation transform surfaces it to the LLM at request
1066
+ * time, letting the model self-correct when its reference
1067
+ * key caused the failure. Persisted `content` stays clean.
1060
1068
  */
1061
1069
  const unresolved = unresolvedByCallId.get(result.toolCallId) ?? [];
1062
- if (unresolved.length > 0) {
1063
- contentString = this.applyOutputReference(
1064
- registryRunId,
1065
- contentString,
1066
- contentString,
1067
- undefined,
1068
- unresolved
1069
- );
1070
- }
1070
+ const errorRefMeta =
1071
+ unresolved.length > 0
1072
+ ? this.recordOutputReference(
1073
+ registryRunId,
1074
+ contentString,
1075
+ undefined,
1076
+ unresolved
1077
+ )
1078
+ : undefined;
1071
1079
  toolMessage = new ToolMessage({
1072
1080
  status: 'error',
1073
1081
  content: contentString,
1074
1082
  name: toolName,
1075
1083
  tool_call_id: result.toolCallId,
1084
+ ...(errorRefMeta != null && {
1085
+ additional_kwargs: errorRefMeta as Record<string, unknown>,
1086
+ }),
1076
1087
  });
1077
1088
 
1078
1089
  if (hasFailureHook) {
@@ -1145,9 +1156,8 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
1145
1156
  turn != null
1146
1157
  ? buildReferenceKey(batchIndex, turn)
1147
1158
  : undefined;
1148
- contentString = this.applyOutputReference(
1159
+ const successRefMeta = this.recordOutputReference(
1149
1160
  registryRunId,
1150
- contentString,
1151
1161
  registryRaw,
1152
1162
  refKey,
1153
1163
  unresolved
@@ -1159,6 +1169,9 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
1159
1169
  content: contentString,
1160
1170
  artifact: result.artifact,
1161
1171
  tool_call_id: result.toolCallId,
1172
+ ...(successRefMeta != null && {
1173
+ additional_kwargs: successRefMeta as Record<string, unknown>,
1174
+ }),
1162
1175
  });
1163
1176
  }
1164
1177
 
@@ -7,10 +7,27 @@ import type * as t from '@/types';
7
7
  import * as events from '@/utils/events';
8
8
  import { HookRegistry } from '@/hooks';
9
9
  import { ToolNode } from '../ToolNode';
10
- import {
11
- ToolOutputReferenceRegistry,
12
- TOOL_OUTPUT_REF_KEY,
13
- } from '../toolOutputReferences';
10
+ import { ToolOutputReferenceRegistry } from '../toolOutputReferences';
11
+
12
+ /**
13
+ * Reads the lazy ref-metadata stamped onto a `ToolMessage` by ToolNode.
14
+ * The metadata replaces the durable `[ref: …]` content mutation that the
15
+ * earlier eager-annotation design used; the LLM-facing annotation is
16
+ * applied at request time by `annotateMessagesForLLM` instead.
17
+ */
18
+ function getRefKey(msg: ToolMessage): string | undefined {
19
+ return (msg.additional_kwargs as { _refKey?: string } | undefined)?._refKey;
20
+ }
21
+ function getRefScope(msg: ToolMessage): string | undefined {
22
+ return (msg.additional_kwargs as { _refScope?: string } | undefined)
23
+ ?._refScope;
24
+ }
25
+ function getUnresolvedRefs(msg: ToolMessage): string[] {
26
+ return (
27
+ (msg.additional_kwargs as { _unresolvedRefs?: string[] } | undefined)
28
+ ?._unresolvedRefs ?? []
29
+ );
30
+ }
14
31
 
15
32
  /**
16
33
  * Captures the `command` arg each time the tool is invoked and returns
@@ -98,7 +115,7 @@ describe('ToolNode tool output references', () => {
98
115
  });
99
116
 
100
117
  describe('enabled', () => {
101
- it('annotates string outputs with a [ref: …] prefix line', async () => {
118
+ it('keeps string outputs clean and stamps the ref key as metadata', async () => {
102
119
  const t1 = createEchoTool({
103
120
  capturedArgs: [],
104
121
  outputs: ['hello world'],
@@ -112,10 +129,19 @@ describe('ToolNode tool output references', () => {
112
129
  { id: 'c1', name: 'echo', command: 'run' },
113
130
  ]);
114
131
 
115
- expect(msg.content).toBe('[ref: tool0turn0]\nhello world');
132
+ expect(msg.content).toBe('hello world');
133
+ expect(getRefKey(msg)).toBe('tool0turn0');
134
+ /**
135
+ * `_refScope` is what lets `annotateMessagesForLLM` recover the
136
+ * registry bucket at request time without re-deriving it from
137
+ * `config.configurable.run_id` (which fails for anonymous
138
+ * batches). For named runs it equals the run_id.
139
+ */
140
+ expect(getRefScope(msg)).toBe('test-run');
141
+ expect(getUnresolvedRefs(msg)).toEqual([]);
116
142
  });
117
143
 
118
- it('injects _ref into JSON-object string outputs', async () => {
144
+ it('keeps JSON-object string outputs unmodified and stamps ref metadata', async () => {
119
145
  const t1 = createEchoTool({
120
146
  capturedArgs: [],
121
147
  outputs: ['{"a":1,"b":"x"}'],
@@ -130,11 +156,13 @@ describe('ToolNode tool output references', () => {
130
156
  ]);
131
157
 
132
158
  const parsed = JSON.parse(msg.content as string);
133
- expect(parsed[TOOL_OUTPUT_REF_KEY]).toBe('tool0turn0');
134
159
  expect(parsed.a).toBe(1);
160
+ expect(parsed.b).toBe('x');
161
+ expect(parsed._ref).toBeUndefined();
162
+ expect(getRefKey(msg)).toBe('tool0turn0');
135
163
  });
136
164
 
137
- it('uses the [ref: …] prefix for JSON array outputs', async () => {
165
+ it('keeps JSON array outputs unmodified and stamps ref metadata', async () => {
138
166
  const t1 = createEchoTool({ capturedArgs: [], outputs: ['[1,2,3]'] });
139
167
  const node = new ToolNode({
140
168
  tools: [t1],
@@ -145,7 +173,8 @@ describe('ToolNode tool output references', () => {
145
173
  { id: 'c1', name: 'echo', command: 'run' },
146
174
  ]);
147
175
 
148
- expect(msg.content).toBe('[ref: tool0turn0]\n[1,2,3]');
176
+ expect(msg.content).toBe('[1,2,3]');
177
+ expect(getRefKey(msg)).toBe('tool0turn0');
149
178
  });
150
179
 
151
180
  it('registers the un-annotated output for piping into later calls', async () => {
@@ -192,9 +221,9 @@ describe('ToolNode tool output references', () => {
192
221
  { id: 'b3c1', name: 'echo', command: '{{tool0turn1}}' },
193
222
  ]);
194
223
 
195
- expect(m0.content).toContain('[ref: tool0turn0]');
196
- expect(m1.content).toContain('[ref: tool0turn1]');
197
- expect(m2.content).toContain('[ref: tool0turn2]');
224
+ expect(getRefKey(m0)).toBe('tool0turn0');
225
+ expect(getRefKey(m1)).toBe('tool0turn1');
226
+ expect(getRefKey(m2)).toBe('tool0turn2');
198
227
  expect(capturedArgs[2]).toBe('two');
199
228
  });
200
229
 
@@ -221,8 +250,8 @@ describe('ToolNode tool output references', () => {
221
250
  { id: 'c2', name: 'beta', command: 'b' },
222
251
  ]);
223
252
 
224
- expect(messages[0].content).toContain('[ref: tool0turn0]');
225
- expect(messages[1].content).toContain('[ref: tool1turn0]');
253
+ expect(getRefKey(messages[0])).toBe('tool0turn0');
254
+ expect(getRefKey(messages[1])).toBe('tool1turn0');
226
255
  });
227
256
 
228
257
  it('reports unresolved placeholders after the output', async () => {
@@ -242,7 +271,8 @@ describe('ToolNode tool output references', () => {
242
271
  ]);
243
272
 
244
273
  expect(capturedArgs[0]).toBe('see {{tool9turn9}}');
245
- expect(msg.content).toContain('[unresolved refs: tool9turn9]');
274
+ expect(msg.content).toBe('done');
275
+ expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
246
276
  });
247
277
 
248
278
  it('stores the raw untruncated output in the registry, independent of the LLM-visible truncation', async () => {
@@ -343,10 +373,10 @@ describe('ToolNode tool output references', () => {
343
373
  messages: ToolMessage[];
344
374
  }>;
345
375
 
346
- expect(resA.messages[0].content).toContain('[ref: tool0turn0]');
347
- expect(resA.messages[0].content).toContain('output-A');
348
- expect(resB.messages[0].content).toContain('[ref: tool0turn1]');
349
- expect(resB.messages[0].content).toContain('output-B');
376
+ expect(getRefKey(resA.messages[0])).toBe('tool0turn0');
377
+ expect(resA.messages[0].content).toBe('output-A');
378
+ expect(getRefKey(resB.messages[0])).toBe('tool0turn1');
379
+ expect(resB.messages[0].content).toBe('output-B');
350
380
 
351
381
  const registry = node._unsafeGetToolOutputRegistry()!;
352
382
  expect(registry.get('concurrent-run', 'tool0turn0')).toBe('output-A');
@@ -417,7 +447,7 @@ describe('ToolNode tool output references', () => {
417
447
  { id: 'c1', name: 'boom', command: 'x' },
418
448
  ]);
419
449
 
420
- expect((msg.content as string).startsWith('[ref:')).toBe(false);
450
+ expect(getRefKey(msg)).toBeUndefined();
421
451
  expect(
422
452
  node._unsafeGetToolOutputRegistry()!.get('test-run', 'tool0turn0')
423
453
  ).toBeUndefined();
@@ -445,7 +475,8 @@ describe('ToolNode tool output references', () => {
445
475
  ]);
446
476
 
447
477
  expect(msg.content).toContain('Error: nope');
448
- expect(msg.content).toContain('[unresolved refs: tool9turn9]');
478
+ expect(msg.content as string).not.toContain('[unresolved refs:');
479
+ expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
449
480
  });
450
481
 
451
482
  it('surfaces unresolved refs on tool-returned error ToolMessages', async () => {
@@ -473,8 +504,8 @@ describe('ToolNode tool output references', () => {
473
504
  { id: 'c1', name: 'errReturn', command: 'see {{tool9turn9}}' },
474
505
  ]);
475
506
 
476
- expect(msg.content).toContain('handled failure');
477
- expect(msg.content).toContain('[unresolved refs: tool9turn9]');
507
+ expect(msg.content).toBe('handled failure');
508
+ expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
478
509
  });
479
510
 
480
511
  it('isolates state between overlapping runs on the same ToolNode', async () => {
@@ -584,13 +615,26 @@ describe('ToolNode tool output references', () => {
584
615
  messages: ToolMessage[];
585
616
  }>;
586
617
 
587
- // Each invocation produces its own annotated output — neither's
618
+ // Each invocation stamps its own ref metadata — neither's
588
619
  // registered tool0turn0 was clobbered by the other's sync-prefix
589
620
  // reset.
590
- expect(resA.messages[0].content).toContain('[ref: tool0turn0]');
591
- expect(resA.messages[0].content).toContain('out-A');
592
- expect(resB.messages[0].content).toContain('[ref: tool0turn0]');
593
- expect(resB.messages[0].content).toContain('out-B');
621
+ expect(getRefKey(resA.messages[0])).toBe('tool0turn0');
622
+ expect(resA.messages[0].content).toBe('out-A');
623
+ expect(getRefKey(resB.messages[0])).toBe('tool0turn0');
624
+ expect(resB.messages[0].content).toBe('out-B');
625
+
626
+ /**
627
+ * Each anonymous invocation stamps a distinct synthetic
628
+ * `_refScope` so the lazy annotation transform can later look
629
+ * up the right registry bucket — `config.configurable.run_id`
630
+ * is undefined for both calls and would collapse them to the
631
+ * same `\0anon` bucket without this stamping.
632
+ */
633
+ const scopeA = getRefScope(resA.messages[0]);
634
+ const scopeB = getRefScope(resB.messages[0]);
635
+ expect(scopeA).toMatch(/^\0anon-\d+$/);
636
+ expect(scopeB).toMatch(/^\0anon-\d+$/);
637
+ expect(scopeA).not.toBe(scopeB);
594
638
  });
595
639
 
596
640
  it('clears state on every batch when run_id is absent (anonymous caller)', async () => {
@@ -616,9 +660,7 @@ describe('ToolNode tool output references', () => {
616
660
  })) as { messages: ToolMessage[] };
617
661
 
618
662
  expect(capturedArgs[1]).toBe('echo {{tool0turn0}}');
619
- expect(result.messages[0].content).toContain(
620
- '[unresolved refs: tool0turn0]'
621
- );
663
+ expect(getUnresolvedRefs(result.messages[0])).toEqual(['tool0turn0']);
622
664
  });
623
665
 
624
666
  it('lets two ToolNodes sharing a registry resolve each other\'s refs', async () => {
@@ -728,7 +770,7 @@ describe('ToolNode tool output references', () => {
728
770
  expect(JSON.parse(stepCompletedArgs[1]).command).toBe('echo STORED');
729
771
  });
730
772
 
731
- it('prepends unresolved-refs warning to non-string ToolMessage content', async () => {
773
+ it('records unresolved refs as metadata on non-string ToolMessage content (content untouched)', async () => {
732
774
  const complexTool = tool(
733
775
  async () =>
734
776
  new ToolMessage({
@@ -760,12 +802,13 @@ describe('ToolNode tool output references', () => {
760
802
 
761
803
  expect(Array.isArray(msg.content)).toBe(true);
762
804
  const blocks = msg.content as Array<{ type: string; text?: string }>;
805
+ // Multi-part content is untouched at storage time — the lazy
806
+ // transform handles the unresolved-refs warning at request time.
807
+ expect(blocks).toHaveLength(2);
763
808
  expect(blocks[0].type).toBe('text');
764
- expect(blocks[0].text).toContain('[unresolved refs: tool9turn9]');
765
- // Original blocks follow the warning.
766
- expect(blocks[1].type).toBe('text');
767
- expect(blocks[1].text).toBe('data');
768
- expect(blocks[2].type).toBe('image_url');
809
+ expect(blocks[0].text).toBe('data');
810
+ expect(blocks[1].type).toBe('image_url');
811
+ expect(getUnresolvedRefs(msg)).toEqual(['tool9turn9']);
769
812
  });
770
813
 
771
814
  it('resets the registry and turn counter when the runId changes', async () => {
@@ -800,10 +843,9 @@ describe('ToolNode tool output references', () => {
800
843
  )) as { messages: ToolMessage[] };
801
844
 
802
845
  expect(capturedArgs[1]).toBe('echo {{tool0turn0}}');
803
- expect(resultB.messages[0].content).toContain('[ref: tool0turn0]');
804
- expect(resultB.messages[0].content).toContain(
805
- '[unresolved refs: tool0turn0]'
806
- );
846
+ expect(resultB.messages[0].content).toBe('from-run-B');
847
+ expect(getRefKey(resultB.messages[0])).toBe('tool0turn0');
848
+ expect(getUnresolvedRefs(resultB.messages[0])).toEqual(['tool0turn0']);
807
849
  });
808
850
  });
809
851
 
@@ -836,7 +878,7 @@ describe('ToolNode tool output references', () => {
836
878
  }) as unknown as StructuredToolInterface;
837
879
  }
838
880
 
839
- it('annotates the output the host returns', async () => {
881
+ it('keeps host-returned output clean and stamps the ref key as metadata', async () => {
840
882
  const node = new ToolNode({
841
883
  tools: [createSchemaStub('echo')],
842
884
  eventDrivenMode: true,
@@ -858,7 +900,8 @@ describe('ToolNode tool output references', () => {
858
900
  { configurable: { run_id: 'run-host' } }
859
901
  )) as { messages: ToolMessage[] };
860
902
 
861
- expect(result.messages[0].content).toBe('[ref: tool0turn0]\nhost-output');
903
+ expect(result.messages[0].content).toBe('host-output');
904
+ expect(getRefKey(result.messages[0])).toBe('tool0turn0');
862
905
  expect(
863
906
  node._unsafeGetToolOutputRegistry()!.get('run-host', 'tool0turn0')
864
907
  ).toBe('host-output');
@@ -963,9 +1006,10 @@ describe('ToolNode tool output references', () => {
963
1006
  })) as { messages: ToolMessage[] };
964
1007
 
965
1008
  expect(result.messages[0].content).toContain('Error: host failure');
966
- expect(result.messages[0].content).toContain(
967
- '[unresolved refs: tool9turn9]'
1009
+ expect(result.messages[0].content as string).not.toContain(
1010
+ '[unresolved refs:'
968
1011
  );
1012
+ expect(getUnresolvedRefs(result.messages[0])).toEqual(['tool9turn9']);
969
1013
  });
970
1014
 
971
1015
  it('reports unresolved refs even when the host succeeds', async () => {
@@ -995,9 +1039,8 @@ describe('ToolNode tool output references', () => {
995
1039
  ],
996
1040
  })) as { messages: ToolMessage[] };
997
1041
 
998
- expect(result.messages[0].content).toContain(
999
- '[unresolved refs: tool9turn9]'
1000
- );
1042
+ expect(result.messages[0].content).toBe('done');
1043
+ expect(getUnresolvedRefs(result.messages[0])).toEqual(['tool9turn9']);
1001
1044
  });
1002
1045
 
1003
1046
  it('registers the post-hook output when PostToolUse replaces it', async () => {
@@ -1035,9 +1078,8 @@ describe('ToolNode tool output references', () => {
1035
1078
  { configurable: { run_id: 'run-posthook' } }
1036
1079
  )) as { messages: ToolMessage[] };
1037
1080
 
1038
- expect(result.messages[0].content).toBe(
1039
- '[ref: tool0turn0]\nhooked-output'
1040
- );
1081
+ expect(result.messages[0].content).toBe('hooked-output');
1082
+ expect(getRefKey(result.messages[0])).toBe('tool0turn0');
1041
1083
  expect(
1042
1084
  node._unsafeGetToolOutputRegistry()!.get('run-posthook', 'tool0turn0')
1043
1085
  ).toBe('hooked-output');
@@ -1252,9 +1294,10 @@ describe('ToolNode tool output references', () => {
1252
1294
  // call attempts `{{tool0turn0}}` — which points at the direct
1253
1295
  // call running *in the same batch*. Correct behavior: the
1254
1296
  // placeholder stays unresolved (cross-batch only), and the
1255
- // event args received by the host must carry the literal
1256
- // template string plus the LLM-visible `[unresolved refs:…]`
1257
- // trailer.
1297
+ // event args received by the host carry the literal template
1298
+ // string. The unresolved-refs hint is stamped into the resulting
1299
+ // ToolMessage's `additional_kwargs._unresolvedRefs` so the lazy
1300
+ // annotation transform surfaces it to the LLM at request time.
1258
1301
  await node.invoke(
1259
1302
  {
1260
1303
  messages: [