@librechat/agents 3.1.72 → 3.1.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +62 -20
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +11 -1
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/main.cjs +1 -0
  6. package/dist/cjs/main.cjs.map +1 -1
  7. package/dist/cjs/messages/format.cjs +27 -1
  8. package/dist/cjs/messages/format.cjs.map +1 -1
  9. package/dist/cjs/tools/BashExecutor.cjs +21 -11
  10. package/dist/cjs/tools/BashExecutor.cjs.map +1 -1
  11. package/dist/cjs/tools/CodeExecutor.cjs +37 -10
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +16 -11
  14. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  15. package/dist/cjs/tools/ToolNode.cjs +21 -2
  16. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  17. package/dist/esm/agents/AgentContext.mjs +62 -20
  18. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  19. package/dist/esm/graphs/Graph.mjs +11 -1
  20. package/dist/esm/graphs/Graph.mjs.map +1 -1
  21. package/dist/esm/main.mjs +1 -1
  22. package/dist/esm/messages/format.mjs +27 -1
  23. package/dist/esm/messages/format.mjs.map +1 -1
  24. package/dist/esm/tools/BashExecutor.mjs +22 -12
  25. package/dist/esm/tools/BashExecutor.mjs.map +1 -1
  26. package/dist/esm/tools/CodeExecutor.mjs +37 -11
  27. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  28. package/dist/esm/tools/ProgrammaticToolCalling.mjs +17 -12
  29. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  30. package/dist/esm/tools/ToolNode.mjs +21 -2
  31. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  32. package/dist/types/agents/AgentContext.d.ts +15 -0
  33. package/dist/types/messages/format.d.ts +11 -1
  34. package/dist/types/tools/CodeExecutor.d.ts +6 -0
  35. package/dist/types/types/tools.d.ts +9 -0
  36. package/package.json +1 -1
  37. package/src/agents/AgentContext.ts +66 -27
  38. package/src/agents/__tests__/AgentContext.test.ts +178 -0
  39. package/src/graphs/Graph.ts +12 -1
  40. package/src/messages/ensureThinkingBlock.test.ts +167 -0
  41. package/src/messages/format.ts +29 -1
  42. package/src/tools/BashExecutor.ts +37 -13
  43. package/src/tools/CodeExecutor.ts +55 -11
  44. package/src/tools/ProgrammaticToolCalling.ts +29 -14
  45. package/src/tools/ToolNode.ts +21 -2
  46. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +60 -0
  47. package/src/tools/__tests__/ToolNode.session.test.ts +124 -0
  48. package/src/types/tools.ts +9 -0
@@ -681,10 +681,47 @@ export class AgentContext {
681
681
  if (!this.toolDefinitions) {
682
682
  return [];
683
683
  }
684
- return this.toolDefinitions.filter(
685
- (def) =>
684
+ /**
685
+ * Mirror `getEventDrivenToolsForBinding`'s gate: a definition is only
686
+ * bound to the model when its `allowed_callers` include `'direct'` and
687
+ * (if deferred) it has been discovered. Filtering by `defer_loading`
688
+ * alone left programmatic-only definitions counted in
689
+ * `toolSchemaTokens` even though they were never bound.
690
+ */
691
+ return this.toolDefinitions.filter((def) => {
692
+ const allowedCallers = def.allowed_callers ?? ['direct'];
693
+ if (!allowedCallers.includes('direct')) {
694
+ return false;
695
+ }
696
+ return (
686
697
  def.defer_loading !== true || this.discoveredToolNames.has(def.name)
687
- );
698
+ );
699
+ });
700
+ }
701
+
702
+ /**
703
+ * Single source of truth for "which entries of `this.tools` should be
704
+ * treated as actually bound". Callers:
705
+ * - `getToolsForBinding` (non-event-driven branch)
706
+ * - `getEventDrivenToolsForBinding` (appends instance tools alongside
707
+ * schema-only definitions)
708
+ * - `calculateInstructionTokens` (counts schema bytes for accounting)
709
+ *
710
+ * In event-driven mode (`toolDefinitions` present) instance tools are
711
+ * appended unfiltered; outside event-driven mode they pass through
712
+ * `filterToolsForBinding`. Centralizing the decision here prevents the
713
+ * accounting/binding paths from drifting apart, which was the root
714
+ * cause of the original miscount.
715
+ */
716
+ private getEffectiveInstanceTools(): t.GraphTools | undefined {
717
+ if (!this.tools) {
718
+ return undefined;
719
+ }
720
+ const isEventDriven = (this.toolDefinitions?.length ?? 0) > 0;
721
+ if (isEventDriven || !this.toolRegistry) {
722
+ return this.tools;
723
+ }
724
+ return this.filterToolsForBinding(this.tools);
688
725
  }
689
726
 
690
727
  /**
@@ -703,9 +740,17 @@ export class AgentContext {
703
740
  * populated after `fromConfig()` kicks off the initial calculation, so
704
741
  * callers that mutate `graphTools` must re-trigger this method to
705
742
  * refresh `toolSchemaTokens`.
743
+ *
744
+ * Use `getEffectiveInstanceTools()` so accounting reflects exactly the
745
+ * subset that `getToolsForBinding` would emit — preventing the
746
+ * worst-case-ceiling miscount that triggered spurious `empty_messages`
747
+ * preflight rejections at low `maxContextTokens`. Deferred and
748
+ * non-`'direct'` `toolDefinitions` are excluded by
749
+ * `getActiveToolDefinitions()` below.
706
750
  */
707
751
  const instanceTools: t.GraphTools = [
708
- ...((this.tools as t.GenericTool[] | undefined) ?? []),
752
+ ...((this.getEffectiveInstanceTools() as t.GenericTool[] | undefined) ??
753
+ []),
709
754
  ...((this.graphTools as t.GenericTool[] | undefined) ?? []),
710
755
  ];
711
756
 
@@ -900,8 +945,16 @@ export class AgentContext {
900
945
  */
901
946
  getTokenBudgetBreakdown(messages?: BaseMessage[]): t.TokenBudgetBreakdown {
902
947
  const maxContextTokens = this.maxContextTokens ?? 0;
903
- const toolCount =
904
- (this.tools?.length ?? 0) + this.getActiveToolDefinitions().length;
948
+ /**
949
+ * Derive `toolCount` from `getToolsForBinding()` so the diagnostic stays
950
+ * aligned with what is actually bound to the model — and with what
951
+ * `calculateInstructionTokens` counts into `toolSchemaTokens`. Using raw
952
+ * `this.tools.length` would inflate the count whenever the registry
953
+ * marks instance tools as deferred-undiscovered or non-`'direct'`,
954
+ * producing the same misleading "N tools" diagnostic this fix is meant
955
+ * to eliminate.
956
+ */
957
+ const toolCount = this.getToolsForBinding()?.length ?? 0;
905
958
  const messageCount = messages?.length ?? 0;
906
959
 
907
960
  let messageTokens = 0;
@@ -1014,10 +1067,7 @@ export class AgentContext {
1014
1067
  return this.getEventDrivenToolsForBinding();
1015
1068
  }
1016
1069
 
1017
- const filtered =
1018
- !this.tools || !this.toolRegistry
1019
- ? this.tools
1020
- : this.filterToolsForBinding(this.tools);
1070
+ const filtered = this.getEffectiveInstanceTools();
1021
1071
 
1022
1072
  if (this.graphTools && this.graphTools.length > 0) {
1023
1073
  return [...(filtered ?? []), ...this.graphTools];
@@ -1032,21 +1082,9 @@ export class AgentContext {
1032
1082
  return this.graphTools ?? [];
1033
1083
  }
1034
1084
 
1035
- const defsToInclude = this.toolDefinitions.filter((def) => {
1036
- const allowedCallers = def.allowed_callers ?? ['direct'];
1037
- if (!allowedCallers.includes('direct')) {
1038
- return false;
1039
- }
1040
- if (
1041
- def.defer_loading === true &&
1042
- !this.discoveredToolNames.has(def.name)
1043
- ) {
1044
- return false;
1045
- }
1046
- return true;
1047
- });
1048
-
1049
- const schemaTools = createSchemaOnlyTools(defsToInclude) as t.GraphTools;
1085
+ const schemaTools = createSchemaOnlyTools(
1086
+ this.getActiveToolDefinitions()
1087
+ ) as t.GraphTools;
1050
1088
 
1051
1089
  const allTools = [...schemaTools];
1052
1090
 
@@ -1054,8 +1092,9 @@ export class AgentContext {
1054
1092
  allTools.push(...this.graphTools);
1055
1093
  }
1056
1094
 
1057
- if (this.tools && this.tools.length > 0) {
1058
- allTools.push(...this.tools);
1095
+ const instanceTools = this.getEffectiveInstanceTools();
1096
+ if (instanceTools && instanceTools.length > 0) {
1097
+ allTools.push(...instanceTools);
1059
1098
  }
1060
1099
 
1061
1100
  return allTools;
@@ -404,6 +404,141 @@ describe('AgentContext', () => {
404
404
  expect(ctxWithDeferred.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
405
405
  });
406
406
 
407
+ it('excludes programmatic-only toolDefinitions from toolSchemaTokens', async () => {
408
+ // getEventDrivenToolsForBinding excludes definitions whose
409
+ // allowed_callers omit 'direct'. Accounting must mirror that — a
410
+ // programmatic-only definition is never bound to the model and
411
+ // shouldn't inflate toolSchemaTokens.
412
+ const activeDef: t.LCTool = {
413
+ name: 'active_tool',
414
+ description: 'Always loaded',
415
+ parameters: { type: 'object', properties: {} },
416
+ };
417
+ const programmaticDef: t.LCTool = {
418
+ name: 'programmatic_tool',
419
+ description: 'Only callable via code execution',
420
+ parameters: { type: 'object', properties: {} },
421
+ allowed_callers: ['code_execution'],
422
+ };
423
+
424
+ const ctxBase = createBasicContext({
425
+ agentConfig: { toolDefinitions: [activeDef] },
426
+ tokenCounter: mockTokenCounter,
427
+ });
428
+ const ctxWithProgrammatic = createBasicContext({
429
+ agentConfig: { toolDefinitions: [activeDef, programmaticDef] },
430
+ tokenCounter: mockTokenCounter,
431
+ });
432
+
433
+ await ctxBase.tokenCalculationPromise;
434
+ await ctxWithProgrammatic.tokenCalculationPromise;
435
+
436
+ expect(ctxWithProgrammatic.toolSchemaTokens).toBe(
437
+ ctxBase.toolSchemaTokens
438
+ );
439
+ });
440
+
441
+ it('excludes deferred-undiscovered instance tools from toolSchemaTokens', async () => {
442
+ const activeTool = createMockTool('active_tool');
443
+ const deferredTool = createMockTool('deferred_tool');
444
+ const programmaticTool = createMockTool('programmatic_tool');
445
+ const toolRegistry: t.LCToolRegistry = new Map([
446
+ ['active_tool', { name: 'active_tool' }],
447
+ ['deferred_tool', { name: 'deferred_tool', defer_loading: true }],
448
+ [
449
+ 'programmatic_tool',
450
+ {
451
+ name: 'programmatic_tool',
452
+ allowed_callers: ['code_execution'],
453
+ },
454
+ ],
455
+ ]);
456
+
457
+ const ctxBase = createBasicContext({
458
+ agentConfig: { tools: [activeTool], toolRegistry },
459
+ tokenCounter: mockTokenCounter,
460
+ });
461
+ const ctxWithExcluded = createBasicContext({
462
+ agentConfig: {
463
+ tools: [activeTool, deferredTool, programmaticTool],
464
+ toolRegistry,
465
+ },
466
+ tokenCounter: mockTokenCounter,
467
+ });
468
+
469
+ await ctxBase.tokenCalculationPromise;
470
+ await ctxWithExcluded.tokenCalculationPromise;
471
+
472
+ expect(ctxWithExcluded.toolSchemaTokens).toBe(ctxBase.toolSchemaTokens);
473
+ });
474
+
475
+ it('includes deferred instance tools once discovered via discoveredTools input', async () => {
476
+ const tools = [createMockTool('deferred_tool')];
477
+ const toolRegistry: t.LCToolRegistry = new Map([
478
+ ['deferred_tool', { name: 'deferred_tool', defer_loading: true }],
479
+ ]);
480
+
481
+ const ctxUndiscovered = createBasicContext({
482
+ agentConfig: { tools, toolRegistry },
483
+ tokenCounter: mockTokenCounter,
484
+ });
485
+ const ctxDiscovered = createBasicContext({
486
+ agentConfig: {
487
+ tools,
488
+ toolRegistry,
489
+ discoveredTools: ['deferred_tool'],
490
+ },
491
+ tokenCounter: mockTokenCounter,
492
+ });
493
+
494
+ await ctxUndiscovered.tokenCalculationPromise;
495
+ await ctxDiscovered.tokenCalculationPromise;
496
+
497
+ expect(ctxUndiscovered.toolSchemaTokens).toBe(0);
498
+ expect(ctxDiscovered.toolSchemaTokens).toBeGreaterThan(0);
499
+ });
500
+
501
+ it('does not filter instance tools in event-driven mode (matches getEventDrivenToolsForBinding)', async () => {
502
+ // In event-driven mode, getEventDrivenToolsForBinding appends
503
+ // `this.tools` UNFILTERED. Accounting must do the same — otherwise we
504
+ // under-count and risk exceeding the model's context budget.
505
+ const activeDef: t.LCTool = {
506
+ name: 'active_def',
507
+ description: 'Always loaded',
508
+ parameters: { type: 'object', properties: {} },
509
+ };
510
+ const nativeTool = createMockTool('native_tool');
511
+ // Registry marks the native tool as deferred-undiscovered. In the
512
+ // non-event-driven path this would exclude it; in event-driven mode
513
+ // it is still bound and must still be counted.
514
+ const toolRegistry: t.LCToolRegistry = new Map([
515
+ ['native_tool', { name: 'native_tool', defer_loading: true }],
516
+ ]);
517
+
518
+ const ctxWithoutNative = createBasicContext({
519
+ agentConfig: {
520
+ toolDefinitions: [activeDef],
521
+ toolRegistry,
522
+ },
523
+ tokenCounter: mockTokenCounter,
524
+ });
525
+ const ctxWithNative = createBasicContext({
526
+ agentConfig: {
527
+ toolDefinitions: [activeDef],
528
+ tools: [nativeTool],
529
+ toolRegistry,
530
+ },
531
+ tokenCounter: mockTokenCounter,
532
+ });
533
+
534
+ await ctxWithoutNative.tokenCalculationPromise;
535
+ await ctxWithNative.tokenCalculationPromise;
536
+
537
+ expect(ctxWithNative.toolSchemaTokens).toBeGreaterThan(
538
+ ctxWithoutNative.toolSchemaTokens
539
+ );
540
+ });
541
+
407
542
  it('includes deferred toolDefinitions once discovered via discoveredTools input', async () => {
408
543
  const toolDefinitions: t.LCTool[] = [
409
544
  {
@@ -448,6 +583,36 @@ describe('AgentContext', () => {
448
583
  expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
449
584
  });
450
585
 
586
+ it('getTokenBudgetBreakdown toolCount excludes deferred-undiscovered instance tools', () => {
587
+ // Mirrors the toolDefinitions test for the instance-tools path so
588
+ // toolCount stays aligned with toolSchemaTokens (and with what
589
+ // getToolsForBinding actually emits) for non-event-driven runs.
590
+ const tools = [
591
+ createMockTool('active_tool'),
592
+ createMockTool('deferred_tool'),
593
+ createMockTool('programmatic_tool'),
594
+ ];
595
+ const toolRegistry: t.LCToolRegistry = new Map([
596
+ ['active_tool', { name: 'active_tool' }],
597
+ ['deferred_tool', { name: 'deferred_tool', defer_loading: true }],
598
+ [
599
+ 'programmatic_tool',
600
+ {
601
+ name: 'programmatic_tool',
602
+ allowed_callers: ['code_execution'],
603
+ },
604
+ ],
605
+ ]);
606
+
607
+ const ctx = createBasicContext({
608
+ agentConfig: { tools, toolRegistry },
609
+ });
610
+
611
+ expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
612
+ ctx.markToolsAsDiscovered(['deferred_tool']);
613
+ expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
614
+ });
615
+
451
616
  it('getTokenBudgetBreakdown toolCount reflects newly discovered deferred tools', () => {
452
617
  const toolDefinitions: t.LCTool[] = [
453
618
  {
@@ -464,6 +629,19 @@ describe('AgentContext', () => {
464
629
  expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(1);
465
630
  });
466
631
 
632
+ it('getTokenBudgetBreakdown toolCount includes graphTools', () => {
633
+ // graphTools (handoff/subagent) are bound to the model alongside
634
+ // instance tools. Now that toolCount derives from getToolsForBinding(),
635
+ // graphTools are reflected in the diagnostic just like they're
636
+ // counted in toolSchemaTokens. Locks in that alignment.
637
+ const ctx = createBasicContext({
638
+ agentConfig: { tools: [createMockTool('direct_tool')] },
639
+ });
640
+ ctx.graphTools = [createMockTool('handoff_tool')];
641
+
642
+ expect(ctx.getTokenBudgetBreakdown().toolCount).toBe(2);
643
+ });
644
+
467
645
  it('toolSchemaTokens snapshot does not auto-update after markToolsAsDiscovered', async () => {
468
646
  const toolDefinitions: t.LCTool[] = [
469
647
  {
@@ -898,10 +898,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
898
898
  if (
899
899
  isThinkingEnabled(agentContext.provider, agentContext.clientOptions)
900
900
  ) {
901
+ /**
902
+ * Pass `this.startIndex` so the function can distinguish CURRENT-run
903
+ * AI messages (the agent's own iterations — possibly without a
904
+ * leading thinking block, which Claude is allowed to skip) from
905
+ * historical context that genuinely needs the
906
+ * `[Previous agent context]` placeholder. Without this signal the
907
+ * function would convert the agent's own in-run tool_use messages,
908
+ * polluting the next iteration's prompt with a placeholder the
909
+ * model treats as suspicious injected content.
910
+ */
901
911
  finalMessages = ensureThinkingBlockInMessages(
902
912
  finalMessages,
903
913
  agentContext.provider,
904
- config
914
+ config,
915
+ this.startIndex
905
916
  );
906
917
  }
907
918
 
@@ -1209,4 +1209,171 @@ describe('ensureThinkingBlockInMessages', () => {
1209
1209
  expect(outputImageBlock).not.toBe(originalImageBlock);
1210
1210
  });
1211
1211
  });
1212
+
1213
+ describe('runStartIndex (current-run boundary)', () => {
1214
+ /**
1215
+ * Claude is allowed to skip a thinking block before a tool_use (cf.
1216
+ * PR #116). When the agent's own first iteration produces an
1217
+ * `AI(tool_use, no thinking)`, the function would otherwise convert
1218
+ * it to a `[Previous agent context]` HumanMessage — polluting the
1219
+ * next iteration's prompt with text the model treats as suspicious
1220
+ * injected content. The model then ignores its own real prior tool
1221
+ * result and re-runs the tool to verify, often failing because the
1222
+ * subsequent sandbox doesn't have the file.
1223
+ *
1224
+ * The `runStartIndex` parameter tells the function which messages
1225
+ * are the agent's own in-run work: those at or after it must NEVER
1226
+ * be converted, even if no thinking block appears in the chain.
1227
+ */
1228
+
1229
+ test('preserves the agent first-iteration AI(tool_use) when its index is at runStartIndex', () => {
1230
+ const messages = [
1231
+ new HumanMessage({ content: 'fetch the data' }),
1232
+ // No thinking block — Claude validly skipped it before tool_use
1233
+ new AIMessage({
1234
+ content: '',
1235
+ tool_calls: [
1236
+ { id: 'c1', name: 'fetch', args: {}, type: 'tool_call' as const },
1237
+ ],
1238
+ }),
1239
+ new ToolMessage({ content: 'data', tool_call_id: 'c1' }),
1240
+ ];
1241
+
1242
+ const result = ensureThinkingBlockInMessages(
1243
+ messages,
1244
+ Providers.BEDROCK,
1245
+ undefined,
1246
+ /* runStartIndex */ 1
1247
+ );
1248
+
1249
+ // All 3 preserved — the AI at index 1 is the agent's own work
1250
+ expect(result).toHaveLength(3);
1251
+ expect(result[1]).toBeInstanceOf(AIMessage);
1252
+ expect((result[1] as AIMessage).tool_calls).toHaveLength(1);
1253
+ expect(result[2]).toBeInstanceOf(ToolMessage);
1254
+ // No placeholder leaked in
1255
+ expect(getTextContent(result[1])).not.toContain(
1256
+ '[Previous agent context]'
1257
+ );
1258
+ });
1259
+
1260
+ test('preserves multiple in-run AI(tool_use) iterations without thinking blocks', () => {
1261
+ const messages = [
1262
+ new HumanMessage({ content: 'do work' }),
1263
+ new AIMessage({
1264
+ content: '',
1265
+ tool_calls: [
1266
+ { id: 'c1', name: 'step1', args: {}, type: 'tool_call' as const },
1267
+ ],
1268
+ }),
1269
+ new ToolMessage({ content: 'r1', tool_call_id: 'c1' }),
1270
+ new AIMessage({
1271
+ content: '',
1272
+ tool_calls: [
1273
+ { id: 'c2', name: 'step2', args: {}, type: 'tool_call' as const },
1274
+ ],
1275
+ }),
1276
+ new ToolMessage({ content: 'r2', tool_call_id: 'c2' }),
1277
+ ];
1278
+
1279
+ const result = ensureThinkingBlockInMessages(
1280
+ messages,
1281
+ Providers.BEDROCK,
1282
+ undefined,
1283
+ /* runStartIndex */ 1
1284
+ );
1285
+
1286
+ expect(result).toHaveLength(5);
1287
+ expect(result[1]).toBeInstanceOf(AIMessage);
1288
+ expect(result[3]).toBeInstanceOf(AIMessage);
1289
+ // Neither AI was converted
1290
+ expect(getTextContent(result[1])).not.toContain(
1291
+ '[Previous agent context]'
1292
+ );
1293
+ expect(getTextContent(result[3])).not.toContain(
1294
+ '[Previous agent context]'
1295
+ );
1296
+ });
1297
+
1298
+ test('still converts pre-runStartIndex history that lacks thinking blocks', () => {
1299
+ // Real handoff scenario: a prior non-thinking agent's tool calls
1300
+ // appear before this run started. They genuinely need the
1301
+ // placeholder (the legacy reason this function exists).
1302
+ const messages = [
1303
+ new HumanMessage({ content: 'first request' }),
1304
+ new AIMessage({
1305
+ content: 'using tool',
1306
+ tool_calls: [
1307
+ { id: 'old', name: 'legacy', args: {}, type: 'tool_call' as const },
1308
+ ],
1309
+ }),
1310
+ new ToolMessage({ content: 'old result', tool_call_id: 'old' }),
1311
+ // Current run starts here — say after a handoff. Index >= 3 is
1312
+ // the new agent's own work.
1313
+ ];
1314
+
1315
+ const result = ensureThinkingBlockInMessages(
1316
+ messages,
1317
+ Providers.BEDROCK,
1318
+ undefined,
1319
+ /* runStartIndex */ 3
1320
+ );
1321
+
1322
+ // The pre-run AI(tool_use)+Tool got converted to a placeholder
1323
+ expect(result).toHaveLength(2);
1324
+ expect(result[0]).toBeInstanceOf(HumanMessage);
1325
+ expect(result[1]).toBeInstanceOf(HumanMessage);
1326
+ expect(getTextContent(result[1])).toContain('[Previous agent context]');
1327
+ });
1328
+
1329
+ test('falls back to chainHasThinkingBlock heuristic when runStartIndex is undefined (backward compat)', () => {
1330
+ const messages = [
1331
+ new HumanMessage({ content: 'do work' }),
1332
+ // No reasoning + no runStartIndex hint → still gets converted
1333
+ // (preserves the prior behavior for callers that haven't been
1334
+ // updated to pass the boundary).
1335
+ new AIMessage({
1336
+ content: 'using tool',
1337
+ tool_calls: [
1338
+ { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
1339
+ ],
1340
+ }),
1341
+ new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
1342
+ ];
1343
+
1344
+ const result = ensureThinkingBlockInMessages(messages, Providers.BEDROCK);
1345
+
1346
+ expect(result).toHaveLength(2);
1347
+ expect(result[1]).toBeInstanceOf(HumanMessage);
1348
+ expect(getTextContent(result[1])).toContain('[Previous agent context]');
1349
+ });
1350
+
1351
+ test('runStartIndex of 0 is honored (whole array is the current run)', () => {
1352
+ // Edge: a fresh run with no prior history at all. Everything is
1353
+ // in-run and must be preserved even without thinking blocks.
1354
+ const messages = [
1355
+ new HumanMessage({ content: 'do work' }),
1356
+ new AIMessage({
1357
+ content: '',
1358
+ tool_calls: [
1359
+ { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
1360
+ ],
1361
+ }),
1362
+ new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
1363
+ ];
1364
+
1365
+ const result = ensureThinkingBlockInMessages(
1366
+ messages,
1367
+ Providers.BEDROCK,
1368
+ undefined,
1369
+ /* runStartIndex */ 0
1370
+ );
1371
+
1372
+ expect(result).toHaveLength(3);
1373
+ expect(result[1]).toBeInstanceOf(AIMessage);
1374
+ expect(getTextContent(result[1])).not.toContain(
1375
+ '[Previous agent context]'
1376
+ );
1377
+ });
1378
+ });
1212
1379
  });
@@ -1391,12 +1391,23 @@ function appendToolCalls(
1391
1391
  * @param messages - Array of messages to process
1392
1392
  * @param provider - The provider being used (unused but kept for future compatibility)
1393
1393
  * @param config - Optional RunnableConfig for structured agent logging
1394
+ * @param runStartIndex - Index in `messages` where the CURRENT run's own
1395
+ * appended AI/Tool messages begin (i.e. anything at this index or later
1396
+ * was just produced by this run's own iterations, not historical
1397
+ * context). When provided, AI messages at or after this index are
1398
+ * never converted to `[Previous agent context]` placeholders — Claude
1399
+ * can validly skip a thinking block before a tool_use (cf. PR #116),
1400
+ * so the agent's own in-run iterations must not be misclassified as
1401
+ * foreign history. Without the signal the function falls back to its
1402
+ * prior heuristic (`chainHasThinkingBlock`), preserving backward
1403
+ * compatibility for callers that don't yet pass the boundary.
1394
1404
  * @returns The messages array with tool sequences converted to buffer strings if necessary
1395
1405
  */
1396
1406
  export function ensureThinkingBlockInMessages(
1397
1407
  messages: BaseMessage[],
1398
1408
  _provider: Providers,
1399
- config?: RunnableConfig
1409
+ config?: RunnableConfig,
1410
+ runStartIndex?: number
1400
1411
  ): BaseMessage[] {
1401
1412
  if (messages.length === 0) {
1402
1413
  return messages;
@@ -1483,6 +1494,23 @@ export function ensureThinkingBlockInMessages(
1483
1494
  // but follow-ups have content: "" with only tool_calls. These are the
1484
1495
  // same agent's turn and must NOT be converted to HumanMessages.
1485
1496
  if (hasToolUse && !hasThinkingBlock) {
1497
+ // Current-run boundary check: anything at or after `runStartIndex`
1498
+ // is the current run's own work — preserve it. Claude is allowed
1499
+ // to skip a thinking block before a tool_use (cf. PR #116 in the
1500
+ // agents repo), so the agent's own first-iteration AI message can
1501
+ // legitimately have tool_calls without reasoning. Converting it to
1502
+ // a `[Previous agent context]` placeholder pollutes the next
1503
+ // iteration's prompt — the LLM sees the placeholder, treats it as
1504
+ // suspicious injected content, ignores its own real prior tool
1505
+ // result, and re-runs the tool to verify (which then often fails
1506
+ // because subsequent calls land in fresh sandboxes without the
1507
+ // file). Skip the conversion when we know this is in-run.
1508
+ if (runStartIndex !== undefined && i >= runStartIndex) {
1509
+ result.push(msg);
1510
+ i++;
1511
+ continue;
1512
+ }
1513
+
1486
1514
  // Walk backwards — if an earlier AI message in the same chain (before
1487
1515
  // the nearest HumanMessage) has a thinking/reasoning block, this is a
1488
1516
  // continuation of a thinking-enabled turn, not a non-thinking handoff.
@@ -3,17 +3,23 @@ import fetch, { RequestInit } from 'node-fetch';
3
3
  import { HttpsProxyAgent } from 'https-proxy-agent';
4
4
  import { tool, DynamicStructuredTool } from '@langchain/core/tools';
5
5
  import type * as t from '@/types';
6
- import { imageExtRegex, getCodeBaseURL } from './CodeExecutor';
6
+ import { getCodeBaseURL, renderFileSection } from './CodeExecutor';
7
7
  import { Constants } from '@/common';
8
8
 
9
9
  config();
10
10
 
11
- const imageMessage = 'Image is already displayed to the user';
12
11
  const otherMessage = 'File is already downloaded by the user';
12
+ const inheritedFileMessage =
13
+ 'Available as an input — already known to the user';
13
14
  const accessMessage =
14
15
  'Note: Files from previous executions are automatically available and can be modified.';
15
16
  const emptyOutputMessage =
16
17
  'stdout: Empty. Ensure you\'re writing output explicitly.\n';
18
+ const inheritedFilesHeader =
19
+ 'Available files (inputs, not generated by this execution):';
20
+ const generatedFilesHeader = 'Generated files:';
21
+ const inheritedNote =
22
+ 'Note: Files in "Available files" are inputs the user (or a skill) already provided to the sandbox. They were not produced by this execution and you should not present them as new outputs in your response.';
17
23
 
18
24
  const baseEndpoint = getCodeBaseURL();
19
25
  const EXEC_ENDPOINT = `${baseEndpoint}/exec`;
@@ -198,20 +204,38 @@ function createBashExecutionTool(
198
204
  }
199
205
  if (result.stderr) formattedOutput += `stderr:\n${result.stderr}\n`;
200
206
  if (result.files && result.files.length > 0) {
201
- formattedOutput += 'Generated files:\n';
207
+ /* Split inherited (read-only / unchanged-input passthroughs from
208
+ * codeapi) from genuine generated outputs. The LLM was previously
209
+ * shown skill files under "Generated files:" with the message
210
+ * "File is already downloaded by the user", which led it to
211
+ * (a) believe it had just produced files it merely referenced
212
+ * and (b) sometimes invent paths like /mnt/user-data/uploads/
213
+ * trying to find the "originals". Labeling them as inputs makes
214
+ * the mental model accurate. */
215
+ const inheritedFiles = result.files.filter(
216
+ (f) => f.inherited === true
217
+ );
218
+ const generatedFiles = result.files.filter(
219
+ (f) => f.inherited !== true
220
+ );
202
221
 
203
- const fileCount = result.files.length;
204
- for (let i = 0; i < fileCount; i++) {
205
- const file = result.files[i];
206
- const isImage = imageExtRegex.test(file.name);
207
- formattedOutput += `- /mnt/data/${file.name} | ${isImage ? imageMessage : otherMessage}`;
222
+ formattedOutput += renderFileSection(
223
+ generatedFilesHeader,
224
+ generatedFiles,
225
+ otherMessage
226
+ );
227
+ formattedOutput += renderFileSection(
228
+ inheritedFilesHeader,
229
+ inheritedFiles,
230
+ inheritedFileMessage
231
+ );
208
232
 
209
- if (i < fileCount - 1) {
210
- formattedOutput += fileCount <= 3 ? ', ' : ',\n';
211
- }
233
+ if (generatedFiles.length > 0) {
234
+ formattedOutput += `\n\n${accessMessage}`;
235
+ }
236
+ if (inheritedFiles.length > 0) {
237
+ formattedOutput += `\n\n${inheritedNote}`;
212
238
  }
213
-
214
- formattedOutput += `\n\n${accessMessage}`;
215
239
  return [
216
240
  formattedOutput.trim(),
217
241
  {