@librechat/agents 3.1.72 → 3.1.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1209,4 +1209,171 @@ describe('ensureThinkingBlockInMessages', () => {
1209
1209
  expect(outputImageBlock).not.toBe(originalImageBlock);
1210
1210
  });
1211
1211
  });
1212
+
1213
+ describe('runStartIndex (current-run boundary)', () => {
1214
+ /**
1215
+ * Claude is allowed to skip a thinking block before a tool_use (cf.
1216
+ * PR #116). When the agent's own first iteration produces an
1217
+ * `AI(tool_use, no thinking)`, the function would otherwise convert
1218
+ * it to a `[Previous agent context]` HumanMessage — polluting the
1219
+ * next iteration's prompt with text the model treats as suspicious
1220
+ * injected content. The model then ignores its own real prior tool
1221
+ * result and re-runs the tool to verify, often failing because the
1222
+ * subsequent sandbox doesn't have the file.
1223
+ *
1224
+ * The `runStartIndex` parameter tells the function which messages
1225
+ * are the agent's own in-run work: those at or after it must NEVER
1226
+ * be converted, even if no thinking block appears in the chain.
1227
+ */
1228
+
1229
+ test('preserves the agent first-iteration AI(tool_use) when its index is at runStartIndex', () => {
1230
+ const messages = [
1231
+ new HumanMessage({ content: 'fetch the data' }),
1232
+ // No thinking block — Claude validly skipped it before tool_use
1233
+ new AIMessage({
1234
+ content: '',
1235
+ tool_calls: [
1236
+ { id: 'c1', name: 'fetch', args: {}, type: 'tool_call' as const },
1237
+ ],
1238
+ }),
1239
+ new ToolMessage({ content: 'data', tool_call_id: 'c1' }),
1240
+ ];
1241
+
1242
+ const result = ensureThinkingBlockInMessages(
1243
+ messages,
1244
+ Providers.BEDROCK,
1245
+ undefined,
1246
+ /* runStartIndex */ 1
1247
+ );
1248
+
1249
+ // All 3 preserved — the AI at index 1 is the agent's own work
1250
+ expect(result).toHaveLength(3);
1251
+ expect(result[1]).toBeInstanceOf(AIMessage);
1252
+ expect((result[1] as AIMessage).tool_calls).toHaveLength(1);
1253
+ expect(result[2]).toBeInstanceOf(ToolMessage);
1254
+ // No placeholder leaked in
1255
+ expect(getTextContent(result[1])).not.toContain(
1256
+ '[Previous agent context]'
1257
+ );
1258
+ });
1259
+
1260
+ test('preserves multiple in-run AI(tool_use) iterations without thinking blocks', () => {
1261
+ const messages = [
1262
+ new HumanMessage({ content: 'do work' }),
1263
+ new AIMessage({
1264
+ content: '',
1265
+ tool_calls: [
1266
+ { id: 'c1', name: 'step1', args: {}, type: 'tool_call' as const },
1267
+ ],
1268
+ }),
1269
+ new ToolMessage({ content: 'r1', tool_call_id: 'c1' }),
1270
+ new AIMessage({
1271
+ content: '',
1272
+ tool_calls: [
1273
+ { id: 'c2', name: 'step2', args: {}, type: 'tool_call' as const },
1274
+ ],
1275
+ }),
1276
+ new ToolMessage({ content: 'r2', tool_call_id: 'c2' }),
1277
+ ];
1278
+
1279
+ const result = ensureThinkingBlockInMessages(
1280
+ messages,
1281
+ Providers.BEDROCK,
1282
+ undefined,
1283
+ /* runStartIndex */ 1
1284
+ );
1285
+
1286
+ expect(result).toHaveLength(5);
1287
+ expect(result[1]).toBeInstanceOf(AIMessage);
1288
+ expect(result[3]).toBeInstanceOf(AIMessage);
1289
+ // Neither AI was converted
1290
+ expect(getTextContent(result[1])).not.toContain(
1291
+ '[Previous agent context]'
1292
+ );
1293
+ expect(getTextContent(result[3])).not.toContain(
1294
+ '[Previous agent context]'
1295
+ );
1296
+ });
1297
+
1298
+ test('still converts pre-runStartIndex history that lacks thinking blocks', () => {
1299
+ // Real handoff scenario: a prior non-thinking agent's tool calls
1300
+ // appear before this run started. They genuinely need the
1301
+ // placeholder (the legacy reason this function exists).
1302
+ const messages = [
1303
+ new HumanMessage({ content: 'first request' }),
1304
+ new AIMessage({
1305
+ content: 'using tool',
1306
+ tool_calls: [
1307
+ { id: 'old', name: 'legacy', args: {}, type: 'tool_call' as const },
1308
+ ],
1309
+ }),
1310
+ new ToolMessage({ content: 'old result', tool_call_id: 'old' }),
1311
+ // Current run starts here — say after a handoff. Index >= 3 is
1312
+ // the new agent's own work.
1313
+ ];
1314
+
1315
+ const result = ensureThinkingBlockInMessages(
1316
+ messages,
1317
+ Providers.BEDROCK,
1318
+ undefined,
1319
+ /* runStartIndex */ 3
1320
+ );
1321
+
1322
+ // The pre-run AI(tool_use)+Tool got converted to a placeholder
1323
+ expect(result).toHaveLength(2);
1324
+ expect(result[0]).toBeInstanceOf(HumanMessage);
1325
+ expect(result[1]).toBeInstanceOf(HumanMessage);
1326
+ expect(getTextContent(result[1])).toContain('[Previous agent context]');
1327
+ });
1328
+
1329
+ test('falls back to chainHasThinkingBlock heuristic when runStartIndex is undefined (backward compat)', () => {
1330
+ const messages = [
1331
+ new HumanMessage({ content: 'do work' }),
1332
+ // No reasoning + no runStartIndex hint → still gets converted
1333
+ // (preserves the prior behavior for callers that haven't been
1334
+ // updated to pass the boundary).
1335
+ new AIMessage({
1336
+ content: 'using tool',
1337
+ tool_calls: [
1338
+ { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
1339
+ ],
1340
+ }),
1341
+ new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
1342
+ ];
1343
+
1344
+ const result = ensureThinkingBlockInMessages(messages, Providers.BEDROCK);
1345
+
1346
+ expect(result).toHaveLength(2);
1347
+ expect(result[1]).toBeInstanceOf(HumanMessage);
1348
+ expect(getTextContent(result[1])).toContain('[Previous agent context]');
1349
+ });
1350
+
1351
+ test('runStartIndex of 0 is honored (whole array is the current run)', () => {
1352
+ // Edge: a fresh run with no prior history at all. Everything is
1353
+ // in-run and must be preserved even without thinking blocks.
1354
+ const messages = [
1355
+ new HumanMessage({ content: 'do work' }),
1356
+ new AIMessage({
1357
+ content: '',
1358
+ tool_calls: [
1359
+ { id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
1360
+ ],
1361
+ }),
1362
+ new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
1363
+ ];
1364
+
1365
+ const result = ensureThinkingBlockInMessages(
1366
+ messages,
1367
+ Providers.BEDROCK,
1368
+ undefined,
1369
+ /* runStartIndex */ 0
1370
+ );
1371
+
1372
+ expect(result).toHaveLength(3);
1373
+ expect(result[1]).toBeInstanceOf(AIMessage);
1374
+ expect(getTextContent(result[1])).not.toContain(
1375
+ '[Previous agent context]'
1376
+ );
1377
+ });
1378
+ });
1212
1379
  });
@@ -1391,12 +1391,23 @@ function appendToolCalls(
1391
1391
  * @param messages - Array of messages to process
1392
1392
  * @param provider - The provider being used (unused but kept for future compatibility)
1393
1393
  * @param config - Optional RunnableConfig for structured agent logging
1394
+ * @param runStartIndex - Index in `messages` where the CURRENT run's own
1395
+ * appended AI/Tool messages begin (i.e. anything at this index or later
1396
+ * was just produced by this run's own iterations, not historical
1397
+ * context). When provided, AI messages at or after this index are
1398
+ * never converted to `[Previous agent context]` placeholders — Claude
1399
+ * can validly skip a thinking block before a tool_use (cf. PR #116),
1400
+ * so the agent's own in-run iterations must not be misclassified as
1401
+ * foreign history. Without the signal the function falls back to its
1402
+ * prior heuristic (`chainHasThinkingBlock`), preserving backward
1403
+ * compatibility for callers that don't yet pass the boundary.
1394
1404
  * @returns The messages array with tool sequences converted to buffer strings if necessary
1395
1405
  */
1396
1406
  export function ensureThinkingBlockInMessages(
1397
1407
  messages: BaseMessage[],
1398
1408
  _provider: Providers,
1399
- config?: RunnableConfig
1409
+ config?: RunnableConfig,
1410
+ runStartIndex?: number
1400
1411
  ): BaseMessage[] {
1401
1412
  if (messages.length === 0) {
1402
1413
  return messages;
@@ -1483,6 +1494,23 @@ export function ensureThinkingBlockInMessages(
1483
1494
  // but follow-ups have content: "" with only tool_calls. These are the
1484
1495
  // same agent's turn and must NOT be converted to HumanMessages.
1485
1496
  if (hasToolUse && !hasThinkingBlock) {
1497
+ // Current-run boundary check: anything at or after `runStartIndex`
1498
+ // is the current run's own work — preserve it. Claude is allowed
1499
+ // to skip a thinking block before a tool_use (cf. PR #116 in the
1500
+ // agents repo), so the agent's own first-iteration AI message can
1501
+ // legitimately have tool_calls without reasoning. Converting it to
1502
+ // a `[Previous agent context]` placeholder pollutes the next
1503
+ // iteration's prompt — the LLM sees the placeholder, treats it as
1504
+ // suspicious injected content, ignores its own real prior tool
1505
+ // result, and re-runs the tool to verify (which then often fails
1506
+ // because subsequent calls land in fresh sandboxes without the
1507
+ // file). Skip the conversion when we know this is in-run.
1508
+ if (runStartIndex !== undefined && i >= runStartIndex) {
1509
+ result.push(msg);
1510
+ i++;
1511
+ continue;
1512
+ }
1513
+
1486
1514
  // Walk backwards — if an earlier AI message in the same chain (before
1487
1515
  // the nearest HumanMessage) has a thinking/reasoning block, this is a
1488
1516
  // continuation of a thinking-enabled turn, not a non-thinking handoff.
@@ -89,7 +89,26 @@ function isSend(value: unknown): value is Send {
89
89
  return value instanceof Send;
90
90
  }
91
91
 
92
- /** Merges code execution session context into the sessions map. */
92
+ /**
93
+ * Merges code execution session context into the sessions map.
94
+ *
95
+ * The codeapi worker reports two distinct ids on a code-execution result:
96
+ * - `artifact.session_id` (the `sessionId` arg here) is the EXEC session
97
+ * — the sandbox VM that ran the code. It's transient and torn down
98
+ * post-execution; subsequent calls cannot reuse it as a sandbox.
99
+ * - `file.session_id` on each `artifact.files[i]` is the STORAGE
100
+ * session — the file-server bucket prefix where the artifact actually
101
+ * lives and is served from.
102
+ *
103
+ * Per-file `session_id` is preserved (not overwritten with the exec id)
104
+ * because `_injected_files` are looked up against the file-server's
105
+ * storage path on subsequent tool calls. Stomping the storage id with
106
+ * the exec id silently 404s every follow-up tool call within the same
107
+ * run — `cat /mnt/data/foo.txt` reports "No such file or directory"
108
+ * because the worker can't mount a file at a path the storage doesn't
109
+ * know about. Fall back to `sessionId` only when the per-file id is
110
+ * absent (older worker payloads).
111
+ */
93
112
  function updateCodeSession(
94
113
  sessions: t.ToolSessionMap,
95
114
  sessionId: string,
@@ -104,7 +123,7 @@ function updateCodeSession(
104
123
  if (newFiles.length > 0) {
105
124
  const filesWithSession: t.FileRefs = newFiles.map((file) => ({
106
125
  ...file,
107
- session_id: sessionId,
126
+ session_id: file.session_id ?? sessionId,
108
127
  }));
109
128
  const newFileNames = new Set(filesWithSession.map((f) => f.name));
110
129
  const filteredExisting = existingFiles.filter(
@@ -472,6 +472,130 @@ describe('ToolNode code execution session management', () => {
472
472
 
473
473
  expect(sessions.has(Constants.EXECUTE_CODE)).toBe(false);
474
474
  });
475
+
476
+ it('preserves per-file storage session_id (not overwritten with the exec session_id)', () => {
477
+ /**
478
+ * Regression: the codeapi worker reports `artifact.session_id` (EXEC
479
+ * session — torn down post-run) and per-file `session_id` (STORAGE
480
+ * session where the file lives). Stomping the storage id with the
481
+ * exec id silently 404s every follow-up tool call within the same
482
+ * run because `_injected_files` carry the wrong path on the next
483
+ * `/exec`. The worker tries to mount `<exec_session>/<id>` against
484
+ * file-server, gets 404, mounts nothing — `cat /mnt/data/foo.txt`
485
+ * → "No such file or directory".
486
+ */
487
+ const sessions: t.ToolSessionMap = new Map();
488
+ const mockTool = createMockCodeTool({ capturedConfigs: [] });
489
+ const toolNode = new ToolNode({
490
+ tools: [mockTool],
491
+ sessions,
492
+ eventDrivenMode: true,
493
+ });
494
+ const storeMethod = (
495
+ toolNode as unknown as {
496
+ storeCodeSessionFromResults: (
497
+ results: t.ToolExecuteResult[],
498
+ requestMap: Map<string, t.ToolCallRequest>
499
+ ) => void;
500
+ }
501
+ ).storeCodeSessionFromResults.bind(toolNode);
502
+
503
+ storeMethod(
504
+ [
505
+ {
506
+ toolCallId: 'tc-storage',
507
+ content: 'output',
508
+ artifact: {
509
+ /* EXEC session — transient, torn down after this run */
510
+ session_id: 'exec-session-123',
511
+ files: [
512
+ /* STORAGE session — persistent file-server bucket prefix */
513
+ {
514
+ id: 'f1',
515
+ name: 'sentinel.txt',
516
+ session_id: 'storage-session-A',
517
+ },
518
+ { id: 'f2', name: 'data.csv', session_id: 'storage-session-B' },
519
+ ],
520
+ },
521
+ status: 'success',
522
+ },
523
+ ],
524
+ new Map([
525
+ [
526
+ 'tc-storage',
527
+ { id: 'tc-storage', name: Constants.EXECUTE_CODE, args: {} },
528
+ ],
529
+ ])
530
+ );
531
+
532
+ const stored = sessions.get(
533
+ Constants.EXECUTE_CODE
534
+ ) as t.CodeSessionContext;
535
+ /* The session-level id is the (latest) exec id — fine for tracking
536
+ "what session ran last" — but per-file storage ids must survive. */
537
+ expect(stored.session_id).toBe('exec-session-123');
538
+ expect(stored.files).toHaveLength(2);
539
+ expect(stored.files![0]).toEqual({
540
+ id: 'f1',
541
+ name: 'sentinel.txt',
542
+ session_id: 'storage-session-A',
543
+ });
544
+ expect(stored.files![1]).toEqual({
545
+ id: 'f2',
546
+ name: 'data.csv',
547
+ session_id: 'storage-session-B',
548
+ });
549
+ });
550
+
551
+ it('falls back to exec session_id only when per-file session_id is absent (older worker payloads)', () => {
552
+ const sessions: t.ToolSessionMap = new Map();
553
+ const mockTool = createMockCodeTool({ capturedConfigs: [] });
554
+ const toolNode = new ToolNode({
555
+ tools: [mockTool],
556
+ sessions,
557
+ eventDrivenMode: true,
558
+ });
559
+ const storeMethod = (
560
+ toolNode as unknown as {
561
+ storeCodeSessionFromResults: (
562
+ results: t.ToolExecuteResult[],
563
+ requestMap: Map<string, t.ToolCallRequest>
564
+ ) => void;
565
+ }
566
+ ).storeCodeSessionFromResults.bind(toolNode);
567
+
568
+ storeMethod(
569
+ [
570
+ {
571
+ toolCallId: 'tc-mixed',
572
+ content: 'output',
573
+ artifact: {
574
+ session_id: 'exec-mixed',
575
+ files: [
576
+ /* Mix: one file with storage id, one without (older payload). */
577
+ { id: 'f1', name: 'fresh.csv', session_id: 'storage-fresh' },
578
+ { id: 'f2', name: 'legacy.csv' },
579
+ ],
580
+ },
581
+ status: 'success',
582
+ },
583
+ ],
584
+ new Map([
585
+ [
586
+ 'tc-mixed',
587
+ { id: 'tc-mixed', name: Constants.EXECUTE_CODE, args: {} },
588
+ ],
589
+ ])
590
+ );
591
+
592
+ const stored = sessions.get(
593
+ Constants.EXECUTE_CODE
594
+ ) as t.CodeSessionContext;
595
+ expect(stored.files![0].session_id).toBe('storage-fresh');
596
+ /* Fallback only when the per-file id is missing. */
597
+ expect(stored.files![1].session_id).toBe('exec-mixed');
598
+ });
475
599
  });
476
600
 
477
601
  describe('codeSessionContext emission gate (event-driven request building)', () => {