@librechat/agents 3.1.71 → 3.1.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +11 -1
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +27 -1
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +34 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +11 -1
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +27 -1
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +34 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/types/messages/format.d.ts +11 -1
- package/package.json +1 -1
- package/src/graphs/Graph.ts +12 -1
- package/src/messages/ensureThinkingBlock.test.ts +167 -0
- package/src/messages/format.ts +29 -1
- package/src/tools/ToolNode.ts +34 -3
- package/src/tools/__tests__/ToolNode.session.test.ts +279 -6
|
@@ -1209,4 +1209,171 @@ describe('ensureThinkingBlockInMessages', () => {
|
|
|
1209
1209
|
expect(outputImageBlock).not.toBe(originalImageBlock);
|
|
1210
1210
|
});
|
|
1211
1211
|
});
|
|
1212
|
+
|
|
1213
|
+
describe('runStartIndex (current-run boundary)', () => {
|
|
1214
|
+
/**
|
|
1215
|
+
* Claude is allowed to skip a thinking block before a tool_use (cf.
|
|
1216
|
+
* PR #116). When the agent's own first iteration produces an
|
|
1217
|
+
* `AI(tool_use, no thinking)`, the function would otherwise convert
|
|
1218
|
+
* it to a `[Previous agent context]` HumanMessage — polluting the
|
|
1219
|
+
* next iteration's prompt with text the model treats as suspicious
|
|
1220
|
+
* injected content. The model then ignores its own real prior tool
|
|
1221
|
+
* result and re-runs the tool to verify, often failing because the
|
|
1222
|
+
* subsequent sandbox doesn't have the file.
|
|
1223
|
+
*
|
|
1224
|
+
* The `runStartIndex` parameter tells the function which messages
|
|
1225
|
+
* are the agent's own in-run work: those at or after it must NEVER
|
|
1226
|
+
* be converted, even if no thinking block appears in the chain.
|
|
1227
|
+
*/
|
|
1228
|
+
|
|
1229
|
+
test('preserves the agent first-iteration AI(tool_use) when its index is at runStartIndex', () => {
|
|
1230
|
+
const messages = [
|
|
1231
|
+
new HumanMessage({ content: 'fetch the data' }),
|
|
1232
|
+
// No thinking block — Claude validly skipped it before tool_use
|
|
1233
|
+
new AIMessage({
|
|
1234
|
+
content: '',
|
|
1235
|
+
tool_calls: [
|
|
1236
|
+
{ id: 'c1', name: 'fetch', args: {}, type: 'tool_call' as const },
|
|
1237
|
+
],
|
|
1238
|
+
}),
|
|
1239
|
+
new ToolMessage({ content: 'data', tool_call_id: 'c1' }),
|
|
1240
|
+
];
|
|
1241
|
+
|
|
1242
|
+
const result = ensureThinkingBlockInMessages(
|
|
1243
|
+
messages,
|
|
1244
|
+
Providers.BEDROCK,
|
|
1245
|
+
undefined,
|
|
1246
|
+
/* runStartIndex */ 1
|
|
1247
|
+
);
|
|
1248
|
+
|
|
1249
|
+
// All 3 preserved — the AI at index 1 is the agent's own work
|
|
1250
|
+
expect(result).toHaveLength(3);
|
|
1251
|
+
expect(result[1]).toBeInstanceOf(AIMessage);
|
|
1252
|
+
expect((result[1] as AIMessage).tool_calls).toHaveLength(1);
|
|
1253
|
+
expect(result[2]).toBeInstanceOf(ToolMessage);
|
|
1254
|
+
// No placeholder leaked in
|
|
1255
|
+
expect(getTextContent(result[1])).not.toContain(
|
|
1256
|
+
'[Previous agent context]'
|
|
1257
|
+
);
|
|
1258
|
+
});
|
|
1259
|
+
|
|
1260
|
+
test('preserves multiple in-run AI(tool_use) iterations without thinking blocks', () => {
|
|
1261
|
+
const messages = [
|
|
1262
|
+
new HumanMessage({ content: 'do work' }),
|
|
1263
|
+
new AIMessage({
|
|
1264
|
+
content: '',
|
|
1265
|
+
tool_calls: [
|
|
1266
|
+
{ id: 'c1', name: 'step1', args: {}, type: 'tool_call' as const },
|
|
1267
|
+
],
|
|
1268
|
+
}),
|
|
1269
|
+
new ToolMessage({ content: 'r1', tool_call_id: 'c1' }),
|
|
1270
|
+
new AIMessage({
|
|
1271
|
+
content: '',
|
|
1272
|
+
tool_calls: [
|
|
1273
|
+
{ id: 'c2', name: 'step2', args: {}, type: 'tool_call' as const },
|
|
1274
|
+
],
|
|
1275
|
+
}),
|
|
1276
|
+
new ToolMessage({ content: 'r2', tool_call_id: 'c2' }),
|
|
1277
|
+
];
|
|
1278
|
+
|
|
1279
|
+
const result = ensureThinkingBlockInMessages(
|
|
1280
|
+
messages,
|
|
1281
|
+
Providers.BEDROCK,
|
|
1282
|
+
undefined,
|
|
1283
|
+
/* runStartIndex */ 1
|
|
1284
|
+
);
|
|
1285
|
+
|
|
1286
|
+
expect(result).toHaveLength(5);
|
|
1287
|
+
expect(result[1]).toBeInstanceOf(AIMessage);
|
|
1288
|
+
expect(result[3]).toBeInstanceOf(AIMessage);
|
|
1289
|
+
// Neither AI was converted
|
|
1290
|
+
expect(getTextContent(result[1])).not.toContain(
|
|
1291
|
+
'[Previous agent context]'
|
|
1292
|
+
);
|
|
1293
|
+
expect(getTextContent(result[3])).not.toContain(
|
|
1294
|
+
'[Previous agent context]'
|
|
1295
|
+
);
|
|
1296
|
+
});
|
|
1297
|
+
|
|
1298
|
+
test('still converts pre-runStartIndex history that lacks thinking blocks', () => {
|
|
1299
|
+
// Real handoff scenario: a prior non-thinking agent's tool calls
|
|
1300
|
+
// appear before this run started. They genuinely need the
|
|
1301
|
+
// placeholder (the legacy reason this function exists).
|
|
1302
|
+
const messages = [
|
|
1303
|
+
new HumanMessage({ content: 'first request' }),
|
|
1304
|
+
new AIMessage({
|
|
1305
|
+
content: 'using tool',
|
|
1306
|
+
tool_calls: [
|
|
1307
|
+
{ id: 'old', name: 'legacy', args: {}, type: 'tool_call' as const },
|
|
1308
|
+
],
|
|
1309
|
+
}),
|
|
1310
|
+
new ToolMessage({ content: 'old result', tool_call_id: 'old' }),
|
|
1311
|
+
// Current run starts here — say after a handoff. Index >= 3 is
|
|
1312
|
+
// the new agent's own work.
|
|
1313
|
+
];
|
|
1314
|
+
|
|
1315
|
+
const result = ensureThinkingBlockInMessages(
|
|
1316
|
+
messages,
|
|
1317
|
+
Providers.BEDROCK,
|
|
1318
|
+
undefined,
|
|
1319
|
+
/* runStartIndex */ 3
|
|
1320
|
+
);
|
|
1321
|
+
|
|
1322
|
+
// The pre-run AI(tool_use)+Tool got converted to a placeholder
|
|
1323
|
+
expect(result).toHaveLength(2);
|
|
1324
|
+
expect(result[0]).toBeInstanceOf(HumanMessage);
|
|
1325
|
+
expect(result[1]).toBeInstanceOf(HumanMessage);
|
|
1326
|
+
expect(getTextContent(result[1])).toContain('[Previous agent context]');
|
|
1327
|
+
});
|
|
1328
|
+
|
|
1329
|
+
test('falls back to chainHasThinkingBlock heuristic when runStartIndex is undefined (backward compat)', () => {
|
|
1330
|
+
const messages = [
|
|
1331
|
+
new HumanMessage({ content: 'do work' }),
|
|
1332
|
+
// No reasoning + no runStartIndex hint → still gets converted
|
|
1333
|
+
// (preserves the prior behavior for callers that haven't been
|
|
1334
|
+
// updated to pass the boundary).
|
|
1335
|
+
new AIMessage({
|
|
1336
|
+
content: 'using tool',
|
|
1337
|
+
tool_calls: [
|
|
1338
|
+
{ id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
|
|
1339
|
+
],
|
|
1340
|
+
}),
|
|
1341
|
+
new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
|
|
1342
|
+
];
|
|
1343
|
+
|
|
1344
|
+
const result = ensureThinkingBlockInMessages(messages, Providers.BEDROCK);
|
|
1345
|
+
|
|
1346
|
+
expect(result).toHaveLength(2);
|
|
1347
|
+
expect(result[1]).toBeInstanceOf(HumanMessage);
|
|
1348
|
+
expect(getTextContent(result[1])).toContain('[Previous agent context]');
|
|
1349
|
+
});
|
|
1350
|
+
|
|
1351
|
+
test('runStartIndex of 0 is honored (whole array is the current run)', () => {
|
|
1352
|
+
// Edge: a fresh run with no prior history at all. Everything is
|
|
1353
|
+
// in-run and must be preserved even without thinking blocks.
|
|
1354
|
+
const messages = [
|
|
1355
|
+
new HumanMessage({ content: 'do work' }),
|
|
1356
|
+
new AIMessage({
|
|
1357
|
+
content: '',
|
|
1358
|
+
tool_calls: [
|
|
1359
|
+
{ id: 'c1', name: 'tool', args: {}, type: 'tool_call' as const },
|
|
1360
|
+
],
|
|
1361
|
+
}),
|
|
1362
|
+
new ToolMessage({ content: 'r', tool_call_id: 'c1' }),
|
|
1363
|
+
];
|
|
1364
|
+
|
|
1365
|
+
const result = ensureThinkingBlockInMessages(
|
|
1366
|
+
messages,
|
|
1367
|
+
Providers.BEDROCK,
|
|
1368
|
+
undefined,
|
|
1369
|
+
/* runStartIndex */ 0
|
|
1370
|
+
);
|
|
1371
|
+
|
|
1372
|
+
expect(result).toHaveLength(3);
|
|
1373
|
+
expect(result[1]).toBeInstanceOf(AIMessage);
|
|
1374
|
+
expect(getTextContent(result[1])).not.toContain(
|
|
1375
|
+
'[Previous agent context]'
|
|
1376
|
+
);
|
|
1377
|
+
});
|
|
1378
|
+
});
|
|
1212
1379
|
});
|
package/src/messages/format.ts
CHANGED
|
@@ -1391,12 +1391,23 @@ function appendToolCalls(
|
|
|
1391
1391
|
* @param messages - Array of messages to process
|
|
1392
1392
|
* @param provider - The provider being used (unused but kept for future compatibility)
|
|
1393
1393
|
* @param config - Optional RunnableConfig for structured agent logging
|
|
1394
|
+
* @param runStartIndex - Index in `messages` where the CURRENT run's own
|
|
1395
|
+
* appended AI/Tool messages begin (i.e. anything at this index or later
|
|
1396
|
+
* was just produced by this run's own iterations, not historical
|
|
1397
|
+
* context). When provided, AI messages at or after this index are
|
|
1398
|
+
* never converted to `[Previous agent context]` placeholders — Claude
|
|
1399
|
+
* can validly skip a thinking block before a tool_use (cf. PR #116),
|
|
1400
|
+
* so the agent's own in-run iterations must not be misclassified as
|
|
1401
|
+
* foreign history. Without the signal the function falls back to its
|
|
1402
|
+
* prior heuristic (`chainHasThinkingBlock`), preserving backward
|
|
1403
|
+
* compatibility for callers that don't yet pass the boundary.
|
|
1394
1404
|
* @returns The messages array with tool sequences converted to buffer strings if necessary
|
|
1395
1405
|
*/
|
|
1396
1406
|
export function ensureThinkingBlockInMessages(
|
|
1397
1407
|
messages: BaseMessage[],
|
|
1398
1408
|
_provider: Providers,
|
|
1399
|
-
config?: RunnableConfig
|
|
1409
|
+
config?: RunnableConfig,
|
|
1410
|
+
runStartIndex?: number
|
|
1400
1411
|
): BaseMessage[] {
|
|
1401
1412
|
if (messages.length === 0) {
|
|
1402
1413
|
return messages;
|
|
@@ -1483,6 +1494,23 @@ export function ensureThinkingBlockInMessages(
|
|
|
1483
1494
|
// but follow-ups have content: "" with only tool_calls. These are the
|
|
1484
1495
|
// same agent's turn and must NOT be converted to HumanMessages.
|
|
1485
1496
|
if (hasToolUse && !hasThinkingBlock) {
|
|
1497
|
+
// Current-run boundary check: anything at or after `runStartIndex`
|
|
1498
|
+
// is the current run's own work — preserve it. Claude is allowed
|
|
1499
|
+
// to skip a thinking block before a tool_use (cf. PR #116 in the
|
|
1500
|
+
// agents repo), so the agent's own first-iteration AI message can
|
|
1501
|
+
// legitimately have tool_calls without reasoning. Converting it to
|
|
1502
|
+
// a `[Previous agent context]` placeholder pollutes the next
|
|
1503
|
+
// iteration's prompt — the LLM sees the placeholder, treats it as
|
|
1504
|
+
// suspicious injected content, ignores its own real prior tool
|
|
1505
|
+
// result, and re-runs the tool to verify (which then often fails
|
|
1506
|
+
// because subsequent calls land in fresh sandboxes without the
|
|
1507
|
+
// file). Skip the conversion when we know this is in-run.
|
|
1508
|
+
if (runStartIndex !== undefined && i >= runStartIndex) {
|
|
1509
|
+
result.push(msg);
|
|
1510
|
+
i++;
|
|
1511
|
+
continue;
|
|
1512
|
+
}
|
|
1513
|
+
|
|
1486
1514
|
// Walk backwards — if an earlier AI message in the same chain (before
|
|
1487
1515
|
// the nearest HumanMessage) has a thinking/reasoning block, this is a
|
|
1488
1516
|
// continuation of a thinking-enabled turn, not a non-thinking handoff.
|
package/src/tools/ToolNode.ts
CHANGED
|
@@ -89,7 +89,26 @@ function isSend(value: unknown): value is Send {
|
|
|
89
89
|
return value instanceof Send;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
/**
|
|
92
|
+
/**
|
|
93
|
+
* Merges code execution session context into the sessions map.
|
|
94
|
+
*
|
|
95
|
+
* The codeapi worker reports two distinct ids on a code-execution result:
|
|
96
|
+
* - `artifact.session_id` (the `sessionId` arg here) is the EXEC session
|
|
97
|
+
* — the sandbox VM that ran the code. It's transient and torn down
|
|
98
|
+
* post-execution; subsequent calls cannot reuse it as a sandbox.
|
|
99
|
+
* - `file.session_id` on each `artifact.files[i]` is the STORAGE
|
|
100
|
+
* session — the file-server bucket prefix where the artifact actually
|
|
101
|
+
* lives and is served from.
|
|
102
|
+
*
|
|
103
|
+
* Per-file `session_id` is preserved (not overwritten with the exec id)
|
|
104
|
+
* because `_injected_files` are looked up against the file-server's
|
|
105
|
+
* storage path on subsequent tool calls. Stomping the storage id with
|
|
106
|
+
* the exec id silently 404s every follow-up tool call within the same
|
|
107
|
+
* run — `cat /mnt/data/foo.txt` reports "No such file or directory"
|
|
108
|
+
* because the worker can't mount a file at a path the storage doesn't
|
|
109
|
+
* know about. Fall back to `sessionId` only when the per-file id is
|
|
110
|
+
* absent (older worker payloads).
|
|
111
|
+
*/
|
|
93
112
|
function updateCodeSession(
|
|
94
113
|
sessions: t.ToolSessionMap,
|
|
95
114
|
sessionId: string,
|
|
@@ -104,7 +123,7 @@ function updateCodeSession(
|
|
|
104
123
|
if (newFiles.length > 0) {
|
|
105
124
|
const filesWithSession: t.FileRefs = newFiles.map((file) => ({
|
|
106
125
|
...file,
|
|
107
|
-
session_id: sessionId,
|
|
126
|
+
session_id: file.session_id ?? sessionId,
|
|
108
127
|
}));
|
|
109
128
|
const newFileNames = new Set(filesWithSession.map((f) => f.name));
|
|
110
129
|
const filteredExisting = existingFiles.filter(
|
|
@@ -996,9 +1015,21 @@ export class ToolNode<T = any> extends RunnableCallable<T, T> {
|
|
|
996
1015
|
turn,
|
|
997
1016
|
};
|
|
998
1017
|
|
|
1018
|
+
/**
|
|
1019
|
+
* Emit `codeSessionContext` for any tool whose host handler may need
|
|
1020
|
+
* to reach into the code-execution sandbox:
|
|
1021
|
+
* - `CODE_EXECUTION_TOOLS` — direct executors that POST to /exec.
|
|
1022
|
+
* - `SKILL_TOOL` — skill files live alongside code-env state.
|
|
1023
|
+
* - `READ_FILE` — when the requested path is a code-env artifact
|
|
1024
|
+
* (e.g. `/mnt/data/...`) the host falls back to reading via the
|
|
1025
|
+
* same sandbox session; without the seeded `session_id` /
|
|
1026
|
+
* `_injected_files` here, that fallback can't see prior-turn
|
|
1027
|
+
* artifacts on the very first call of a turn.
|
|
1028
|
+
*/
|
|
999
1029
|
if (
|
|
1000
1030
|
CODE_EXECUTION_TOOLS.has(entry.call.name) ||
|
|
1001
|
-
entry.call.name === Constants.SKILL_TOOL
|
|
1031
|
+
entry.call.name === Constants.SKILL_TOOL ||
|
|
1032
|
+
entry.call.name === Constants.READ_FILE
|
|
1002
1033
|
) {
|
|
1003
1034
|
request.codeSessionContext = this.getCodeSessionContext();
|
|
1004
1035
|
}
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { tool } from '@langchain/core/tools';
|
|
3
3
|
import { AIMessage } from '@langchain/core/messages';
|
|
4
|
-
import { describe, it, expect } from '@jest/globals';
|
|
4
|
+
import { describe, it, expect, jest, afterEach } from '@jest/globals';
|
|
5
5
|
import type { StructuredToolInterface } from '@langchain/core/tools';
|
|
6
6
|
import type * as t from '@/types';
|
|
7
7
|
import { ToolNode } from '../ToolNode';
|
|
8
8
|
import { Constants } from '@/common';
|
|
9
|
+
import * as events from '@/utils/events';
|
|
9
10
|
|
|
10
11
|
/**
|
|
11
12
|
* Creates a mock execute_code tool that captures the toolCall config it receives.
|
|
@@ -233,7 +234,9 @@ describe('ToolNode code execution session management', () => {
|
|
|
233
234
|
status: 'success',
|
|
234
235
|
},
|
|
235
236
|
],
|
|
236
|
-
new Map([
|
|
237
|
+
new Map([
|
|
238
|
+
['tc1', { id: 'tc1', name: Constants.EXECUTE_CODE, args: {} }],
|
|
239
|
+
])
|
|
237
240
|
);
|
|
238
241
|
|
|
239
242
|
const stored = sessions.get(
|
|
@@ -279,7 +282,9 @@ describe('ToolNode code execution session management', () => {
|
|
|
279
282
|
status: 'success',
|
|
280
283
|
},
|
|
281
284
|
],
|
|
282
|
-
new Map([
|
|
285
|
+
new Map([
|
|
286
|
+
['tc2', { id: 'tc2', name: Constants.EXECUTE_CODE, args: {} }],
|
|
287
|
+
])
|
|
283
288
|
);
|
|
284
289
|
|
|
285
290
|
const stored = sessions.get(
|
|
@@ -329,7 +334,9 @@ describe('ToolNode code execution session management', () => {
|
|
|
329
334
|
status: 'success',
|
|
330
335
|
},
|
|
331
336
|
],
|
|
332
|
-
new Map([
|
|
337
|
+
new Map([
|
|
338
|
+
['tc3', { id: 'tc3', name: Constants.EXECUTE_CODE, args: {} }],
|
|
339
|
+
])
|
|
333
340
|
);
|
|
334
341
|
|
|
335
342
|
const stored = sessions.get(
|
|
@@ -379,7 +386,9 @@ describe('ToolNode code execution session management', () => {
|
|
|
379
386
|
status: 'success',
|
|
380
387
|
},
|
|
381
388
|
],
|
|
382
|
-
new Map([
|
|
389
|
+
new Map([
|
|
390
|
+
['tc4', { id: 'tc4', name: Constants.EXECUTE_CODE, args: {} }],
|
|
391
|
+
])
|
|
383
392
|
);
|
|
384
393
|
|
|
385
394
|
const stored = sessions.get(
|
|
@@ -456,10 +465,274 @@ describe('ToolNode code execution session management', () => {
|
|
|
456
465
|
errorMessage: 'execution failed',
|
|
457
466
|
},
|
|
458
467
|
],
|
|
459
|
-
new Map([
|
|
468
|
+
new Map([
|
|
469
|
+
['tc6', { id: 'tc6', name: Constants.EXECUTE_CODE, args: {} }],
|
|
470
|
+
])
|
|
460
471
|
);
|
|
461
472
|
|
|
462
473
|
expect(sessions.has(Constants.EXECUTE_CODE)).toBe(false);
|
|
463
474
|
});
|
|
475
|
+
|
|
476
|
+
it('preserves per-file storage session_id (not overwritten with the exec session_id)', () => {
|
|
477
|
+
/**
|
|
478
|
+
* Regression: the codeapi worker reports `artifact.session_id` (EXEC
|
|
479
|
+
* session — torn down post-run) and per-file `session_id` (STORAGE
|
|
480
|
+
* session where the file lives). Stomping the storage id with the
|
|
481
|
+
* exec id silently 404s every follow-up tool call within the same
|
|
482
|
+
* run because `_injected_files` carry the wrong path on the next
|
|
483
|
+
* `/exec`. The worker tries to mount `<exec_session>/<id>` against
|
|
484
|
+
* file-server, gets 404, mounts nothing — `cat /mnt/data/foo.txt`
|
|
485
|
+
* → "No such file or directory".
|
|
486
|
+
*/
|
|
487
|
+
const sessions: t.ToolSessionMap = new Map();
|
|
488
|
+
const mockTool = createMockCodeTool({ capturedConfigs: [] });
|
|
489
|
+
const toolNode = new ToolNode({
|
|
490
|
+
tools: [mockTool],
|
|
491
|
+
sessions,
|
|
492
|
+
eventDrivenMode: true,
|
|
493
|
+
});
|
|
494
|
+
const storeMethod = (
|
|
495
|
+
toolNode as unknown as {
|
|
496
|
+
storeCodeSessionFromResults: (
|
|
497
|
+
results: t.ToolExecuteResult[],
|
|
498
|
+
requestMap: Map<string, t.ToolCallRequest>
|
|
499
|
+
) => void;
|
|
500
|
+
}
|
|
501
|
+
).storeCodeSessionFromResults.bind(toolNode);
|
|
502
|
+
|
|
503
|
+
storeMethod(
|
|
504
|
+
[
|
|
505
|
+
{
|
|
506
|
+
toolCallId: 'tc-storage',
|
|
507
|
+
content: 'output',
|
|
508
|
+
artifact: {
|
|
509
|
+
/* EXEC session — transient, torn down after this run */
|
|
510
|
+
session_id: 'exec-session-123',
|
|
511
|
+
files: [
|
|
512
|
+
/* STORAGE session — persistent file-server bucket prefix */
|
|
513
|
+
{
|
|
514
|
+
id: 'f1',
|
|
515
|
+
name: 'sentinel.txt',
|
|
516
|
+
session_id: 'storage-session-A',
|
|
517
|
+
},
|
|
518
|
+
{ id: 'f2', name: 'data.csv', session_id: 'storage-session-B' },
|
|
519
|
+
],
|
|
520
|
+
},
|
|
521
|
+
status: 'success',
|
|
522
|
+
},
|
|
523
|
+
],
|
|
524
|
+
new Map([
|
|
525
|
+
[
|
|
526
|
+
'tc-storage',
|
|
527
|
+
{ id: 'tc-storage', name: Constants.EXECUTE_CODE, args: {} },
|
|
528
|
+
],
|
|
529
|
+
])
|
|
530
|
+
);
|
|
531
|
+
|
|
532
|
+
const stored = sessions.get(
|
|
533
|
+
Constants.EXECUTE_CODE
|
|
534
|
+
) as t.CodeSessionContext;
|
|
535
|
+
/* The session-level id is the (latest) exec id — fine for tracking
|
|
536
|
+
"what session ran last" — but per-file storage ids must survive. */
|
|
537
|
+
expect(stored.session_id).toBe('exec-session-123');
|
|
538
|
+
expect(stored.files).toHaveLength(2);
|
|
539
|
+
expect(stored.files![0]).toEqual({
|
|
540
|
+
id: 'f1',
|
|
541
|
+
name: 'sentinel.txt',
|
|
542
|
+
session_id: 'storage-session-A',
|
|
543
|
+
});
|
|
544
|
+
expect(stored.files![1]).toEqual({
|
|
545
|
+
id: 'f2',
|
|
546
|
+
name: 'data.csv',
|
|
547
|
+
session_id: 'storage-session-B',
|
|
548
|
+
});
|
|
549
|
+
});
|
|
550
|
+
|
|
551
|
+
it('falls back to exec session_id only when per-file session_id is absent (older worker payloads)', () => {
|
|
552
|
+
const sessions: t.ToolSessionMap = new Map();
|
|
553
|
+
const mockTool = createMockCodeTool({ capturedConfigs: [] });
|
|
554
|
+
const toolNode = new ToolNode({
|
|
555
|
+
tools: [mockTool],
|
|
556
|
+
sessions,
|
|
557
|
+
eventDrivenMode: true,
|
|
558
|
+
});
|
|
559
|
+
const storeMethod = (
|
|
560
|
+
toolNode as unknown as {
|
|
561
|
+
storeCodeSessionFromResults: (
|
|
562
|
+
results: t.ToolExecuteResult[],
|
|
563
|
+
requestMap: Map<string, t.ToolCallRequest>
|
|
564
|
+
) => void;
|
|
565
|
+
}
|
|
566
|
+
).storeCodeSessionFromResults.bind(toolNode);
|
|
567
|
+
|
|
568
|
+
storeMethod(
|
|
569
|
+
[
|
|
570
|
+
{
|
|
571
|
+
toolCallId: 'tc-mixed',
|
|
572
|
+
content: 'output',
|
|
573
|
+
artifact: {
|
|
574
|
+
session_id: 'exec-mixed',
|
|
575
|
+
files: [
|
|
576
|
+
/* Mix: one file with storage id, one without (older payload). */
|
|
577
|
+
{ id: 'f1', name: 'fresh.csv', session_id: 'storage-fresh' },
|
|
578
|
+
{ id: 'f2', name: 'legacy.csv' },
|
|
579
|
+
],
|
|
580
|
+
},
|
|
581
|
+
status: 'success',
|
|
582
|
+
},
|
|
583
|
+
],
|
|
584
|
+
new Map([
|
|
585
|
+
[
|
|
586
|
+
'tc-mixed',
|
|
587
|
+
{ id: 'tc-mixed', name: Constants.EXECUTE_CODE, args: {} },
|
|
588
|
+
],
|
|
589
|
+
])
|
|
590
|
+
);
|
|
591
|
+
|
|
592
|
+
const stored = sessions.get(
|
|
593
|
+
Constants.EXECUTE_CODE
|
|
594
|
+
) as t.CodeSessionContext;
|
|
595
|
+
expect(stored.files![0].session_id).toBe('storage-fresh');
|
|
596
|
+
/* Fallback only when the per-file id is missing. */
|
|
597
|
+
expect(stored.files![1].session_id).toBe('exec-mixed');
|
|
598
|
+
});
|
|
599
|
+
});
|
|
600
|
+
|
|
601
|
+
describe('codeSessionContext emission gate (event-driven request building)', () => {
|
|
602
|
+
/**
|
|
603
|
+
* Captures the `ToolExecuteBatchRequest` dispatched on ON_TOOL_EXECUTE so
|
|
604
|
+
* we can assert which `request.name`s receive `codeSessionContext`. Returns
|
|
605
|
+
* the captured requests; resolves the dispatched event with empty results
|
|
606
|
+
* to let `dispatchToolEvents` complete.
|
|
607
|
+
*/
|
|
608
|
+
function captureBatchRequests(): {
|
|
609
|
+
capturedRequests: t.ToolCallRequest[];
|
|
610
|
+
} {
|
|
611
|
+
const capturedRequests: t.ToolCallRequest[] = [];
|
|
612
|
+
jest
|
|
613
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
614
|
+
.mockImplementation(async (_event, data) => {
|
|
615
|
+
const batch = data as t.ToolExecuteBatchRequest;
|
|
616
|
+
if (Array.isArray(batch.toolCalls)) {
|
|
617
|
+
capturedRequests.push(...batch.toolCalls);
|
|
618
|
+
}
|
|
619
|
+
if (typeof batch.resolve === 'function') {
|
|
620
|
+
batch.resolve(
|
|
621
|
+
batch.toolCalls.map((tc) => ({
|
|
622
|
+
toolCallId: tc.id,
|
|
623
|
+
content: '',
|
|
624
|
+
status: 'success' as const,
|
|
625
|
+
}))
|
|
626
|
+
);
|
|
627
|
+
}
|
|
628
|
+
});
|
|
629
|
+
return { capturedRequests };
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
const createDummyTool = (name: string): StructuredToolInterface =>
|
|
633
|
+
tool(async () => 'ok', {
|
|
634
|
+
name,
|
|
635
|
+
description: 'dummy',
|
|
636
|
+
schema: z.object({ x: z.string().optional() }),
|
|
637
|
+
}) as unknown as StructuredToolInterface;
|
|
638
|
+
|
|
639
|
+
afterEach(() => {
|
|
640
|
+
jest.restoreAllMocks();
|
|
641
|
+
});
|
|
642
|
+
|
|
643
|
+
it('attaches codeSessionContext to read_file requests so the host can fall back to the code-env sandbox', async () => {
|
|
644
|
+
const sessions: t.ToolSessionMap = new Map();
|
|
645
|
+
sessions.set(Constants.EXECUTE_CODE, {
|
|
646
|
+
session_id: 'rf-session',
|
|
647
|
+
files: [{ id: 'rf1', name: 'data.csv', session_id: 'rf-session' }],
|
|
648
|
+
lastUpdated: Date.now(),
|
|
649
|
+
} satisfies t.CodeSessionContext);
|
|
650
|
+
|
|
651
|
+
const { capturedRequests } = captureBatchRequests();
|
|
652
|
+
|
|
653
|
+
const toolNode = new ToolNode({
|
|
654
|
+
tools: [createDummyTool(Constants.READ_FILE)],
|
|
655
|
+
sessions,
|
|
656
|
+
eventDrivenMode: true,
|
|
657
|
+
toolCallStepIds: new Map([['call_rf', 'step_rf']]),
|
|
658
|
+
});
|
|
659
|
+
|
|
660
|
+
const aiMsg = new AIMessage({
|
|
661
|
+
content: '',
|
|
662
|
+
tool_calls: [
|
|
663
|
+
{
|
|
664
|
+
id: 'call_rf',
|
|
665
|
+
name: Constants.READ_FILE,
|
|
666
|
+
args: { file_path: '/mnt/data/data.csv' },
|
|
667
|
+
},
|
|
668
|
+
],
|
|
669
|
+
});
|
|
670
|
+
|
|
671
|
+
await toolNode.invoke({ messages: [aiMsg] });
|
|
672
|
+
|
|
673
|
+
expect(capturedRequests).toHaveLength(1);
|
|
674
|
+
expect(capturedRequests[0].name).toBe(Constants.READ_FILE);
|
|
675
|
+
expect(capturedRequests[0].codeSessionContext).toEqual({
|
|
676
|
+
session_id: 'rf-session',
|
|
677
|
+
files: [{ session_id: 'rf-session', id: 'rf1', name: 'data.csv' }],
|
|
678
|
+
});
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
it('does not attach codeSessionContext to read_file when no session exists yet', async () => {
|
|
682
|
+
const { capturedRequests } = captureBatchRequests();
|
|
683
|
+
|
|
684
|
+
const toolNode = new ToolNode({
|
|
685
|
+
tools: [createDummyTool(Constants.READ_FILE)],
|
|
686
|
+
sessions: new Map(),
|
|
687
|
+
eventDrivenMode: true,
|
|
688
|
+
toolCallStepIds: new Map([['call_rf2', 'step_rf2']]),
|
|
689
|
+
});
|
|
690
|
+
|
|
691
|
+
const aiMsg = new AIMessage({
|
|
692
|
+
content: '',
|
|
693
|
+
tool_calls: [
|
|
694
|
+
{
|
|
695
|
+
id: 'call_rf2',
|
|
696
|
+
name: Constants.READ_FILE,
|
|
697
|
+
args: { file_path: 'some-skill/notes.md' },
|
|
698
|
+
},
|
|
699
|
+
],
|
|
700
|
+
});
|
|
701
|
+
|
|
702
|
+
await toolNode.invoke({ messages: [aiMsg] });
|
|
703
|
+
|
|
704
|
+
expect(capturedRequests).toHaveLength(1);
|
|
705
|
+
expect(capturedRequests[0].name).toBe(Constants.READ_FILE);
|
|
706
|
+
expect(capturedRequests[0].codeSessionContext).toBeUndefined();
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
it('does not attach codeSessionContext to unrelated tools', async () => {
|
|
710
|
+
const sessions: t.ToolSessionMap = new Map();
|
|
711
|
+
sessions.set(Constants.EXECUTE_CODE, {
|
|
712
|
+
session_id: 'unrelated-session',
|
|
713
|
+
files: [],
|
|
714
|
+
lastUpdated: Date.now(),
|
|
715
|
+
} satisfies t.CodeSessionContext);
|
|
716
|
+
|
|
717
|
+
const { capturedRequests } = captureBatchRequests();
|
|
718
|
+
|
|
719
|
+
const toolNode = new ToolNode({
|
|
720
|
+
tools: [createDummyTool('web_search')],
|
|
721
|
+
sessions,
|
|
722
|
+
eventDrivenMode: true,
|
|
723
|
+
toolCallStepIds: new Map([['call_ws', 'step_ws']]),
|
|
724
|
+
});
|
|
725
|
+
|
|
726
|
+
const aiMsg = new AIMessage({
|
|
727
|
+
content: '',
|
|
728
|
+
tool_calls: [{ id: 'call_ws', name: 'web_search', args: { x: 'q' } }],
|
|
729
|
+
});
|
|
730
|
+
|
|
731
|
+
await toolNode.invoke({ messages: [aiMsg] });
|
|
732
|
+
|
|
733
|
+
expect(capturedRequests).toHaveLength(1);
|
|
734
|
+
expect(capturedRequests[0].name).toBe('web_search');
|
|
735
|
+
expect(capturedRequests[0].codeSessionContext).toBeUndefined();
|
|
736
|
+
});
|
|
464
737
|
});
|
|
465
738
|
});
|