@librechat/agents 3.1.77-dev.1 → 3.1.78-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/dist/cjs/common/enum.cjs +54 -0
  2. package/dist/cjs/common/enum.cjs.map +1 -1
  3. package/dist/cjs/graphs/Graph.cjs +148 -4
  4. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  5. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +291 -0
  6. package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -0
  7. package/dist/cjs/llm/openai/index.cjs +317 -1
  8. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  9. package/dist/cjs/main.cjs +90 -0
  10. package/dist/cjs/main.cjs.map +1 -1
  11. package/dist/cjs/messages/anthropicToolCache.cjs +102 -0
  12. package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -0
  13. package/dist/cjs/messages/prune.cjs +27 -0
  14. package/dist/cjs/messages/prune.cjs.map +1 -1
  15. package/dist/cjs/messages/recency.cjs +99 -0
  16. package/dist/cjs/messages/recency.cjs.map +1 -0
  17. package/dist/cjs/run.cjs +30 -0
  18. package/dist/cjs/run.cjs.map +1 -1
  19. package/dist/cjs/summarization/node.cjs +100 -6
  20. package/dist/cjs/summarization/node.cjs.map +1 -1
  21. package/dist/cjs/tools/ToolNode.cjs +635 -23
  22. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  23. package/dist/cjs/tools/local/CompileCheckTool.cjs +227 -0
  24. package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -0
  25. package/dist/cjs/tools/local/FileCheckpointer.cjs +90 -0
  26. package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -0
  27. package/dist/cjs/tools/local/LocalCodingTools.cjs +1098 -0
  28. package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -0
  29. package/dist/cjs/tools/local/LocalExecutionEngine.cjs +1042 -0
  30. package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -0
  31. package/dist/cjs/tools/local/LocalExecutionTools.cjs +122 -0
  32. package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -0
  33. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +453 -0
  34. package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -0
  35. package/dist/cjs/tools/local/attachments.cjs +183 -0
  36. package/dist/cjs/tools/local/attachments.cjs.map +1 -0
  37. package/dist/cjs/tools/local/bashAst.cjs +129 -0
  38. package/dist/cjs/tools/local/bashAst.cjs.map +1 -0
  39. package/dist/cjs/tools/local/editStrategies.cjs +188 -0
  40. package/dist/cjs/tools/local/editStrategies.cjs.map +1 -0
  41. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +141 -0
  42. package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -0
  43. package/dist/cjs/tools/local/syntaxCheck.cjs +182 -0
  44. package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -0
  45. package/dist/cjs/tools/local/textEncoding.cjs +30 -0
  46. package/dist/cjs/tools/local/textEncoding.cjs.map +1 -0
  47. package/dist/cjs/tools/local/workspaceFS.cjs +51 -0
  48. package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -0
  49. package/dist/cjs/tools/subagent/SubagentExecutor.cjs +1 -0
  50. package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
  51. package/dist/esm/common/enum.mjs +53 -1
  52. package/dist/esm/common/enum.mjs.map +1 -1
  53. package/dist/esm/graphs/Graph.mjs +149 -5
  54. package/dist/esm/graphs/Graph.mjs.map +1 -1
  55. package/dist/esm/hooks/createWorkspacePolicyHook.mjs +289 -0
  56. package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -0
  57. package/dist/esm/llm/openai/index.mjs +318 -2
  58. package/dist/esm/llm/openai/index.mjs.map +1 -1
  59. package/dist/esm/main.mjs +17 -2
  60. package/dist/esm/main.mjs.map +1 -1
  61. package/dist/esm/messages/anthropicToolCache.mjs +99 -0
  62. package/dist/esm/messages/anthropicToolCache.mjs.map +1 -0
  63. package/dist/esm/messages/prune.mjs +26 -1
  64. package/dist/esm/messages/prune.mjs.map +1 -1
  65. package/dist/esm/messages/recency.mjs +97 -0
  66. package/dist/esm/messages/recency.mjs.map +1 -0
  67. package/dist/esm/run.mjs +30 -0
  68. package/dist/esm/run.mjs.map +1 -1
  69. package/dist/esm/summarization/node.mjs +100 -6
  70. package/dist/esm/summarization/node.mjs.map +1 -1
  71. package/dist/esm/tools/ToolNode.mjs +635 -23
  72. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  73. package/dist/esm/tools/local/CompileCheckTool.mjs +223 -0
  74. package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -0
  75. package/dist/esm/tools/local/FileCheckpointer.mjs +87 -0
  76. package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -0
  77. package/dist/esm/tools/local/LocalCodingTools.mjs +1075 -0
  78. package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -0
  79. package/dist/esm/tools/local/LocalExecutionEngine.mjs +1022 -0
  80. package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -0
  81. package/dist/esm/tools/local/LocalExecutionTools.mjs +117 -0
  82. package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -0
  83. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +448 -0
  84. package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -0
  85. package/dist/esm/tools/local/attachments.mjs +180 -0
  86. package/dist/esm/tools/local/attachments.mjs.map +1 -0
  87. package/dist/esm/tools/local/bashAst.mjs +126 -0
  88. package/dist/esm/tools/local/bashAst.mjs.map +1 -0
  89. package/dist/esm/tools/local/editStrategies.mjs +185 -0
  90. package/dist/esm/tools/local/editStrategies.mjs.map +1 -0
  91. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +137 -0
  92. package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -0
  93. package/dist/esm/tools/local/syntaxCheck.mjs +179 -0
  94. package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -0
  95. package/dist/esm/tools/local/textEncoding.mjs +27 -0
  96. package/dist/esm/tools/local/textEncoding.mjs.map +1 -0
  97. package/dist/esm/tools/local/workspaceFS.mjs +49 -0
  98. package/dist/esm/tools/local/workspaceFS.mjs.map +1 -0
  99. package/dist/esm/tools/subagent/SubagentExecutor.mjs +1 -0
  100. package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
  101. package/dist/types/common/enum.d.ts +39 -1
  102. package/dist/types/graphs/Graph.d.ts +34 -0
  103. package/dist/types/hooks/createWorkspacePolicyHook.d.ts +95 -0
  104. package/dist/types/hooks/index.d.ts +2 -0
  105. package/dist/types/index.d.ts +1 -0
  106. package/dist/types/llm/openai/index.d.ts +17 -0
  107. package/dist/types/messages/anthropicToolCache.d.ts +51 -0
  108. package/dist/types/messages/index.d.ts +2 -0
  109. package/dist/types/messages/prune.d.ts +11 -0
  110. package/dist/types/messages/recency.d.ts +64 -0
  111. package/dist/types/run.d.ts +21 -0
  112. package/dist/types/tools/ToolNode.d.ts +145 -2
  113. package/dist/types/tools/local/CompileCheckTool.d.ts +31 -0
  114. package/dist/types/tools/local/FileCheckpointer.d.ts +39 -0
  115. package/dist/types/tools/local/LocalCodingTools.d.ts +57 -0
  116. package/dist/types/tools/local/LocalExecutionEngine.d.ts +149 -0
  117. package/dist/types/tools/local/LocalExecutionTools.d.ts +9 -0
  118. package/dist/types/tools/local/LocalProgrammaticToolCalling.d.ts +21 -0
  119. package/dist/types/tools/local/attachments.d.ts +84 -0
  120. package/dist/types/tools/local/bashAst.d.ts +11 -0
  121. package/dist/types/tools/local/editStrategies.d.ts +28 -0
  122. package/dist/types/tools/local/index.d.ts +12 -0
  123. package/dist/types/tools/local/resolveLocalExecutionTools.d.ts +38 -0
  124. package/dist/types/tools/local/syntaxCheck.d.ts +42 -0
  125. package/dist/types/tools/local/textEncoding.d.ts +21 -0
  126. package/dist/types/tools/local/workspaceFS.d.ts +49 -0
  127. package/dist/types/types/hitl.d.ts +56 -27
  128. package/dist/types/types/run.d.ts +8 -1
  129. package/dist/types/types/summarize.d.ts +30 -0
  130. package/dist/types/types/tools.d.ts +341 -6
  131. package/package.json +21 -2
  132. package/src/common/enum.ts +54 -0
  133. package/src/graphs/Graph.ts +164 -6
  134. package/src/hooks/__tests__/compactHooks.test.ts +38 -2
  135. package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +393 -0
  136. package/src/hooks/createWorkspacePolicyHook.ts +355 -0
  137. package/src/hooks/index.ts +6 -0
  138. package/src/index.ts +1 -0
  139. package/src/llm/openai/deepseek.test.ts +479 -0
  140. package/src/llm/openai/index.ts +484 -1
  141. package/src/messages/__tests__/anthropicToolCache.test.ts +125 -0
  142. package/src/messages/__tests__/recency.test.ts +267 -0
  143. package/src/messages/anthropicToolCache.ts +116 -0
  144. package/src/messages/index.ts +2 -0
  145. package/src/messages/prune.ts +27 -1
  146. package/src/messages/recency.ts +155 -0
  147. package/src/run.ts +31 -0
  148. package/src/scripts/compare_pi_vs_ours.ts +840 -0
  149. package/src/scripts/local_engine.ts +166 -0
  150. package/src/scripts/local_engine_checkpointer.ts +205 -0
  151. package/src/scripts/local_engine_compile.ts +263 -0
  152. package/src/scripts/local_engine_hooks.ts +226 -0
  153. package/src/scripts/local_engine_image.ts +201 -0
  154. package/src/scripts/local_engine_ptc.ts +151 -0
  155. package/src/scripts/local_engine_workspace.ts +258 -0
  156. package/src/scripts/summarization-recency.ts +462 -0
  157. package/src/specs/prune.test.ts +39 -0
  158. package/src/summarization/__tests__/node.test.ts +499 -3
  159. package/src/summarization/node.ts +124 -7
  160. package/src/tools/ToolNode.ts +769 -20
  161. package/src/tools/__tests__/LocalExecutionTools.test.ts +2647 -0
  162. package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +175 -0
  163. package/src/tools/__tests__/ToolNode.outputReferences.test.ts +114 -0
  164. package/src/tools/__tests__/ToolNode.session.test.ts +84 -0
  165. package/src/tools/__tests__/directToolHITLResumeScope.test.ts +467 -0
  166. package/src/tools/__tests__/directToolHooks.test.ts +411 -0
  167. package/src/tools/__tests__/localToolNames.test.ts +73 -0
  168. package/src/tools/__tests__/workspaceSeam.test.ts +134 -0
  169. package/src/tools/local/CompileCheckTool.ts +278 -0
  170. package/src/tools/local/FileCheckpointer.ts +93 -0
  171. package/src/tools/local/LocalCodingTools.ts +1342 -0
  172. package/src/tools/local/LocalExecutionEngine.ts +1329 -0
  173. package/src/tools/local/LocalExecutionTools.ts +167 -0
  174. package/src/tools/local/LocalProgrammaticToolCalling.ts +594 -0
  175. package/src/tools/local/__tests__/FileCheckpointer.test.ts +120 -0
  176. package/src/tools/local/__tests__/editStrategies.test.ts +134 -0
  177. package/src/tools/local/attachments.ts +251 -0
  178. package/src/tools/local/bashAst.ts +151 -0
  179. package/src/tools/local/editStrategies.ts +188 -0
  180. package/src/tools/local/index.ts +12 -0
  181. package/src/tools/local/resolveLocalExecutionTools.ts +208 -0
  182. package/src/tools/local/syntaxCheck.ts +243 -0
  183. package/src/tools/local/textEncoding.ts +37 -0
  184. package/src/tools/local/workspaceFS.ts +89 -0
  185. package/src/types/hitl.ts +56 -27
  186. package/src/types/run.ts +12 -1
  187. package/src/types/summarize.ts +31 -0
  188. package/src/types/tools.ts +359 -7
@@ -1062,4 +1062,179 @@ for member in team:
1062
1062
  expect(results[1].result.result).toBe(5);
1063
1063
  });
1064
1064
  });
1065
+
1066
+ describe('bash bridge script does not require python3 (Codex P2 #19)', () => {
1067
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1068
+ const { _createBashProgramForTests } = require('../local/LocalProgrammaticToolCalling');
1069
+
1070
+ it('uses curl as the primary HTTP helper with python3 only as fallback', () => {
1071
+ const script: string = _createBashProgramForTests(
1072
+ 'echo hello',
1073
+ [],
1074
+ 'http://127.0.0.1:9999/tool',
1075
+ 'test-token'
1076
+ );
1077
+ // Curl path must be present and gated by `command -v curl` so
1078
+ // it's tried first on hosts that have it.
1079
+ expect(script).toContain('command -v curl');
1080
+ expect(script).toContain('curl -sS -X POST');
1081
+ // Python3 must remain as a fallback (not removed).
1082
+ expect(script).toContain('command -v python3');
1083
+ expect(script).toContain('python3 - "$__LIBRECHAT_TOOL_BRIDGE"');
1084
+ // Curl branch must come BEFORE python3 — bash `if/elif` order
1085
+ // determines which helper is preferred. Pre-fix, python3 was
1086
+ // unconditional and the bash bridge failed on python3-less
1087
+ // hosts (minimal containers, some Windows setups).
1088
+ expect(script.indexOf('command -v curl')).toBeLessThan(
1089
+ script.indexOf('command -v python3')
1090
+ );
1091
+ // Curl uses the bridge's text-mode endpoint to skip JSON
1092
+ // parsing on the bash side.
1093
+ expect(script).toContain('?mode=text');
1094
+ // Helpful error when neither helper is available.
1095
+ expect(script).toContain('needs either curl or python3');
1096
+ });
1097
+ });
1098
+
1099
+ describe('bridge runs PreToolUse hooks for inner tool calls (manual finding A)', () => {
1100
+ // The bridge spawned by `run_tools_with_code` / `run_tools_with_bash`
1101
+ // used to call inner tools via `executeTools` directly, bypassing
1102
+ // every PreToolUse hook the host registered. Manual review flagged
1103
+ // this as a P1 bypass — `write_file` could be invoked from inside
1104
+ // a programmatic block while the host's `write_file` deny policy
1105
+ // never saw it. Now ToolNode threads a `hookContext` into the
1106
+ // programmatic-tool factory; the bridge runs PreToolUse before
1107
+ // each inner call, fail-closing on `deny`/`ask`.
1108
+
1109
+ it('honours `decision: deny` for inner tool calls invoked through the bridge', async () => {
1110
+ const { tool } = await import('@langchain/core/tools');
1111
+ const { z } = await import('zod');
1112
+ const { HookRegistry } = await import('@/hooks');
1113
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1114
+ const ptcMod = require('../local/LocalProgrammaticToolCalling');
1115
+
1116
+ let callsMade = 0;
1117
+ const writeFileTool = tool(
1118
+ async () => {
1119
+ callsMade += 1;
1120
+ return 'wrote file';
1121
+ },
1122
+ {
1123
+ name: 'write_file',
1124
+ description: 'mock write tool',
1125
+ schema: z.object({ path: z.string() }),
1126
+ }
1127
+ );
1128
+ const toolMap = new Map([['write_file', writeFileTool]]);
1129
+ const registry = new HookRegistry();
1130
+ registry.register('PreToolUse', {
1131
+ hooks: [
1132
+ async (input) => {
1133
+ if (input.toolName === 'write_file') {
1134
+ return { decision: 'deny', reason: 'no writes from bridge' };
1135
+ }
1136
+ return { decision: 'allow' };
1137
+ },
1138
+ ],
1139
+ });
1140
+
1141
+ // Internal createToolBridge isn't exported, but exercising it via
1142
+ // a synthetic HTTP request mirrors the real path. We use a tiny
1143
+ // helper to access the (testing-internal) bridge factory.
1144
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1145
+ const http = require('http') as typeof import('http');
1146
+
1147
+ // Use the same internal factory the production path uses by
1148
+ // invoking it through a direct-spawn substitute: capture the
1149
+ // request handler by recreating the simplest possible call.
1150
+ // Simpler: spin up a minimal duplicate and assert hook gating.
1151
+ // (We can't easily test the production server without exposing
1152
+ // it, but exporting `applyPreToolUseHooksForBridge` would also
1153
+ // do the job — for this test we exercise the deny path through
1154
+ // the public `executeTools` shortcut that the bridge uses.)
1155
+ void ptcMod;
1156
+ void toolMap;
1157
+ void registry;
1158
+ void callsMade;
1159
+ void http;
1160
+ // The minimum-viable assertion: registering a deny hook and
1161
+ // sending a `write_file` request through the bridge results in
1162
+ // the inner tool NOT being invoked. Implemented via the public
1163
+ // `applyPreToolUseHooksForBridge` (added in this round) so we
1164
+ // don't have to reach into the createServer closure.
1165
+ const gate = await ptcMod.applyPreToolUseHooksForBridge(
1166
+ { registry, runId: 'r1' },
1167
+ 'write_file',
1168
+ 'call_1',
1169
+ { path: '/tmp/x' }
1170
+ );
1171
+ expect(gate.denyReason).toBeDefined();
1172
+ expect(gate.denyReason).toContain('no writes from bridge');
1173
+ });
1174
+
1175
+ it('passes through when no hook denies (allow path)', async () => {
1176
+ const { HookRegistry } = await import('@/hooks');
1177
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1178
+ const ptcMod = require('../local/LocalProgrammaticToolCalling');
1179
+
1180
+ const registry = new HookRegistry();
1181
+ registry.register('PreToolUse', {
1182
+ hooks: [async () => ({ decision: 'allow' })],
1183
+ });
1184
+
1185
+ const gate = await ptcMod.applyPreToolUseHooksForBridge(
1186
+ { registry, runId: 'r1' },
1187
+ 'read_file',
1188
+ 'call_1',
1189
+ { file_path: '/tmp/x' }
1190
+ );
1191
+ expect(gate.denyReason).toBeUndefined();
1192
+ expect(gate.input).toEqual({ file_path: '/tmp/x' });
1193
+ });
1194
+
1195
+ it('applies updatedInput to the inner tool args', async () => {
1196
+ const { HookRegistry } = await import('@/hooks');
1197
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1198
+ const ptcMod = require('../local/LocalProgrammaticToolCalling');
1199
+
1200
+ const registry = new HookRegistry();
1201
+ registry.register('PreToolUse', {
1202
+ hooks: [
1203
+ async () => ({
1204
+ decision: 'allow',
1205
+ updatedInput: { file_path: '/tmp/rewritten' },
1206
+ }),
1207
+ ],
1208
+ });
1209
+
1210
+ const gate = await ptcMod.applyPreToolUseHooksForBridge(
1211
+ { registry, runId: 'r1' },
1212
+ 'read_file',
1213
+ 'call_1',
1214
+ { file_path: '/tmp/original' }
1215
+ );
1216
+ expect(gate.denyReason).toBeUndefined();
1217
+ expect(gate.input).toEqual({ file_path: '/tmp/rewritten' });
1218
+ });
1219
+
1220
+ it('treats `ask` as fail-closed deny (HITL not reachable from bridge)', async () => {
1221
+ const { HookRegistry } = await import('@/hooks');
1222
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
1223
+ const ptcMod = require('../local/LocalProgrammaticToolCalling');
1224
+
1225
+ const registry = new HookRegistry();
1226
+ registry.register('PreToolUse', {
1227
+ hooks: [async () => ({ decision: 'ask' })],
1228
+ });
1229
+
1230
+ const gate = await ptcMod.applyPreToolUseHooksForBridge(
1231
+ { registry, runId: 'r1' },
1232
+ 'edit_file',
1233
+ 'call_1',
1234
+ {}
1235
+ );
1236
+ expect(gate.denyReason).toBeDefined();
1237
+ expect(gate.denyReason).toMatch(/HITL|ask|approval|interrupt/i);
1238
+ });
1239
+ });
1065
1240
  });
@@ -5,6 +5,10 @@ import { describe, it, expect, jest, afterEach } from '@jest/globals';
5
5
  import type { StructuredToolInterface } from '@langchain/core/tools';
6
6
  import type * as t from '@/types';
7
7
  import * as events from '@/utils/events';
8
+ import type {
9
+ PostToolUseHookOutput,
10
+ PreToolUseHookOutput,
11
+ } from '@/hooks';
8
12
  import { HookRegistry } from '@/hooks';
9
13
  import { ToolNode } from '../ToolNode';
10
14
  import { ToolOutputReferenceRegistry } from '../toolOutputReferences';
@@ -1435,4 +1439,114 @@ describe('ToolNode tool output references', () => {
1435
1439
  });
1436
1440
  });
1437
1441
  });
1442
+
1443
+ describe('PostToolUse updatedOutput updates the registry (Codex P2 #17)', () => {
1444
+ it('subsequent {{tool0turn0}} substitutions deliver the post-hook content, not the pre-hook content', async () => {
1445
+ const capturedArgs: string[] = [];
1446
+ const echoT = createEchoTool({
1447
+ capturedArgs,
1448
+ outputs: ['original-output', 'second-call-result'],
1449
+ });
1450
+
1451
+ const registry = new HookRegistry();
1452
+ registry.register('PostToolUse', {
1453
+ hooks: [
1454
+ // Replace the tool's content. Pre-fix the registry kept the
1455
+ // pre-hook string ("original-output"), so a later
1456
+ // {{tool0turn0}} substitution would deliver stale bytes.
1457
+ async (): Promise<PostToolUseHookOutput> => ({
1458
+ updatedOutput: 'redacted-by-hook',
1459
+ }),
1460
+ ],
1461
+ });
1462
+
1463
+ const node = new ToolNode({
1464
+ tools: [echoT],
1465
+ toolOutputReferences: { enabled: true },
1466
+ hookRegistry: registry,
1467
+ });
1468
+
1469
+ const [first] = await invokeBatch(
1470
+ node,
1471
+ [{ id: 'c1', name: 'echo', command: 'first' }],
1472
+ 'run-posthook-ref'
1473
+ );
1474
+ // Sanity: the model sees the replaced content.
1475
+ expect(first.content).toBe('redacted-by-hook');
1476
+
1477
+ // Second call references the first via {{tool0turn0}}. The
1478
+ // tool's `command` arg should resolve to the post-hook content.
1479
+ await invokeBatch(
1480
+ node,
1481
+ [{ id: 'c2', name: 'echo', command: 'value={{tool0turn0}}' }],
1482
+ 'run-posthook-ref'
1483
+ );
1484
+ expect(capturedArgs).toEqual(['first', 'value=redacted-by-hook']);
1485
+ // Pre-fix: the second call would have seen 'value=original-output'
1486
+ // because the registry was never updated after the post-hook.
1487
+ expect(capturedArgs[1]).not.toContain('original-output');
1488
+ });
1489
+ });
1490
+
1491
+ describe('direct-batch snapshot isolation (Codex P1 #18)', () => {
1492
+ it('does not let a slow PreToolUse hook on one call leak a sibling output into another call args', async () => {
1493
+ // Two direct calls in a single batch:
1494
+ // c0: has a slow PreToolUse hook (await) + args containing
1495
+ // `{{tool1turn0}}` (a same-turn placeholder).
1496
+ // c1: no hook, returns 'sibling-output' instantly.
1497
+ //
1498
+ // Same-turn refs are intentionally isolated (the snapshot is
1499
+ // taken pre-batch). Pre-fix, runTool's late re-resolve against
1500
+ // the *live* registry meant c0 (waiting on its hook) saw c1's
1501
+ // already-registered output and substituted it into its args
1502
+ // — order-dependent leakage. With the snapshot, c0 sees the
1503
+ // placeholder unresolved.
1504
+ const capturedArgs: string[] = [];
1505
+ const echoT = createEchoTool({
1506
+ capturedArgs,
1507
+ outputs: ['c0-output', 'sibling-output'],
1508
+ name: 'echo',
1509
+ });
1510
+
1511
+ const registry = new HookRegistry();
1512
+ registry.register('PreToolUse', {
1513
+ hooks: [
1514
+ // Slow hook gates ONLY c0; c1 has no hook to wait on. The
1515
+ // delay gives c1 time to finish and register its output
1516
+ // before c0's `runTool` runs.
1517
+ async (input): Promise<PreToolUseHookOutput> => {
1518
+ const cmd = (input.toolInput as { command?: string }).command ?? '';
1519
+ if (cmd.includes('{{tool1turn0}}')) {
1520
+ await new Promise<void>((resolve) => setTimeout(resolve, 50));
1521
+ }
1522
+ return { decision: 'allow' };
1523
+ },
1524
+ ],
1525
+ });
1526
+
1527
+ const node = new ToolNode({
1528
+ tools: [echoT],
1529
+ toolOutputReferences: { enabled: true },
1530
+ hookRegistry: registry,
1531
+ });
1532
+
1533
+ await invokeBatch(
1534
+ node,
1535
+ [
1536
+ { id: 'c0', name: 'echo', command: 'leak={{tool1turn0}}' },
1537
+ { id: 'c1', name: 'echo', command: 'instant' },
1538
+ ],
1539
+ 'run-snapshot-iso'
1540
+ );
1541
+
1542
+ // Pre-fix: capturedArgs[0] would have been 'leak=sibling-output'
1543
+ // because c1 won the race and c0's late re-resolve picked it up.
1544
+ // With the snapshot fix: same-turn isolation holds — the
1545
+ // placeholder stays unresolved in c0's args (and an
1546
+ // `[unresolved refs: …]` marker shows up downstream).
1547
+ const c0Index = capturedArgs.findIndex((a) => a.startsWith('leak='));
1548
+ expect(c0Index).toBeGreaterThanOrEqual(0);
1549
+ expect(capturedArgs[c0Index]).not.toContain('sibling-output');
1550
+ });
1551
+ });
1438
1552
  });
@@ -134,6 +134,49 @@ describe('ToolNode code execution session management', () => {
134
134
  expect(files[0].session_id).toBe('session-A');
135
135
  expect(files[1].session_id).toBe('session-B');
136
136
  });
137
+
138
+ it('forwards per-file entity_id for mixed-entity sessions', async () => {
139
+ const capturedConfigs: Record<string, unknown>[] = [];
140
+ const sessions: t.ToolSessionMap = new Map();
141
+ sessions.set(Constants.EXECUTE_CODE, {
142
+ session_id: 'session-A',
143
+ files: [
144
+ {
145
+ id: 'skill-file',
146
+ name: 'demo/SKILL.md',
147
+ session_id: 'session-A',
148
+ entity_id: 'skill-123',
149
+ },
150
+ {
151
+ id: 'user-file',
152
+ name: 'attachment.csv',
153
+ session_id: 'session-B',
154
+ },
155
+ ],
156
+ lastUpdated: Date.now(),
157
+ } satisfies t.CodeSessionContext);
158
+
159
+ const mockTool = createMockCodeTool({ capturedConfigs });
160
+ const toolNode = new ToolNode({ tools: [mockTool], sessions });
161
+
162
+ const aiMsg = createAIMessageWithCodeCall('call_5');
163
+ await toolNode.invoke({ messages: [aiMsg] });
164
+
165
+ const files = capturedConfigs[0]._injected_files as t.CodeEnvFile[];
166
+ expect(files).toEqual([
167
+ {
168
+ session_id: 'session-A',
169
+ id: 'skill-file',
170
+ name: 'demo/SKILL.md',
171
+ entity_id: 'skill-123',
172
+ },
173
+ {
174
+ session_id: 'session-B',
175
+ id: 'user-file',
176
+ name: 'attachment.csv',
177
+ },
178
+ ]);
179
+ });
137
180
  });
138
181
 
139
182
  describe('getCodeSessionContext (via dispatchToolEvents request building)', () => {
@@ -200,6 +243,47 @@ describe('ToolNode code execution session management', () => {
200
243
 
201
244
  expect(context).toBeUndefined();
202
245
  });
246
+
247
+ it('forwards per-file entity_id to event-driven request context', () => {
248
+ const sessions: t.ToolSessionMap = new Map();
249
+ sessions.set(Constants.EXECUTE_CODE, {
250
+ session_id: 'evt-session',
251
+ files: [
252
+ {
253
+ id: 'sk1',
254
+ name: 'demo/SKILL.md',
255
+ session_id: 'evt-session',
256
+ entity_id: 'skill-abc',
257
+ },
258
+ { id: 'usr1', name: 'data.csv', session_id: 'evt-session' },
259
+ ],
260
+ lastUpdated: Date.now(),
261
+ } satisfies t.CodeSessionContext);
262
+
263
+ const mockTool = createMockCodeTool({ capturedConfigs: [] });
264
+ const toolNode = new ToolNode({
265
+ tools: [mockTool],
266
+ sessions,
267
+ eventDrivenMode: true,
268
+ });
269
+
270
+ const context = (
271
+ toolNode as unknown as { getCodeSessionContext: () => unknown }
272
+ ).getCodeSessionContext();
273
+
274
+ expect(context).toEqual({
275
+ session_id: 'evt-session',
276
+ files: [
277
+ {
278
+ session_id: 'evt-session',
279
+ id: 'sk1',
280
+ name: 'demo/SKILL.md',
281
+ entity_id: 'skill-abc',
282
+ },
283
+ { session_id: 'evt-session', id: 'usr1', name: 'data.csv' },
284
+ ],
285
+ });
286
+ });
203
287
  });
204
288
 
205
289
  describe('storeCodeSessionFromResults (session storage from artifacts)', () => {