@librechat/agents 3.1.75 → 3.1.77-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/dist/cjs/graphs/Graph.cjs +22 -3
  2. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  3. package/dist/cjs/hitl/askUserQuestion.cjs +67 -0
  4. package/dist/cjs/hitl/askUserQuestion.cjs.map +1 -0
  5. package/dist/cjs/hooks/HookRegistry.cjs +54 -0
  6. package/dist/cjs/hooks/HookRegistry.cjs.map +1 -1
  7. package/dist/cjs/hooks/createToolPolicyHook.cjs +115 -0
  8. package/dist/cjs/hooks/createToolPolicyHook.cjs.map +1 -0
  9. package/dist/cjs/hooks/executeHooks.cjs +40 -1
  10. package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
  11. package/dist/cjs/hooks/types.cjs +1 -0
  12. package/dist/cjs/hooks/types.cjs.map +1 -1
  13. package/dist/cjs/langchain/google-common.cjs +3 -0
  14. package/dist/cjs/langchain/google-common.cjs.map +1 -0
  15. package/dist/cjs/langchain/index.cjs +86 -0
  16. package/dist/cjs/langchain/index.cjs.map +1 -0
  17. package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
  18. package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
  19. package/dist/cjs/langchain/messages/tool.cjs +3 -0
  20. package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
  21. package/dist/cjs/langchain/messages.cjs +51 -0
  22. package/dist/cjs/langchain/messages.cjs.map +1 -0
  23. package/dist/cjs/langchain/openai.cjs +3 -0
  24. package/dist/cjs/langchain/openai.cjs.map +1 -0
  25. package/dist/cjs/langchain/prompts.cjs +11 -0
  26. package/dist/cjs/langchain/prompts.cjs.map +1 -0
  27. package/dist/cjs/langchain/runnables.cjs +19 -0
  28. package/dist/cjs/langchain/runnables.cjs.map +1 -0
  29. package/dist/cjs/langchain/tools.cjs +23 -0
  30. package/dist/cjs/langchain/tools.cjs.map +1 -0
  31. package/dist/cjs/langchain/utils/env.cjs +11 -0
  32. package/dist/cjs/langchain/utils/env.cjs.map +1 -0
  33. package/dist/cjs/llm/anthropic/index.cjs +145 -52
  34. package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
  35. package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
  36. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +21 -14
  37. package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
  38. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
  39. package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
  40. package/dist/cjs/llm/bedrock/index.cjs +1 -1
  41. package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
  42. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
  43. package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
  44. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
  45. package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
  46. package/dist/cjs/llm/google/utils/common.cjs +5 -4
  47. package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
  48. package/dist/cjs/llm/openai/index.cjs +519 -655
  49. package/dist/cjs/llm/openai/index.cjs.map +1 -1
  50. package/dist/cjs/llm/openai/utils/index.cjs +20 -458
  51. package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
  52. package/dist/cjs/llm/openrouter/index.cjs +57 -175
  53. package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
  54. package/dist/cjs/llm/vertexai/index.cjs +5 -3
  55. package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
  56. package/dist/cjs/main.cjs +112 -3
  57. package/dist/cjs/main.cjs.map +1 -1
  58. package/dist/cjs/messages/cache.cjs +2 -1
  59. package/dist/cjs/messages/cache.cjs.map +1 -1
  60. package/dist/cjs/messages/core.cjs +7 -6
  61. package/dist/cjs/messages/core.cjs.map +1 -1
  62. package/dist/cjs/messages/format.cjs +73 -15
  63. package/dist/cjs/messages/format.cjs.map +1 -1
  64. package/dist/cjs/messages/langchain.cjs +26 -0
  65. package/dist/cjs/messages/langchain.cjs.map +1 -0
  66. package/dist/cjs/messages/prune.cjs +7 -6
  67. package/dist/cjs/messages/prune.cjs.map +1 -1
  68. package/dist/cjs/run.cjs +400 -42
  69. package/dist/cjs/run.cjs.map +1 -1
  70. package/dist/cjs/tools/ToolNode.cjs +556 -56
  71. package/dist/cjs/tools/ToolNode.cjs.map +1 -1
  72. package/dist/cjs/tools/search/search.cjs +55 -66
  73. package/dist/cjs/tools/search/search.cjs.map +1 -1
  74. package/dist/cjs/tools/search/tavily-scraper.cjs +189 -0
  75. package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -0
  76. package/dist/cjs/tools/search/tavily-search.cjs +372 -0
  77. package/dist/cjs/tools/search/tavily-search.cjs.map +1 -0
  78. package/dist/cjs/tools/search/tool.cjs +26 -4
  79. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  80. package/dist/cjs/tools/search/utils.cjs +10 -3
  81. package/dist/cjs/tools/search/utils.cjs.map +1 -1
  82. package/dist/esm/graphs/Graph.mjs +22 -3
  83. package/dist/esm/graphs/Graph.mjs.map +1 -1
  84. package/dist/esm/hitl/askUserQuestion.mjs +65 -0
  85. package/dist/esm/hitl/askUserQuestion.mjs.map +1 -0
  86. package/dist/esm/hooks/HookRegistry.mjs +54 -0
  87. package/dist/esm/hooks/HookRegistry.mjs.map +1 -1
  88. package/dist/esm/hooks/createToolPolicyHook.mjs +113 -0
  89. package/dist/esm/hooks/createToolPolicyHook.mjs.map +1 -0
  90. package/dist/esm/hooks/executeHooks.mjs +40 -1
  91. package/dist/esm/hooks/executeHooks.mjs.map +1 -1
  92. package/dist/esm/hooks/types.mjs +1 -0
  93. package/dist/esm/hooks/types.mjs.map +1 -1
  94. package/dist/esm/langchain/google-common.mjs +2 -0
  95. package/dist/esm/langchain/google-common.mjs.map +1 -0
  96. package/dist/esm/langchain/index.mjs +5 -0
  97. package/dist/esm/langchain/index.mjs.map +1 -0
  98. package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
  99. package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
  100. package/dist/esm/langchain/messages/tool.mjs +2 -0
  101. package/dist/esm/langchain/messages/tool.mjs.map +1 -0
  102. package/dist/esm/langchain/messages.mjs +2 -0
  103. package/dist/esm/langchain/messages.mjs.map +1 -0
  104. package/dist/esm/langchain/openai.mjs +2 -0
  105. package/dist/esm/langchain/openai.mjs.map +1 -0
  106. package/dist/esm/langchain/prompts.mjs +2 -0
  107. package/dist/esm/langchain/prompts.mjs.map +1 -0
  108. package/dist/esm/langchain/runnables.mjs +2 -0
  109. package/dist/esm/langchain/runnables.mjs.map +1 -0
  110. package/dist/esm/langchain/tools.mjs +2 -0
  111. package/dist/esm/langchain/tools.mjs.map +1 -0
  112. package/dist/esm/langchain/utils/env.mjs +2 -0
  113. package/dist/esm/langchain/utils/env.mjs.map +1 -0
  114. package/dist/esm/llm/anthropic/index.mjs +146 -54
  115. package/dist/esm/llm/anthropic/index.mjs.map +1 -1
  116. package/dist/esm/llm/anthropic/types.mjs.map +1 -1
  117. package/dist/esm/llm/anthropic/utils/message_inputs.mjs +21 -14
  118. package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
  119. package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
  120. package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
  121. package/dist/esm/llm/bedrock/index.mjs +1 -1
  122. package/dist/esm/llm/bedrock/index.mjs.map +1 -1
  123. package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
  124. package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
  125. package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
  126. package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
  127. package/dist/esm/llm/google/utils/common.mjs +5 -4
  128. package/dist/esm/llm/google/utils/common.mjs.map +1 -1
  129. package/dist/esm/llm/openai/index.mjs +520 -656
  130. package/dist/esm/llm/openai/index.mjs.map +1 -1
  131. package/dist/esm/llm/openai/utils/index.mjs +23 -459
  132. package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
  133. package/dist/esm/llm/openrouter/index.mjs +57 -175
  134. package/dist/esm/llm/openrouter/index.mjs.map +1 -1
  135. package/dist/esm/llm/vertexai/index.mjs +5 -3
  136. package/dist/esm/llm/vertexai/index.mjs.map +1 -1
  137. package/dist/esm/main.mjs +7 -0
  138. package/dist/esm/main.mjs.map +1 -1
  139. package/dist/esm/messages/cache.mjs +2 -1
  140. package/dist/esm/messages/cache.mjs.map +1 -1
  141. package/dist/esm/messages/core.mjs +7 -6
  142. package/dist/esm/messages/core.mjs.map +1 -1
  143. package/dist/esm/messages/format.mjs +73 -15
  144. package/dist/esm/messages/format.mjs.map +1 -1
  145. package/dist/esm/messages/langchain.mjs +23 -0
  146. package/dist/esm/messages/langchain.mjs.map +1 -0
  147. package/dist/esm/messages/prune.mjs +7 -6
  148. package/dist/esm/messages/prune.mjs.map +1 -1
  149. package/dist/esm/run.mjs +400 -42
  150. package/dist/esm/run.mjs.map +1 -1
  151. package/dist/esm/tools/ToolNode.mjs +557 -57
  152. package/dist/esm/tools/ToolNode.mjs.map +1 -1
  153. package/dist/esm/tools/search/search.mjs +55 -66
  154. package/dist/esm/tools/search/search.mjs.map +1 -1
  155. package/dist/esm/tools/search/tavily-scraper.mjs +186 -0
  156. package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -0
  157. package/dist/esm/tools/search/tavily-search.mjs +370 -0
  158. package/dist/esm/tools/search/tavily-search.mjs.map +1 -0
  159. package/dist/esm/tools/search/tool.mjs +26 -4
  160. package/dist/esm/tools/search/tool.mjs.map +1 -1
  161. package/dist/esm/tools/search/utils.mjs +10 -3
  162. package/dist/esm/tools/search/utils.mjs.map +1 -1
  163. package/dist/types/graphs/Graph.d.ts +7 -0
  164. package/dist/types/hitl/askUserQuestion.d.ts +55 -0
  165. package/dist/types/hitl/index.d.ts +6 -0
  166. package/dist/types/hooks/HookRegistry.d.ts +58 -0
  167. package/dist/types/hooks/createToolPolicyHook.d.ts +87 -0
  168. package/dist/types/hooks/index.d.ts +4 -1
  169. package/dist/types/hooks/types.d.ts +109 -3
  170. package/dist/types/index.d.ts +10 -0
  171. package/dist/types/langchain/google-common.d.ts +1 -0
  172. package/dist/types/langchain/index.d.ts +8 -0
  173. package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
  174. package/dist/types/langchain/messages/tool.d.ts +1 -0
  175. package/dist/types/langchain/messages.d.ts +2 -0
  176. package/dist/types/langchain/openai.d.ts +1 -0
  177. package/dist/types/langchain/prompts.d.ts +1 -0
  178. package/dist/types/langchain/runnables.d.ts +2 -0
  179. package/dist/types/langchain/tools.d.ts +2 -0
  180. package/dist/types/langchain/utils/env.d.ts +1 -0
  181. package/dist/types/llm/anthropic/index.d.ts +22 -9
  182. package/dist/types/llm/anthropic/types.d.ts +5 -1
  183. package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
  184. package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
  185. package/dist/types/llm/openai/index.d.ts +21 -24
  186. package/dist/types/llm/openrouter/index.d.ts +11 -9
  187. package/dist/types/llm/vertexai/index.d.ts +1 -0
  188. package/dist/types/messages/cache.d.ts +4 -1
  189. package/dist/types/messages/format.d.ts +4 -1
  190. package/dist/types/messages/langchain.d.ts +27 -0
  191. package/dist/types/run.d.ts +117 -1
  192. package/dist/types/tools/ToolNode.d.ts +26 -1
  193. package/dist/types/tools/search/tavily-scraper.d.ts +19 -0
  194. package/dist/types/tools/search/tavily-search.d.ts +4 -0
  195. package/dist/types/tools/search/types.d.ts +99 -5
  196. package/dist/types/tools/search/utils.d.ts +2 -2
  197. package/dist/types/types/graph.d.ts +23 -37
  198. package/dist/types/types/hitl.d.ts +272 -0
  199. package/dist/types/types/index.d.ts +1 -0
  200. package/dist/types/types/llm.d.ts +3 -3
  201. package/dist/types/types/run.d.ts +33 -0
  202. package/dist/types/types/stream.d.ts +1 -1
  203. package/dist/types/types/tools.d.ts +19 -0
  204. package/package.json +80 -17
  205. package/src/graphs/Graph.ts +33 -4
  206. package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
  207. package/src/hitl/askUserQuestion.ts +72 -0
  208. package/src/hitl/index.ts +7 -0
  209. package/src/hooks/HookRegistry.ts +71 -0
  210. package/src/hooks/__tests__/createToolPolicyHook.test.ts +259 -0
  211. package/src/hooks/createToolPolicyHook.ts +184 -0
  212. package/src/hooks/executeHooks.ts +50 -1
  213. package/src/hooks/index.ts +6 -0
  214. package/src/hooks/types.ts +112 -0
  215. package/src/index.ts +22 -0
  216. package/src/langchain/google-common.ts +1 -0
  217. package/src/langchain/index.ts +8 -0
  218. package/src/langchain/language_models/chat_models.ts +1 -0
  219. package/src/langchain/messages/tool.ts +5 -0
  220. package/src/langchain/messages.ts +21 -0
  221. package/src/langchain/openai.ts +1 -0
  222. package/src/langchain/prompts.ts +1 -0
  223. package/src/langchain/runnables.ts +7 -0
  224. package/src/langchain/tools.ts +8 -0
  225. package/src/langchain/utils/env.ts +1 -0
  226. package/src/llm/anthropic/index.ts +252 -84
  227. package/src/llm/anthropic/llm.spec.ts +751 -102
  228. package/src/llm/anthropic/types.ts +9 -1
  229. package/src/llm/anthropic/utils/message_inputs.ts +37 -19
  230. package/src/llm/anthropic/utils/message_outputs.ts +119 -101
  231. package/src/llm/bedrock/index.ts +2 -2
  232. package/src/llm/bedrock/llm.spec.ts +341 -0
  233. package/src/llm/bedrock/utils/message_inputs.ts +303 -4
  234. package/src/llm/bedrock/utils/message_outputs.ts +2 -1
  235. package/src/llm/custom-chat-models.smoke.test.ts +836 -0
  236. package/src/llm/google/llm.spec.ts +339 -57
  237. package/src/llm/google/utils/common.ts +53 -48
  238. package/src/llm/openai/contentBlocks.test.ts +346 -0
  239. package/src/llm/openai/index.ts +856 -833
  240. package/src/llm/openai/utils/index.ts +107 -78
  241. package/src/llm/openai/utils/messages.test.ts +159 -0
  242. package/src/llm/openrouter/index.ts +124 -247
  243. package/src/llm/openrouter/reasoning.test.ts +8 -1
  244. package/src/llm/vertexai/index.ts +11 -5
  245. package/src/llm/vertexai/llm.spec.ts +28 -1
  246. package/src/messages/cache.test.ts +4 -3
  247. package/src/messages/cache.ts +3 -2
  248. package/src/messages/core.ts +16 -9
  249. package/src/messages/format.ts +96 -16
  250. package/src/messages/formatAgentMessages.test.ts +166 -1
  251. package/src/messages/langchain.ts +39 -0
  252. package/src/messages/prune.ts +12 -8
  253. package/src/run.ts +456 -47
  254. package/src/scripts/caching.ts +2 -3
  255. package/src/specs/summarization.test.ts +51 -58
  256. package/src/tools/ToolNode.ts +706 -63
  257. package/src/tools/__tests__/hitl.test.ts +3593 -0
  258. package/src/tools/search/search.ts +83 -73
  259. package/src/tools/search/tavily-scraper.ts +235 -0
  260. package/src/tools/search/tavily-search.ts +424 -0
  261. package/src/tools/search/tavily.test.ts +965 -0
  262. package/src/tools/search/tool.ts +36 -26
  263. package/src/tools/search/types.ts +133 -8
  264. package/src/tools/search/utils.ts +13 -5
  265. package/src/types/graph.ts +32 -87
  266. package/src/types/hitl.ts +303 -0
  267. package/src/types/index.ts +1 -0
  268. package/src/types/llm.ts +3 -3
  269. package/src/types/run.ts +33 -0
  270. package/src/types/stream.ts +1 -1
  271. package/src/types/tools.ts +19 -0
  272. package/src/utils/llmConfig.ts +1 -6
@@ -0,0 +1,3593 @@
1
+ import { z } from 'zod';
2
+ import { tool } from '@langchain/core/tools';
3
+ import {
4
+ END,
5
+ START,
6
+ Command,
7
+ StateGraph,
8
+ MemorySaver,
9
+ isInterrupted,
10
+ MessagesAnnotation,
11
+ } from '@langchain/langgraph';
12
+ import { AIMessage, ToolMessage } from '@langchain/core/messages';
13
+ import {
14
+ describe,
15
+ it,
16
+ expect,
17
+ jest,
18
+ afterEach,
19
+ beforeEach,
20
+ } from '@jest/globals';
21
+ import type { StructuredToolInterface } from '@langchain/core/tools';
22
+ import type { BaseMessage } from '@langchain/core/messages';
23
+ import type { Runnable, RunnableConfig } from '@langchain/core/runnables';
24
+ import type {
25
+ PreToolUseHookOutput,
26
+ PostToolUseHookOutput,
27
+ PostToolUseFailureHookOutput,
28
+ PostToolBatchEntry,
29
+ PostToolBatchHookInput,
30
+ PostToolBatchHookOutput,
31
+ RunStartHookOutput,
32
+ UserPromptSubmitHookOutput,
33
+ } from '@/hooks';
34
+ import type * as t from '@/types';
35
+ import * as events from '@/utils/events';
36
+ import { HookRegistry } from '@/hooks';
37
+ import { Providers as providers, GraphEvents } from '@/common';
38
+ import { ToolNode } from '../ToolNode';
39
+
40
+ /**
41
+ * Schema-only tool stub. ToolNode in event-driven mode uses the schema
42
+ * for binding/discovery but routes execution through the host via
43
+ * `ON_TOOL_EXECUTE`, so the actual `func` here is never called.
44
+ */
45
+ function createSchemaStub(name: string): StructuredToolInterface {
46
+ return tool(async () => 'unused', {
47
+ name,
48
+ description: 'schema-only stub; host executes via ON_TOOL_EXECUTE',
49
+ schema: z.object({ command: z.string() }),
50
+ }) as unknown as StructuredToolInterface;
51
+ }
52
+
53
+ /**
54
+ * Wires a fake host that responds to every `ON_TOOL_EXECUTE` event by
55
+ * resolving the request promise with `mockResults`. Mirrors the pattern
56
+ * used in `ToolNode.outputReferences.test.ts` so the event-driven path
57
+ * actually returns ToolMessages without spinning up a real host.
58
+ */
59
+ function mockEventDispatch(mockResults: t.ToolExecuteResult[]): void {
60
+ jest
61
+ .spyOn(events, 'safeDispatchCustomEvent')
62
+ .mockImplementation(async (event, data) => {
63
+ if (event !== 'on_tool_execute') {
64
+ return;
65
+ }
66
+ const request = data as Record<string, unknown>;
67
+ if (typeof request.resolve === 'function') {
68
+ (request.resolve as (r: t.ToolExecuteResult[]) => void)(mockResults);
69
+ }
70
+ });
71
+ }
72
+
73
+ type MessagesUpdate = { messages: BaseMessage[] };
74
+ type CompiledMessagesGraph = Runnable<unknown, { messages: BaseMessage[] }> & {
75
+ invoke(input: unknown, config?: RunnableConfig): Promise<unknown>;
76
+ };
77
+
78
+ /** Factory for a minimal `agent → tools → END` graph wrapping the ToolNode. */
79
+ function buildHITLGraph(
80
+ toolNode: ToolNode,
81
+ toolCalls: Array<{ id: string; name: string; args: Record<string, unknown> }>
82
+ ): CompiledMessagesGraph {
83
+ let agentInvocations = 0;
84
+ const builder = new StateGraph(MessagesAnnotation)
85
+ .addNode('agent', (): MessagesUpdate => {
86
+ agentInvocations += 1;
87
+ /**
88
+ * First entry → emit the AIMessage carrying tool_calls so the
89
+ * ToolNode actually has work. After resume the agent re-enters
90
+ * once more (a normal LangGraph loop), but at that point any
91
+ * approved tool already has a ToolMessage in state, so we emit
92
+ * an empty AIMessage to satisfy the loop and end the run.
93
+ */
94
+ if (agentInvocations === 1) {
95
+ return {
96
+ messages: [new AIMessage({ content: '', tool_calls: toolCalls })],
97
+ };
98
+ }
99
+ return { messages: [new AIMessage({ content: 'done' })] };
100
+ })
101
+ .addNode('tools', toolNode)
102
+ .addEdge(START, 'agent')
103
+ .addEdge('agent', 'tools')
104
+ .addEdge('tools', END);
105
+ return builder.compile({
106
+ checkpointer: new MemorySaver(),
107
+ }) as unknown as CompiledMessagesGraph;
108
+ }
109
+
110
+ function makeHookRegistry(
111
+ decision: 'allow' | 'deny' | 'ask',
112
+ reason?: string
113
+ ): HookRegistry {
114
+ const registry = new HookRegistry();
115
+ registry.register('PreToolUse', {
116
+ hooks: [
117
+ async (): Promise<PreToolUseHookOutput> => ({
118
+ decision,
119
+ ...(reason != null ? { reason } : {}),
120
+ }),
121
+ ],
122
+ });
123
+ return registry;
124
+ }
125
+
126
+ describe('ToolNode HITL — `ask` decision raises interrupt() when humanInTheLoop is enabled', () => {
127
+ afterEach(() => {
128
+ jest.restoreAllMocks();
129
+ });
130
+
131
+ it('raises a tool_approval interrupt with the pending tool call payload', async () => {
132
+ mockEventDispatch([
133
+ { toolCallId: 'call_1', content: 'should-not-run', status: 'success' },
134
+ ]);
135
+ const node = new ToolNode({
136
+ tools: [createSchemaStub('echo')],
137
+ eventDrivenMode: true,
138
+ agentId: 'agent-x',
139
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
140
+ hookRegistry: makeHookRegistry('ask', 'review tool args'),
141
+ humanInTheLoop: { enabled: true },
142
+ });
143
+
144
+ const graph = buildHITLGraph(node, [
145
+ { id: 'call_1', name: 'echo', args: { command: 'list /' } },
146
+ ]);
147
+ const config = { configurable: { thread_id: 'thread-hitl-1' } };
148
+
149
+ const result = await graph.invoke({ messages: [] }, config);
150
+
151
+ expect(isInterrupted<t.HumanInterruptPayload>(result)).toBe(true);
152
+ if (!isInterrupted<t.HumanInterruptPayload>(result)) {
153
+ throw new Error('expected interrupt');
154
+ }
155
+ const interrupts = result.__interrupt__;
156
+ expect(interrupts).toHaveLength(1);
157
+ const payload = interrupts[0].value!;
158
+ if (payload.type !== 'tool_approval') {
159
+ throw new Error('expected tool_approval payload');
160
+ }
161
+ expect(payload.action_requests).toEqual([
162
+ {
163
+ tool_call_id: 'call_1',
164
+ name: 'echo',
165
+ arguments: { command: 'list /' },
166
+ description: 'review tool args',
167
+ },
168
+ ]);
169
+ expect(payload.review_configs).toEqual([
170
+ {
171
+ action_name: 'echo',
172
+ tool_call_id: 'call_1',
173
+ allowed_decisions: ['approve', 'reject', 'edit', 'respond'],
174
+ },
175
+ ]);
176
+ });
177
+
178
+ it('resume with approve runs the tool through the host event path', async () => {
179
+ mockEventDispatch([
180
+ { toolCallId: 'call_1', content: 'host-result', status: 'success' },
181
+ ]);
182
+ const node = new ToolNode({
183
+ tools: [createSchemaStub('echo')],
184
+ eventDrivenMode: true,
185
+ agentId: 'agent-x',
186
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
187
+ hookRegistry: makeHookRegistry('ask'),
188
+ humanInTheLoop: { enabled: true },
189
+ });
190
+
191
+ const graph = buildHITLGraph(node, [
192
+ { id: 'call_1', name: 'echo', args: { command: 'do-it' } },
193
+ ]);
194
+ const config = { configurable: { thread_id: 'thread-hitl-approve' } };
195
+
196
+ const interrupted = await graph.invoke({ messages: [] }, config);
197
+ expect(isInterrupted(interrupted)).toBe(true);
198
+
199
+ const resumed = (await graph.invoke(
200
+ new Command({ resume: [{ type: 'approve' }] }),
201
+ config
202
+ )) as { messages: BaseMessage[] };
203
+
204
+ const toolMessages = resumed.messages.filter(
205
+ (m): m is ToolMessage => m._getType() === 'tool'
206
+ );
207
+ expect(toolMessages).toHaveLength(1);
208
+ expect(toolMessages[0].tool_call_id).toBe('call_1');
209
+ expect(toolMessages[0].content).toBe('host-result');
210
+ expect(toolMessages[0].status).not.toBe('error');
211
+ });
212
+
213
+ it('resume with reject blocks the tool and emits an error ToolMessage', async () => {
214
+ mockEventDispatch([]);
215
+ const node = new ToolNode({
216
+ tools: [createSchemaStub('echo')],
217
+ eventDrivenMode: true,
218
+ agentId: 'agent-x',
219
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
220
+ hookRegistry: makeHookRegistry('ask'),
221
+ humanInTheLoop: { enabled: true },
222
+ });
223
+
224
+ const graph = buildHITLGraph(node, [
225
+ { id: 'call_1', name: 'echo', args: { command: 'rm -rf /' } },
226
+ ]);
227
+ const config = { configurable: { thread_id: 'thread-hitl-reject' } };
228
+
229
+ await graph.invoke({ messages: [] }, config);
230
+
231
+ const resumed = (await graph.invoke(
232
+ new Command({
233
+ resume: [{ type: 'reject', reason: 'destructive command' }],
234
+ }),
235
+ config
236
+ )) as { messages: BaseMessage[] };
237
+
238
+ const toolMessages = resumed.messages.filter(
239
+ (m): m is ToolMessage => m._getType() === 'tool'
240
+ );
241
+ expect(toolMessages).toHaveLength(1);
242
+ expect(toolMessages[0].status).toBe('error');
243
+ expect(String(toolMessages[0].content)).toContain('destructive command');
244
+ });
245
+
246
+ it('resume with edit substitutes the tool input before invocation', async () => {
247
+ const capturedRequests: t.ToolCallRequest[] = [];
248
+ jest
249
+ .spyOn(events, 'safeDispatchCustomEvent')
250
+ .mockImplementation(async (event, data) => {
251
+ if (event !== 'on_tool_execute') {
252
+ return;
253
+ }
254
+ const request = data as {
255
+ toolCalls: t.ToolCallRequest[];
256
+ resolve: (r: t.ToolExecuteResult[]) => void;
257
+ };
258
+ capturedRequests.push(...request.toolCalls);
259
+ request.resolve(
260
+ request.toolCalls.map((c) => ({
261
+ toolCallId: c.id,
262
+ content: 'host-result',
263
+ status: 'success' as const,
264
+ }))
265
+ );
266
+ });
267
+
268
+ const node = new ToolNode({
269
+ tools: [createSchemaStub('echo')],
270
+ eventDrivenMode: true,
271
+ agentId: 'agent-x',
272
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
273
+ hookRegistry: makeHookRegistry('ask'),
274
+ humanInTheLoop: { enabled: true },
275
+ });
276
+
277
+ const graph = buildHITLGraph(node, [
278
+ { id: 'call_1', name: 'echo', args: { command: 'original' } },
279
+ ]);
280
+ const config = { configurable: { thread_id: 'thread-hitl-edit' } };
281
+
282
+ await graph.invoke({ messages: [] }, config);
283
+
284
+ await graph.invoke(
285
+ new Command({
286
+ resume: [{ type: 'edit', updatedInput: { command: 'patched' } }],
287
+ }),
288
+ config
289
+ );
290
+
291
+ expect(capturedRequests).toHaveLength(1);
292
+ expect(capturedRequests[0].args).toEqual({ command: 'patched' });
293
+ });
294
+
295
+ it('resume with respond emits the user-supplied text as a successful ToolMessage and skips host execution', async () => {
296
+ const dispatchSpy = jest
297
+ .spyOn(events, 'safeDispatchCustomEvent')
298
+ .mockImplementation(async (event, data) => {
299
+ if (event !== 'on_tool_execute') {
300
+ return;
301
+ }
302
+ const request = data as {
303
+ toolCalls: t.ToolCallRequest[];
304
+ resolve: (r: t.ToolExecuteResult[]) => void;
305
+ };
306
+ request.resolve([]);
307
+ });
308
+
309
+ const node = new ToolNode({
310
+ tools: [createSchemaStub('echo')],
311
+ eventDrivenMode: true,
312
+ agentId: 'agent-x',
313
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
314
+ hookRegistry: makeHookRegistry('ask'),
315
+ humanInTheLoop: { enabled: true },
316
+ });
317
+
318
+ const graph = buildHITLGraph(node, [
319
+ { id: 'call_1', name: 'echo', args: { command: 'search' } },
320
+ ]);
321
+ const config = { configurable: { thread_id: 'thread-hitl-respond' } };
322
+
323
+ await graph.invoke({ messages: [] }, config);
324
+
325
+ const dispatchCallsBefore = dispatchSpy.mock.calls.filter(
326
+ ([event]) => event === 'on_tool_execute'
327
+ ).length;
328
+
329
+ const resumed = (await graph.invoke(
330
+ new Command({
331
+ resume: [{ type: 'respond', responseText: 'no relevant results' }],
332
+ }),
333
+ config
334
+ )) as { messages: BaseMessage[] };
335
+
336
+ const dispatchCallsAfter = dispatchSpy.mock.calls.filter(
337
+ ([event]) => event === 'on_tool_execute'
338
+ ).length;
339
+
340
+ const toolMessages = resumed.messages.filter(
341
+ (m): m is ToolMessage => m._getType() === 'tool'
342
+ );
343
+ expect(toolMessages).toHaveLength(1);
344
+ expect(toolMessages[0].tool_call_id).toBe('call_1');
345
+ expect(toolMessages[0].content).toBe('no relevant results');
346
+ expect(toolMessages[0].status).not.toBe('error');
347
+ expect(dispatchCallsAfter).toBe(dispatchCallsBefore);
348
+ });
349
+
350
+ it('advertises respond in review_configs.allowed_decisions', async () => {
351
+ mockEventDispatch([]);
352
+ const node = new ToolNode({
353
+ tools: [createSchemaStub('echo')],
354
+ eventDrivenMode: true,
355
+ agentId: 'agent-x',
356
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
357
+ hookRegistry: makeHookRegistry('ask'),
358
+ humanInTheLoop: { enabled: true },
359
+ });
360
+
361
+ const graph = buildHITLGraph(node, [
362
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
363
+ ]);
364
+ const config = {
365
+ configurable: { thread_id: 'thread-hitl-allowed-decisions' },
366
+ };
367
+
368
+ const interrupted = await graph.invoke({ messages: [] }, config);
369
+ if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
370
+ throw new Error('expected interrupt');
371
+ }
372
+ const payload = interrupted.__interrupt__[0].value!;
373
+ if (payload.type !== 'tool_approval') {
374
+ throw new Error('expected tool_approval payload');
375
+ }
376
+ expect(payload.review_configs[0].allowed_decisions).toEqual([
377
+ 'approve',
378
+ 'reject',
379
+ 'edit',
380
+ 'respond',
381
+ ]);
382
+ });
383
+
384
+ it('resume with a record keyed by tool_call_id is accepted', async () => {
385
+ mockEventDispatch([
386
+ { toolCallId: 'call_1', content: 'host-result', status: 'success' },
387
+ ]);
388
+ const node = new ToolNode({
389
+ tools: [createSchemaStub('echo')],
390
+ eventDrivenMode: true,
391
+ agentId: 'agent-x',
392
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
393
+ hookRegistry: makeHookRegistry('ask'),
394
+ humanInTheLoop: { enabled: true },
395
+ });
396
+
397
+ const graph = buildHITLGraph(node, [
398
+ { id: 'call_1', name: 'echo', args: { command: 'do-it' } },
399
+ ]);
400
+ const config = { configurable: { thread_id: 'thread-hitl-map' } };
401
+
402
+ await graph.invoke({ messages: [] }, config);
403
+
404
+ const resumed = (await graph.invoke(
405
+ new Command({ resume: { call_1: { type: 'approve' } } }),
406
+ config
407
+ )) as { messages: BaseMessage[] };
408
+
409
+ const toolMessages = resumed.messages.filter(
410
+ (m): m is ToolMessage => m._getType() === 'tool'
411
+ );
412
+ expect(toolMessages).toHaveLength(1);
413
+ expect(toolMessages[0].content).toBe('host-result');
414
+ });
415
+ });
416
+
417
+ describe('ToolNode HITL — opt-out (`humanInTheLoop: { enabled: false }`) is fail-closed', () => {
418
+ afterEach(() => {
419
+ jest.restoreAllMocks();
420
+ });
421
+
422
+ it('blocks the tool with a ToolMessage error and never raises an interrupt', async () => {
423
+ mockEventDispatch([]);
424
+ const node = new ToolNode({
425
+ tools: [createSchemaStub('echo')],
426
+ eventDrivenMode: true,
427
+ agentId: 'agent-x',
428
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
429
+ hookRegistry: makeHookRegistry('ask', 'HITL explicitly disabled'),
430
+ humanInTheLoop: { enabled: false },
431
+ });
432
+
433
+ const graph = buildHITLGraph(node, [
434
+ { id: 'call_1', name: 'echo', args: { command: 'list /' } },
435
+ ]);
436
+ const config = { configurable: { thread_id: 'thread-hitl-optout' } };
437
+
438
+ const result = (await graph.invoke({ messages: [] }, config)) as {
439
+ messages: BaseMessage[];
440
+ };
441
+
442
+ expect(isInterrupted(result)).toBe(false);
443
+ const toolMessages = result.messages.filter(
444
+ (m): m is ToolMessage => m._getType() === 'tool'
445
+ );
446
+ expect(toolMessages).toHaveLength(1);
447
+ expect(toolMessages[0].status).toBe('error');
448
+ expect(String(toolMessages[0].content)).toContain(
449
+ 'HITL explicitly disabled'
450
+ );
451
+ });
452
+
453
+ it('blocks the tool when `humanInTheLoop` is omitted (default-off)', async () => {
454
+ /**
455
+ * Default is OFF until host UIs (notably LibreChat) ship the
456
+ * approval-rendering affordances. With HITL omitted, an `ask`
457
+ * decision must collapse into a synchronous block — same fail-
458
+ * closed behavior as the explicit `{ enabled: false }` opt-out.
459
+ * This test guards against accidentally re-enabling the default-on
460
+ * path before the consumer ecosystem is ready.
461
+ */
462
+ mockEventDispatch([
463
+ { toolCallId: 'call_1', content: 'host-result', status: 'success' },
464
+ ]);
465
+ const node = new ToolNode({
466
+ tools: [createSchemaStub('echo')],
467
+ eventDrivenMode: true,
468
+ agentId: 'agent-x',
469
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
470
+ hookRegistry: makeHookRegistry('ask', 'default-off-blocks'),
471
+ // humanInTheLoop intentionally omitted — should default to disabled
472
+ });
473
+
474
+ const graph = buildHITLGraph(node, [
475
+ { id: 'call_1', name: 'echo', args: { command: 'list /' } },
476
+ ]);
477
+ const config = { configurable: { thread_id: 'thread-hitl-default' } };
478
+
479
+ const out = (await graph.invoke({ messages: [] }, config)) as {
480
+ messages: BaseMessage[];
481
+ };
482
+ expect(isInterrupted<t.HumanInterruptPayload>(out)).toBe(false);
483
+ const toolMessages = out.messages.filter(
484
+ (m): m is ToolMessage => m._getType() === 'tool'
485
+ );
486
+ expect(toolMessages).toHaveLength(1);
487
+ expect(toolMessages[0].tool_call_id).toBe('call_1');
488
+ expect(toolMessages[0].status).toBe('error');
489
+ expect(String(toolMessages[0].content)).toContain('default-off-blocks');
490
+ });
491
+ });
492
+
493
+ describe('ToolNode HITL — multi-tool batches', () => {
494
+ afterEach(() => {
495
+ jest.restoreAllMocks();
496
+ });
497
+
498
+ it('bundles multiple ask decisions into a single interrupt and resolves per call', async () => {
499
+ const capturedRequests: t.ToolCallRequest[] = [];
500
+ jest
501
+ .spyOn(events, 'safeDispatchCustomEvent')
502
+ .mockImplementation(async (event, data) => {
503
+ if (event !== 'on_tool_execute') {
504
+ return;
505
+ }
506
+ const request = data as {
507
+ toolCalls: t.ToolCallRequest[];
508
+ resolve: (r: t.ToolExecuteResult[]) => void;
509
+ };
510
+ capturedRequests.push(...request.toolCalls);
511
+ request.resolve(
512
+ request.toolCalls.map(
513
+ (c): t.ToolExecuteResult => ({
514
+ toolCallId: c.id,
515
+ content: `ran:${c.name}`,
516
+ status: 'success',
517
+ })
518
+ )
519
+ );
520
+ });
521
+
522
+ const registry = new HookRegistry();
523
+ registry.register('PreToolUse', {
524
+ hooks: [
525
+ async (): Promise<PreToolUseHookOutput> => ({
526
+ decision: 'ask',
527
+ reason: 'review',
528
+ }),
529
+ ],
530
+ });
531
+
532
+ const node = new ToolNode({
533
+ tools: [createSchemaStub('echo'), createSchemaStub('cat')],
534
+ eventDrivenMode: true,
535
+ agentId: 'agent-x',
536
+ toolCallStepIds: new Map([
537
+ ['call_1', 'step_call_1'],
538
+ ['call_2', 'step_call_2'],
539
+ ]),
540
+ hookRegistry: registry,
541
+ humanInTheLoop: { enabled: true },
542
+ });
543
+
544
+ const graph = buildHITLGraph(node, [
545
+ { id: 'call_1', name: 'echo', args: { command: 'one' } },
546
+ { id: 'call_2', name: 'cat', args: { command: 'two' } },
547
+ ]);
548
+ const config = { configurable: { thread_id: 'thread-hitl-batch' } };
549
+
550
+ const interrupted = await graph.invoke({ messages: [] }, config);
551
+ expect(isInterrupted<t.HumanInterruptPayload>(interrupted)).toBe(true);
552
+ if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
553
+ throw new Error('expected interrupt');
554
+ }
555
+ const payload = interrupted.__interrupt__[0].value!;
556
+ if (payload.type !== 'tool_approval') {
557
+ throw new Error('expected tool_approval payload');
558
+ }
559
+ expect(payload.action_requests.map((r) => r.tool_call_id)).toEqual([
560
+ 'call_1',
561
+ 'call_2',
562
+ ]);
563
+
564
+ const resumed = (await graph.invoke(
565
+ new Command({
566
+ resume: [{ type: 'approve' }, { type: 'reject', reason: 'too risky' }],
567
+ }),
568
+ config
569
+ )) as { messages: BaseMessage[] };
570
+
571
+ const toolMessages = resumed.messages.filter(
572
+ (m): m is ToolMessage => m._getType() === 'tool'
573
+ );
574
+ expect(toolMessages).toHaveLength(2);
575
+ const byId = new Map(toolMessages.map((m) => [m.tool_call_id, m]));
576
+ expect(byId.get('call_1')!.content).toBe('ran:echo');
577
+ expect(byId.get('call_1')!.status).not.toBe('error');
578
+ expect(byId.get('call_2')!.status).toBe('error');
579
+ expect(String(byId.get('call_2')!.content)).toContain('too risky');
580
+
581
+ expect(capturedRequests).toHaveLength(1);
582
+ expect(capturedRequests[0].id).toBe('call_1');
583
+ });
584
+ });
585
+
586
+ describe('Run integration — HITL fallback checkpointer + resume', () => {
587
+ beforeEach(() => {
588
+ jest.restoreAllMocks();
589
+ });
590
+ afterEach(() => {
591
+ jest.restoreAllMocks();
592
+ });
593
+
594
+ it('Run.create does NOT install a MemorySaver fallback by default (HITL is off until host UI ships)', async () => {
595
+ /**
596
+ * Default-off rationale: HITL ships the interrupt machinery but
597
+ * stays opt-in until host UIs (notably LibreChat) can render and
598
+ * resolve `tool_approval` interrupts. With HITL omitted, the SDK
599
+ * must NOT silently install a checkpointer — that would suggest
600
+ * the run can pause/resume when in fact the `ask` path will
601
+ * fail-closed. Plan of record: flip the default to ON in a future
602
+ * minor once the consumer ecosystem is ready.
603
+ */
604
+ const { Run } = await import('@/run');
605
+ const { Providers } = await import('@/common');
606
+
607
+ const run = await Run.create<t.IState>({
608
+ runId: 'hitl-default-run',
609
+ graphConfig: {
610
+ type: 'standard',
611
+ agents: [
612
+ {
613
+ agentId: 'a',
614
+ provider: Providers.OPENAI,
615
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
616
+ instructions: 'noop',
617
+ maxContextTokens: 8000,
618
+ },
619
+ ],
620
+ },
621
+ // humanInTheLoop intentionally omitted — default is OFF
622
+ });
623
+
624
+ expect(run.Graph?.compileOptions?.checkpointer).toBeUndefined();
625
+ });
626
+
627
+ it('Run.create installs a MemorySaver fallback when HITL is explicitly enabled', async () => {
628
+ const { Run } = await import('@/run');
629
+ const { Providers } = await import('@/common');
630
+
631
+ const run = await Run.create<t.IState>({
632
+ runId: 'hitl-explicit-run',
633
+ graphConfig: {
634
+ type: 'standard',
635
+ agents: [
636
+ {
637
+ agentId: 'a',
638
+ provider: Providers.OPENAI,
639
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
640
+ instructions: 'noop',
641
+ maxContextTokens: 8000,
642
+ },
643
+ ],
644
+ },
645
+ humanInTheLoop: { enabled: true },
646
+ });
647
+
648
+ expect(run.Graph?.compileOptions?.checkpointer).toBeInstanceOf(MemorySaver);
649
+ expect(run.Graph?.humanInTheLoop?.enabled).toBe(true);
650
+ });
651
+
652
+ it('Run.create preserves a host-supplied checkpointer when HITL is explicitly enabled', async () => {
653
+ const { Run } = await import('@/run');
654
+ const { Providers } = await import('@/common');
655
+
656
+ const hostCheckpointer = new MemorySaver();
657
+ const run = await Run.create<t.IState>({
658
+ runId: 'hitl-host-checkpointer',
659
+ graphConfig: {
660
+ type: 'standard',
661
+ agents: [
662
+ {
663
+ agentId: 'a',
664
+ provider: Providers.OPENAI,
665
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
666
+ instructions: 'noop',
667
+ maxContextTokens: 8000,
668
+ },
669
+ ],
670
+ compileOptions: { checkpointer: hostCheckpointer },
671
+ },
672
+ humanInTheLoop: { enabled: true },
673
+ });
674
+
675
+ expect(run.Graph?.compileOptions?.checkpointer).toBe(hostCheckpointer);
676
+ });
677
+
678
+ it('re-exports langgraph HITL primitives from the SDK barrel for host use', async () => {
679
+ const indexExports = await import('@/index');
680
+ expect(indexExports.MemorySaver).toBe(MemorySaver);
681
+ expect(indexExports.Command).toBe(Command);
682
+ expect(indexExports.INTERRUPT).toBeDefined();
683
+ expect(typeof indexExports.interrupt).toBe('function');
684
+ expect(typeof indexExports.isInterrupted).toBe('function');
685
+ expect(typeof indexExports.BaseCheckpointSaver).toBe('function');
686
+ });
687
+
688
+ it('Run.create does not attach a checkpointer when HITL is explicitly disabled', async () => {
689
+ const { Run } = await import('@/run');
690
+ const { Providers } = await import('@/common');
691
+
692
+ const run = await Run.create<t.IState>({
693
+ runId: 'hitl-optout-run',
694
+ graphConfig: {
695
+ type: 'standard',
696
+ agents: [
697
+ {
698
+ agentId: 'a',
699
+ provider: Providers.OPENAI,
700
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
701
+ instructions: 'noop',
702
+ maxContextTokens: 8000,
703
+ },
704
+ ],
705
+ },
706
+ humanInTheLoop: { enabled: false },
707
+ });
708
+
709
+ expect(run.Graph?.compileOptions?.checkpointer).toBeUndefined();
710
+ });
711
+
712
+ it('Run.resume() drives the host all the way through the resume command path', async () => {
713
+ /** End-to-end on the Run wrapper: build a HITL graph that
714
+ * interrupts on first invoke, then drive resume via the Run's
715
+ * own `resume()` method (not raw graph.invoke + Command).
716
+ * Validates the full Run.resume → processStream(Command) path. */
717
+ let dispatchCount = 0;
718
+ jest
719
+ .spyOn(events, 'safeDispatchCustomEvent')
720
+ .mockImplementation(async (event, data) => {
721
+ if (event !== 'on_tool_execute') {
722
+ return;
723
+ }
724
+ dispatchCount += 1;
725
+ const request = data as {
726
+ toolCalls: t.ToolCallRequest[];
727
+ resolve: (r: t.ToolExecuteResult[]) => void;
728
+ };
729
+ request.resolve(
730
+ request.toolCalls.map((c) => ({
731
+ toolCallId: c.id,
732
+ content: 'host-result',
733
+ status: 'success' as const,
734
+ }))
735
+ );
736
+ });
737
+
738
+ const registry = new HookRegistry();
739
+ registry.register('PreToolUse', {
740
+ hooks: [
741
+ async (): Promise<PreToolUseHookOutput> => ({
742
+ decision: 'ask',
743
+ reason: 'review',
744
+ }),
745
+ ],
746
+ });
747
+
748
+ const node = new ToolNode({
749
+ tools: [createSchemaStub('echo')],
750
+ eventDrivenMode: true,
751
+ agentId: 'agent-x',
752
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
753
+ hookRegistry: registry,
754
+ humanInTheLoop: { enabled: true },
755
+ });
756
+
757
+ const builder = new StateGraph(MessagesAnnotation)
758
+ .addNode(
759
+ 'agent',
760
+ (): MessagesUpdate => ({
761
+ messages: [
762
+ new AIMessage({
763
+ content: '',
764
+ tool_calls: [
765
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
766
+ ],
767
+ }),
768
+ ],
769
+ })
770
+ )
771
+ .addNode('tools', node)
772
+ .addEdge(START, 'agent')
773
+ .addEdge('agent', 'tools')
774
+ .addEdge('tools', END);
775
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
776
+
777
+ const { Run } = await import('@/run');
778
+ const run = await Run.create<t.IState>({
779
+ runId: 'run-resume-direct',
780
+ graphConfig: {
781
+ type: 'standard',
782
+ agents: [
783
+ {
784
+ agentId: 'a',
785
+ provider: providers.OPENAI,
786
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
787
+ instructions: 'noop',
788
+ maxContextTokens: 8000,
789
+ },
790
+ ],
791
+ },
792
+ hooks: registry,
793
+ humanInTheLoop: { enabled: true },
794
+ });
795
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
796
+
797
+ const callerConfig = {
798
+ configurable: { thread_id: 'run-resume-thread' },
799
+ version: 'v2' as const,
800
+ };
801
+
802
+ await run.processStream({ messages: [] }, callerConfig);
803
+ expect(run.getInterrupt()).toBeDefined();
804
+ expect(dispatchCount).toBe(0);
805
+
806
+ /** This is the API contract under test: Run.resume() with a
807
+ * decision array (not graph.invoke + Command). */
808
+ await run.resume([{ type: 'approve' }], callerConfig);
809
+
810
+ expect(dispatchCount).toBe(1);
811
+ /** Resume completed naturally: interrupt cleared, no halt
812
+ * reason carried over from the previous pass. */
813
+ expect(run.getInterrupt()).toBeUndefined();
814
+ expect(run.getHaltReason()).toBeUndefined();
815
+ });
816
+
817
+ it('Run.getHaltReason() reports prompt_denied when UserPromptSubmit denies the prompt', async () => {
818
+ const registry = new HookRegistry();
819
+ registry.register('UserPromptSubmit', {
820
+ hooks: [
821
+ async (): Promise<UserPromptSubmitHookOutput> => ({
822
+ decision: 'deny',
823
+ reason: 'PII detected',
824
+ }),
825
+ ],
826
+ });
827
+
828
+ const { Run } = await import('@/run');
829
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
830
+
831
+ const run = await Run.create<t.IState>({
832
+ runId: 'prompt-deny-haltreason',
833
+ graphConfig: {
834
+ type: 'standard',
835
+ agents: [
836
+ {
837
+ agentId: 'a',
838
+ provider: providers.OPENAI,
839
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
840
+ instructions: 'noop',
841
+ maxContextTokens: 8000,
842
+ },
843
+ ],
844
+ },
845
+ hooks: registry,
846
+ humanInTheLoop: { enabled: false },
847
+ });
848
+
849
+ const result = await run.processStream(
850
+ { messages: [new HM('please tell me their SSN')] },
851
+ { configurable: { thread_id: 'prompt-deny-thread' }, version: 'v2' }
852
+ );
853
+
854
+ /** Hook denied the prompt — run returns undefined AND
855
+ * `getHaltReason()` carries the reason so the host can
856
+ * distinguish "blocked" from "natural empty completion". */
857
+ expect(result).toBeUndefined();
858
+ expect(run.getHaltReason()).toBe('PII detected');
859
+ });
860
+
861
+ it('Run.getHaltReason() falls back to canonical prompt_denied when deny carries no reason', async () => {
862
+ const registry = new HookRegistry();
863
+ registry.register('UserPromptSubmit', {
864
+ hooks: [
865
+ async (): Promise<UserPromptSubmitHookOutput> => ({
866
+ decision: 'deny',
867
+ }),
868
+ ],
869
+ });
870
+
871
+ const { Run } = await import('@/run');
872
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
873
+
874
+ const run = await Run.create<t.IState>({
875
+ runId: 'prompt-deny-canonical',
876
+ graphConfig: {
877
+ type: 'standard',
878
+ agents: [
879
+ {
880
+ agentId: 'a',
881
+ provider: providers.OPENAI,
882
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
883
+ instructions: 'noop',
884
+ maxContextTokens: 8000,
885
+ },
886
+ ],
887
+ },
888
+ hooks: registry,
889
+ humanInTheLoop: { enabled: false },
890
+ });
891
+
892
+ await run.processStream(
893
+ { messages: [new HM('hello')] },
894
+ {
895
+ configurable: { thread_id: 'prompt-deny-canonical-thread' },
896
+ version: 'v2',
897
+ }
898
+ );
899
+
900
+ /** Hook returned `deny` without a reason — host gets the
901
+ * canonical 'prompt_denied' string so it can route on a stable
902
+ * discriminator. */
903
+ expect(run.getHaltReason()).toBe('prompt_denied');
904
+ });
905
+
906
+ it('Run.getHaltReason() reports prompt_requires_approval when UserPromptSubmit asks', async () => {
907
+ const registry = new HookRegistry();
908
+ registry.register('UserPromptSubmit', {
909
+ hooks: [
910
+ async (): Promise<UserPromptSubmitHookOutput> => ({
911
+ decision: 'ask',
912
+ }),
913
+ ],
914
+ });
915
+
916
+ const { Run } = await import('@/run');
917
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
918
+
919
+ const run = await Run.create<t.IState>({
920
+ runId: 'prompt-ask-haltreason',
921
+ graphConfig: {
922
+ type: 'standard',
923
+ agents: [
924
+ {
925
+ agentId: 'a',
926
+ provider: providers.OPENAI,
927
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
928
+ instructions: 'noop',
929
+ maxContextTokens: 8000,
930
+ },
931
+ ],
932
+ },
933
+ hooks: registry,
934
+ humanInTheLoop: { enabled: false },
935
+ });
936
+
937
+ await run.processStream(
938
+ { messages: [new HM('hello')] },
939
+ { configurable: { thread_id: 'prompt-ask-thread' }, version: 'v2' }
940
+ );
941
+
942
+ /** Default reason when the hook didn't supply one — host can
943
+ * route on the canonical string. */
944
+ expect(run.getHaltReason()).toBe('prompt_requires_approval');
945
+ });
946
+ });
947
+
948
+ describe('ToolNode HITL — additionalContext injection from hooks', () => {
949
+ afterEach(() => {
950
+ jest.restoreAllMocks();
951
+ });
952
+
953
+ it('injects PreToolUse + PostToolUse additionalContexts as a single HumanMessage', async () => {
954
+ mockEventDispatch([
955
+ { toolCallId: 'call_1', content: 'host-result', status: 'success' },
956
+ ]);
957
+
958
+ const registry = new HookRegistry();
959
+ registry.register('PreToolUse', {
960
+ hooks: [
961
+ async (): Promise<PreToolUseHookOutput> => ({
962
+ decision: 'allow',
963
+ additionalContext: 'pre-context: be careful',
964
+ }),
965
+ ],
966
+ });
967
+ registry.register('PostToolUse', {
968
+ hooks: [
969
+ async (): Promise<PostToolUseHookOutput> => ({
970
+ additionalContext: 'post-context: tool ran',
971
+ }),
972
+ ],
973
+ });
974
+
975
+ const node = new ToolNode({
976
+ tools: [createSchemaStub('echo')],
977
+ eventDrivenMode: true,
978
+ agentId: 'agent-x',
979
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
980
+ hookRegistry: registry,
981
+ humanInTheLoop: { enabled: false },
982
+ });
983
+
984
+ const graph = buildHITLGraph(node, [
985
+ { id: 'call_1', name: 'echo', args: { command: 'do' } },
986
+ ]);
987
+ const result = (await graph.invoke(
988
+ { messages: [] },
989
+ { configurable: { thread_id: 'ctx-thread-1' } }
990
+ )) as { messages: BaseMessage[] };
991
+
992
+ const injected = result.messages.find(
993
+ (m) =>
994
+ m._getType() === 'human' &&
995
+ (m as { additional_kwargs?: { source?: string } }).additional_kwargs
996
+ ?.source === 'hook'
997
+ );
998
+ expect(injected).toBeDefined();
999
+ expect(String(injected!.content)).toContain('pre-context: be careful');
1000
+ expect(String(injected!.content)).toContain('post-context: tool ran');
1001
+ });
1002
+
1003
+ it('does not inject anything when no hook returns additionalContext', async () => {
1004
+ mockEventDispatch([
1005
+ { toolCallId: 'call_1', content: 'host-result', status: 'success' },
1006
+ ]);
1007
+
1008
+ const registry = new HookRegistry();
1009
+ registry.register('PreToolUse', {
1010
+ hooks: [
1011
+ async (): Promise<PreToolUseHookOutput> => ({ decision: 'allow' }),
1012
+ ],
1013
+ });
1014
+
1015
+ const node = new ToolNode({
1016
+ tools: [createSchemaStub('echo')],
1017
+ eventDrivenMode: true,
1018
+ agentId: 'agent-x',
1019
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
1020
+ hookRegistry: registry,
1021
+ humanInTheLoop: { enabled: false },
1022
+ });
1023
+
1024
+ const graph = buildHITLGraph(node, [
1025
+ { id: 'call_1', name: 'echo', args: { command: 'do' } },
1026
+ ]);
1027
+ const result = (await graph.invoke(
1028
+ { messages: [] },
1029
+ { configurable: { thread_id: 'ctx-thread-2' } }
1030
+ )) as { messages: BaseMessage[] };
1031
+
1032
+ const injected = result.messages.find(
1033
+ (m) =>
1034
+ m._getType() === 'human' &&
1035
+ (m as { additional_kwargs?: { source?: string } }).additional_kwargs
1036
+ ?.source === 'hook'
1037
+ );
1038
+ expect(injected).toBeUndefined();
1039
+ });
1040
+ });
1041
+
1042
+ describe('ToolNode HITL — PostToolBatch hook', () => {
1043
+ afterEach(() => {
1044
+ jest.restoreAllMocks();
1045
+ });
1046
+
1047
+ it('fires once per dispatch with all entries (success + error mix), in batch order', async () => {
1048
+ jest
1049
+ .spyOn(events, 'safeDispatchCustomEvent')
1050
+ .mockImplementation(async (event, data) => {
1051
+ if (event !== 'on_tool_execute') {
1052
+ return;
1053
+ }
1054
+ const request = data as {
1055
+ toolCalls: t.ToolCallRequest[];
1056
+ resolve: (r: t.ToolExecuteResult[]) => void;
1057
+ };
1058
+ request.resolve([
1059
+ { toolCallId: 'call_1', content: 'ok', status: 'success' },
1060
+ {
1061
+ toolCallId: 'call_2',
1062
+ content: '',
1063
+ status: 'error',
1064
+ errorMessage: 'boom',
1065
+ },
1066
+ ]);
1067
+ });
1068
+
1069
+ const registry = new HookRegistry();
1070
+ let captured: PostToolBatchEntry[] | undefined;
1071
+ registry.register('PostToolBatch', {
1072
+ hooks: [
1073
+ async (input): Promise<PostToolBatchHookOutput> => {
1074
+ captured = (input as PostToolBatchHookInput).entries;
1075
+ return {};
1076
+ },
1077
+ ],
1078
+ });
1079
+
1080
+ const node = new ToolNode({
1081
+ tools: [createSchemaStub('echo'), createSchemaStub('cat')],
1082
+ eventDrivenMode: true,
1083
+ agentId: 'agent-x',
1084
+ toolCallStepIds: new Map([
1085
+ ['call_1', 'step_1'],
1086
+ ['call_2', 'step_2'],
1087
+ ]),
1088
+ hookRegistry: registry,
1089
+ humanInTheLoop: { enabled: false },
1090
+ });
1091
+
1092
+ const graph = buildHITLGraph(node, [
1093
+ { id: 'call_1', name: 'echo', args: { command: 'a' } },
1094
+ { id: 'call_2', name: 'cat', args: { command: 'b' } },
1095
+ ]);
1096
+ await graph.invoke(
1097
+ { messages: [] },
1098
+ { configurable: { thread_id: 'batch-thread' } }
1099
+ );
1100
+
1101
+ expect(captured).toBeDefined();
1102
+ expect(captured!).toHaveLength(2);
1103
+ expect(captured![0].toolUseId).toBe('call_1');
1104
+ expect(captured![0].status).toBe('success');
1105
+ expect(captured![0].toolOutput).toBe('ok');
1106
+ expect(captured![1].toolUseId).toBe('call_2');
1107
+ expect(captured![1].status).toBe('error');
1108
+ expect(captured![1].error).toContain('boom');
1109
+ });
1110
+
1111
+ it('a PostToolBatch additionalContext gets injected as a HumanMessage', async () => {
1112
+ mockEventDispatch([
1113
+ { toolCallId: 'call_1', content: 'ok', status: 'success' },
1114
+ ]);
1115
+
1116
+ const registry = new HookRegistry();
1117
+ registry.register('PostToolBatch', {
1118
+ hooks: [
1119
+ async (): Promise<PostToolBatchHookOutput> => ({
1120
+ additionalContext: 'remember to format the response as JSON',
1121
+ }),
1122
+ ],
1123
+ });
1124
+
1125
+ const node = new ToolNode({
1126
+ tools: [createSchemaStub('echo')],
1127
+ eventDrivenMode: true,
1128
+ agentId: 'agent-x',
1129
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1130
+ hookRegistry: registry,
1131
+ humanInTheLoop: { enabled: false },
1132
+ });
1133
+
1134
+ const graph = buildHITLGraph(node, [
1135
+ { id: 'call_1', name: 'echo', args: { command: 'a' } },
1136
+ ]);
1137
+ const result = (await graph.invoke(
1138
+ { messages: [] },
1139
+ { configurable: { thread_id: 'batch-ctx-thread' } }
1140
+ )) as { messages: BaseMessage[] };
1141
+
1142
+ const injected = result.messages.find(
1143
+ (m) =>
1144
+ m._getType() === 'human' &&
1145
+ (m as { additional_kwargs?: { source?: string } }).additional_kwargs
1146
+ ?.source === 'hook'
1147
+ );
1148
+ expect(injected).toBeDefined();
1149
+ expect(String(injected!.content)).toContain('format the response as JSON');
1150
+ });
1151
+ });
1152
+
1153
+ describe('ToolNode HITL — per-hook allowedDecisions override', () => {
1154
+ afterEach(() => {
1155
+ jest.restoreAllMocks();
1156
+ });
1157
+
1158
+ it('restricts the interrupt review_configs.allowed_decisions to the hook-supplied subset', async () => {
1159
+ const registry = new HookRegistry();
1160
+ registry.register('PreToolUse', {
1161
+ hooks: [
1162
+ async (): Promise<PreToolUseHookOutput> => ({
1163
+ decision: 'ask',
1164
+ allowedDecisions: ['approve', 'reject'],
1165
+ }),
1166
+ ],
1167
+ });
1168
+
1169
+ const node = new ToolNode({
1170
+ tools: [createSchemaStub('echo')],
1171
+ eventDrivenMode: true,
1172
+ agentId: 'agent-x',
1173
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1174
+ hookRegistry: registry,
1175
+ humanInTheLoop: { enabled: true },
1176
+ });
1177
+
1178
+ const graph = buildHITLGraph(node, [
1179
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
1180
+ ]);
1181
+ const interrupted = await graph.invoke(
1182
+ { messages: [] },
1183
+ { configurable: { thread_id: 'allowed-thread' } }
1184
+ );
1185
+ if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
1186
+ throw new Error('expected interrupt');
1187
+ }
1188
+ const payload = interrupted.__interrupt__[0].value!;
1189
+ if (payload.type !== 'tool_approval') {
1190
+ throw new Error('expected tool_approval');
1191
+ }
1192
+ expect(payload.review_configs[0].allowed_decisions).toEqual([
1193
+ 'approve',
1194
+ 'reject',
1195
+ ]);
1196
+ });
1197
+ });
1198
+
1199
+ describe('Run — preventContinuation honored for pre-stream hooks', () => {
1200
+ beforeEach(() => {
1201
+ jest.restoreAllMocks();
1202
+ });
1203
+ afterEach(() => {
1204
+ jest.restoreAllMocks();
1205
+ });
1206
+
1207
+ it('returns undefined without invoking the graph when RunStart hook returns preventContinuation', async () => {
1208
+ const { Run } = await import('@/run');
1209
+ const { Providers } = await import('@/common');
1210
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
1211
+
1212
+ const registry = new HookRegistry();
1213
+ let runStartFired = false;
1214
+ registry.register('RunStart', {
1215
+ hooks: [
1216
+ async (): Promise<RunStartHookOutput> => {
1217
+ runStartFired = true;
1218
+ return {
1219
+ preventContinuation: true,
1220
+ stopReason: 'pre-flight policy halted run',
1221
+ };
1222
+ },
1223
+ ],
1224
+ });
1225
+
1226
+ const run = await Run.create<t.IState>({
1227
+ runId: 'pc-runstart',
1228
+ graphConfig: {
1229
+ type: 'standard',
1230
+ agents: [
1231
+ {
1232
+ agentId: 'a',
1233
+ provider: Providers.OPENAI,
1234
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
1235
+ instructions: 'noop',
1236
+ maxContextTokens: 8000,
1237
+ },
1238
+ ],
1239
+ },
1240
+ hooks: registry,
1241
+ humanInTheLoop: { enabled: false },
1242
+ });
1243
+
1244
+ const result = await run.processStream(
1245
+ { messages: [new HM('hello')] },
1246
+ {
1247
+ configurable: { thread_id: 'pc-thread-1' },
1248
+ version: 'v2',
1249
+ }
1250
+ );
1251
+
1252
+ expect(runStartFired).toBe(true);
1253
+ expect(result).toBeUndefined();
1254
+ /** Graph should not have been run — no messages added beyond the input. */
1255
+ expect(run.getInterrupt()).toBeUndefined();
1256
+ });
1257
+
1258
+ it('returns undefined when UserPromptSubmit hook returns preventContinuation', async () => {
1259
+ const { Run } = await import('@/run');
1260
+ const { Providers } = await import('@/common');
1261
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
1262
+
1263
+ const registry = new HookRegistry();
1264
+ let promptFired = false;
1265
+ registry.register('UserPromptSubmit', {
1266
+ hooks: [
1267
+ async (): Promise<UserPromptSubmitHookOutput> => {
1268
+ promptFired = true;
1269
+ return {
1270
+ preventContinuation: true,
1271
+ stopReason: 'rate limit reached',
1272
+ };
1273
+ },
1274
+ ],
1275
+ });
1276
+
1277
+ const run = await Run.create<t.IState>({
1278
+ runId: 'pc-prompt',
1279
+ graphConfig: {
1280
+ type: 'standard',
1281
+ agents: [
1282
+ {
1283
+ agentId: 'a',
1284
+ provider: Providers.OPENAI,
1285
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
1286
+ instructions: 'noop',
1287
+ maxContextTokens: 8000,
1288
+ },
1289
+ ],
1290
+ },
1291
+ hooks: registry,
1292
+ humanInTheLoop: { enabled: false },
1293
+ });
1294
+
1295
+ const result = await run.processStream(
1296
+ { messages: [new HM('hello')] },
1297
+ {
1298
+ configurable: { thread_id: 'pc-thread-2' },
1299
+ version: 'v2',
1300
+ }
1301
+ );
1302
+
1303
+ expect(promptFired).toBe(true);
1304
+ expect(result).toBeUndefined();
1305
+ });
1306
+ });
1307
+
1308
+ describe('Mid-flight preventContinuation halts the run after the current step', () => {
1309
+ afterEach(() => {
1310
+ jest.restoreAllMocks();
1311
+ });
1312
+
1313
+ it('PostToolBatch hook with preventContinuation breaks the stream loop and skips Stop', async () => {
1314
+ mockEventDispatch([
1315
+ { toolCallId: 'call_1', content: 'ok', status: 'success' },
1316
+ ]);
1317
+
1318
+ const registry = new HookRegistry();
1319
+ let stopFired = false;
1320
+ registry.register('PostToolBatch', {
1321
+ hooks: [
1322
+ async (): Promise<PostToolBatchHookOutput> => ({
1323
+ preventContinuation: true,
1324
+ stopReason: 'rate-limit policy halt',
1325
+ }),
1326
+ ],
1327
+ });
1328
+ registry.register('Stop', {
1329
+ hooks: [
1330
+ async (): Promise<Record<string, never>> => {
1331
+ stopFired = true;
1332
+ return {};
1333
+ },
1334
+ ],
1335
+ });
1336
+
1337
+ const node = new ToolNode({
1338
+ tools: [createSchemaStub('echo')],
1339
+ eventDrivenMode: true,
1340
+ agentId: 'agent-x',
1341
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1342
+ hookRegistry: registry,
1343
+ humanInTheLoop: { enabled: false },
1344
+ });
1345
+
1346
+ const builder = new StateGraph(MessagesAnnotation)
1347
+ .addNode('agent', () => ({
1348
+ messages: [
1349
+ new AIMessage({
1350
+ content: '',
1351
+ tool_calls: [
1352
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
1353
+ ],
1354
+ }),
1355
+ ],
1356
+ }))
1357
+ .addNode('tools', node)
1358
+ .addEdge(START, 'agent')
1359
+ .addEdge('agent', 'tools')
1360
+ .addEdge('tools', END);
1361
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
1362
+
1363
+ const { Run } = await import('@/run');
1364
+ const run = await Run.create<t.IState>({
1365
+ runId: 'halt-mid-flight-1',
1366
+ graphConfig: {
1367
+ type: 'standard',
1368
+ agents: [
1369
+ {
1370
+ agentId: 'a',
1371
+ provider: providers.OPENAI,
1372
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
1373
+ instructions: 'noop',
1374
+ maxContextTokens: 8000,
1375
+ },
1376
+ ],
1377
+ },
1378
+ hooks: registry,
1379
+ humanInTheLoop: { enabled: false },
1380
+ });
1381
+ /** Replace the SDK-built graph runnable with our handcrafted one so the
1382
+ * PostToolBatch hook fires under a real LangGraph stream. */
1383
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
1384
+
1385
+ await run.processStream(
1386
+ { messages: [] },
1387
+ {
1388
+ configurable: { thread_id: 'halt-thread-1' },
1389
+ version: 'v2',
1390
+ }
1391
+ );
1392
+
1393
+ expect(run.getHaltReason()).toBe('rate-limit policy halt');
1394
+ expect(stopFired).toBe(false);
1395
+ });
1396
+
1397
+ it('clears halt signal between processStream invocations', async () => {
1398
+ const registry = new HookRegistry();
1399
+ registry.register('RunStart', {
1400
+ hooks: [
1401
+ async (): Promise<RunStartHookOutput> => ({
1402
+ preventContinuation: true,
1403
+ stopReason: 'first run halted',
1404
+ }),
1405
+ ],
1406
+ });
1407
+
1408
+ const { Run } = await import('@/run');
1409
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
1410
+
1411
+ const run = await Run.create<t.IState>({
1412
+ runId: 'halt-clear-1',
1413
+ graphConfig: {
1414
+ type: 'standard',
1415
+ agents: [
1416
+ {
1417
+ agentId: 'a',
1418
+ provider: providers.OPENAI,
1419
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
1420
+ instructions: 'noop',
1421
+ maxContextTokens: 8000,
1422
+ },
1423
+ ],
1424
+ },
1425
+ hooks: registry,
1426
+ humanInTheLoop: { enabled: false },
1427
+ });
1428
+
1429
+ await run.processStream(
1430
+ { messages: [new HM('first')] },
1431
+ { configurable: { thread_id: 't-1' }, version: 'v2' }
1432
+ );
1433
+ /** RunStart preventContinuation is a pre-stream early return, but
1434
+ * `processStream` should still have cleared the registry signal
1435
+ * for this run id so a subsequent call starts fresh. */
1436
+ expect(registry.getHaltSignal('halt-clear-1')).toBeUndefined();
1437
+ });
1438
+ });
1439
+
1440
+ describe('Async fire-and-forget hooks ignore decision/context fields', () => {
1441
+ afterEach(() => {
1442
+ jest.restoreAllMocks();
1443
+ });
1444
+
1445
+ it('PreToolUse with `async: true` does not block the tool even when decision is `deny`', async () => {
1446
+ mockEventDispatch([
1447
+ { toolCallId: 'call_1', content: 'ran', status: 'success' },
1448
+ ]);
1449
+
1450
+ let bgFired = false;
1451
+ const registry = new HookRegistry();
1452
+ registry.register('PreToolUse', {
1453
+ hooks: [
1454
+ async (): Promise<PreToolUseHookOutput> => {
1455
+ /** Side effect runs in background; agent doesn't wait. */
1456
+ void Promise.resolve().then(() => {
1457
+ bgFired = true;
1458
+ });
1459
+ return {
1460
+ async: true,
1461
+ decision: 'deny',
1462
+ reason: 'this should be ignored',
1463
+ additionalContext: 'this should also be ignored',
1464
+ };
1465
+ },
1466
+ ],
1467
+ });
1468
+
1469
+ const node = new ToolNode({
1470
+ tools: [createSchemaStub('echo')],
1471
+ eventDrivenMode: true,
1472
+ agentId: 'agent-x',
1473
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1474
+ hookRegistry: registry,
1475
+ humanInTheLoop: { enabled: false },
1476
+ });
1477
+
1478
+ const graph = buildHITLGraph(node, [
1479
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
1480
+ ]);
1481
+ const result = (await graph.invoke(
1482
+ { messages: [] },
1483
+ { configurable: { thread_id: 'async-1' } }
1484
+ )) as { messages: BaseMessage[] };
1485
+
1486
+ const toolMsg = result.messages.find(
1487
+ (m): m is ToolMessage => m._getType() === 'tool'
1488
+ );
1489
+ expect(toolMsg).toBeDefined();
1490
+ /** Tool ran (no Blocked: prefix) — async output's `decision: 'deny'` was
1491
+ * ignored as documented. */
1492
+ expect(toolMsg!.status).not.toBe('error');
1493
+ expect(toolMsg!.content).toBe('ran');
1494
+ /** Background work runs even though we ignored the output. */
1495
+ await new Promise((r) => setImmediate(r));
1496
+ expect(bgFired).toBe(true);
1497
+ /** No injected context message — `additionalContext` was also ignored. */
1498
+ const injected = result.messages.find(
1499
+ (m) =>
1500
+ m._getType() === 'human' &&
1501
+ (m as { additional_kwargs?: { source?: string } }).additional_kwargs
1502
+ ?.source === 'hook'
1503
+ );
1504
+ expect(injected).toBeUndefined();
1505
+ });
1506
+
1507
+ it('PostToolUse with `async: true` does not halt the run even when preventContinuation is set', async () => {
1508
+ mockEventDispatch([
1509
+ { toolCallId: 'call_1', content: 'ran', status: 'success' },
1510
+ ]);
1511
+
1512
+ const registry = new HookRegistry();
1513
+ registry.register('PostToolUse', {
1514
+ hooks: [
1515
+ async (): Promise<PostToolUseHookOutput> => ({
1516
+ async: true,
1517
+ preventContinuation: true,
1518
+ stopReason: 'should not halt',
1519
+ }),
1520
+ ],
1521
+ });
1522
+
1523
+ const node = new ToolNode({
1524
+ tools: [createSchemaStub('echo')],
1525
+ eventDrivenMode: true,
1526
+ agentId: 'agent-x',
1527
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1528
+ hookRegistry: registry,
1529
+ humanInTheLoop: { enabled: false },
1530
+ });
1531
+
1532
+ const graph = buildHITLGraph(node, [
1533
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
1534
+ ]);
1535
+ await graph.invoke(
1536
+ { messages: [] },
1537
+ { configurable: { thread_id: 'async-2' } }
1538
+ );
1539
+
1540
+ /** preventContinuation was on an async output → ignored → no halt
1541
+ * signal raised under any session id. The standalone graph here
1542
+ * runs with `runId = ''` (no `config.configurable.run_id` set),
1543
+ * so check that key explicitly. */
1544
+ expect(registry.getHaltSignal('')).toBeUndefined();
1545
+ });
1546
+ });
1547
+
1548
+ describe('Codex review fixes', () => {
1549
+ afterEach(() => {
1550
+ jest.restoreAllMocks();
1551
+ });
1552
+
1553
+ it('preserves session-scoped hooks across HITL interrupt so the policy still fires on resume', async () => {
1554
+ let dispatchCalls = 0;
1555
+ jest
1556
+ .spyOn(events, 'safeDispatchCustomEvent')
1557
+ .mockImplementation(async (event, data) => {
1558
+ if (event !== 'on_tool_execute') {
1559
+ return;
1560
+ }
1561
+ dispatchCalls += 1;
1562
+ const request = data as {
1563
+ toolCalls: t.ToolCallRequest[];
1564
+ resolve: (r: t.ToolExecuteResult[]) => void;
1565
+ };
1566
+ request.resolve(
1567
+ request.toolCalls.map((c) => ({
1568
+ toolCallId: c.id,
1569
+ content: 'host-result',
1570
+ status: 'success' as const,
1571
+ }))
1572
+ );
1573
+ });
1574
+
1575
+ const registry = new HookRegistry();
1576
+ let preCallCount = 0;
1577
+ /**
1578
+ * Register the policy hook against the runId via `registerSession`
1579
+ * (mirrors how a host scopes per-run policy without leaking it to
1580
+ * concurrent runs). The fix under test: this matcher MUST still be
1581
+ * present when `Run.resume()` re-runs the node so the policy
1582
+ * decision applies the second time too.
1583
+ */
1584
+ const runId = 'session-hook-preserve';
1585
+ registry.registerSession(runId, 'PreToolUse', {
1586
+ hooks: [
1587
+ async (): Promise<PreToolUseHookOutput> => {
1588
+ preCallCount += 1;
1589
+ return { decision: 'ask', reason: 'session policy' };
1590
+ },
1591
+ ],
1592
+ });
1593
+
1594
+ const node = new ToolNode({
1595
+ tools: [createSchemaStub('echo')],
1596
+ eventDrivenMode: true,
1597
+ agentId: 'agent-x',
1598
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
1599
+ hookRegistry: registry,
1600
+ humanInTheLoop: { enabled: true },
1601
+ });
1602
+
1603
+ const builder = new StateGraph(MessagesAnnotation)
1604
+ .addNode(
1605
+ 'agent',
1606
+ (): MessagesUpdate => ({
1607
+ messages: [
1608
+ new AIMessage({
1609
+ content: '',
1610
+ tool_calls: [
1611
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
1612
+ ],
1613
+ }),
1614
+ ],
1615
+ })
1616
+ )
1617
+ .addNode('tools', node)
1618
+ .addEdge(START, 'agent')
1619
+ .addEdge('agent', 'tools')
1620
+ .addEdge('tools', END);
1621
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
1622
+
1623
+ const { Run } = await import('@/run');
1624
+ const run = await Run.create<t.IState>({
1625
+ runId,
1626
+ graphConfig: {
1627
+ type: 'standard',
1628
+ agents: [
1629
+ {
1630
+ agentId: 'a',
1631
+ provider: providers.OPENAI,
1632
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
1633
+ instructions: 'noop',
1634
+ maxContextTokens: 8000,
1635
+ },
1636
+ ],
1637
+ },
1638
+ hooks: registry,
1639
+ humanInTheLoop: { enabled: true },
1640
+ });
1641
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
1642
+
1643
+ const callerConfig = {
1644
+ configurable: { thread_id: 'session-thread-1' },
1645
+ version: 'v2' as const,
1646
+ };
1647
+
1648
+ await run.processStream({ messages: [] }, callerConfig);
1649
+
1650
+ /** Interrupt fired; one hook invocation so far. Session matcher
1651
+ * MUST still be present — the regression was that finally cleared
1652
+ * it, leaving the resume to bypass the policy entirely. */
1653
+ expect(run.getInterrupt()).toBeDefined();
1654
+ expect(preCallCount).toBe(1);
1655
+ expect(registry.hasHookFor('PreToolUse', runId)).toBe(true);
1656
+ expect(dispatchCalls).toBe(0);
1657
+
1658
+ await run.resume([{ type: 'approve' }], callerConfig);
1659
+
1660
+ /** Hook fired AGAIN on resume — policy was actually applied a
1661
+ * second time, not skipped. Tool then executed. */
1662
+ expect(preCallCount).toBe(2);
1663
+ expect(dispatchCalls).toBe(1);
1664
+ /** After natural completion, session matchers ARE cleared so the
1665
+ * next run on this registry starts clean. */
1666
+ expect(registry.hasHookFor('PreToolUse', runId)).toBe(false);
1667
+ });
1668
+
1669
+ it('denied tool in a deny+ask batch dispatches ON_RUN_STEP_COMPLETED exactly once across interrupt + resume', async () => {
1670
+ const stepCompletedDispatches: string[] = [];
1671
+ /** Spy on the underlying custom event dispatcher to capture every
1672
+ * ON_RUN_STEP_COMPLETED event with its tool_call_id. Without the
1673
+ * blockEntry deferral, this would record `call_a` twice for one
1674
+ * logical denial (once before interrupt, once after resume
1675
+ * re-execution). */
1676
+ jest
1677
+ .spyOn(events, 'safeDispatchCustomEvent')
1678
+ .mockImplementation(async (event, data) => {
1679
+ if (event === GraphEvents.ON_RUN_STEP_COMPLETED) {
1680
+ const payload = data as {
1681
+ result?: { tool_call?: { id?: string } };
1682
+ };
1683
+ const id = payload.result?.tool_call?.id;
1684
+ if (id != null) {
1685
+ stepCompletedDispatches.push(id);
1686
+ }
1687
+ return;
1688
+ }
1689
+ if (event !== 'on_tool_execute') {
1690
+ return;
1691
+ }
1692
+ const request = data as {
1693
+ toolCalls: t.ToolCallRequest[];
1694
+ resolve: (r: t.ToolExecuteResult[]) => void;
1695
+ };
1696
+ request.resolve(
1697
+ request.toolCalls.map((c) => ({
1698
+ toolCallId: c.id,
1699
+ content: `ran:${c.name}`,
1700
+ status: 'success' as const,
1701
+ }))
1702
+ );
1703
+ });
1704
+
1705
+ const registry = new HookRegistry();
1706
+ registry.register('PreToolUse', {
1707
+ hooks: [
1708
+ async (input): Promise<PreToolUseHookOutput> => {
1709
+ if (input.toolName === 'tool_a') {
1710
+ return { decision: 'deny', reason: 'policy:a' };
1711
+ }
1712
+ return { decision: 'ask', reason: 'policy:b-needs-review' };
1713
+ },
1714
+ ],
1715
+ });
1716
+
1717
+ const node = new ToolNode({
1718
+ tools: [createSchemaStub('tool_a'), createSchemaStub('tool_b')],
1719
+ eventDrivenMode: true,
1720
+ agentId: 'agent-x',
1721
+ toolCallStepIds: new Map([
1722
+ ['call_a', 'step_a'],
1723
+ ['call_b', 'step_b'],
1724
+ ]),
1725
+ hookRegistry: registry,
1726
+ humanInTheLoop: { enabled: true },
1727
+ });
1728
+
1729
+ const graph = buildHITLGraph(node, [
1730
+ { id: 'call_a', name: 'tool_a', args: { command: 'a' } },
1731
+ { id: 'call_b', name: 'tool_b', args: { command: 'b' } },
1732
+ ]);
1733
+ const config = { configurable: { thread_id: 'dedup-thread' } };
1734
+
1735
+ await graph.invoke({ messages: [] }, config);
1736
+ /** First pass: interrupt() threw, so the deferred denial side
1737
+ * effects were not flushed. Zero step-completed events for the
1738
+ * denied tool yet. */
1739
+ expect(stepCompletedDispatches.filter((id) => id === 'call_a')).toEqual([]);
1740
+
1741
+ await graph.invoke(new Command({ resume: [{ type: 'approve' }] }), config);
1742
+
1743
+ /** After resume: the denied tool dispatches exactly once (deferred
1744
+ * flush on the resume re-execution); the approved tool dispatches
1745
+ * once via the normal execution path. */
1746
+ expect(stepCompletedDispatches.filter((id) => id === 'call_a')).toEqual([
1747
+ 'call_a',
1748
+ ]);
1749
+ expect(stepCompletedDispatches.filter((id) => id === 'call_b')).toEqual([
1750
+ 'call_b',
1751
+ ]);
1752
+ });
1753
+
1754
+ it('enforces allowedDecisions on resume — host-submitted decision outside the allowlist is rejected', async () => {
1755
+ const dispatchedToolNames: string[] = [];
1756
+ jest
1757
+ .spyOn(events, 'safeDispatchCustomEvent')
1758
+ .mockImplementation(async (event, data) => {
1759
+ if (event !== 'on_tool_execute') {
1760
+ return;
1761
+ }
1762
+ const request = data as {
1763
+ toolCalls: t.ToolCallRequest[];
1764
+ resolve: (r: t.ToolExecuteResult[]) => void;
1765
+ };
1766
+ for (const c of request.toolCalls) {
1767
+ dispatchedToolNames.push(c.name);
1768
+ }
1769
+ request.resolve(
1770
+ request.toolCalls.map((c) => ({
1771
+ toolCallId: c.id,
1772
+ content: 'ran',
1773
+ status: 'success' as const,
1774
+ }))
1775
+ );
1776
+ });
1777
+
1778
+ /** Hook restricts to approve/reject only — edit/respond are
1779
+ * forbidden. Even if a buggy or hostile host UI submits an
1780
+ * `edit`, the SDK must fail closed instead of mutating the args
1781
+ * and running the tool. */
1782
+ const registry = new HookRegistry();
1783
+ registry.register('PreToolUse', {
1784
+ hooks: [
1785
+ async (): Promise<PreToolUseHookOutput> => ({
1786
+ decision: 'ask',
1787
+ allowedDecisions: ['approve', 'reject'],
1788
+ }),
1789
+ ],
1790
+ });
1791
+
1792
+ const node = new ToolNode({
1793
+ tools: [createSchemaStub('echo')],
1794
+ eventDrivenMode: true,
1795
+ agentId: 'agent-x',
1796
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1797
+ hookRegistry: registry,
1798
+ humanInTheLoop: { enabled: true },
1799
+ });
1800
+
1801
+ const graph = buildHITLGraph(node, [
1802
+ { id: 'call_1', name: 'echo', args: { command: 'original' } },
1803
+ ]);
1804
+ const config = { configurable: { thread_id: 'allowed-enforce' } };
1805
+
1806
+ await graph.invoke({ messages: [] }, config);
1807
+
1808
+ /** Submit `edit` — outside the advertised allowlist. */
1809
+ const resumed = (await graph.invoke(
1810
+ new Command({
1811
+ resume: [{ type: 'edit', updatedInput: { command: 'malicious' } }],
1812
+ }),
1813
+ config
1814
+ )) as { messages: BaseMessage[] };
1815
+
1816
+ const toolMessages = resumed.messages.filter(
1817
+ (m): m is ToolMessage => m._getType() === 'tool'
1818
+ );
1819
+ expect(toolMessages).toHaveLength(1);
1820
+ /** Tool was blocked; arg-mutation never reached the host. */
1821
+ expect(toolMessages[0].status).toBe('error');
1822
+ expect(String(toolMessages[0].content)).toContain(
1823
+ 'not in allowedDecisions'
1824
+ );
1825
+ expect(String(toolMessages[0].content)).toContain('approve');
1826
+ expect(String(toolMessages[0].content)).toContain('reject');
1827
+ expect(dispatchedToolNames).toEqual([]);
1828
+ });
1829
+
1830
+ it('enforces allowedDecisions on resume — approved decision passes through when in the allowlist', async () => {
1831
+ const dispatchedArgs: Array<Record<string, unknown>> = [];
1832
+ jest
1833
+ .spyOn(events, 'safeDispatchCustomEvent')
1834
+ .mockImplementation(async (event, data) => {
1835
+ if (event !== 'on_tool_execute') {
1836
+ return;
1837
+ }
1838
+ const request = data as {
1839
+ toolCalls: t.ToolCallRequest[];
1840
+ resolve: (r: t.ToolExecuteResult[]) => void;
1841
+ };
1842
+ for (const c of request.toolCalls) {
1843
+ dispatchedArgs.push(c.args);
1844
+ }
1845
+ request.resolve(
1846
+ request.toolCalls.map((c) => ({
1847
+ toolCallId: c.id,
1848
+ content: 'ran',
1849
+ status: 'success' as const,
1850
+ }))
1851
+ );
1852
+ });
1853
+
1854
+ const registry = new HookRegistry();
1855
+ registry.register('PreToolUse', {
1856
+ hooks: [
1857
+ async (): Promise<PreToolUseHookOutput> => ({
1858
+ decision: 'ask',
1859
+ allowedDecisions: ['approve', 'reject'],
1860
+ }),
1861
+ ],
1862
+ });
1863
+
1864
+ const node = new ToolNode({
1865
+ tools: [createSchemaStub('echo')],
1866
+ eventDrivenMode: true,
1867
+ agentId: 'agent-x',
1868
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
1869
+ hookRegistry: registry,
1870
+ humanInTheLoop: { enabled: true },
1871
+ });
1872
+
1873
+ const graph = buildHITLGraph(node, [
1874
+ { id: 'call_1', name: 'echo', args: { command: 'original' } },
1875
+ ]);
1876
+ const config = { configurable: { thread_id: 'allowed-pass' } };
1877
+
1878
+ await graph.invoke({ messages: [] }, config);
1879
+
1880
+ /** Submit `approve` — explicitly in the allowlist. */
1881
+ await graph.invoke(new Command({ resume: [{ type: 'approve' }] }), config);
1882
+
1883
+ expect(dispatchedArgs).toEqual([{ command: 'original' }]);
1884
+ });
1885
+
1886
+ it('getInterrupt<T>() returns the captured payload typed as the host-asserted shape', async () => {
1887
+ /**
1888
+ * Custom graph node raises an interrupt with a payload shape the
1889
+ * SDK doesn't know about. `run.getInterrupt<MyCustomPayload>()`
1890
+ * returns the payload typed as the host's assertion — the SDK
1891
+ * doesn't validate, it just transports.
1892
+ */
1893
+ interface MyCustomPayload {
1894
+ type: 'custom_review';
1895
+ diff: string;
1896
+ reviewerHints: string[];
1897
+ }
1898
+
1899
+ const langgraph = await import('@langchain/langgraph');
1900
+
1901
+ const builder = new StateGraph(MessagesAnnotation)
1902
+ .addNode('clarifier', () => {
1903
+ langgraph.interrupt({
1904
+ type: 'custom_review',
1905
+ diff: '+ added line',
1906
+ reviewerHints: ['check formatting'],
1907
+ } satisfies MyCustomPayload);
1908
+ return { messages: [] };
1909
+ })
1910
+ .addEdge(START, 'clarifier')
1911
+ .addEdge('clarifier', END);
1912
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
1913
+
1914
+ const { Run } = await import('@/run');
1915
+ const run = await Run.create<t.IState>({
1916
+ runId: 'custom-interrupt',
1917
+ graphConfig: {
1918
+ type: 'standard',
1919
+ agents: [
1920
+ {
1921
+ agentId: 'a',
1922
+ provider: providers.OPENAI,
1923
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
1924
+ instructions: 'noop',
1925
+ maxContextTokens: 8000,
1926
+ },
1927
+ ],
1928
+ },
1929
+ humanInTheLoop: { enabled: true },
1930
+ });
1931
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
1932
+
1933
+ await run.processStream(
1934
+ { messages: [] },
1935
+ {
1936
+ configurable: { thread_id: 'custom-interrupt-thread' },
1937
+ version: 'v2',
1938
+ }
1939
+ );
1940
+
1941
+ const interrupt = run.getInterrupt<MyCustomPayload>();
1942
+ expect(interrupt).toBeDefined();
1943
+ expect(interrupt!.payload.type).toBe('custom_review');
1944
+ expect(interrupt!.payload.diff).toBe('+ added line');
1945
+ expect(interrupt!.payload.reviewerHints).toEqual(['check formatting']);
1946
+ });
1947
+
1948
+ it('isToolApprovalInterrupt / isAskUserQuestionInterrupt narrow safely from `unknown` (defensive)', async () => {
1949
+ const { isToolApprovalInterrupt, isAskUserQuestionInterrupt } =
1950
+ await import('@/types/hitl');
1951
+
1952
+ /** The guards must accept arbitrary runtime values without throwing,
1953
+ * since hosts can pass anything from custom interrupts. */
1954
+ expect(isToolApprovalInterrupt(null as unknown)).toBe(false);
1955
+ expect(isToolApprovalInterrupt(undefined as unknown)).toBe(false);
1956
+ expect(isToolApprovalInterrupt('string' as unknown)).toBe(false);
1957
+ expect(isToolApprovalInterrupt(42 as unknown)).toBe(false);
1958
+ expect(isToolApprovalInterrupt({} as unknown)).toBe(false);
1959
+ expect(isToolApprovalInterrupt({ type: 'something_else' } as unknown)).toBe(
1960
+ false
1961
+ );
1962
+ expect(
1963
+ isToolApprovalInterrupt({
1964
+ type: 'tool_approval',
1965
+ action_requests: [],
1966
+ review_configs: [],
1967
+ } as unknown)
1968
+ ).toBe(true);
1969
+
1970
+ expect(isAskUserQuestionInterrupt(null as unknown)).toBe(false);
1971
+ expect(
1972
+ isAskUserQuestionInterrupt({ type: 'tool_approval' } as unknown)
1973
+ ).toBe(false);
1974
+ expect(
1975
+ isAskUserQuestionInterrupt({
1976
+ type: 'ask_user_question',
1977
+ question: { question: 'why' },
1978
+ } as unknown)
1979
+ ).toBe(true);
1980
+ });
1981
+
1982
+ it('hook returning ask + updatedInput rewrites args BEFORE the interrupt and BEFORE host execution', async () => {
1983
+ const dispatchedArgs: Array<Record<string, unknown>> = [];
1984
+ jest
1985
+ .spyOn(events, 'safeDispatchCustomEvent')
1986
+ .mockImplementation(async (event, data) => {
1987
+ if (event !== 'on_tool_execute') {
1988
+ return;
1989
+ }
1990
+ const request = data as {
1991
+ toolCalls: t.ToolCallRequest[];
1992
+ resolve: (r: t.ToolExecuteResult[]) => void;
1993
+ };
1994
+ for (const c of request.toolCalls) {
1995
+ dispatchedArgs.push(c.args);
1996
+ }
1997
+ request.resolve(
1998
+ request.toolCalls.map((c) => ({
1999
+ toolCallId: c.id,
2000
+ content: 'ran',
2001
+ status: 'success' as const,
2002
+ }))
2003
+ );
2004
+ });
2005
+
2006
+ /**
2007
+ * Hook returns BOTH a sanitization rewrite AND `ask`. Real-world
2008
+ * pattern: one matcher redacts secrets in the args, another
2009
+ * matcher requires human approval. Both signals must apply.
2010
+ */
2011
+ const registry = new HookRegistry();
2012
+ registry.register('PreToolUse', {
2013
+ hooks: [
2014
+ async (): Promise<PreToolUseHookOutput> => ({
2015
+ decision: 'ask',
2016
+ reason: 'review redacted args',
2017
+ updatedInput: { command: 'redacted-command' },
2018
+ }),
2019
+ ],
2020
+ });
2021
+
2022
+ const node = new ToolNode({
2023
+ tools: [createSchemaStub('echo')],
2024
+ eventDrivenMode: true,
2025
+ agentId: 'agent-x',
2026
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2027
+ hookRegistry: registry,
2028
+ humanInTheLoop: { enabled: true },
2029
+ });
2030
+
2031
+ const graph = buildHITLGraph(node, [
2032
+ { id: 'call_1', name: 'echo', args: { command: 'original-secret' } },
2033
+ ]);
2034
+ const config = { configurable: { thread_id: 'ask-with-update' } };
2035
+
2036
+ const interrupted = await graph.invoke({ messages: [] }, config);
2037
+ if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
2038
+ throw new Error('expected interrupt');
2039
+ }
2040
+ const payload = interrupted.__interrupt__[0].value!;
2041
+ if (payload.type !== 'tool_approval') {
2042
+ throw new Error('expected tool_approval');
2043
+ }
2044
+ /** The interrupt payload surfaces the REWRITTEN args to the
2045
+ * reviewer, not the original. Without the fix, the reviewer
2046
+ * would see the secret. */
2047
+ expect(payload.action_requests[0].arguments).toEqual({
2048
+ command: 'redacted-command',
2049
+ });
2050
+
2051
+ await graph.invoke(new Command({ resume: [{ type: 'approve' }] }), config);
2052
+
2053
+ /** And the host execution dispatches the rewritten args, not
2054
+ * the original. Without the fix, the policy redaction would be
2055
+ * silently dropped after approval. */
2056
+ expect(dispatchedArgs).toEqual([{ command: 'redacted-command' }]);
2057
+ });
2058
+
2059
+ it('captures interrupt even when payload is null (custom node calling interrupt(null))', async () => {
2060
+ const langgraph = await import('@langchain/langgraph');
2061
+
2062
+ let stopFired = false;
2063
+ const registry = new HookRegistry();
2064
+ registry.register('Stop', {
2065
+ hooks: [
2066
+ async (): Promise<Record<string, never>> => {
2067
+ stopFired = true;
2068
+ return {};
2069
+ },
2070
+ ],
2071
+ });
2072
+
2073
+ const builder = new StateGraph(MessagesAnnotation)
2074
+ .addNode('pauser', () => {
2075
+ /** Custom node pauses without payload — valid use case (the
2076
+ * pause itself is the signal; no metadata needed). */
2077
+ langgraph.interrupt(null);
2078
+ return { messages: [] };
2079
+ })
2080
+ .addEdge(START, 'pauser')
2081
+ .addEdge('pauser', END);
2082
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
2083
+
2084
+ const { Run } = await import('@/run');
2085
+ const run = await Run.create<t.IState>({
2086
+ runId: 'null-payload-interrupt',
2087
+ graphConfig: {
2088
+ type: 'standard',
2089
+ agents: [
2090
+ {
2091
+ agentId: 'a',
2092
+ provider: providers.OPENAI,
2093
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
2094
+ instructions: 'noop',
2095
+ maxContextTokens: 8000,
2096
+ },
2097
+ ],
2098
+ },
2099
+ hooks: registry,
2100
+ humanInTheLoop: { enabled: true },
2101
+ });
2102
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
2103
+
2104
+ await run.processStream(
2105
+ { messages: [] },
2106
+ {
2107
+ configurable: { thread_id: 'null-payload-thread' },
2108
+ version: 'v2',
2109
+ }
2110
+ );
2111
+
2112
+ /** Run was paused, NOT completed — getInterrupt returns a result
2113
+ * (with the null payload preserved) and the Stop hook does not
2114
+ * fire. Without the fix, both inversions held. */
2115
+ const interrupt = run.getInterrupt<unknown>();
2116
+ expect(interrupt).toBeDefined();
2117
+ expect(interrupt!.payload).toBeNull();
2118
+ expect(stopFired).toBe(false);
2119
+ });
2120
+
2121
+ it('halt signal raised by run A does not bleed into a concurrent run B sharing the same registry', async () => {
2122
+ /**
2123
+ * One registry, two runs. RunStart hook for run A raises
2124
+ * preventContinuation; run B has no halt signal. Without
2125
+ * per-session scoping, run B's stream-loop poll would see A's
2126
+ * signal and silently terminate. With scoping, each run reads
2127
+ * only its own halt entry.
2128
+ */
2129
+ const registry = new HookRegistry();
2130
+ let runStartFires = 0;
2131
+ registry.register('RunStart', {
2132
+ hooks: [
2133
+ async (input): Promise<RunStartHookOutput> => {
2134
+ runStartFires += 1;
2135
+ /** Halt only run A, not run B. */
2136
+ if (input.runId === 'run-a') {
2137
+ return {
2138
+ preventContinuation: true,
2139
+ stopReason: 'A halted',
2140
+ };
2141
+ }
2142
+ return {};
2143
+ },
2144
+ ],
2145
+ });
2146
+
2147
+ const { Run } = await import('@/run');
2148
+ const { HumanMessage: HM } = await import('@langchain/core/messages');
2149
+
2150
+ /** No-op graph so we never hit the real model. */
2151
+ const makeNoopGraph = (): t.CompiledStateWorkflow => {
2152
+ const builder = new StateGraph(MessagesAnnotation)
2153
+ .addNode('noop', (): MessagesUpdate => ({ messages: [] }))
2154
+ .addEdge(START, 'noop')
2155
+ .addEdge('noop', END);
2156
+ return builder.compile() as unknown as t.CompiledStateWorkflow;
2157
+ };
2158
+
2159
+ const makeRun = async (
2160
+ runId: string
2161
+ ): Promise<Awaited<ReturnType<typeof Run.create<t.IState>>>> => {
2162
+ const r = await Run.create<t.IState>({
2163
+ runId,
2164
+ graphConfig: {
2165
+ type: 'standard',
2166
+ agents: [
2167
+ {
2168
+ agentId: 'a',
2169
+ provider: providers.OPENAI,
2170
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
2171
+ instructions: 'noop',
2172
+ maxContextTokens: 8000,
2173
+ },
2174
+ ],
2175
+ },
2176
+ hooks: registry,
2177
+ humanInTheLoop: { enabled: false },
2178
+ });
2179
+ r.graphRunnable = makeNoopGraph();
2180
+ return r;
2181
+ };
2182
+
2183
+ const runA = await makeRun('run-a');
2184
+ const runB = await makeRun('run-b');
2185
+
2186
+ /** Run A — its preventContinuation lands in the per-session halt
2187
+ * map under key `'run-a'` and triggers a pre-stream early
2188
+ * return. Note that the early-return path also clears its own
2189
+ * halt signal in the same step, so run B can never observe it
2190
+ * even momentarily. */
2191
+ await runA.processStream(
2192
+ { messages: [new HM('a')] },
2193
+ { configurable: { thread_id: 'thread-a' }, version: 'v2' }
2194
+ );
2195
+ expect(runA.getHaltReason()).toBe('A halted');
2196
+
2197
+ /** Run B's signal must be undefined — A's halt is scoped to A's
2198
+ * session id, and was cleared in A's pre-stream finally path. */
2199
+ expect(registry.getHaltSignal('run-b')).toBeUndefined();
2200
+ expect(registry.getHaltSignal('run-a')).toBeUndefined();
2201
+
2202
+ /** Run B — RunStart returns no halt, so processStream proceeds
2203
+ * past the pre-stream gate, executes the no-op graph, and
2204
+ * completes without halt. */
2205
+ runStartFires = 0;
2206
+ await runB.processStream(
2207
+ { messages: [new HM('b')] },
2208
+ { configurable: { thread_id: 'thread-b' }, version: 'v2' }
2209
+ );
2210
+ expect(runStartFires).toBe(1);
2211
+ expect(runB.getHaltReason()).toBeUndefined();
2212
+ });
2213
+
2214
+ it('review_configs entries carry tool_call_id so duplicate-tool batches map unambiguously', async () => {
2215
+ mockEventDispatch([]);
2216
+
2217
+ const registry = new HookRegistry();
2218
+ registry.register('PreToolUse', {
2219
+ hooks: [
2220
+ async (): Promise<PreToolUseHookOutput> => ({
2221
+ decision: 'ask',
2222
+ reason: 'review',
2223
+ }),
2224
+ ],
2225
+ });
2226
+
2227
+ /** Same tool name called twice in one batch — by-position
2228
+ * mapping breaks down for hosts that reorder; tool_call_id
2229
+ * lets the UI map review_configs → action_requests directly. */
2230
+ const node = new ToolNode({
2231
+ tools: [createSchemaStub('echo')],
2232
+ eventDrivenMode: true,
2233
+ agentId: 'agent-x',
2234
+ toolCallStepIds: new Map([
2235
+ ['call_first', 'step_first'],
2236
+ ['call_second', 'step_second'],
2237
+ ]),
2238
+ hookRegistry: registry,
2239
+ humanInTheLoop: { enabled: true },
2240
+ });
2241
+
2242
+ const graph = buildHITLGraph(node, [
2243
+ { id: 'call_first', name: 'echo', args: { command: 'a' } },
2244
+ { id: 'call_second', name: 'echo', args: { command: 'b' } },
2245
+ ]);
2246
+ const config = { configurable: { thread_id: 'duplicate-tool' } };
2247
+
2248
+ const interrupted = await graph.invoke({ messages: [] }, config);
2249
+ if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
2250
+ throw new Error('expected interrupt');
2251
+ }
2252
+ const payload = interrupted.__interrupt__[0].value!;
2253
+ if (payload.type !== 'tool_approval') {
2254
+ throw new Error('expected tool_approval');
2255
+ }
2256
+
2257
+ /** Each review_config carries its own tool_call_id matching the
2258
+ * action_request at the same index. UI can build a Map keyed by
2259
+ * tool_call_id rather than relying on positional order. */
2260
+ expect(payload.review_configs).toEqual([
2261
+ {
2262
+ action_name: 'echo',
2263
+ tool_call_id: 'call_first',
2264
+ allowed_decisions: ['approve', 'reject', 'edit', 'respond'],
2265
+ },
2266
+ {
2267
+ action_name: 'echo',
2268
+ tool_call_id: 'call_second',
2269
+ allowed_decisions: ['approve', 'reject', 'edit', 'respond'],
2270
+ },
2271
+ ]);
2272
+ /** And the action_requests carry the same ids — pairing is
2273
+ * always derivable from id even when names collide. */
2274
+ expect(payload.action_requests.map((r) => r.tool_call_id)).toEqual([
2275
+ 'call_first',
2276
+ 'call_second',
2277
+ ]);
2278
+ });
2279
+
2280
+ it('malformed edit decision (missing updatedInput) is blocked, not approved with garbage args', async () => {
2281
+ let dispatchCount = 0;
2282
+ jest
2283
+ .spyOn(events, 'safeDispatchCustomEvent')
2284
+ .mockImplementation(async (event, data) => {
2285
+ if (event !== 'on_tool_execute') {
2286
+ return;
2287
+ }
2288
+ dispatchCount += 1;
2289
+ const request = data as {
2290
+ toolCalls: t.ToolCallRequest[];
2291
+ resolve: (r: t.ToolExecuteResult[]) => void;
2292
+ };
2293
+ request.resolve([]);
2294
+ });
2295
+
2296
+ const node = new ToolNode({
2297
+ tools: [createSchemaStub('echo')],
2298
+ eventDrivenMode: true,
2299
+ agentId: 'agent-x',
2300
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2301
+ hookRegistry: makeHookRegistry('ask'),
2302
+ humanInTheLoop: { enabled: true },
2303
+ });
2304
+
2305
+ const graph = buildHITLGraph(node, [
2306
+ { id: 'call_1', name: 'echo', args: { command: 'original' } },
2307
+ ]);
2308
+ const config = { configurable: { thread_id: 'edit-malformed' } };
2309
+
2310
+ await graph.invoke({ messages: [] }, config);
2311
+
2312
+ /** `{ type: 'edit' }` with no updatedInput — same trust-boundary
2313
+ * issue as malformed respond. Must fail closed, NOT pass undefined
2314
+ * into applyInputOverride and approve a tool with garbage args. */
2315
+ const resumed = (await graph.invoke(
2316
+ new Command({
2317
+ resume: [{ type: 'edit' } as unknown as t.ToolApprovalDecision],
2318
+ }),
2319
+ config
2320
+ )) as { messages: BaseMessage[] };
2321
+
2322
+ const toolMessages = resumed.messages.filter(
2323
+ (m): m is ToolMessage => m._getType() === 'tool'
2324
+ );
2325
+ expect(toolMessages).toHaveLength(1);
2326
+ expect(toolMessages[0].status).toBe('error');
2327
+ expect(String(toolMessages[0].content)).toContain(
2328
+ 'missing object updatedInput'
2329
+ );
2330
+ expect(String(toolMessages[0].content)).toContain('<missing>');
2331
+ expect(dispatchCount).toBe(0);
2332
+ });
2333
+
2334
+ it('malformed edit decision (non-object updatedInput) is blocked', async () => {
2335
+ let dispatchCount = 0;
2336
+ jest
2337
+ .spyOn(events, 'safeDispatchCustomEvent')
2338
+ .mockImplementation(async (event, data) => {
2339
+ if (event !== 'on_tool_execute') {
2340
+ return;
2341
+ }
2342
+ dispatchCount += 1;
2343
+ const request = data as {
2344
+ toolCalls: t.ToolCallRequest[];
2345
+ resolve: (r: t.ToolExecuteResult[]) => void;
2346
+ };
2347
+ request.resolve([]);
2348
+ });
2349
+
2350
+ const node = new ToolNode({
2351
+ tools: [createSchemaStub('echo')],
2352
+ eventDrivenMode: true,
2353
+ agentId: 'agent-x',
2354
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2355
+ hookRegistry: makeHookRegistry('ask'),
2356
+ humanInTheLoop: { enabled: true },
2357
+ });
2358
+
2359
+ const graph = buildHITLGraph(node, [
2360
+ { id: 'call_1', name: 'echo', args: { command: 'original' } },
2361
+ ]);
2362
+ const config = { configurable: { thread_id: 'edit-nonobject' } };
2363
+
2364
+ await graph.invoke({ messages: [] }, config);
2365
+
2366
+ /** `updatedInput: 'string'` — wire deserializer didn't enforce
2367
+ * object shape; SDK must reject. */
2368
+ const resumed = (await graph.invoke(
2369
+ new Command({
2370
+ resume: [
2371
+ {
2372
+ type: 'edit',
2373
+ updatedInput: 'not-an-object' as unknown as Record<string, unknown>,
2374
+ },
2375
+ ],
2376
+ }),
2377
+ config
2378
+ )) as { messages: BaseMessage[] };
2379
+
2380
+ const toolMessages = resumed.messages.filter(
2381
+ (m): m is ToolMessage => m._getType() === 'tool'
2382
+ );
2383
+ expect(toolMessages).toHaveLength(1);
2384
+ expect(toolMessages[0].status).toBe('error');
2385
+ expect(String(toolMessages[0].content)).toContain(
2386
+ 'missing object updatedInput'
2387
+ );
2388
+ expect(String(toolMessages[0].content)).toContain('string');
2389
+ expect(dispatchCount).toBe(0);
2390
+ });
2391
+
2392
+ it('malformed edit decision (array updatedInput) is blocked — arrays are objects but not plain records', async () => {
2393
+ jest
2394
+ .spyOn(events, 'safeDispatchCustomEvent')
2395
+ .mockImplementation(async () => {
2396
+ return;
2397
+ });
2398
+
2399
+ const node = new ToolNode({
2400
+ tools: [createSchemaStub('echo')],
2401
+ eventDrivenMode: true,
2402
+ agentId: 'agent-x',
2403
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2404
+ hookRegistry: makeHookRegistry('ask'),
2405
+ humanInTheLoop: { enabled: true },
2406
+ });
2407
+
2408
+ const graph = buildHITLGraph(node, [
2409
+ { id: 'call_1', name: 'echo', args: { command: 'original' } },
2410
+ ]);
2411
+ const config = { configurable: { thread_id: 'edit-array' } };
2412
+
2413
+ await graph.invoke({ messages: [] }, config);
2414
+
2415
+ const resumed = (await graph.invoke(
2416
+ new Command({
2417
+ resume: [
2418
+ {
2419
+ type: 'edit',
2420
+ updatedInput: [1, 2, 3] as unknown as Record<string, unknown>,
2421
+ },
2422
+ ],
2423
+ }),
2424
+ config
2425
+ )) as { messages: BaseMessage[] };
2426
+
2427
+ const toolMessages = resumed.messages.filter(
2428
+ (m): m is ToolMessage => m._getType() === 'tool'
2429
+ );
2430
+ expect(toolMessages).toHaveLength(1);
2431
+ expect(toolMessages[0].status).toBe('error');
2432
+ expect(String(toolMessages[0].content)).toContain('array');
2433
+ });
2434
+
2435
+ it('malformed respond decision (missing responseText) is blocked, not crashed', async () => {
2436
+ let dispatchCount = 0;
2437
+ jest
2438
+ .spyOn(events, 'safeDispatchCustomEvent')
2439
+ .mockImplementation(async (event, data) => {
2440
+ if (event !== 'on_tool_execute') {
2441
+ return;
2442
+ }
2443
+ dispatchCount += 1;
2444
+ const request = data as {
2445
+ toolCalls: t.ToolCallRequest[];
2446
+ resolve: (r: t.ToolExecuteResult[]) => void;
2447
+ };
2448
+ request.resolve([]);
2449
+ });
2450
+
2451
+ const node = new ToolNode({
2452
+ tools: [createSchemaStub('echo')],
2453
+ eventDrivenMode: true,
2454
+ agentId: 'agent-x',
2455
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2456
+ hookRegistry: makeHookRegistry('ask'),
2457
+ humanInTheLoop: { enabled: true },
2458
+ });
2459
+
2460
+ const graph = buildHITLGraph(node, [
2461
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
2462
+ ]);
2463
+ const config = { configurable: { thread_id: 'respond-malformed' } };
2464
+
2465
+ await graph.invoke({ messages: [] }, config);
2466
+
2467
+ /** Submit a `respond` decision with NO responseText — wire shape
2468
+ * the SDK can't honor. Must fail closed (blockEntry path), NOT
2469
+ * crash truncateToolResultContent on `undefined.length`. */
2470
+ const resumed = (await graph.invoke(
2471
+ new Command({
2472
+ resume: [{ type: 'respond' } as unknown as t.ToolApprovalDecision],
2473
+ }),
2474
+ config
2475
+ )) as { messages: BaseMessage[] };
2476
+
2477
+ const toolMessages = resumed.messages.filter(
2478
+ (m): m is ToolMessage => m._getType() === 'tool'
2479
+ );
2480
+ expect(toolMessages).toHaveLength(1);
2481
+ expect(toolMessages[0].status).toBe('error');
2482
+ expect(String(toolMessages[0].content)).toContain(
2483
+ 'missing string responseText'
2484
+ );
2485
+ expect(String(toolMessages[0].content)).toContain('<missing>');
2486
+ /** Tool was never dispatched — fail-closed worked. */
2487
+ expect(dispatchCount).toBe(0);
2488
+ });
2489
+
2490
+ it('malformed respond decision (non-string responseText) is blocked, not crashed', async () => {
2491
+ jest
2492
+ .spyOn(events, 'safeDispatchCustomEvent')
2493
+ .mockImplementation(async () => {
2494
+ return;
2495
+ });
2496
+
2497
+ const node = new ToolNode({
2498
+ tools: [createSchemaStub('echo')],
2499
+ eventDrivenMode: true,
2500
+ agentId: 'agent-x',
2501
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2502
+ hookRegistry: makeHookRegistry('ask'),
2503
+ humanInTheLoop: { enabled: true },
2504
+ });
2505
+
2506
+ const graph = buildHITLGraph(node, [
2507
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
2508
+ ]);
2509
+ const config = { configurable: { thread_id: 'respond-nonstring' } };
2510
+
2511
+ await graph.invoke({ messages: [] }, config);
2512
+
2513
+ /** `responseText: 42` — wire deserializer didn't enforce string;
2514
+ * SDK must reject without crashing. */
2515
+ const resumed = (await graph.invoke(
2516
+ new Command({
2517
+ resume: [
2518
+ {
2519
+ type: 'respond',
2520
+ responseText: 42 as unknown as string,
2521
+ },
2522
+ ],
2523
+ }),
2524
+ config
2525
+ )) as { messages: BaseMessage[] };
2526
+
2527
+ const toolMessages = resumed.messages.filter(
2528
+ (m): m is ToolMessage => m._getType() === 'tool'
2529
+ );
2530
+ expect(toolMessages).toHaveLength(1);
2531
+ expect(toolMessages[0].status).toBe('error');
2532
+ expect(String(toolMessages[0].content)).toContain(
2533
+ 'missing string responseText'
2534
+ );
2535
+ expect(String(toolMessages[0].content)).toContain('number');
2536
+ });
2537
+
2538
+ it('respond decision truncates oversized text the same way real tool output is truncated', async () => {
2539
+ mockEventDispatch([]);
2540
+
2541
+ /** Build a ToolNode with a tiny `maxToolResultChars` so the
2542
+ * truncation kicks in for a 200-char response. Without the fix,
2543
+ * the full string would land in the ToolMessage and PostToolBatch
2544
+ * entry — bypassing the model context budget. */
2545
+ const registry = new HookRegistry();
2546
+ registry.register('PreToolUse', {
2547
+ hooks: [async (): Promise<PreToolUseHookOutput> => ({ decision: 'ask' })],
2548
+ });
2549
+ let captured: PostToolBatchEntry | undefined;
2550
+ registry.register('PostToolBatch', {
2551
+ hooks: [
2552
+ async (input): Promise<PostToolBatchHookOutput> => {
2553
+ captured = (input as PostToolBatchHookInput).entries[0];
2554
+ return {};
2555
+ },
2556
+ ],
2557
+ });
2558
+
2559
+ const node = new ToolNode({
2560
+ tools: [createSchemaStub('echo')],
2561
+ eventDrivenMode: true,
2562
+ agentId: 'agent-x',
2563
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2564
+ hookRegistry: registry,
2565
+ humanInTheLoop: { enabled: true },
2566
+ maxToolResultChars: 50,
2567
+ });
2568
+
2569
+ const graph = buildHITLGraph(node, [
2570
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
2571
+ ]);
2572
+ const config = { configurable: { thread_id: 'respond-truncate' } };
2573
+
2574
+ await graph.invoke({ messages: [] }, config);
2575
+
2576
+ /** 200-char response — well over the 50-char cap. */
2577
+ const oversized = 'A'.repeat(200);
2578
+ const resumed = (await graph.invoke(
2579
+ new Command({
2580
+ resume: [{ type: 'respond', responseText: oversized }],
2581
+ }),
2582
+ config
2583
+ )) as { messages: BaseMessage[] };
2584
+
2585
+ const toolMessages = resumed.messages.filter(
2586
+ (m): m is ToolMessage => m._getType() === 'tool'
2587
+ );
2588
+ expect(toolMessages).toHaveLength(1);
2589
+ /** The ToolMessage content is truncated; not the raw 200 chars. */
2590
+ const content = String(toolMessages[0].content);
2591
+ expect(content.length).toBeLessThan(oversized.length);
2592
+ /** And the PostToolBatch entry sees the SAME truncated value
2593
+ * — batch hooks observe what the model will actually see. */
2594
+ expect(captured).toBeDefined();
2595
+ expect(typeof captured!.toolOutput).toBe('string');
2596
+ expect(captured!.toolOutput).toBe(content);
2597
+ });
2598
+
2599
+ it('hook returning both ask + preventContinuation halts cleanly and clears session hooks', async () => {
2600
+ mockEventDispatch([]);
2601
+
2602
+ const registry = new HookRegistry();
2603
+ /** Session-scoped policy hook returns BOTH `ask` (which would
2604
+ * raise an interrupt) AND `preventContinuation: true` (which
2605
+ * raises a halt signal). The halt wins — no resume is expected,
2606
+ * sessions must clear. */
2607
+ const runId = 'ask-and-halt';
2608
+ registry.registerSession(runId, 'PreToolUse', {
2609
+ hooks: [
2610
+ async (): Promise<PreToolUseHookOutput> => ({
2611
+ decision: 'ask',
2612
+ preventContinuation: true,
2613
+ stopReason: 'policy halted ask',
2614
+ }),
2615
+ ],
2616
+ });
2617
+
2618
+ const node = new ToolNode({
2619
+ tools: [createSchemaStub('echo')],
2620
+ eventDrivenMode: true,
2621
+ agentId: 'agent-x',
2622
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
2623
+ hookRegistry: registry,
2624
+ humanInTheLoop: { enabled: true },
2625
+ });
2626
+
2627
+ const builder = new StateGraph(MessagesAnnotation)
2628
+ .addNode(
2629
+ 'agent',
2630
+ (): MessagesUpdate => ({
2631
+ messages: [
2632
+ new AIMessage({
2633
+ content: '',
2634
+ tool_calls: [
2635
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
2636
+ ],
2637
+ }),
2638
+ ],
2639
+ })
2640
+ )
2641
+ .addNode('tools', node)
2642
+ .addEdge(START, 'agent')
2643
+ .addEdge('agent', 'tools')
2644
+ .addEdge('tools', END);
2645
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
2646
+
2647
+ const { Run } = await import('@/run');
2648
+ const run = await Run.create<t.IState>({
2649
+ runId,
2650
+ graphConfig: {
2651
+ type: 'standard',
2652
+ agents: [
2653
+ {
2654
+ agentId: 'a',
2655
+ provider: providers.OPENAI,
2656
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
2657
+ instructions: 'noop',
2658
+ maxContextTokens: 8000,
2659
+ },
2660
+ ],
2661
+ },
2662
+ hooks: registry,
2663
+ humanInTheLoop: { enabled: true },
2664
+ });
2665
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
2666
+
2667
+ await run.processStream(
2668
+ { messages: [] },
2669
+ {
2670
+ configurable: { thread_id: 'ask-and-halt-thread' },
2671
+ version: 'v2',
2672
+ }
2673
+ );
2674
+
2675
+ /** Both signals landed: interrupt was captured AND halt fired. */
2676
+ expect(run.getInterrupt()).toBeDefined();
2677
+ expect(run.getHaltReason()).toBe('policy halted ask');
2678
+ /** Session hooks MUST be cleared — no resume is expected on a
2679
+ * halted run, even one that also captured an interrupt. */
2680
+ expect(registry.hasHookFor('PreToolUse', runId)).toBe(false);
2681
+ });
2682
+
2683
+ it('preserves Graph sidecars across HITL interrupt + resume so tool completions keep their step ids', async () => {
2684
+ /**
2685
+ * Regression test for the cleanup-vs-resume bug: previously
2686
+ * `processStream` always called `Graph.clearHeavyState()` in its
2687
+ * `finally` block AND `Graph.resetValues()` on entry, even when
2688
+ * pausing on a HITL interrupt. That wiped `toolCallStepIds`,
2689
+ * `_toolOutputRegistry`, and `sessions` between pause and resume,
2690
+ * so the resumed `ToolNode` could no longer find the original
2691
+ * step id and dispatched `ON_RUN_STEP_COMPLETED` with an empty id
2692
+ * — the host's stream consumer would then drop the result.
2693
+ *
2694
+ * The fix is two gated cleanups:
2695
+ * - `clearHeavyState` skipped when `_interrupt != null && _haltedReason == null && !streamThrew`
2696
+ * - `resetValues` skipped when entering processStream via `Command` (resume)
2697
+ *
2698
+ * To exercise the SDK Graph's actual sidecar state (not a private
2699
+ * test ToolNode), this test wires the custom ToolNode to share
2700
+ * the SDK Graph's `toolCallStepIds` Map by reference. After the
2701
+ * interrupt fires AND after the resume completes, the
2702
+ * pre-populated entry must still be present.
2703
+ */
2704
+ const dispatchedStepIds: string[] = [];
2705
+ jest
2706
+ .spyOn(events, 'safeDispatchCustomEvent')
2707
+ .mockImplementation(async (event, data) => {
2708
+ if (event === GraphEvents.ON_RUN_STEP_COMPLETED) {
2709
+ const payload = data as { result?: { id?: string } };
2710
+ if (payload.result?.id != null) {
2711
+ dispatchedStepIds.push(payload.result.id);
2712
+ }
2713
+ return;
2714
+ }
2715
+ if (event !== 'on_tool_execute') {
2716
+ return;
2717
+ }
2718
+ const request = data as {
2719
+ toolCalls: t.ToolCallRequest[];
2720
+ resolve: (r: t.ToolExecuteResult[]) => void;
2721
+ };
2722
+ request.resolve(
2723
+ request.toolCalls.map((c) => ({
2724
+ toolCallId: c.id,
2725
+ content: 'host-result',
2726
+ status: 'success' as const,
2727
+ }))
2728
+ );
2729
+ });
2730
+
2731
+ const registry = new HookRegistry();
2732
+ registry.register('PreToolUse', {
2733
+ hooks: [
2734
+ async (): Promise<PreToolUseHookOutput> => ({
2735
+ decision: 'ask',
2736
+ reason: 'review',
2737
+ }),
2738
+ ],
2739
+ });
2740
+
2741
+ const { Run } = await import('@/run');
2742
+ const run = await Run.create<t.IState>({
2743
+ runId: 'sidecar-preserve',
2744
+ graphConfig: {
2745
+ type: 'standard',
2746
+ agents: [
2747
+ {
2748
+ agentId: 'a',
2749
+ provider: providers.OPENAI,
2750
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
2751
+ instructions: 'noop',
2752
+ maxContextTokens: 8000,
2753
+ },
2754
+ ],
2755
+ },
2756
+ hooks: registry,
2757
+ humanInTheLoop: { enabled: true },
2758
+ });
2759
+
2760
+ /** Wire the test ToolNode to share the SDK Graph's
2761
+ * `toolCallStepIds` Map by reference — this is how the real
2762
+ * StandardGraph builds its inner ToolNode at Graph.ts:587. */
2763
+ const toolNode = new ToolNode({
2764
+ tools: [createSchemaStub('echo')],
2765
+ eventDrivenMode: true,
2766
+ agentId: 'a',
2767
+ toolCallStepIds: run.Graph!.toolCallStepIds,
2768
+ hookRegistry: registry,
2769
+ humanInTheLoop: { enabled: true },
2770
+ });
2771
+
2772
+ /** The agent node simulates `attemptInvoke`'s sidecar-population
2773
+ * step: in a real run, the model invocation creates a run step
2774
+ * and writes its id into `toolCallStepIds` before tools dispatch.
2775
+ * Doing it here means the entry lands AFTER `processStream`'s
2776
+ * `resetValues` (which fires once on entry) and BEFORE the
2777
+ * ToolNode's hook + interrupt — exactly mirroring the production
2778
+ * timing the cleanup gate has to preserve. */
2779
+ const builder = new StateGraph(MessagesAnnotation)
2780
+ .addNode('agent', (): MessagesUpdate => {
2781
+ run.Graph!.toolCallStepIds.set('call_1', 'step_real_id');
2782
+ return {
2783
+ messages: [
2784
+ new AIMessage({
2785
+ content: '',
2786
+ tool_calls: [
2787
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
2788
+ ],
2789
+ }),
2790
+ ],
2791
+ };
2792
+ })
2793
+ .addNode('tools', toolNode)
2794
+ .addEdge(START, 'agent')
2795
+ .addEdge('agent', 'tools')
2796
+ .addEdge('tools', END);
2797
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
2798
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
2799
+
2800
+ const callerConfig = {
2801
+ configurable: { thread_id: 'sidecar-thread' },
2802
+ version: 'v2' as const,
2803
+ };
2804
+
2805
+ await run.processStream({ messages: [] }, callerConfig);
2806
+
2807
+ /** After interrupt: sidecar entry MUST still be present. Without
2808
+ * the fix, `clearHeavyState` in the `finally` block would have
2809
+ * wiped this map. */
2810
+ expect(run.getInterrupt()).toBeDefined();
2811
+ expect(run.Graph!.toolCallStepIds.has('call_1')).toBe(true);
2812
+ expect(run.Graph!.toolCallStepIds.get('call_1')).toBe('step_real_id');
2813
+
2814
+ /** Resume: without the resetValues gate, this would also wipe
2815
+ * the map at the START of the second processStream invocation. */
2816
+ await run.resume([{ type: 'approve' }], callerConfig);
2817
+
2818
+ /** After resume completes naturally: dispatch fired with the real
2819
+ * step id (not an empty string from a wiped map). Without either
2820
+ * fix, `dispatchedStepIds` would contain `''`. */
2821
+ expect(dispatchedStepIds).toContain('step_real_id');
2822
+ expect(dispatchedStepIds).not.toContain('');
2823
+ /** And clearHeavyState DID fire on the natural-completion side
2824
+ * — sidecar map is now empty after the resume settled. */
2825
+ expect(run.Graph!.toolCallStepIds.size).toBe(0);
2826
+ });
2827
+
2828
+ it('clears Graph sidecars on natural completion when no interrupt was raised', async () => {
2829
+ /** Negative case: when no interrupt fires, `clearHeavyState`
2830
+ * MUST run as before. This pins the gate so a future change
2831
+ * doesn't accidentally preserve sidecars on natural completion
2832
+ * (memory leak across runs). */
2833
+ mockEventDispatch([]);
2834
+
2835
+ const { Run } = await import('@/run');
2836
+ const run = await Run.create<t.IState>({
2837
+ runId: 'sidecar-clear-natural',
2838
+ graphConfig: {
2839
+ type: 'standard',
2840
+ agents: [
2841
+ {
2842
+ agentId: 'a',
2843
+ provider: providers.OPENAI,
2844
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
2845
+ instructions: 'noop',
2846
+ maxContextTokens: 8000,
2847
+ },
2848
+ ],
2849
+ },
2850
+ humanInTheLoop: { enabled: false },
2851
+ });
2852
+
2853
+ /** No-op graph — runs to completion without an interrupt. */
2854
+ const builder = new StateGraph(MessagesAnnotation)
2855
+ .addNode('noop', (): MessagesUpdate => ({ messages: [] }))
2856
+ .addEdge(START, 'noop')
2857
+ .addEdge('noop', END);
2858
+ const graph = builder.compile();
2859
+ run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
2860
+
2861
+ /** Stash an entry so we can verify clearHeavyState wiped it. */
2862
+ run.Graph!.toolCallStepIds.set('stale_call', 'stale_step');
2863
+
2864
+ await run.processStream(
2865
+ { messages: [] },
2866
+ {
2867
+ configurable: { thread_id: 'sidecar-clear-thread' },
2868
+ version: 'v2',
2869
+ }
2870
+ );
2871
+
2872
+ /** No interrupt → clearHeavyState ran → sidecar wiped. */
2873
+ expect(run.getInterrupt()).toBeUndefined();
2874
+ expect(run.Graph!.toolCallStepIds.size).toBe(0);
2875
+ });
2876
+
2877
+ it('clears session hooks when the stream throws AFTER an interrupt is captured (stale interrupt)', async () => {
2878
+ jest
2879
+ .spyOn(events, 'safeDispatchCustomEvent')
2880
+ .mockImplementation(async () => {
2881
+ return;
2882
+ });
2883
+
2884
+ const registry = new HookRegistry();
2885
+ const runId = 'stream-error-after-interrupt';
2886
+ registry.registerSession(runId, 'PreToolUse', {
2887
+ hooks: [
2888
+ async (): Promise<PreToolUseHookOutput> => ({
2889
+ decision: 'ask',
2890
+ reason: 'session policy',
2891
+ }),
2892
+ ],
2893
+ });
2894
+
2895
+ const node = new ToolNode({
2896
+ tools: [createSchemaStub('echo')],
2897
+ eventDrivenMode: true,
2898
+ agentId: 'agent-x',
2899
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
2900
+ hookRegistry: registry,
2901
+ humanInTheLoop: { enabled: true },
2902
+ });
2903
+
2904
+ const builder = new StateGraph(MessagesAnnotation)
2905
+ .addNode(
2906
+ 'agent',
2907
+ (): MessagesUpdate => ({
2908
+ messages: [
2909
+ new AIMessage({
2910
+ content: '',
2911
+ tool_calls: [
2912
+ { id: 'call_1', name: 'echo', args: { command: 'x' } },
2913
+ ],
2914
+ }),
2915
+ ],
2916
+ })
2917
+ )
2918
+ .addNode('tools', node)
2919
+ .addEdge(START, 'agent')
2920
+ .addEdge('agent', 'tools')
2921
+ .addEdge('tools', END);
2922
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
2923
+
2924
+ const { Run } = await import('@/run');
2925
+ /**
2926
+ * Holder for forward-referencing the run inside the sentinel
2927
+ * handler closure. The handler is constructed before `Run.create`
2928
+ * runs (it's passed into `customHandlers`) but needs to read
2929
+ * `run.getInterrupt()` at firing time.
2930
+ */
2931
+ const holder: {
2932
+ run: Awaited<ReturnType<typeof Run.create<t.IState>>> | undefined;
2933
+ } = { run: undefined };
2934
+
2935
+ /**
2936
+ * Handler keyed to a chain-stream event that throws ONLY after the
2937
+ * interrupt has been captured. The stream loop captures the
2938
+ * interrupt on the chunk that carries `__interrupt__`, then
2939
+ * dispatches to handlers in the same iteration — so the throw
2940
+ * exits the loop with `_interrupt != null`. Without the
2941
+ * `streamThrew` guard, the `finally` block would preserve session
2942
+ * hooks on this stale interrupt.
2943
+ */
2944
+ const sentinelHandler = {
2945
+ handle: async (): Promise<void> => {
2946
+ if (holder.run?.getInterrupt() != null) {
2947
+ throw new Error('post-interrupt handler failure');
2948
+ }
2949
+ },
2950
+ };
2951
+
2952
+ holder.run = await Run.create<t.IState>({
2953
+ runId,
2954
+ graphConfig: {
2955
+ type: 'standard',
2956
+ agents: [
2957
+ {
2958
+ agentId: 'a',
2959
+ provider: providers.OPENAI,
2960
+ clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
2961
+ instructions: 'noop',
2962
+ maxContextTokens: 8000,
2963
+ },
2964
+ ],
2965
+ },
2966
+ hooks: registry,
2967
+ humanInTheLoop: { enabled: true },
2968
+ customHandlers: {
2969
+ [GraphEvents.CHAIN_STREAM]: sentinelHandler,
2970
+ [GraphEvents.CHAIN_END]: sentinelHandler,
2971
+ },
2972
+ });
2973
+ holder.run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
2974
+
2975
+ const callerConfig = {
2976
+ configurable: { thread_id: 'stale-interrupt-thread' },
2977
+ version: 'v2' as const,
2978
+ };
2979
+
2980
+ await expect(
2981
+ holder.run.processStream({ messages: [] }, callerConfig)
2982
+ ).rejects.toThrow('post-interrupt handler failure');
2983
+
2984
+ /** Interrupt WAS captured on the run instance, but because the
2985
+ * stream subsequently threw, session hooks must be cleared so the
2986
+ * next run on this registry isn't poisoned by stale state. */
2987
+ expect(holder.run.getInterrupt()).toBeDefined();
2988
+ expect(registry.hasHookFor('PreToolUse', runId)).toBe(false);
2989
+ });
2990
+
2991
+ it('mixed deny/ask/allow batch: deny short-circuits, allow runs immediately, ask interrupts; resume completes the asked tool', async () => {
2992
+ const dispatchedToolNames: string[] = [];
2993
+ jest
2994
+ .spyOn(events, 'safeDispatchCustomEvent')
2995
+ .mockImplementation(async (event, data) => {
2996
+ if (event !== 'on_tool_execute') {
2997
+ return;
2998
+ }
2999
+ const request = data as {
3000
+ toolCalls: t.ToolCallRequest[];
3001
+ resolve: (r: t.ToolExecuteResult[]) => void;
3002
+ };
3003
+ for (const c of request.toolCalls) {
3004
+ dispatchedToolNames.push(c.name);
3005
+ }
3006
+ request.resolve(
3007
+ request.toolCalls.map((c) => ({
3008
+ toolCallId: c.id,
3009
+ content: `ran:${c.name}`,
3010
+ status: 'success' as const,
3011
+ }))
3012
+ );
3013
+ });
3014
+
3015
+ /**
3016
+ * Per-tool policy hook: tool_a denied, tool_b asks, tool_c allowed.
3017
+ * The hook is registered without a pattern so it fires once per
3018
+ * tool call and dispatches by tool name.
3019
+ */
3020
+ const registry = new HookRegistry();
3021
+ registry.register('PreToolUse', {
3022
+ hooks: [
3023
+ async (input): Promise<PreToolUseHookOutput> => {
3024
+ if (input.toolName === 'tool_a') {
3025
+ return { decision: 'deny', reason: 'policy:a' };
3026
+ }
3027
+ if (input.toolName === 'tool_b') {
3028
+ return { decision: 'ask', reason: 'policy:b-needs-review' };
3029
+ }
3030
+ return { decision: 'allow' };
3031
+ },
3032
+ ],
3033
+ });
3034
+ /**
3035
+ * Listen on PostToolBatch to verify the batch entry shape after
3036
+ * resume reflects the final outcomes (deny + run + run), not
3037
+ * stale state from the first pass.
3038
+ */
3039
+ const batchSnapshots: PostToolBatchEntry[][] = [];
3040
+ registry.register('PostToolBatch', {
3041
+ hooks: [
3042
+ async (input): Promise<PostToolBatchHookOutput> => {
3043
+ batchSnapshots.push(
3044
+ (input as PostToolBatchHookInput).entries.map((e) => ({ ...e }))
3045
+ );
3046
+ return {};
3047
+ },
3048
+ ],
3049
+ });
3050
+
3051
+ const node = new ToolNode({
3052
+ tools: [
3053
+ createSchemaStub('tool_a'),
3054
+ createSchemaStub('tool_b'),
3055
+ createSchemaStub('tool_c'),
3056
+ ],
3057
+ eventDrivenMode: true,
3058
+ agentId: 'agent-x',
3059
+ toolCallStepIds: new Map([
3060
+ ['call_a', 'step_a'],
3061
+ ['call_b', 'step_b'],
3062
+ ['call_c', 'step_c'],
3063
+ ]),
3064
+ hookRegistry: registry,
3065
+ humanInTheLoop: { enabled: true },
3066
+ });
3067
+
3068
+ const graph = buildHITLGraph(node, [
3069
+ { id: 'call_a', name: 'tool_a', args: { command: 'a' } },
3070
+ { id: 'call_b', name: 'tool_b', args: { command: 'b' } },
3071
+ { id: 'call_c', name: 'tool_c', args: { command: 'c' } },
3072
+ ]);
3073
+ const config = { configurable: { thread_id: 'mixed-thread' } };
3074
+
3075
+ const interrupted = await graph.invoke({ messages: [] }, config);
3076
+ if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
3077
+ throw new Error('expected interrupt');
3078
+ }
3079
+ const payload = interrupted.__interrupt__[0].value!;
3080
+ if (payload.type !== 'tool_approval') {
3081
+ throw new Error('expected tool_approval payload');
3082
+ }
3083
+ /** Only tool_b appears in the interrupt — deny short-circuited
3084
+ * locally, allow was queued for dispatch but never reached it
3085
+ * because `interrupt()` threw inside the same node first. LangGraph
3086
+ * rolls back the entire node's effects on throw, so no host event
3087
+ * fires for any tool until after resume. This is the safe
3088
+ * semantic: partial execution while a human is being asked would
3089
+ * leak side effects ahead of approval. */
3090
+ expect(payload.action_requests).toHaveLength(1);
3091
+ expect(payload.action_requests[0].tool_call_id).toBe('call_b');
3092
+ expect(dispatchedToolNames).toEqual([]);
3093
+
3094
+ const resumed = (await graph.invoke(
3095
+ new Command({ resume: [{ type: 'approve' }] }),
3096
+ config
3097
+ )) as { messages: BaseMessage[] };
3098
+
3099
+ /**
3100
+ * After resume, all three tools have ToolMessages: tool_a blocked
3101
+ * (deny), tool_b ran (host approved), tool_c ran (allow). The
3102
+ * ToolNode re-executed from scratch, so both tool_b and tool_c
3103
+ * dispatch in this pass.
3104
+ */
3105
+ const toolMessages = resumed.messages.filter(
3106
+ (m): m is ToolMessage => m._getType() === 'tool'
3107
+ );
3108
+ expect(toolMessages).toHaveLength(3);
3109
+ const byId = new Map(toolMessages.map((m) => [m.tool_call_id, m]));
3110
+ expect(byId.get('call_a')!.status).toBe('error');
3111
+ expect(String(byId.get('call_a')!.content)).toContain('policy:a');
3112
+ expect(byId.get('call_b')!.status).not.toBe('error');
3113
+ expect(byId.get('call_b')!.content).toBe('ran:tool_b');
3114
+ expect(byId.get('call_c')!.status).not.toBe('error');
3115
+ expect(byId.get('call_c')!.content).toBe('ran:tool_c');
3116
+ /** Both approved tools dispatched on resume; tool_a (deny) never did. */
3117
+ expect(new Set(dispatchedToolNames)).toEqual(new Set(['tool_b', 'tool_c']));
3118
+ expect(dispatchedToolNames).not.toContain('tool_a');
3119
+
3120
+ /**
3121
+ * PostToolBatch is dispatched at the bottom of `dispatchToolEvents`,
3122
+ * after tool execution. On the FIRST pass `interrupt()` throws
3123
+ * before reaching that line, so PostToolBatch does NOT fire for
3124
+ * the interrupted pass. Only the resume pass yields a snapshot —
3125
+ * carrying all three entries with their final outcomes (tool_a
3126
+ * blocked by deny, tool_b approved + ran, tool_c approved + ran).
3127
+ */
3128
+ expect(batchSnapshots).toHaveLength(1);
3129
+ const finalSnapshot = batchSnapshots[0];
3130
+ /**
3131
+ * Order assertion: entries must match the original toolCalls
3132
+ * sequence (`call_a`, `call_b`, `call_c`) regardless of when each
3133
+ * outcome was recorded — `call_a` was denied synchronously in the
3134
+ * hook loop, `call_b` was approved through the resume branch,
3135
+ * `call_c` was approved+executed via the host event path. Hooks
3136
+ * correlating outcomes by position (per the API doc) depend on
3137
+ * this stability.
3138
+ */
3139
+ expect(finalSnapshot.map((e) => e.toolUseId)).toEqual([
3140
+ 'call_a',
3141
+ 'call_b',
3142
+ 'call_c',
3143
+ ]);
3144
+ const byCallId = new Map(finalSnapshot.map((e) => [e.toolUseId, e]));
3145
+ expect(byCallId.size).toBe(3);
3146
+ expect(byCallId.get('call_a')!.status).toBe('error');
3147
+ expect(byCallId.get('call_a')!.error).toContain('policy:a');
3148
+ expect(byCallId.get('call_b')!.status).toBe('success');
3149
+ expect(byCallId.get('call_b')!.toolOutput).toBe('ran:tool_b');
3150
+ expect(byCallId.get('call_c')!.status).toBe('success');
3151
+ expect(byCallId.get('call_c')!.toolOutput).toBe('ran:tool_c');
3152
+ });
3153
+
3154
+ it('mixed respond + reject in the same resume: dispatches once each, batch entries in toolCalls order', async () => {
3155
+ const stepCompletedDispatches: string[] = [];
3156
+ jest
3157
+ .spyOn(events, 'safeDispatchCustomEvent')
3158
+ .mockImplementation(async (event, data) => {
3159
+ if (event === GraphEvents.ON_RUN_STEP_COMPLETED) {
3160
+ const payload = data as {
3161
+ result?: { tool_call?: { id?: string } };
3162
+ };
3163
+ const id = payload.result?.tool_call?.id;
3164
+ if (id != null) {
3165
+ stepCompletedDispatches.push(id);
3166
+ }
3167
+ return;
3168
+ }
3169
+ if (event !== 'on_tool_execute') {
3170
+ return;
3171
+ }
3172
+ const request = data as {
3173
+ toolCalls: t.ToolCallRequest[];
3174
+ resolve: (r: t.ToolExecuteResult[]) => void;
3175
+ };
3176
+ request.resolve([]);
3177
+ });
3178
+
3179
+ const registry = new HookRegistry();
3180
+ /** Both tools `ask`; the resume picks `respond` for one and
3181
+ * `reject` for the other. Exercises the timing interaction
3182
+ * between respond's immediate dispatch and reject's deferred
3183
+ * flush in the same resume pass. */
3184
+ registry.register('PreToolUse', {
3185
+ hooks: [
3186
+ async (): Promise<PreToolUseHookOutput> => ({
3187
+ decision: 'ask',
3188
+ reason: 'review',
3189
+ }),
3190
+ ],
3191
+ });
3192
+ const batchSnapshots: PostToolBatchEntry[][] = [];
3193
+ registry.register('PostToolBatch', {
3194
+ hooks: [
3195
+ async (input): Promise<PostToolBatchHookOutput> => {
3196
+ batchSnapshots.push(
3197
+ (input as PostToolBatchHookInput).entries.map((e) => ({ ...e }))
3198
+ );
3199
+ return {};
3200
+ },
3201
+ ],
3202
+ });
3203
+
3204
+ const node = new ToolNode({
3205
+ tools: [
3206
+ createSchemaStub('respond_tool'),
3207
+ createSchemaStub('reject_tool'),
3208
+ ],
3209
+ eventDrivenMode: true,
3210
+ agentId: 'agent-x',
3211
+ toolCallStepIds: new Map([
3212
+ ['call_respond', 'step_respond'],
3213
+ ['call_reject', 'step_reject'],
3214
+ ]),
3215
+ hookRegistry: registry,
3216
+ humanInTheLoop: { enabled: true },
3217
+ });
3218
+
3219
+ const graph = buildHITLGraph(node, [
3220
+ { id: 'call_respond', name: 'respond_tool', args: { command: 'r' } },
3221
+ { id: 'call_reject', name: 'reject_tool', args: { command: 'j' } },
3222
+ ]);
3223
+ const config = { configurable: { thread_id: 'mixed-respond-reject' } };
3224
+
3225
+ await graph.invoke({ messages: [] }, config);
3226
+ /** First pass: interrupt fires before either dispatch path runs. */
3227
+ expect(stepCompletedDispatches).toEqual([]);
3228
+
3229
+ const resumed = (await graph.invoke(
3230
+ new Command({
3231
+ resume: [
3232
+ { type: 'respond', responseText: 'fake answer' },
3233
+ { type: 'reject', reason: 'no thanks' },
3234
+ ],
3235
+ }),
3236
+ config
3237
+ )) as { messages: BaseMessage[] };
3238
+
3239
+ /** Each tool dispatched ON_RUN_STEP_COMPLETED exactly once on
3240
+ * resume — respond via its immediate path, reject via the
3241
+ * deferred flush. */
3242
+ expect(
3243
+ stepCompletedDispatches.filter((id) => id === 'call_respond')
3244
+ ).toEqual(['call_respond']);
3245
+ expect(
3246
+ stepCompletedDispatches.filter((id) => id === 'call_reject')
3247
+ ).toEqual(['call_reject']);
3248
+
3249
+ /** PostToolBatch fires once on the resume pass, with entries in
3250
+ * the original toolCalls order (respond first, reject second)
3251
+ * regardless of which dispatch path landed first into the Map. */
3252
+ expect(batchSnapshots).toHaveLength(1);
3253
+ expect(batchSnapshots[0].map((e) => e.toolUseId)).toEqual([
3254
+ 'call_respond',
3255
+ 'call_reject',
3256
+ ]);
3257
+ expect(batchSnapshots[0][0].status).toBe('success');
3258
+ expect(batchSnapshots[0][0].toolOutput).toBe('fake answer');
3259
+ expect(batchSnapshots[0][1].status).toBe('error');
3260
+ expect(String(batchSnapshots[0][1].error)).toContain('no thanks');
3261
+
3262
+ /** ToolMessage state matches: success with response text, error with reason. */
3263
+ const toolMessages = resumed.messages.filter(
3264
+ (m): m is ToolMessage => m._getType() === 'tool'
3265
+ );
3266
+ expect(toolMessages).toHaveLength(2);
3267
+ const byId = new Map(toolMessages.map((m) => [m.tool_call_id, m]));
3268
+ expect(byId.get('call_respond')!.status).not.toBe('error');
3269
+ expect(byId.get('call_respond')!.content).toBe('fake answer');
3270
+ expect(byId.get('call_reject')!.status).toBe('error');
3271
+ expect(String(byId.get('call_reject')!.content)).toContain('no thanks');
3272
+ });
3273
+
3274
+ it('PostToolBatch entries preserve toolCalls order even when first call is denied and second is approved', async () => {
3275
+ jest
3276
+ .spyOn(events, 'safeDispatchCustomEvent')
3277
+ .mockImplementation(async (event, data) => {
3278
+ if (event !== 'on_tool_execute') {
3279
+ return;
3280
+ }
3281
+ const request = data as {
3282
+ toolCalls: t.ToolCallRequest[];
3283
+ resolve: (r: t.ToolExecuteResult[]) => void;
3284
+ };
3285
+ request.resolve(
3286
+ request.toolCalls.map((c) => ({
3287
+ toolCallId: c.id,
3288
+ content: `ran:${c.name}`,
3289
+ status: 'success' as const,
3290
+ }))
3291
+ );
3292
+ });
3293
+
3294
+ /**
3295
+ * Two different orderings to verify the asserted order really
3296
+ * tracks the input — not just incidental ordering from one path
3297
+ * landing first.
3298
+ */
3299
+ const cases: Array<{
3300
+ thread: string;
3301
+ input: Array<{ id: string; name: string; args: Record<string, unknown> }>;
3302
+ expected: string[];
3303
+ }> = [
3304
+ {
3305
+ thread: 'order-deny-first',
3306
+ input: [
3307
+ { id: 'call_first', name: 'denied_tool', args: { command: 'a' } },
3308
+ { id: 'call_second', name: 'allowed_tool', args: { command: 'b' } },
3309
+ ],
3310
+ expected: ['call_first', 'call_second'],
3311
+ },
3312
+ {
3313
+ thread: 'order-approve-first',
3314
+ input: [
3315
+ { id: 'call_first', name: 'allowed_tool', args: { command: 'a' } },
3316
+ { id: 'call_second', name: 'denied_tool', args: { command: 'b' } },
3317
+ ],
3318
+ expected: ['call_first', 'call_second'],
3319
+ },
3320
+ ];
3321
+
3322
+ for (const { thread, input, expected } of cases) {
3323
+ const registry = new HookRegistry();
3324
+ registry.register('PreToolUse', {
3325
+ hooks: [
3326
+ async (hookInput): Promise<PreToolUseHookOutput> => {
3327
+ if (hookInput.toolName === 'denied_tool') {
3328
+ return { decision: 'deny', reason: 'no' };
3329
+ }
3330
+ return { decision: 'allow' };
3331
+ },
3332
+ ],
3333
+ });
3334
+ const captured: PostToolBatchEntry[] = [];
3335
+ registry.register('PostToolBatch', {
3336
+ hooks: [
3337
+ async (i): Promise<PostToolBatchHookOutput> => {
3338
+ captured.push(...(i as PostToolBatchHookInput).entries);
3339
+ return {};
3340
+ },
3341
+ ],
3342
+ });
3343
+
3344
+ const node = new ToolNode({
3345
+ tools: [
3346
+ createSchemaStub('denied_tool'),
3347
+ createSchemaStub('allowed_tool'),
3348
+ ],
3349
+ eventDrivenMode: true,
3350
+ agentId: 'agent-x',
3351
+ toolCallStepIds: new Map(input.map((c) => [c.id, `step_${c.id}`])),
3352
+ hookRegistry: registry,
3353
+ humanInTheLoop: { enabled: false },
3354
+ });
3355
+
3356
+ const graph = buildHITLGraph(node, input);
3357
+ await graph.invoke(
3358
+ { messages: [] },
3359
+ { configurable: { thread_id: thread } }
3360
+ );
3361
+
3362
+ expect(captured.map((e) => e.toolUseId)).toEqual(expected);
3363
+ }
3364
+ });
3365
+
3366
+ it('fails closed when the host resume payload carries an unknown decision type', async () => {
3367
+ /** Spy MUST be reachable inside Promise.resolve handlers — must not run after mock is restored. */
3368
+ let dispatchCalls = 0;
3369
+ jest
3370
+ .spyOn(events, 'safeDispatchCustomEvent')
3371
+ .mockImplementation(async (event, data) => {
3372
+ if (event !== 'on_tool_execute') {
3373
+ return;
3374
+ }
3375
+ dispatchCalls += 1;
3376
+ const request = data as {
3377
+ toolCalls: t.ToolCallRequest[];
3378
+ resolve: (r: t.ToolExecuteResult[]) => void;
3379
+ };
3380
+ request.resolve([]);
3381
+ });
3382
+
3383
+ const node = new ToolNode({
3384
+ tools: [createSchemaStub('echo')],
3385
+ eventDrivenMode: true,
3386
+ agentId: 'agent-x',
3387
+ toolCallStepIds: new Map([['call_1', 'step_call_1']]),
3388
+ hookRegistry: makeHookRegistry('ask'),
3389
+ humanInTheLoop: { enabled: true },
3390
+ });
3391
+
3392
+ const graph = buildHITLGraph(node, [
3393
+ { id: 'call_1', name: 'echo', args: { command: 'sensitive' } },
3394
+ ]);
3395
+ const config = { configurable: { thread_id: 'unknown-decision' } };
3396
+
3397
+ await graph.invoke({ messages: [] }, config);
3398
+
3399
+ /** Host sends a typo'd / malformed decision. Must NOT silently approve. */
3400
+ const resumed = (await graph.invoke(
3401
+ new Command({
3402
+ resume: [{ type: 'aproved' as 'approve' }],
3403
+ }),
3404
+ config
3405
+ )) as { messages: BaseMessage[] };
3406
+
3407
+ const toolMessages = resumed.messages.filter(
3408
+ (m): m is ToolMessage => m._getType() === 'tool'
3409
+ );
3410
+ expect(toolMessages).toHaveLength(1);
3411
+ expect(toolMessages[0].status).toBe('error');
3412
+ expect(String(toolMessages[0].content)).toContain(
3413
+ 'Unknown approval decision type'
3414
+ );
3415
+ /** Tool was never dispatched — fail-closed worked. */
3416
+ expect(dispatchCalls).toBe(0);
3417
+ });
3418
+
3419
+ it('PostToolBatch entry sees the PostToolUse-rewritten output, not the original', async () => {
3420
+ mockEventDispatch([
3421
+ { toolCallId: 'call_1', content: 'raw-secret-1234', status: 'success' },
3422
+ ]);
3423
+
3424
+ const registry = new HookRegistry();
3425
+ /** PostToolUse redacts the output before the model sees it. */
3426
+ registry.register('PostToolUse', {
3427
+ hooks: [
3428
+ async (): Promise<PostToolUseHookOutput> => ({
3429
+ updatedOutput: 'raw-secret-[REDACTED]',
3430
+ }),
3431
+ ],
3432
+ });
3433
+ let batchEntries: PostToolBatchEntry[] | undefined;
3434
+ registry.register('PostToolBatch', {
3435
+ hooks: [
3436
+ async (input): Promise<PostToolBatchHookOutput> => {
3437
+ batchEntries = (input as PostToolBatchHookInput).entries;
3438
+ return {};
3439
+ },
3440
+ ],
3441
+ });
3442
+
3443
+ const node = new ToolNode({
3444
+ tools: [createSchemaStub('echo')],
3445
+ eventDrivenMode: true,
3446
+ agentId: 'agent-x',
3447
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
3448
+ hookRegistry: registry,
3449
+ humanInTheLoop: { enabled: false },
3450
+ });
3451
+
3452
+ const graph = buildHITLGraph(node, [
3453
+ { id: 'call_1', name: 'echo', args: { command: 'fetch' } },
3454
+ ]);
3455
+ await graph.invoke(
3456
+ { messages: [] },
3457
+ { configurable: { thread_id: 'batch-rewrite' } }
3458
+ );
3459
+
3460
+ expect(batchEntries).toBeDefined();
3461
+ expect(batchEntries).toHaveLength(1);
3462
+ /** Batch hook sees the redacted value, not the raw secret. */
3463
+ expect(batchEntries![0].toolOutput).toBe('raw-secret-[REDACTED]');
3464
+ expect(batchEntries![0].toolOutput).not.toContain('raw-secret-1234');
3465
+ });
3466
+
3467
+ it('PostToolUseFailure additionalContext is injected for the next model turn', async () => {
3468
+ /** Force the host event dispatch to return an error so the failure path runs. */
3469
+ jest
3470
+ .spyOn(events, 'safeDispatchCustomEvent')
3471
+ .mockImplementation(async (event, data) => {
3472
+ if (event !== 'on_tool_execute') {
3473
+ return;
3474
+ }
3475
+ const request = data as {
3476
+ toolCalls: t.ToolCallRequest[];
3477
+ resolve: (r: t.ToolExecuteResult[]) => void;
3478
+ };
3479
+ request.resolve([
3480
+ {
3481
+ toolCallId: 'call_1',
3482
+ content: '',
3483
+ status: 'error',
3484
+ errorMessage: 'network timeout',
3485
+ },
3486
+ ]);
3487
+ });
3488
+
3489
+ const registry = new HookRegistry();
3490
+ registry.register('PostToolUseFailure', {
3491
+ hooks: [
3492
+ async (): Promise<PostToolUseFailureHookOutput> => ({
3493
+ additionalContext:
3494
+ 'Tool failed — suggest the user retry with a smaller batch size',
3495
+ }),
3496
+ ],
3497
+ });
3498
+
3499
+ const node = new ToolNode({
3500
+ tools: [createSchemaStub('echo')],
3501
+ eventDrivenMode: true,
3502
+ agentId: 'agent-x',
3503
+ toolCallStepIds: new Map([['call_1', 'step_1']]),
3504
+ hookRegistry: registry,
3505
+ humanInTheLoop: { enabled: false },
3506
+ });
3507
+
3508
+ const graph = buildHITLGraph(node, [
3509
+ { id: 'call_1', name: 'echo', args: { command: 'fetch' } },
3510
+ ]);
3511
+ const result = (await graph.invoke(
3512
+ { messages: [] },
3513
+ { configurable: { thread_id: 'failure-ctx' } }
3514
+ )) as { messages: BaseMessage[] };
3515
+
3516
+ const injected = result.messages.find(
3517
+ (m) =>
3518
+ m._getType() === 'human' &&
3519
+ (m as { additional_kwargs?: { source?: string } }).additional_kwargs
3520
+ ?.source === 'hook'
3521
+ );
3522
+ expect(injected).toBeDefined();
3523
+ expect(String(injected!.content)).toContain(
3524
+ 'suggest the user retry with a smaller batch size'
3525
+ );
3526
+ });
3527
+ });
3528
+
3529
+ describe('AskUserQuestion — interrupt + resume', () => {
3530
+ afterEach(() => {
3531
+ jest.restoreAllMocks();
3532
+ });
3533
+
3534
+ it('a node calling askUserQuestion() raises an ask_user_question interrupt and resumes with the answer', async () => {
3535
+ const { askUserQuestion } = await import('@/hitl');
3536
+
3537
+ let resumedAnswer: string | undefined;
3538
+
3539
+ const builder = new StateGraph(MessagesAnnotation)
3540
+ .addNode('clarifier', () => {
3541
+ const resolution = askUserQuestion({
3542
+ question: 'Which environment?',
3543
+ options: [
3544
+ { label: 'Staging', value: 'staging' },
3545
+ { label: 'Production', value: 'production' },
3546
+ ],
3547
+ });
3548
+ resumedAnswer = resolution.answer;
3549
+ return { messages: [] };
3550
+ })
3551
+ .addEdge(START, 'clarifier')
3552
+ .addEdge('clarifier', END);
3553
+ const graph = builder.compile({ checkpointer: new MemorySaver() });
3554
+
3555
+ const config = { configurable: { thread_id: 'ask-q-thread' } };
3556
+
3557
+ const interrupted = (await graph.invoke({ messages: [] }, config)) as {
3558
+ __interrupt__?: Array<{ value?: t.HumanInterruptPayload }>;
3559
+ };
3560
+ expect(interrupted.__interrupt__).toBeDefined();
3561
+ const payload = interrupted.__interrupt__![0].value!;
3562
+ if (payload.type !== 'ask_user_question') {
3563
+ throw new Error('expected ask_user_question');
3564
+ }
3565
+ expect(payload.question.question).toBe('Which environment?');
3566
+ expect(payload.question.options).toHaveLength(2);
3567
+
3568
+ const resolution: t.AskUserQuestionResolution = { answer: 'production' };
3569
+ await graph.invoke(new Command({ resume: resolution }), config);
3570
+
3571
+ expect(resumedAnswer).toBe('production');
3572
+ });
3573
+
3574
+ it('isAskUserQuestionInterrupt narrows the payload union correctly', async () => {
3575
+ const { isAskUserQuestionInterrupt, isToolApprovalInterrupt } =
3576
+ await import('@/types/hitl');
3577
+
3578
+ const askPayload: t.HumanInterruptPayload = {
3579
+ type: 'ask_user_question',
3580
+ question: { question: 'why?' },
3581
+ };
3582
+ const approvalPayload: t.HumanInterruptPayload = {
3583
+ type: 'tool_approval',
3584
+ action_requests: [],
3585
+ review_configs: [],
3586
+ };
3587
+
3588
+ expect(isAskUserQuestionInterrupt(askPayload)).toBe(true);
3589
+ expect(isAskUserQuestionInterrupt(approvalPayload)).toBe(false);
3590
+ expect(isToolApprovalInterrupt(approvalPayload)).toBe(true);
3591
+ expect(isToolApprovalInterrupt(askPayload)).toBe(false);
3592
+ });
3593
+ });