@librechat/agents 3.1.75 → 3.1.77-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +22 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/hitl/askUserQuestion.cjs +67 -0
- package/dist/cjs/hitl/askUserQuestion.cjs.map +1 -0
- package/dist/cjs/hooks/HookRegistry.cjs +54 -0
- package/dist/cjs/hooks/HookRegistry.cjs.map +1 -1
- package/dist/cjs/hooks/createToolPolicyHook.cjs +115 -0
- package/dist/cjs/hooks/createToolPolicyHook.cjs.map +1 -0
- package/dist/cjs/hooks/executeHooks.cjs +40 -1
- package/dist/cjs/hooks/executeHooks.cjs.map +1 -1
- package/dist/cjs/hooks/types.cjs +1 -0
- package/dist/cjs/hooks/types.cjs.map +1 -1
- package/dist/cjs/langchain/google-common.cjs +3 -0
- package/dist/cjs/langchain/google-common.cjs.map +1 -0
- package/dist/cjs/langchain/index.cjs +86 -0
- package/dist/cjs/langchain/index.cjs.map +1 -0
- package/dist/cjs/langchain/language_models/chat_models.cjs +3 -0
- package/dist/cjs/langchain/language_models/chat_models.cjs.map +1 -0
- package/dist/cjs/langchain/messages/tool.cjs +3 -0
- package/dist/cjs/langchain/messages/tool.cjs.map +1 -0
- package/dist/cjs/langchain/messages.cjs +51 -0
- package/dist/cjs/langchain/messages.cjs.map +1 -0
- package/dist/cjs/langchain/openai.cjs +3 -0
- package/dist/cjs/langchain/openai.cjs.map +1 -0
- package/dist/cjs/langchain/prompts.cjs +11 -0
- package/dist/cjs/langchain/prompts.cjs.map +1 -0
- package/dist/cjs/langchain/runnables.cjs +19 -0
- package/dist/cjs/langchain/runnables.cjs.map +1 -0
- package/dist/cjs/langchain/tools.cjs +23 -0
- package/dist/cjs/langchain/tools.cjs.map +1 -0
- package/dist/cjs/langchain/utils/env.cjs +11 -0
- package/dist/cjs/langchain/utils/env.cjs.map +1 -0
- package/dist/cjs/llm/anthropic/index.cjs +145 -52
- package/dist/cjs/llm/anthropic/index.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +21 -14
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs +84 -70
- package/dist/cjs/llm/anthropic/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +1 -1
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs +213 -3
- package/dist/cjs/llm/bedrock/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs +2 -1
- package/dist/cjs/llm/bedrock/utils/message_outputs.cjs.map +1 -1
- package/dist/cjs/llm/google/utils/common.cjs +5 -4
- package/dist/cjs/llm/google/utils/common.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +519 -655
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +20 -458
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +57 -175
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/vertexai/index.cjs +5 -3
- package/dist/cjs/llm/vertexai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +112 -3
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +2 -1
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +7 -6
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/format.cjs +73 -15
- package/dist/cjs/messages/format.cjs.map +1 -1
- package/dist/cjs/messages/langchain.cjs +26 -0
- package/dist/cjs/messages/langchain.cjs.map +1 -0
- package/dist/cjs/messages/prune.cjs +7 -6
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +400 -42
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +556 -56
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +55 -66
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tavily-scraper.cjs +189 -0
- package/dist/cjs/tools/search/tavily-scraper.cjs.map +1 -0
- package/dist/cjs/tools/search/tavily-search.cjs +372 -0
- package/dist/cjs/tools/search/tavily-search.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +26 -4
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +10 -3
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +22 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/hitl/askUserQuestion.mjs +65 -0
- package/dist/esm/hitl/askUserQuestion.mjs.map +1 -0
- package/dist/esm/hooks/HookRegistry.mjs +54 -0
- package/dist/esm/hooks/HookRegistry.mjs.map +1 -1
- package/dist/esm/hooks/createToolPolicyHook.mjs +113 -0
- package/dist/esm/hooks/createToolPolicyHook.mjs.map +1 -0
- package/dist/esm/hooks/executeHooks.mjs +40 -1
- package/dist/esm/hooks/executeHooks.mjs.map +1 -1
- package/dist/esm/hooks/types.mjs +1 -0
- package/dist/esm/hooks/types.mjs.map +1 -1
- package/dist/esm/langchain/google-common.mjs +2 -0
- package/dist/esm/langchain/google-common.mjs.map +1 -0
- package/dist/esm/langchain/index.mjs +5 -0
- package/dist/esm/langchain/index.mjs.map +1 -0
- package/dist/esm/langchain/language_models/chat_models.mjs +2 -0
- package/dist/esm/langchain/language_models/chat_models.mjs.map +1 -0
- package/dist/esm/langchain/messages/tool.mjs +2 -0
- package/dist/esm/langchain/messages/tool.mjs.map +1 -0
- package/dist/esm/langchain/messages.mjs +2 -0
- package/dist/esm/langchain/messages.mjs.map +1 -0
- package/dist/esm/langchain/openai.mjs +2 -0
- package/dist/esm/langchain/openai.mjs.map +1 -0
- package/dist/esm/langchain/prompts.mjs +2 -0
- package/dist/esm/langchain/prompts.mjs.map +1 -0
- package/dist/esm/langchain/runnables.mjs +2 -0
- package/dist/esm/langchain/runnables.mjs.map +1 -0
- package/dist/esm/langchain/tools.mjs +2 -0
- package/dist/esm/langchain/tools.mjs.map +1 -0
- package/dist/esm/langchain/utils/env.mjs +2 -0
- package/dist/esm/langchain/utils/env.mjs.map +1 -0
- package/dist/esm/llm/anthropic/index.mjs +146 -54
- package/dist/esm/llm/anthropic/index.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +21 -14
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs +84 -71
- package/dist/esm/llm/anthropic/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +1 -1
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs +214 -4
- package/dist/esm/llm/bedrock/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs +2 -1
- package/dist/esm/llm/bedrock/utils/message_outputs.mjs.map +1 -1
- package/dist/esm/llm/google/utils/common.mjs +5 -4
- package/dist/esm/llm/google/utils/common.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +520 -656
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +23 -459
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +57 -175
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/vertexai/index.mjs +5 -3
- package/dist/esm/llm/vertexai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +7 -0
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +2 -1
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +7 -6
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/format.mjs +73 -15
- package/dist/esm/messages/format.mjs.map +1 -1
- package/dist/esm/messages/langchain.mjs +23 -0
- package/dist/esm/messages/langchain.mjs.map +1 -0
- package/dist/esm/messages/prune.mjs +7 -6
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +400 -42
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +557 -57
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +55 -66
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tavily-scraper.mjs +186 -0
- package/dist/esm/tools/search/tavily-scraper.mjs.map +1 -0
- package/dist/esm/tools/search/tavily-search.mjs +370 -0
- package/dist/esm/tools/search/tavily-search.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +26 -4
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +10 -3
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/types/graphs/Graph.d.ts +7 -0
- package/dist/types/hitl/askUserQuestion.d.ts +55 -0
- package/dist/types/hitl/index.d.ts +6 -0
- package/dist/types/hooks/HookRegistry.d.ts +58 -0
- package/dist/types/hooks/createToolPolicyHook.d.ts +87 -0
- package/dist/types/hooks/index.d.ts +4 -1
- package/dist/types/hooks/types.d.ts +109 -3
- package/dist/types/index.d.ts +10 -0
- package/dist/types/langchain/google-common.d.ts +1 -0
- package/dist/types/langchain/index.d.ts +8 -0
- package/dist/types/langchain/language_models/chat_models.d.ts +1 -0
- package/dist/types/langchain/messages/tool.d.ts +1 -0
- package/dist/types/langchain/messages.d.ts +2 -0
- package/dist/types/langchain/openai.d.ts +1 -0
- package/dist/types/langchain/prompts.d.ts +1 -0
- package/dist/types/langchain/runnables.d.ts +2 -0
- package/dist/types/langchain/tools.d.ts +2 -0
- package/dist/types/langchain/utils/env.d.ts +1 -0
- package/dist/types/llm/anthropic/index.d.ts +22 -9
- package/dist/types/llm/anthropic/types.d.ts +5 -1
- package/dist/types/llm/anthropic/utils/message_outputs.d.ts +13 -6
- package/dist/types/llm/anthropic/utils/output_parsers.d.ts +1 -1
- package/dist/types/llm/openai/index.d.ts +21 -24
- package/dist/types/llm/openrouter/index.d.ts +11 -9
- package/dist/types/llm/vertexai/index.d.ts +1 -0
- package/dist/types/messages/cache.d.ts +4 -1
- package/dist/types/messages/format.d.ts +4 -1
- package/dist/types/messages/langchain.d.ts +27 -0
- package/dist/types/run.d.ts +117 -1
- package/dist/types/tools/ToolNode.d.ts +26 -1
- package/dist/types/tools/search/tavily-scraper.d.ts +19 -0
- package/dist/types/tools/search/tavily-search.d.ts +4 -0
- package/dist/types/tools/search/types.d.ts +99 -5
- package/dist/types/tools/search/utils.d.ts +2 -2
- package/dist/types/types/graph.d.ts +23 -37
- package/dist/types/types/hitl.d.ts +272 -0
- package/dist/types/types/index.d.ts +1 -0
- package/dist/types/types/llm.d.ts +3 -3
- package/dist/types/types/run.d.ts +33 -0
- package/dist/types/types/stream.d.ts +1 -1
- package/dist/types/types/tools.d.ts +19 -0
- package/package.json +80 -17
- package/src/graphs/Graph.ts +33 -4
- package/src/graphs/__tests__/composition.smoke.test.ts +188 -0
- package/src/hitl/askUserQuestion.ts +72 -0
- package/src/hitl/index.ts +7 -0
- package/src/hooks/HookRegistry.ts +71 -0
- package/src/hooks/__tests__/createToolPolicyHook.test.ts +259 -0
- package/src/hooks/createToolPolicyHook.ts +184 -0
- package/src/hooks/executeHooks.ts +50 -1
- package/src/hooks/index.ts +6 -0
- package/src/hooks/types.ts +112 -0
- package/src/index.ts +22 -0
- package/src/langchain/google-common.ts +1 -0
- package/src/langchain/index.ts +8 -0
- package/src/langchain/language_models/chat_models.ts +1 -0
- package/src/langchain/messages/tool.ts +5 -0
- package/src/langchain/messages.ts +21 -0
- package/src/langchain/openai.ts +1 -0
- package/src/langchain/prompts.ts +1 -0
- package/src/langchain/runnables.ts +7 -0
- package/src/langchain/tools.ts +8 -0
- package/src/langchain/utils/env.ts +1 -0
- package/src/llm/anthropic/index.ts +252 -84
- package/src/llm/anthropic/llm.spec.ts +751 -102
- package/src/llm/anthropic/types.ts +9 -1
- package/src/llm/anthropic/utils/message_inputs.ts +37 -19
- package/src/llm/anthropic/utils/message_outputs.ts +119 -101
- package/src/llm/bedrock/index.ts +2 -2
- package/src/llm/bedrock/llm.spec.ts +341 -0
- package/src/llm/bedrock/utils/message_inputs.ts +303 -4
- package/src/llm/bedrock/utils/message_outputs.ts +2 -1
- package/src/llm/custom-chat-models.smoke.test.ts +836 -0
- package/src/llm/google/llm.spec.ts +339 -57
- package/src/llm/google/utils/common.ts +53 -48
- package/src/llm/openai/contentBlocks.test.ts +346 -0
- package/src/llm/openai/index.ts +856 -833
- package/src/llm/openai/utils/index.ts +107 -78
- package/src/llm/openai/utils/messages.test.ts +159 -0
- package/src/llm/openrouter/index.ts +124 -247
- package/src/llm/openrouter/reasoning.test.ts +8 -1
- package/src/llm/vertexai/index.ts +11 -5
- package/src/llm/vertexai/llm.spec.ts +28 -1
- package/src/messages/cache.test.ts +4 -3
- package/src/messages/cache.ts +3 -2
- package/src/messages/core.ts +16 -9
- package/src/messages/format.ts +96 -16
- package/src/messages/formatAgentMessages.test.ts +166 -1
- package/src/messages/langchain.ts +39 -0
- package/src/messages/prune.ts +12 -8
- package/src/run.ts +456 -47
- package/src/scripts/caching.ts +2 -3
- package/src/specs/summarization.test.ts +51 -58
- package/src/tools/ToolNode.ts +706 -63
- package/src/tools/__tests__/hitl.test.ts +3593 -0
- package/src/tools/search/search.ts +83 -73
- package/src/tools/search/tavily-scraper.ts +235 -0
- package/src/tools/search/tavily-search.ts +424 -0
- package/src/tools/search/tavily.test.ts +965 -0
- package/src/tools/search/tool.ts +36 -26
- package/src/tools/search/types.ts +133 -8
- package/src/tools/search/utils.ts +13 -5
- package/src/types/graph.ts +32 -87
- package/src/types/hitl.ts +303 -0
- package/src/types/index.ts +1 -0
- package/src/types/llm.ts +3 -3
- package/src/types/run.ts +33 -0
- package/src/types/stream.ts +1 -1
- package/src/types/tools.ts +19 -0
- package/src/utils/llmConfig.ts +1 -6
|
@@ -0,0 +1,3593 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { tool } from '@langchain/core/tools';
|
|
3
|
+
import {
|
|
4
|
+
END,
|
|
5
|
+
START,
|
|
6
|
+
Command,
|
|
7
|
+
StateGraph,
|
|
8
|
+
MemorySaver,
|
|
9
|
+
isInterrupted,
|
|
10
|
+
MessagesAnnotation,
|
|
11
|
+
} from '@langchain/langgraph';
|
|
12
|
+
import { AIMessage, ToolMessage } from '@langchain/core/messages';
|
|
13
|
+
import {
|
|
14
|
+
describe,
|
|
15
|
+
it,
|
|
16
|
+
expect,
|
|
17
|
+
jest,
|
|
18
|
+
afterEach,
|
|
19
|
+
beforeEach,
|
|
20
|
+
} from '@jest/globals';
|
|
21
|
+
import type { StructuredToolInterface } from '@langchain/core/tools';
|
|
22
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
23
|
+
import type { Runnable, RunnableConfig } from '@langchain/core/runnables';
|
|
24
|
+
import type {
|
|
25
|
+
PreToolUseHookOutput,
|
|
26
|
+
PostToolUseHookOutput,
|
|
27
|
+
PostToolUseFailureHookOutput,
|
|
28
|
+
PostToolBatchEntry,
|
|
29
|
+
PostToolBatchHookInput,
|
|
30
|
+
PostToolBatchHookOutput,
|
|
31
|
+
RunStartHookOutput,
|
|
32
|
+
UserPromptSubmitHookOutput,
|
|
33
|
+
} from '@/hooks';
|
|
34
|
+
import type * as t from '@/types';
|
|
35
|
+
import * as events from '@/utils/events';
|
|
36
|
+
import { HookRegistry } from '@/hooks';
|
|
37
|
+
import { Providers as providers, GraphEvents } from '@/common';
|
|
38
|
+
import { ToolNode } from '../ToolNode';
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Schema-only tool stub. ToolNode in event-driven mode uses the schema
|
|
42
|
+
* for binding/discovery but routes execution through the host via
|
|
43
|
+
* `ON_TOOL_EXECUTE`, so the actual `func` here is never called.
|
|
44
|
+
*/
|
|
45
|
+
function createSchemaStub(name: string): StructuredToolInterface {
|
|
46
|
+
return tool(async () => 'unused', {
|
|
47
|
+
name,
|
|
48
|
+
description: 'schema-only stub; host executes via ON_TOOL_EXECUTE',
|
|
49
|
+
schema: z.object({ command: z.string() }),
|
|
50
|
+
}) as unknown as StructuredToolInterface;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Wires a fake host that responds to every `ON_TOOL_EXECUTE` event by
|
|
55
|
+
* resolving the request promise with `mockResults`. Mirrors the pattern
|
|
56
|
+
* used in `ToolNode.outputReferences.test.ts` so the event-driven path
|
|
57
|
+
* actually returns ToolMessages without spinning up a real host.
|
|
58
|
+
*/
|
|
59
|
+
function mockEventDispatch(mockResults: t.ToolExecuteResult[]): void {
|
|
60
|
+
jest
|
|
61
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
62
|
+
.mockImplementation(async (event, data) => {
|
|
63
|
+
if (event !== 'on_tool_execute') {
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
const request = data as Record<string, unknown>;
|
|
67
|
+
if (typeof request.resolve === 'function') {
|
|
68
|
+
(request.resolve as (r: t.ToolExecuteResult[]) => void)(mockResults);
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
type MessagesUpdate = { messages: BaseMessage[] };
|
|
74
|
+
type CompiledMessagesGraph = Runnable<unknown, { messages: BaseMessage[] }> & {
|
|
75
|
+
invoke(input: unknown, config?: RunnableConfig): Promise<unknown>;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
/** Factory for a minimal `agent → tools → END` graph wrapping the ToolNode. */
|
|
79
|
+
function buildHITLGraph(
|
|
80
|
+
toolNode: ToolNode,
|
|
81
|
+
toolCalls: Array<{ id: string; name: string; args: Record<string, unknown> }>
|
|
82
|
+
): CompiledMessagesGraph {
|
|
83
|
+
let agentInvocations = 0;
|
|
84
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
85
|
+
.addNode('agent', (): MessagesUpdate => {
|
|
86
|
+
agentInvocations += 1;
|
|
87
|
+
/**
|
|
88
|
+
* First entry → emit the AIMessage carrying tool_calls so the
|
|
89
|
+
* ToolNode actually has work. After resume the agent re-enters
|
|
90
|
+
* once more (a normal LangGraph loop), but at that point any
|
|
91
|
+
* approved tool already has a ToolMessage in state, so we emit
|
|
92
|
+
* an empty AIMessage to satisfy the loop and end the run.
|
|
93
|
+
*/
|
|
94
|
+
if (agentInvocations === 1) {
|
|
95
|
+
return {
|
|
96
|
+
messages: [new AIMessage({ content: '', tool_calls: toolCalls })],
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
return { messages: [new AIMessage({ content: 'done' })] };
|
|
100
|
+
})
|
|
101
|
+
.addNode('tools', toolNode)
|
|
102
|
+
.addEdge(START, 'agent')
|
|
103
|
+
.addEdge('agent', 'tools')
|
|
104
|
+
.addEdge('tools', END);
|
|
105
|
+
return builder.compile({
|
|
106
|
+
checkpointer: new MemorySaver(),
|
|
107
|
+
}) as unknown as CompiledMessagesGraph;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function makeHookRegistry(
|
|
111
|
+
decision: 'allow' | 'deny' | 'ask',
|
|
112
|
+
reason?: string
|
|
113
|
+
): HookRegistry {
|
|
114
|
+
const registry = new HookRegistry();
|
|
115
|
+
registry.register('PreToolUse', {
|
|
116
|
+
hooks: [
|
|
117
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
118
|
+
decision,
|
|
119
|
+
...(reason != null ? { reason } : {}),
|
|
120
|
+
}),
|
|
121
|
+
],
|
|
122
|
+
});
|
|
123
|
+
return registry;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
describe('ToolNode HITL — `ask` decision raises interrupt() when humanInTheLoop is enabled', () => {
|
|
127
|
+
afterEach(() => {
|
|
128
|
+
jest.restoreAllMocks();
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it('raises a tool_approval interrupt with the pending tool call payload', async () => {
|
|
132
|
+
mockEventDispatch([
|
|
133
|
+
{ toolCallId: 'call_1', content: 'should-not-run', status: 'success' },
|
|
134
|
+
]);
|
|
135
|
+
const node = new ToolNode({
|
|
136
|
+
tools: [createSchemaStub('echo')],
|
|
137
|
+
eventDrivenMode: true,
|
|
138
|
+
agentId: 'agent-x',
|
|
139
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
140
|
+
hookRegistry: makeHookRegistry('ask', 'review tool args'),
|
|
141
|
+
humanInTheLoop: { enabled: true },
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
const graph = buildHITLGraph(node, [
|
|
145
|
+
{ id: 'call_1', name: 'echo', args: { command: 'list /' } },
|
|
146
|
+
]);
|
|
147
|
+
const config = { configurable: { thread_id: 'thread-hitl-1' } };
|
|
148
|
+
|
|
149
|
+
const result = await graph.invoke({ messages: [] }, config);
|
|
150
|
+
|
|
151
|
+
expect(isInterrupted<t.HumanInterruptPayload>(result)).toBe(true);
|
|
152
|
+
if (!isInterrupted<t.HumanInterruptPayload>(result)) {
|
|
153
|
+
throw new Error('expected interrupt');
|
|
154
|
+
}
|
|
155
|
+
const interrupts = result.__interrupt__;
|
|
156
|
+
expect(interrupts).toHaveLength(1);
|
|
157
|
+
const payload = interrupts[0].value!;
|
|
158
|
+
if (payload.type !== 'tool_approval') {
|
|
159
|
+
throw new Error('expected tool_approval payload');
|
|
160
|
+
}
|
|
161
|
+
expect(payload.action_requests).toEqual([
|
|
162
|
+
{
|
|
163
|
+
tool_call_id: 'call_1',
|
|
164
|
+
name: 'echo',
|
|
165
|
+
arguments: { command: 'list /' },
|
|
166
|
+
description: 'review tool args',
|
|
167
|
+
},
|
|
168
|
+
]);
|
|
169
|
+
expect(payload.review_configs).toEqual([
|
|
170
|
+
{
|
|
171
|
+
action_name: 'echo',
|
|
172
|
+
tool_call_id: 'call_1',
|
|
173
|
+
allowed_decisions: ['approve', 'reject', 'edit', 'respond'],
|
|
174
|
+
},
|
|
175
|
+
]);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it('resume with approve runs the tool through the host event path', async () => {
|
|
179
|
+
mockEventDispatch([
|
|
180
|
+
{ toolCallId: 'call_1', content: 'host-result', status: 'success' },
|
|
181
|
+
]);
|
|
182
|
+
const node = new ToolNode({
|
|
183
|
+
tools: [createSchemaStub('echo')],
|
|
184
|
+
eventDrivenMode: true,
|
|
185
|
+
agentId: 'agent-x',
|
|
186
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
187
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
188
|
+
humanInTheLoop: { enabled: true },
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const graph = buildHITLGraph(node, [
|
|
192
|
+
{ id: 'call_1', name: 'echo', args: { command: 'do-it' } },
|
|
193
|
+
]);
|
|
194
|
+
const config = { configurable: { thread_id: 'thread-hitl-approve' } };
|
|
195
|
+
|
|
196
|
+
const interrupted = await graph.invoke({ messages: [] }, config);
|
|
197
|
+
expect(isInterrupted(interrupted)).toBe(true);
|
|
198
|
+
|
|
199
|
+
const resumed = (await graph.invoke(
|
|
200
|
+
new Command({ resume: [{ type: 'approve' }] }),
|
|
201
|
+
config
|
|
202
|
+
)) as { messages: BaseMessage[] };
|
|
203
|
+
|
|
204
|
+
const toolMessages = resumed.messages.filter(
|
|
205
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
206
|
+
);
|
|
207
|
+
expect(toolMessages).toHaveLength(1);
|
|
208
|
+
expect(toolMessages[0].tool_call_id).toBe('call_1');
|
|
209
|
+
expect(toolMessages[0].content).toBe('host-result');
|
|
210
|
+
expect(toolMessages[0].status).not.toBe('error');
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
it('resume with reject blocks the tool and emits an error ToolMessage', async () => {
|
|
214
|
+
mockEventDispatch([]);
|
|
215
|
+
const node = new ToolNode({
|
|
216
|
+
tools: [createSchemaStub('echo')],
|
|
217
|
+
eventDrivenMode: true,
|
|
218
|
+
agentId: 'agent-x',
|
|
219
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
220
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
221
|
+
humanInTheLoop: { enabled: true },
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
const graph = buildHITLGraph(node, [
|
|
225
|
+
{ id: 'call_1', name: 'echo', args: { command: 'rm -rf /' } },
|
|
226
|
+
]);
|
|
227
|
+
const config = { configurable: { thread_id: 'thread-hitl-reject' } };
|
|
228
|
+
|
|
229
|
+
await graph.invoke({ messages: [] }, config);
|
|
230
|
+
|
|
231
|
+
const resumed = (await graph.invoke(
|
|
232
|
+
new Command({
|
|
233
|
+
resume: [{ type: 'reject', reason: 'destructive command' }],
|
|
234
|
+
}),
|
|
235
|
+
config
|
|
236
|
+
)) as { messages: BaseMessage[] };
|
|
237
|
+
|
|
238
|
+
const toolMessages = resumed.messages.filter(
|
|
239
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
240
|
+
);
|
|
241
|
+
expect(toolMessages).toHaveLength(1);
|
|
242
|
+
expect(toolMessages[0].status).toBe('error');
|
|
243
|
+
expect(String(toolMessages[0].content)).toContain('destructive command');
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
it('resume with edit substitutes the tool input before invocation', async () => {
|
|
247
|
+
const capturedRequests: t.ToolCallRequest[] = [];
|
|
248
|
+
jest
|
|
249
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
250
|
+
.mockImplementation(async (event, data) => {
|
|
251
|
+
if (event !== 'on_tool_execute') {
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
const request = data as {
|
|
255
|
+
toolCalls: t.ToolCallRequest[];
|
|
256
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
257
|
+
};
|
|
258
|
+
capturedRequests.push(...request.toolCalls);
|
|
259
|
+
request.resolve(
|
|
260
|
+
request.toolCalls.map((c) => ({
|
|
261
|
+
toolCallId: c.id,
|
|
262
|
+
content: 'host-result',
|
|
263
|
+
status: 'success' as const,
|
|
264
|
+
}))
|
|
265
|
+
);
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
const node = new ToolNode({
|
|
269
|
+
tools: [createSchemaStub('echo')],
|
|
270
|
+
eventDrivenMode: true,
|
|
271
|
+
agentId: 'agent-x',
|
|
272
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
273
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
274
|
+
humanInTheLoop: { enabled: true },
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
const graph = buildHITLGraph(node, [
|
|
278
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
279
|
+
]);
|
|
280
|
+
const config = { configurable: { thread_id: 'thread-hitl-edit' } };
|
|
281
|
+
|
|
282
|
+
await graph.invoke({ messages: [] }, config);
|
|
283
|
+
|
|
284
|
+
await graph.invoke(
|
|
285
|
+
new Command({
|
|
286
|
+
resume: [{ type: 'edit', updatedInput: { command: 'patched' } }],
|
|
287
|
+
}),
|
|
288
|
+
config
|
|
289
|
+
);
|
|
290
|
+
|
|
291
|
+
expect(capturedRequests).toHaveLength(1);
|
|
292
|
+
expect(capturedRequests[0].args).toEqual({ command: 'patched' });
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
it('resume with respond emits the user-supplied text as a successful ToolMessage and skips host execution', async () => {
|
|
296
|
+
const dispatchSpy = jest
|
|
297
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
298
|
+
.mockImplementation(async (event, data) => {
|
|
299
|
+
if (event !== 'on_tool_execute') {
|
|
300
|
+
return;
|
|
301
|
+
}
|
|
302
|
+
const request = data as {
|
|
303
|
+
toolCalls: t.ToolCallRequest[];
|
|
304
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
305
|
+
};
|
|
306
|
+
request.resolve([]);
|
|
307
|
+
});
|
|
308
|
+
|
|
309
|
+
const node = new ToolNode({
|
|
310
|
+
tools: [createSchemaStub('echo')],
|
|
311
|
+
eventDrivenMode: true,
|
|
312
|
+
agentId: 'agent-x',
|
|
313
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
314
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
315
|
+
humanInTheLoop: { enabled: true },
|
|
316
|
+
});
|
|
317
|
+
|
|
318
|
+
const graph = buildHITLGraph(node, [
|
|
319
|
+
{ id: 'call_1', name: 'echo', args: { command: 'search' } },
|
|
320
|
+
]);
|
|
321
|
+
const config = { configurable: { thread_id: 'thread-hitl-respond' } };
|
|
322
|
+
|
|
323
|
+
await graph.invoke({ messages: [] }, config);
|
|
324
|
+
|
|
325
|
+
const dispatchCallsBefore = dispatchSpy.mock.calls.filter(
|
|
326
|
+
([event]) => event === 'on_tool_execute'
|
|
327
|
+
).length;
|
|
328
|
+
|
|
329
|
+
const resumed = (await graph.invoke(
|
|
330
|
+
new Command({
|
|
331
|
+
resume: [{ type: 'respond', responseText: 'no relevant results' }],
|
|
332
|
+
}),
|
|
333
|
+
config
|
|
334
|
+
)) as { messages: BaseMessage[] };
|
|
335
|
+
|
|
336
|
+
const dispatchCallsAfter = dispatchSpy.mock.calls.filter(
|
|
337
|
+
([event]) => event === 'on_tool_execute'
|
|
338
|
+
).length;
|
|
339
|
+
|
|
340
|
+
const toolMessages = resumed.messages.filter(
|
|
341
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
342
|
+
);
|
|
343
|
+
expect(toolMessages).toHaveLength(1);
|
|
344
|
+
expect(toolMessages[0].tool_call_id).toBe('call_1');
|
|
345
|
+
expect(toolMessages[0].content).toBe('no relevant results');
|
|
346
|
+
expect(toolMessages[0].status).not.toBe('error');
|
|
347
|
+
expect(dispatchCallsAfter).toBe(dispatchCallsBefore);
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
it('advertises respond in review_configs.allowed_decisions', async () => {
|
|
351
|
+
mockEventDispatch([]);
|
|
352
|
+
const node = new ToolNode({
|
|
353
|
+
tools: [createSchemaStub('echo')],
|
|
354
|
+
eventDrivenMode: true,
|
|
355
|
+
agentId: 'agent-x',
|
|
356
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
357
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
358
|
+
humanInTheLoop: { enabled: true },
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
const graph = buildHITLGraph(node, [
|
|
362
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
363
|
+
]);
|
|
364
|
+
const config = {
|
|
365
|
+
configurable: { thread_id: 'thread-hitl-allowed-decisions' },
|
|
366
|
+
};
|
|
367
|
+
|
|
368
|
+
const interrupted = await graph.invoke({ messages: [] }, config);
|
|
369
|
+
if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
|
|
370
|
+
throw new Error('expected interrupt');
|
|
371
|
+
}
|
|
372
|
+
const payload = interrupted.__interrupt__[0].value!;
|
|
373
|
+
if (payload.type !== 'tool_approval') {
|
|
374
|
+
throw new Error('expected tool_approval payload');
|
|
375
|
+
}
|
|
376
|
+
expect(payload.review_configs[0].allowed_decisions).toEqual([
|
|
377
|
+
'approve',
|
|
378
|
+
'reject',
|
|
379
|
+
'edit',
|
|
380
|
+
'respond',
|
|
381
|
+
]);
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
it('resume with a record keyed by tool_call_id is accepted', async () => {
|
|
385
|
+
mockEventDispatch([
|
|
386
|
+
{ toolCallId: 'call_1', content: 'host-result', status: 'success' },
|
|
387
|
+
]);
|
|
388
|
+
const node = new ToolNode({
|
|
389
|
+
tools: [createSchemaStub('echo')],
|
|
390
|
+
eventDrivenMode: true,
|
|
391
|
+
agentId: 'agent-x',
|
|
392
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
393
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
394
|
+
humanInTheLoop: { enabled: true },
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
const graph = buildHITLGraph(node, [
|
|
398
|
+
{ id: 'call_1', name: 'echo', args: { command: 'do-it' } },
|
|
399
|
+
]);
|
|
400
|
+
const config = { configurable: { thread_id: 'thread-hitl-map' } };
|
|
401
|
+
|
|
402
|
+
await graph.invoke({ messages: [] }, config);
|
|
403
|
+
|
|
404
|
+
const resumed = (await graph.invoke(
|
|
405
|
+
new Command({ resume: { call_1: { type: 'approve' } } }),
|
|
406
|
+
config
|
|
407
|
+
)) as { messages: BaseMessage[] };
|
|
408
|
+
|
|
409
|
+
const toolMessages = resumed.messages.filter(
|
|
410
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
411
|
+
);
|
|
412
|
+
expect(toolMessages).toHaveLength(1);
|
|
413
|
+
expect(toolMessages[0].content).toBe('host-result');
|
|
414
|
+
});
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
describe('ToolNode HITL — opt-out (`humanInTheLoop: { enabled: false }`) is fail-closed', () => {
|
|
418
|
+
afterEach(() => {
|
|
419
|
+
jest.restoreAllMocks();
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
it('blocks the tool with a ToolMessage error and never raises an interrupt', async () => {
|
|
423
|
+
mockEventDispatch([]);
|
|
424
|
+
const node = new ToolNode({
|
|
425
|
+
tools: [createSchemaStub('echo')],
|
|
426
|
+
eventDrivenMode: true,
|
|
427
|
+
agentId: 'agent-x',
|
|
428
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
429
|
+
hookRegistry: makeHookRegistry('ask', 'HITL explicitly disabled'),
|
|
430
|
+
humanInTheLoop: { enabled: false },
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
const graph = buildHITLGraph(node, [
|
|
434
|
+
{ id: 'call_1', name: 'echo', args: { command: 'list /' } },
|
|
435
|
+
]);
|
|
436
|
+
const config = { configurable: { thread_id: 'thread-hitl-optout' } };
|
|
437
|
+
|
|
438
|
+
const result = (await graph.invoke({ messages: [] }, config)) as {
|
|
439
|
+
messages: BaseMessage[];
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
expect(isInterrupted(result)).toBe(false);
|
|
443
|
+
const toolMessages = result.messages.filter(
|
|
444
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
445
|
+
);
|
|
446
|
+
expect(toolMessages).toHaveLength(1);
|
|
447
|
+
expect(toolMessages[0].status).toBe('error');
|
|
448
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
449
|
+
'HITL explicitly disabled'
|
|
450
|
+
);
|
|
451
|
+
});
|
|
452
|
+
|
|
453
|
+
it('blocks the tool when `humanInTheLoop` is omitted (default-off)', async () => {
|
|
454
|
+
/**
|
|
455
|
+
* Default is OFF until host UIs (notably LibreChat) ship the
|
|
456
|
+
* approval-rendering affordances. With HITL omitted, an `ask`
|
|
457
|
+
* decision must collapse into a synchronous block — same fail-
|
|
458
|
+
* closed behavior as the explicit `{ enabled: false }` opt-out.
|
|
459
|
+
* This test guards against accidentally re-enabling the default-on
|
|
460
|
+
* path before the consumer ecosystem is ready.
|
|
461
|
+
*/
|
|
462
|
+
mockEventDispatch([
|
|
463
|
+
{ toolCallId: 'call_1', content: 'host-result', status: 'success' },
|
|
464
|
+
]);
|
|
465
|
+
const node = new ToolNode({
|
|
466
|
+
tools: [createSchemaStub('echo')],
|
|
467
|
+
eventDrivenMode: true,
|
|
468
|
+
agentId: 'agent-x',
|
|
469
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
470
|
+
hookRegistry: makeHookRegistry('ask', 'default-off-blocks'),
|
|
471
|
+
// humanInTheLoop intentionally omitted — should default to disabled
|
|
472
|
+
});
|
|
473
|
+
|
|
474
|
+
const graph = buildHITLGraph(node, [
|
|
475
|
+
{ id: 'call_1', name: 'echo', args: { command: 'list /' } },
|
|
476
|
+
]);
|
|
477
|
+
const config = { configurable: { thread_id: 'thread-hitl-default' } };
|
|
478
|
+
|
|
479
|
+
const out = (await graph.invoke({ messages: [] }, config)) as {
|
|
480
|
+
messages: BaseMessage[];
|
|
481
|
+
};
|
|
482
|
+
expect(isInterrupted<t.HumanInterruptPayload>(out)).toBe(false);
|
|
483
|
+
const toolMessages = out.messages.filter(
|
|
484
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
485
|
+
);
|
|
486
|
+
expect(toolMessages).toHaveLength(1);
|
|
487
|
+
expect(toolMessages[0].tool_call_id).toBe('call_1');
|
|
488
|
+
expect(toolMessages[0].status).toBe('error');
|
|
489
|
+
expect(String(toolMessages[0].content)).toContain('default-off-blocks');
|
|
490
|
+
});
|
|
491
|
+
});
|
|
492
|
+
|
|
493
|
+
describe('ToolNode HITL — multi-tool batches', () => {
|
|
494
|
+
afterEach(() => {
|
|
495
|
+
jest.restoreAllMocks();
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
it('bundles multiple ask decisions into a single interrupt and resolves per call', async () => {
|
|
499
|
+
const capturedRequests: t.ToolCallRequest[] = [];
|
|
500
|
+
jest
|
|
501
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
502
|
+
.mockImplementation(async (event, data) => {
|
|
503
|
+
if (event !== 'on_tool_execute') {
|
|
504
|
+
return;
|
|
505
|
+
}
|
|
506
|
+
const request = data as {
|
|
507
|
+
toolCalls: t.ToolCallRequest[];
|
|
508
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
509
|
+
};
|
|
510
|
+
capturedRequests.push(...request.toolCalls);
|
|
511
|
+
request.resolve(
|
|
512
|
+
request.toolCalls.map(
|
|
513
|
+
(c): t.ToolExecuteResult => ({
|
|
514
|
+
toolCallId: c.id,
|
|
515
|
+
content: `ran:${c.name}`,
|
|
516
|
+
status: 'success',
|
|
517
|
+
})
|
|
518
|
+
)
|
|
519
|
+
);
|
|
520
|
+
});
|
|
521
|
+
|
|
522
|
+
const registry = new HookRegistry();
|
|
523
|
+
registry.register('PreToolUse', {
|
|
524
|
+
hooks: [
|
|
525
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
526
|
+
decision: 'ask',
|
|
527
|
+
reason: 'review',
|
|
528
|
+
}),
|
|
529
|
+
],
|
|
530
|
+
});
|
|
531
|
+
|
|
532
|
+
const node = new ToolNode({
|
|
533
|
+
tools: [createSchemaStub('echo'), createSchemaStub('cat')],
|
|
534
|
+
eventDrivenMode: true,
|
|
535
|
+
agentId: 'agent-x',
|
|
536
|
+
toolCallStepIds: new Map([
|
|
537
|
+
['call_1', 'step_call_1'],
|
|
538
|
+
['call_2', 'step_call_2'],
|
|
539
|
+
]),
|
|
540
|
+
hookRegistry: registry,
|
|
541
|
+
humanInTheLoop: { enabled: true },
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
const graph = buildHITLGraph(node, [
|
|
545
|
+
{ id: 'call_1', name: 'echo', args: { command: 'one' } },
|
|
546
|
+
{ id: 'call_2', name: 'cat', args: { command: 'two' } },
|
|
547
|
+
]);
|
|
548
|
+
const config = { configurable: { thread_id: 'thread-hitl-batch' } };
|
|
549
|
+
|
|
550
|
+
const interrupted = await graph.invoke({ messages: [] }, config);
|
|
551
|
+
expect(isInterrupted<t.HumanInterruptPayload>(interrupted)).toBe(true);
|
|
552
|
+
if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
|
|
553
|
+
throw new Error('expected interrupt');
|
|
554
|
+
}
|
|
555
|
+
const payload = interrupted.__interrupt__[0].value!;
|
|
556
|
+
if (payload.type !== 'tool_approval') {
|
|
557
|
+
throw new Error('expected tool_approval payload');
|
|
558
|
+
}
|
|
559
|
+
expect(payload.action_requests.map((r) => r.tool_call_id)).toEqual([
|
|
560
|
+
'call_1',
|
|
561
|
+
'call_2',
|
|
562
|
+
]);
|
|
563
|
+
|
|
564
|
+
const resumed = (await graph.invoke(
|
|
565
|
+
new Command({
|
|
566
|
+
resume: [{ type: 'approve' }, { type: 'reject', reason: 'too risky' }],
|
|
567
|
+
}),
|
|
568
|
+
config
|
|
569
|
+
)) as { messages: BaseMessage[] };
|
|
570
|
+
|
|
571
|
+
const toolMessages = resumed.messages.filter(
|
|
572
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
573
|
+
);
|
|
574
|
+
expect(toolMessages).toHaveLength(2);
|
|
575
|
+
const byId = new Map(toolMessages.map((m) => [m.tool_call_id, m]));
|
|
576
|
+
expect(byId.get('call_1')!.content).toBe('ran:echo');
|
|
577
|
+
expect(byId.get('call_1')!.status).not.toBe('error');
|
|
578
|
+
expect(byId.get('call_2')!.status).toBe('error');
|
|
579
|
+
expect(String(byId.get('call_2')!.content)).toContain('too risky');
|
|
580
|
+
|
|
581
|
+
expect(capturedRequests).toHaveLength(1);
|
|
582
|
+
expect(capturedRequests[0].id).toBe('call_1');
|
|
583
|
+
});
|
|
584
|
+
});
|
|
585
|
+
|
|
586
|
+
describe('Run integration — HITL fallback checkpointer + resume', () => {
|
|
587
|
+
beforeEach(() => {
|
|
588
|
+
jest.restoreAllMocks();
|
|
589
|
+
});
|
|
590
|
+
afterEach(() => {
|
|
591
|
+
jest.restoreAllMocks();
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
it('Run.create does NOT install a MemorySaver fallback by default (HITL is off until host UI ships)', async () => {
|
|
595
|
+
/**
|
|
596
|
+
* Default-off rationale: HITL ships the interrupt machinery but
|
|
597
|
+
* stays opt-in until host UIs (notably LibreChat) can render and
|
|
598
|
+
* resolve `tool_approval` interrupts. With HITL omitted, the SDK
|
|
599
|
+
* must NOT silently install a checkpointer — that would suggest
|
|
600
|
+
* the run can pause/resume when in fact the `ask` path will
|
|
601
|
+
* fail-closed. Plan of record: flip the default to ON in a future
|
|
602
|
+
* minor once the consumer ecosystem is ready.
|
|
603
|
+
*/
|
|
604
|
+
const { Run } = await import('@/run');
|
|
605
|
+
const { Providers } = await import('@/common');
|
|
606
|
+
|
|
607
|
+
const run = await Run.create<t.IState>({
|
|
608
|
+
runId: 'hitl-default-run',
|
|
609
|
+
graphConfig: {
|
|
610
|
+
type: 'standard',
|
|
611
|
+
agents: [
|
|
612
|
+
{
|
|
613
|
+
agentId: 'a',
|
|
614
|
+
provider: Providers.OPENAI,
|
|
615
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
616
|
+
instructions: 'noop',
|
|
617
|
+
maxContextTokens: 8000,
|
|
618
|
+
},
|
|
619
|
+
],
|
|
620
|
+
},
|
|
621
|
+
// humanInTheLoop intentionally omitted — default is OFF
|
|
622
|
+
});
|
|
623
|
+
|
|
624
|
+
expect(run.Graph?.compileOptions?.checkpointer).toBeUndefined();
|
|
625
|
+
});
|
|
626
|
+
|
|
627
|
+
it('Run.create installs a MemorySaver fallback when HITL is explicitly enabled', async () => {
|
|
628
|
+
const { Run } = await import('@/run');
|
|
629
|
+
const { Providers } = await import('@/common');
|
|
630
|
+
|
|
631
|
+
const run = await Run.create<t.IState>({
|
|
632
|
+
runId: 'hitl-explicit-run',
|
|
633
|
+
graphConfig: {
|
|
634
|
+
type: 'standard',
|
|
635
|
+
agents: [
|
|
636
|
+
{
|
|
637
|
+
agentId: 'a',
|
|
638
|
+
provider: Providers.OPENAI,
|
|
639
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
640
|
+
instructions: 'noop',
|
|
641
|
+
maxContextTokens: 8000,
|
|
642
|
+
},
|
|
643
|
+
],
|
|
644
|
+
},
|
|
645
|
+
humanInTheLoop: { enabled: true },
|
|
646
|
+
});
|
|
647
|
+
|
|
648
|
+
expect(run.Graph?.compileOptions?.checkpointer).toBeInstanceOf(MemorySaver);
|
|
649
|
+
expect(run.Graph?.humanInTheLoop?.enabled).toBe(true);
|
|
650
|
+
});
|
|
651
|
+
|
|
652
|
+
it('Run.create preserves a host-supplied checkpointer when HITL is explicitly enabled', async () => {
|
|
653
|
+
const { Run } = await import('@/run');
|
|
654
|
+
const { Providers } = await import('@/common');
|
|
655
|
+
|
|
656
|
+
const hostCheckpointer = new MemorySaver();
|
|
657
|
+
const run = await Run.create<t.IState>({
|
|
658
|
+
runId: 'hitl-host-checkpointer',
|
|
659
|
+
graphConfig: {
|
|
660
|
+
type: 'standard',
|
|
661
|
+
agents: [
|
|
662
|
+
{
|
|
663
|
+
agentId: 'a',
|
|
664
|
+
provider: Providers.OPENAI,
|
|
665
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
666
|
+
instructions: 'noop',
|
|
667
|
+
maxContextTokens: 8000,
|
|
668
|
+
},
|
|
669
|
+
],
|
|
670
|
+
compileOptions: { checkpointer: hostCheckpointer },
|
|
671
|
+
},
|
|
672
|
+
humanInTheLoop: { enabled: true },
|
|
673
|
+
});
|
|
674
|
+
|
|
675
|
+
expect(run.Graph?.compileOptions?.checkpointer).toBe(hostCheckpointer);
|
|
676
|
+
});
|
|
677
|
+
|
|
678
|
+
it('re-exports langgraph HITL primitives from the SDK barrel for host use', async () => {
|
|
679
|
+
const indexExports = await import('@/index');
|
|
680
|
+
expect(indexExports.MemorySaver).toBe(MemorySaver);
|
|
681
|
+
expect(indexExports.Command).toBe(Command);
|
|
682
|
+
expect(indexExports.INTERRUPT).toBeDefined();
|
|
683
|
+
expect(typeof indexExports.interrupt).toBe('function');
|
|
684
|
+
expect(typeof indexExports.isInterrupted).toBe('function');
|
|
685
|
+
expect(typeof indexExports.BaseCheckpointSaver).toBe('function');
|
|
686
|
+
});
|
|
687
|
+
|
|
688
|
+
it('Run.create does not attach a checkpointer when HITL is explicitly disabled', async () => {
|
|
689
|
+
const { Run } = await import('@/run');
|
|
690
|
+
const { Providers } = await import('@/common');
|
|
691
|
+
|
|
692
|
+
const run = await Run.create<t.IState>({
|
|
693
|
+
runId: 'hitl-optout-run',
|
|
694
|
+
graphConfig: {
|
|
695
|
+
type: 'standard',
|
|
696
|
+
agents: [
|
|
697
|
+
{
|
|
698
|
+
agentId: 'a',
|
|
699
|
+
provider: Providers.OPENAI,
|
|
700
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
701
|
+
instructions: 'noop',
|
|
702
|
+
maxContextTokens: 8000,
|
|
703
|
+
},
|
|
704
|
+
],
|
|
705
|
+
},
|
|
706
|
+
humanInTheLoop: { enabled: false },
|
|
707
|
+
});
|
|
708
|
+
|
|
709
|
+
expect(run.Graph?.compileOptions?.checkpointer).toBeUndefined();
|
|
710
|
+
});
|
|
711
|
+
|
|
712
|
+
it('Run.resume() drives the host all the way through the resume command path', async () => {
|
|
713
|
+
/** End-to-end on the Run wrapper: build a HITL graph that
|
|
714
|
+
* interrupts on first invoke, then drive resume via the Run's
|
|
715
|
+
* own `resume()` method (not raw graph.invoke + Command).
|
|
716
|
+
* Validates the full Run.resume → processStream(Command) path. */
|
|
717
|
+
let dispatchCount = 0;
|
|
718
|
+
jest
|
|
719
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
720
|
+
.mockImplementation(async (event, data) => {
|
|
721
|
+
if (event !== 'on_tool_execute') {
|
|
722
|
+
return;
|
|
723
|
+
}
|
|
724
|
+
dispatchCount += 1;
|
|
725
|
+
const request = data as {
|
|
726
|
+
toolCalls: t.ToolCallRequest[];
|
|
727
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
728
|
+
};
|
|
729
|
+
request.resolve(
|
|
730
|
+
request.toolCalls.map((c) => ({
|
|
731
|
+
toolCallId: c.id,
|
|
732
|
+
content: 'host-result',
|
|
733
|
+
status: 'success' as const,
|
|
734
|
+
}))
|
|
735
|
+
);
|
|
736
|
+
});
|
|
737
|
+
|
|
738
|
+
const registry = new HookRegistry();
|
|
739
|
+
registry.register('PreToolUse', {
|
|
740
|
+
hooks: [
|
|
741
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
742
|
+
decision: 'ask',
|
|
743
|
+
reason: 'review',
|
|
744
|
+
}),
|
|
745
|
+
],
|
|
746
|
+
});
|
|
747
|
+
|
|
748
|
+
const node = new ToolNode({
|
|
749
|
+
tools: [createSchemaStub('echo')],
|
|
750
|
+
eventDrivenMode: true,
|
|
751
|
+
agentId: 'agent-x',
|
|
752
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
753
|
+
hookRegistry: registry,
|
|
754
|
+
humanInTheLoop: { enabled: true },
|
|
755
|
+
});
|
|
756
|
+
|
|
757
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
758
|
+
.addNode(
|
|
759
|
+
'agent',
|
|
760
|
+
(): MessagesUpdate => ({
|
|
761
|
+
messages: [
|
|
762
|
+
new AIMessage({
|
|
763
|
+
content: '',
|
|
764
|
+
tool_calls: [
|
|
765
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
766
|
+
],
|
|
767
|
+
}),
|
|
768
|
+
],
|
|
769
|
+
})
|
|
770
|
+
)
|
|
771
|
+
.addNode('tools', node)
|
|
772
|
+
.addEdge(START, 'agent')
|
|
773
|
+
.addEdge('agent', 'tools')
|
|
774
|
+
.addEdge('tools', END);
|
|
775
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
776
|
+
|
|
777
|
+
const { Run } = await import('@/run');
|
|
778
|
+
const run = await Run.create<t.IState>({
|
|
779
|
+
runId: 'run-resume-direct',
|
|
780
|
+
graphConfig: {
|
|
781
|
+
type: 'standard',
|
|
782
|
+
agents: [
|
|
783
|
+
{
|
|
784
|
+
agentId: 'a',
|
|
785
|
+
provider: providers.OPENAI,
|
|
786
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
787
|
+
instructions: 'noop',
|
|
788
|
+
maxContextTokens: 8000,
|
|
789
|
+
},
|
|
790
|
+
],
|
|
791
|
+
},
|
|
792
|
+
hooks: registry,
|
|
793
|
+
humanInTheLoop: { enabled: true },
|
|
794
|
+
});
|
|
795
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
796
|
+
|
|
797
|
+
const callerConfig = {
|
|
798
|
+
configurable: { thread_id: 'run-resume-thread' },
|
|
799
|
+
version: 'v2' as const,
|
|
800
|
+
};
|
|
801
|
+
|
|
802
|
+
await run.processStream({ messages: [] }, callerConfig);
|
|
803
|
+
expect(run.getInterrupt()).toBeDefined();
|
|
804
|
+
expect(dispatchCount).toBe(0);
|
|
805
|
+
|
|
806
|
+
/** This is the API contract under test: Run.resume() with a
|
|
807
|
+
* decision array (not graph.invoke + Command). */
|
|
808
|
+
await run.resume([{ type: 'approve' }], callerConfig);
|
|
809
|
+
|
|
810
|
+
expect(dispatchCount).toBe(1);
|
|
811
|
+
/** Resume completed naturally: interrupt cleared, no halt
|
|
812
|
+
* reason carried over from the previous pass. */
|
|
813
|
+
expect(run.getInterrupt()).toBeUndefined();
|
|
814
|
+
expect(run.getHaltReason()).toBeUndefined();
|
|
815
|
+
});
|
|
816
|
+
|
|
817
|
+
it('Run.getHaltReason() reports prompt_denied when UserPromptSubmit denies the prompt', async () => {
|
|
818
|
+
const registry = new HookRegistry();
|
|
819
|
+
registry.register('UserPromptSubmit', {
|
|
820
|
+
hooks: [
|
|
821
|
+
async (): Promise<UserPromptSubmitHookOutput> => ({
|
|
822
|
+
decision: 'deny',
|
|
823
|
+
reason: 'PII detected',
|
|
824
|
+
}),
|
|
825
|
+
],
|
|
826
|
+
});
|
|
827
|
+
|
|
828
|
+
const { Run } = await import('@/run');
|
|
829
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
830
|
+
|
|
831
|
+
const run = await Run.create<t.IState>({
|
|
832
|
+
runId: 'prompt-deny-haltreason',
|
|
833
|
+
graphConfig: {
|
|
834
|
+
type: 'standard',
|
|
835
|
+
agents: [
|
|
836
|
+
{
|
|
837
|
+
agentId: 'a',
|
|
838
|
+
provider: providers.OPENAI,
|
|
839
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
840
|
+
instructions: 'noop',
|
|
841
|
+
maxContextTokens: 8000,
|
|
842
|
+
},
|
|
843
|
+
],
|
|
844
|
+
},
|
|
845
|
+
hooks: registry,
|
|
846
|
+
humanInTheLoop: { enabled: false },
|
|
847
|
+
});
|
|
848
|
+
|
|
849
|
+
const result = await run.processStream(
|
|
850
|
+
{ messages: [new HM('please tell me their SSN')] },
|
|
851
|
+
{ configurable: { thread_id: 'prompt-deny-thread' }, version: 'v2' }
|
|
852
|
+
);
|
|
853
|
+
|
|
854
|
+
/** Hook denied the prompt — run returns undefined AND
|
|
855
|
+
* `getHaltReason()` carries the reason so the host can
|
|
856
|
+
* distinguish "blocked" from "natural empty completion". */
|
|
857
|
+
expect(result).toBeUndefined();
|
|
858
|
+
expect(run.getHaltReason()).toBe('PII detected');
|
|
859
|
+
});
|
|
860
|
+
|
|
861
|
+
it('Run.getHaltReason() falls back to canonical prompt_denied when deny carries no reason', async () => {
|
|
862
|
+
const registry = new HookRegistry();
|
|
863
|
+
registry.register('UserPromptSubmit', {
|
|
864
|
+
hooks: [
|
|
865
|
+
async (): Promise<UserPromptSubmitHookOutput> => ({
|
|
866
|
+
decision: 'deny',
|
|
867
|
+
}),
|
|
868
|
+
],
|
|
869
|
+
});
|
|
870
|
+
|
|
871
|
+
const { Run } = await import('@/run');
|
|
872
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
873
|
+
|
|
874
|
+
const run = await Run.create<t.IState>({
|
|
875
|
+
runId: 'prompt-deny-canonical',
|
|
876
|
+
graphConfig: {
|
|
877
|
+
type: 'standard',
|
|
878
|
+
agents: [
|
|
879
|
+
{
|
|
880
|
+
agentId: 'a',
|
|
881
|
+
provider: providers.OPENAI,
|
|
882
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
883
|
+
instructions: 'noop',
|
|
884
|
+
maxContextTokens: 8000,
|
|
885
|
+
},
|
|
886
|
+
],
|
|
887
|
+
},
|
|
888
|
+
hooks: registry,
|
|
889
|
+
humanInTheLoop: { enabled: false },
|
|
890
|
+
});
|
|
891
|
+
|
|
892
|
+
await run.processStream(
|
|
893
|
+
{ messages: [new HM('hello')] },
|
|
894
|
+
{
|
|
895
|
+
configurable: { thread_id: 'prompt-deny-canonical-thread' },
|
|
896
|
+
version: 'v2',
|
|
897
|
+
}
|
|
898
|
+
);
|
|
899
|
+
|
|
900
|
+
/** Hook returned `deny` without a reason — host gets the
|
|
901
|
+
* canonical 'prompt_denied' string so it can route on a stable
|
|
902
|
+
* discriminator. */
|
|
903
|
+
expect(run.getHaltReason()).toBe('prompt_denied');
|
|
904
|
+
});
|
|
905
|
+
|
|
906
|
+
it('Run.getHaltReason() reports prompt_requires_approval when UserPromptSubmit asks', async () => {
|
|
907
|
+
const registry = new HookRegistry();
|
|
908
|
+
registry.register('UserPromptSubmit', {
|
|
909
|
+
hooks: [
|
|
910
|
+
async (): Promise<UserPromptSubmitHookOutput> => ({
|
|
911
|
+
decision: 'ask',
|
|
912
|
+
}),
|
|
913
|
+
],
|
|
914
|
+
});
|
|
915
|
+
|
|
916
|
+
const { Run } = await import('@/run');
|
|
917
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
918
|
+
|
|
919
|
+
const run = await Run.create<t.IState>({
|
|
920
|
+
runId: 'prompt-ask-haltreason',
|
|
921
|
+
graphConfig: {
|
|
922
|
+
type: 'standard',
|
|
923
|
+
agents: [
|
|
924
|
+
{
|
|
925
|
+
agentId: 'a',
|
|
926
|
+
provider: providers.OPENAI,
|
|
927
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
928
|
+
instructions: 'noop',
|
|
929
|
+
maxContextTokens: 8000,
|
|
930
|
+
},
|
|
931
|
+
],
|
|
932
|
+
},
|
|
933
|
+
hooks: registry,
|
|
934
|
+
humanInTheLoop: { enabled: false },
|
|
935
|
+
});
|
|
936
|
+
|
|
937
|
+
await run.processStream(
|
|
938
|
+
{ messages: [new HM('hello')] },
|
|
939
|
+
{ configurable: { thread_id: 'prompt-ask-thread' }, version: 'v2' }
|
|
940
|
+
);
|
|
941
|
+
|
|
942
|
+
/** Default reason when the hook didn't supply one — host can
|
|
943
|
+
* route on the canonical string. */
|
|
944
|
+
expect(run.getHaltReason()).toBe('prompt_requires_approval');
|
|
945
|
+
});
|
|
946
|
+
});
|
|
947
|
+
|
|
948
|
+
describe('ToolNode HITL — additionalContext injection from hooks', () => {
|
|
949
|
+
afterEach(() => {
|
|
950
|
+
jest.restoreAllMocks();
|
|
951
|
+
});
|
|
952
|
+
|
|
953
|
+
it('injects PreToolUse + PostToolUse additionalContexts as a single HumanMessage', async () => {
|
|
954
|
+
mockEventDispatch([
|
|
955
|
+
{ toolCallId: 'call_1', content: 'host-result', status: 'success' },
|
|
956
|
+
]);
|
|
957
|
+
|
|
958
|
+
const registry = new HookRegistry();
|
|
959
|
+
registry.register('PreToolUse', {
|
|
960
|
+
hooks: [
|
|
961
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
962
|
+
decision: 'allow',
|
|
963
|
+
additionalContext: 'pre-context: be careful',
|
|
964
|
+
}),
|
|
965
|
+
],
|
|
966
|
+
});
|
|
967
|
+
registry.register('PostToolUse', {
|
|
968
|
+
hooks: [
|
|
969
|
+
async (): Promise<PostToolUseHookOutput> => ({
|
|
970
|
+
additionalContext: 'post-context: tool ran',
|
|
971
|
+
}),
|
|
972
|
+
],
|
|
973
|
+
});
|
|
974
|
+
|
|
975
|
+
const node = new ToolNode({
|
|
976
|
+
tools: [createSchemaStub('echo')],
|
|
977
|
+
eventDrivenMode: true,
|
|
978
|
+
agentId: 'agent-x',
|
|
979
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
980
|
+
hookRegistry: registry,
|
|
981
|
+
humanInTheLoop: { enabled: false },
|
|
982
|
+
});
|
|
983
|
+
|
|
984
|
+
const graph = buildHITLGraph(node, [
|
|
985
|
+
{ id: 'call_1', name: 'echo', args: { command: 'do' } },
|
|
986
|
+
]);
|
|
987
|
+
const result = (await graph.invoke(
|
|
988
|
+
{ messages: [] },
|
|
989
|
+
{ configurable: { thread_id: 'ctx-thread-1' } }
|
|
990
|
+
)) as { messages: BaseMessage[] };
|
|
991
|
+
|
|
992
|
+
const injected = result.messages.find(
|
|
993
|
+
(m) =>
|
|
994
|
+
m._getType() === 'human' &&
|
|
995
|
+
(m as { additional_kwargs?: { source?: string } }).additional_kwargs
|
|
996
|
+
?.source === 'hook'
|
|
997
|
+
);
|
|
998
|
+
expect(injected).toBeDefined();
|
|
999
|
+
expect(String(injected!.content)).toContain('pre-context: be careful');
|
|
1000
|
+
expect(String(injected!.content)).toContain('post-context: tool ran');
|
|
1001
|
+
});
|
|
1002
|
+
|
|
1003
|
+
it('does not inject anything when no hook returns additionalContext', async () => {
|
|
1004
|
+
mockEventDispatch([
|
|
1005
|
+
{ toolCallId: 'call_1', content: 'host-result', status: 'success' },
|
|
1006
|
+
]);
|
|
1007
|
+
|
|
1008
|
+
const registry = new HookRegistry();
|
|
1009
|
+
registry.register('PreToolUse', {
|
|
1010
|
+
hooks: [
|
|
1011
|
+
async (): Promise<PreToolUseHookOutput> => ({ decision: 'allow' }),
|
|
1012
|
+
],
|
|
1013
|
+
});
|
|
1014
|
+
|
|
1015
|
+
const node = new ToolNode({
|
|
1016
|
+
tools: [createSchemaStub('echo')],
|
|
1017
|
+
eventDrivenMode: true,
|
|
1018
|
+
agentId: 'agent-x',
|
|
1019
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
1020
|
+
hookRegistry: registry,
|
|
1021
|
+
humanInTheLoop: { enabled: false },
|
|
1022
|
+
});
|
|
1023
|
+
|
|
1024
|
+
const graph = buildHITLGraph(node, [
|
|
1025
|
+
{ id: 'call_1', name: 'echo', args: { command: 'do' } },
|
|
1026
|
+
]);
|
|
1027
|
+
const result = (await graph.invoke(
|
|
1028
|
+
{ messages: [] },
|
|
1029
|
+
{ configurable: { thread_id: 'ctx-thread-2' } }
|
|
1030
|
+
)) as { messages: BaseMessage[] };
|
|
1031
|
+
|
|
1032
|
+
const injected = result.messages.find(
|
|
1033
|
+
(m) =>
|
|
1034
|
+
m._getType() === 'human' &&
|
|
1035
|
+
(m as { additional_kwargs?: { source?: string } }).additional_kwargs
|
|
1036
|
+
?.source === 'hook'
|
|
1037
|
+
);
|
|
1038
|
+
expect(injected).toBeUndefined();
|
|
1039
|
+
});
|
|
1040
|
+
});
|
|
1041
|
+
|
|
1042
|
+
describe('ToolNode HITL — PostToolBatch hook', () => {
|
|
1043
|
+
afterEach(() => {
|
|
1044
|
+
jest.restoreAllMocks();
|
|
1045
|
+
});
|
|
1046
|
+
|
|
1047
|
+
it('fires once per dispatch with all entries (success + error mix), in batch order', async () => {
|
|
1048
|
+
jest
|
|
1049
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
1050
|
+
.mockImplementation(async (event, data) => {
|
|
1051
|
+
if (event !== 'on_tool_execute') {
|
|
1052
|
+
return;
|
|
1053
|
+
}
|
|
1054
|
+
const request = data as {
|
|
1055
|
+
toolCalls: t.ToolCallRequest[];
|
|
1056
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
1057
|
+
};
|
|
1058
|
+
request.resolve([
|
|
1059
|
+
{ toolCallId: 'call_1', content: 'ok', status: 'success' },
|
|
1060
|
+
{
|
|
1061
|
+
toolCallId: 'call_2',
|
|
1062
|
+
content: '',
|
|
1063
|
+
status: 'error',
|
|
1064
|
+
errorMessage: 'boom',
|
|
1065
|
+
},
|
|
1066
|
+
]);
|
|
1067
|
+
});
|
|
1068
|
+
|
|
1069
|
+
const registry = new HookRegistry();
|
|
1070
|
+
let captured: PostToolBatchEntry[] | undefined;
|
|
1071
|
+
registry.register('PostToolBatch', {
|
|
1072
|
+
hooks: [
|
|
1073
|
+
async (input): Promise<PostToolBatchHookOutput> => {
|
|
1074
|
+
captured = (input as PostToolBatchHookInput).entries;
|
|
1075
|
+
return {};
|
|
1076
|
+
},
|
|
1077
|
+
],
|
|
1078
|
+
});
|
|
1079
|
+
|
|
1080
|
+
const node = new ToolNode({
|
|
1081
|
+
tools: [createSchemaStub('echo'), createSchemaStub('cat')],
|
|
1082
|
+
eventDrivenMode: true,
|
|
1083
|
+
agentId: 'agent-x',
|
|
1084
|
+
toolCallStepIds: new Map([
|
|
1085
|
+
['call_1', 'step_1'],
|
|
1086
|
+
['call_2', 'step_2'],
|
|
1087
|
+
]),
|
|
1088
|
+
hookRegistry: registry,
|
|
1089
|
+
humanInTheLoop: { enabled: false },
|
|
1090
|
+
});
|
|
1091
|
+
|
|
1092
|
+
const graph = buildHITLGraph(node, [
|
|
1093
|
+
{ id: 'call_1', name: 'echo', args: { command: 'a' } },
|
|
1094
|
+
{ id: 'call_2', name: 'cat', args: { command: 'b' } },
|
|
1095
|
+
]);
|
|
1096
|
+
await graph.invoke(
|
|
1097
|
+
{ messages: [] },
|
|
1098
|
+
{ configurable: { thread_id: 'batch-thread' } }
|
|
1099
|
+
);
|
|
1100
|
+
|
|
1101
|
+
expect(captured).toBeDefined();
|
|
1102
|
+
expect(captured!).toHaveLength(2);
|
|
1103
|
+
expect(captured![0].toolUseId).toBe('call_1');
|
|
1104
|
+
expect(captured![0].status).toBe('success');
|
|
1105
|
+
expect(captured![0].toolOutput).toBe('ok');
|
|
1106
|
+
expect(captured![1].toolUseId).toBe('call_2');
|
|
1107
|
+
expect(captured![1].status).toBe('error');
|
|
1108
|
+
expect(captured![1].error).toContain('boom');
|
|
1109
|
+
});
|
|
1110
|
+
|
|
1111
|
+
it('a PostToolBatch additionalContext gets injected as a HumanMessage', async () => {
|
|
1112
|
+
mockEventDispatch([
|
|
1113
|
+
{ toolCallId: 'call_1', content: 'ok', status: 'success' },
|
|
1114
|
+
]);
|
|
1115
|
+
|
|
1116
|
+
const registry = new HookRegistry();
|
|
1117
|
+
registry.register('PostToolBatch', {
|
|
1118
|
+
hooks: [
|
|
1119
|
+
async (): Promise<PostToolBatchHookOutput> => ({
|
|
1120
|
+
additionalContext: 'remember to format the response as JSON',
|
|
1121
|
+
}),
|
|
1122
|
+
],
|
|
1123
|
+
});
|
|
1124
|
+
|
|
1125
|
+
const node = new ToolNode({
|
|
1126
|
+
tools: [createSchemaStub('echo')],
|
|
1127
|
+
eventDrivenMode: true,
|
|
1128
|
+
agentId: 'agent-x',
|
|
1129
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1130
|
+
hookRegistry: registry,
|
|
1131
|
+
humanInTheLoop: { enabled: false },
|
|
1132
|
+
});
|
|
1133
|
+
|
|
1134
|
+
const graph = buildHITLGraph(node, [
|
|
1135
|
+
{ id: 'call_1', name: 'echo', args: { command: 'a' } },
|
|
1136
|
+
]);
|
|
1137
|
+
const result = (await graph.invoke(
|
|
1138
|
+
{ messages: [] },
|
|
1139
|
+
{ configurable: { thread_id: 'batch-ctx-thread' } }
|
|
1140
|
+
)) as { messages: BaseMessage[] };
|
|
1141
|
+
|
|
1142
|
+
const injected = result.messages.find(
|
|
1143
|
+
(m) =>
|
|
1144
|
+
m._getType() === 'human' &&
|
|
1145
|
+
(m as { additional_kwargs?: { source?: string } }).additional_kwargs
|
|
1146
|
+
?.source === 'hook'
|
|
1147
|
+
);
|
|
1148
|
+
expect(injected).toBeDefined();
|
|
1149
|
+
expect(String(injected!.content)).toContain('format the response as JSON');
|
|
1150
|
+
});
|
|
1151
|
+
});
|
|
1152
|
+
|
|
1153
|
+
describe('ToolNode HITL — per-hook allowedDecisions override', () => {
|
|
1154
|
+
afterEach(() => {
|
|
1155
|
+
jest.restoreAllMocks();
|
|
1156
|
+
});
|
|
1157
|
+
|
|
1158
|
+
it('restricts the interrupt review_configs.allowed_decisions to the hook-supplied subset', async () => {
|
|
1159
|
+
const registry = new HookRegistry();
|
|
1160
|
+
registry.register('PreToolUse', {
|
|
1161
|
+
hooks: [
|
|
1162
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
1163
|
+
decision: 'ask',
|
|
1164
|
+
allowedDecisions: ['approve', 'reject'],
|
|
1165
|
+
}),
|
|
1166
|
+
],
|
|
1167
|
+
});
|
|
1168
|
+
|
|
1169
|
+
const node = new ToolNode({
|
|
1170
|
+
tools: [createSchemaStub('echo')],
|
|
1171
|
+
eventDrivenMode: true,
|
|
1172
|
+
agentId: 'agent-x',
|
|
1173
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1174
|
+
hookRegistry: registry,
|
|
1175
|
+
humanInTheLoop: { enabled: true },
|
|
1176
|
+
});
|
|
1177
|
+
|
|
1178
|
+
const graph = buildHITLGraph(node, [
|
|
1179
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
1180
|
+
]);
|
|
1181
|
+
const interrupted = await graph.invoke(
|
|
1182
|
+
{ messages: [] },
|
|
1183
|
+
{ configurable: { thread_id: 'allowed-thread' } }
|
|
1184
|
+
);
|
|
1185
|
+
if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
|
|
1186
|
+
throw new Error('expected interrupt');
|
|
1187
|
+
}
|
|
1188
|
+
const payload = interrupted.__interrupt__[0].value!;
|
|
1189
|
+
if (payload.type !== 'tool_approval') {
|
|
1190
|
+
throw new Error('expected tool_approval');
|
|
1191
|
+
}
|
|
1192
|
+
expect(payload.review_configs[0].allowed_decisions).toEqual([
|
|
1193
|
+
'approve',
|
|
1194
|
+
'reject',
|
|
1195
|
+
]);
|
|
1196
|
+
});
|
|
1197
|
+
});
|
|
1198
|
+
|
|
1199
|
+
describe('Run — preventContinuation honored for pre-stream hooks', () => {
|
|
1200
|
+
beforeEach(() => {
|
|
1201
|
+
jest.restoreAllMocks();
|
|
1202
|
+
});
|
|
1203
|
+
afterEach(() => {
|
|
1204
|
+
jest.restoreAllMocks();
|
|
1205
|
+
});
|
|
1206
|
+
|
|
1207
|
+
it('returns undefined without invoking the graph when RunStart hook returns preventContinuation', async () => {
|
|
1208
|
+
const { Run } = await import('@/run');
|
|
1209
|
+
const { Providers } = await import('@/common');
|
|
1210
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
1211
|
+
|
|
1212
|
+
const registry = new HookRegistry();
|
|
1213
|
+
let runStartFired = false;
|
|
1214
|
+
registry.register('RunStart', {
|
|
1215
|
+
hooks: [
|
|
1216
|
+
async (): Promise<RunStartHookOutput> => {
|
|
1217
|
+
runStartFired = true;
|
|
1218
|
+
return {
|
|
1219
|
+
preventContinuation: true,
|
|
1220
|
+
stopReason: 'pre-flight policy halted run',
|
|
1221
|
+
};
|
|
1222
|
+
},
|
|
1223
|
+
],
|
|
1224
|
+
});
|
|
1225
|
+
|
|
1226
|
+
const run = await Run.create<t.IState>({
|
|
1227
|
+
runId: 'pc-runstart',
|
|
1228
|
+
graphConfig: {
|
|
1229
|
+
type: 'standard',
|
|
1230
|
+
agents: [
|
|
1231
|
+
{
|
|
1232
|
+
agentId: 'a',
|
|
1233
|
+
provider: Providers.OPENAI,
|
|
1234
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
1235
|
+
instructions: 'noop',
|
|
1236
|
+
maxContextTokens: 8000,
|
|
1237
|
+
},
|
|
1238
|
+
],
|
|
1239
|
+
},
|
|
1240
|
+
hooks: registry,
|
|
1241
|
+
humanInTheLoop: { enabled: false },
|
|
1242
|
+
});
|
|
1243
|
+
|
|
1244
|
+
const result = await run.processStream(
|
|
1245
|
+
{ messages: [new HM('hello')] },
|
|
1246
|
+
{
|
|
1247
|
+
configurable: { thread_id: 'pc-thread-1' },
|
|
1248
|
+
version: 'v2',
|
|
1249
|
+
}
|
|
1250
|
+
);
|
|
1251
|
+
|
|
1252
|
+
expect(runStartFired).toBe(true);
|
|
1253
|
+
expect(result).toBeUndefined();
|
|
1254
|
+
/** Graph should not have been run — no messages added beyond the input. */
|
|
1255
|
+
expect(run.getInterrupt()).toBeUndefined();
|
|
1256
|
+
});
|
|
1257
|
+
|
|
1258
|
+
it('returns undefined when UserPromptSubmit hook returns preventContinuation', async () => {
|
|
1259
|
+
const { Run } = await import('@/run');
|
|
1260
|
+
const { Providers } = await import('@/common');
|
|
1261
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
1262
|
+
|
|
1263
|
+
const registry = new HookRegistry();
|
|
1264
|
+
let promptFired = false;
|
|
1265
|
+
registry.register('UserPromptSubmit', {
|
|
1266
|
+
hooks: [
|
|
1267
|
+
async (): Promise<UserPromptSubmitHookOutput> => {
|
|
1268
|
+
promptFired = true;
|
|
1269
|
+
return {
|
|
1270
|
+
preventContinuation: true,
|
|
1271
|
+
stopReason: 'rate limit reached',
|
|
1272
|
+
};
|
|
1273
|
+
},
|
|
1274
|
+
],
|
|
1275
|
+
});
|
|
1276
|
+
|
|
1277
|
+
const run = await Run.create<t.IState>({
|
|
1278
|
+
runId: 'pc-prompt',
|
|
1279
|
+
graphConfig: {
|
|
1280
|
+
type: 'standard',
|
|
1281
|
+
agents: [
|
|
1282
|
+
{
|
|
1283
|
+
agentId: 'a',
|
|
1284
|
+
provider: Providers.OPENAI,
|
|
1285
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
1286
|
+
instructions: 'noop',
|
|
1287
|
+
maxContextTokens: 8000,
|
|
1288
|
+
},
|
|
1289
|
+
],
|
|
1290
|
+
},
|
|
1291
|
+
hooks: registry,
|
|
1292
|
+
humanInTheLoop: { enabled: false },
|
|
1293
|
+
});
|
|
1294
|
+
|
|
1295
|
+
const result = await run.processStream(
|
|
1296
|
+
{ messages: [new HM('hello')] },
|
|
1297
|
+
{
|
|
1298
|
+
configurable: { thread_id: 'pc-thread-2' },
|
|
1299
|
+
version: 'v2',
|
|
1300
|
+
}
|
|
1301
|
+
);
|
|
1302
|
+
|
|
1303
|
+
expect(promptFired).toBe(true);
|
|
1304
|
+
expect(result).toBeUndefined();
|
|
1305
|
+
});
|
|
1306
|
+
});
|
|
1307
|
+
|
|
1308
|
+
describe('Mid-flight preventContinuation halts the run after the current step', () => {
|
|
1309
|
+
afterEach(() => {
|
|
1310
|
+
jest.restoreAllMocks();
|
|
1311
|
+
});
|
|
1312
|
+
|
|
1313
|
+
it('PostToolBatch hook with preventContinuation breaks the stream loop and skips Stop', async () => {
|
|
1314
|
+
mockEventDispatch([
|
|
1315
|
+
{ toolCallId: 'call_1', content: 'ok', status: 'success' },
|
|
1316
|
+
]);
|
|
1317
|
+
|
|
1318
|
+
const registry = new HookRegistry();
|
|
1319
|
+
let stopFired = false;
|
|
1320
|
+
registry.register('PostToolBatch', {
|
|
1321
|
+
hooks: [
|
|
1322
|
+
async (): Promise<PostToolBatchHookOutput> => ({
|
|
1323
|
+
preventContinuation: true,
|
|
1324
|
+
stopReason: 'rate-limit policy halt',
|
|
1325
|
+
}),
|
|
1326
|
+
],
|
|
1327
|
+
});
|
|
1328
|
+
registry.register('Stop', {
|
|
1329
|
+
hooks: [
|
|
1330
|
+
async (): Promise<Record<string, never>> => {
|
|
1331
|
+
stopFired = true;
|
|
1332
|
+
return {};
|
|
1333
|
+
},
|
|
1334
|
+
],
|
|
1335
|
+
});
|
|
1336
|
+
|
|
1337
|
+
const node = new ToolNode({
|
|
1338
|
+
tools: [createSchemaStub('echo')],
|
|
1339
|
+
eventDrivenMode: true,
|
|
1340
|
+
agentId: 'agent-x',
|
|
1341
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1342
|
+
hookRegistry: registry,
|
|
1343
|
+
humanInTheLoop: { enabled: false },
|
|
1344
|
+
});
|
|
1345
|
+
|
|
1346
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
1347
|
+
.addNode('agent', () => ({
|
|
1348
|
+
messages: [
|
|
1349
|
+
new AIMessage({
|
|
1350
|
+
content: '',
|
|
1351
|
+
tool_calls: [
|
|
1352
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
1353
|
+
],
|
|
1354
|
+
}),
|
|
1355
|
+
],
|
|
1356
|
+
}))
|
|
1357
|
+
.addNode('tools', node)
|
|
1358
|
+
.addEdge(START, 'agent')
|
|
1359
|
+
.addEdge('agent', 'tools')
|
|
1360
|
+
.addEdge('tools', END);
|
|
1361
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
1362
|
+
|
|
1363
|
+
const { Run } = await import('@/run');
|
|
1364
|
+
const run = await Run.create<t.IState>({
|
|
1365
|
+
runId: 'halt-mid-flight-1',
|
|
1366
|
+
graphConfig: {
|
|
1367
|
+
type: 'standard',
|
|
1368
|
+
agents: [
|
|
1369
|
+
{
|
|
1370
|
+
agentId: 'a',
|
|
1371
|
+
provider: providers.OPENAI,
|
|
1372
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
1373
|
+
instructions: 'noop',
|
|
1374
|
+
maxContextTokens: 8000,
|
|
1375
|
+
},
|
|
1376
|
+
],
|
|
1377
|
+
},
|
|
1378
|
+
hooks: registry,
|
|
1379
|
+
humanInTheLoop: { enabled: false },
|
|
1380
|
+
});
|
|
1381
|
+
/** Replace the SDK-built graph runnable with our handcrafted one so the
|
|
1382
|
+
* PostToolBatch hook fires under a real LangGraph stream. */
|
|
1383
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
1384
|
+
|
|
1385
|
+
await run.processStream(
|
|
1386
|
+
{ messages: [] },
|
|
1387
|
+
{
|
|
1388
|
+
configurable: { thread_id: 'halt-thread-1' },
|
|
1389
|
+
version: 'v2',
|
|
1390
|
+
}
|
|
1391
|
+
);
|
|
1392
|
+
|
|
1393
|
+
expect(run.getHaltReason()).toBe('rate-limit policy halt');
|
|
1394
|
+
expect(stopFired).toBe(false);
|
|
1395
|
+
});
|
|
1396
|
+
|
|
1397
|
+
it('clears halt signal between processStream invocations', async () => {
|
|
1398
|
+
const registry = new HookRegistry();
|
|
1399
|
+
registry.register('RunStart', {
|
|
1400
|
+
hooks: [
|
|
1401
|
+
async (): Promise<RunStartHookOutput> => ({
|
|
1402
|
+
preventContinuation: true,
|
|
1403
|
+
stopReason: 'first run halted',
|
|
1404
|
+
}),
|
|
1405
|
+
],
|
|
1406
|
+
});
|
|
1407
|
+
|
|
1408
|
+
const { Run } = await import('@/run');
|
|
1409
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
1410
|
+
|
|
1411
|
+
const run = await Run.create<t.IState>({
|
|
1412
|
+
runId: 'halt-clear-1',
|
|
1413
|
+
graphConfig: {
|
|
1414
|
+
type: 'standard',
|
|
1415
|
+
agents: [
|
|
1416
|
+
{
|
|
1417
|
+
agentId: 'a',
|
|
1418
|
+
provider: providers.OPENAI,
|
|
1419
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
1420
|
+
instructions: 'noop',
|
|
1421
|
+
maxContextTokens: 8000,
|
|
1422
|
+
},
|
|
1423
|
+
],
|
|
1424
|
+
},
|
|
1425
|
+
hooks: registry,
|
|
1426
|
+
humanInTheLoop: { enabled: false },
|
|
1427
|
+
});
|
|
1428
|
+
|
|
1429
|
+
await run.processStream(
|
|
1430
|
+
{ messages: [new HM('first')] },
|
|
1431
|
+
{ configurable: { thread_id: 't-1' }, version: 'v2' }
|
|
1432
|
+
);
|
|
1433
|
+
/** RunStart preventContinuation is a pre-stream early return, but
|
|
1434
|
+
* `processStream` should still have cleared the registry signal
|
|
1435
|
+
* for this run id so a subsequent call starts fresh. */
|
|
1436
|
+
expect(registry.getHaltSignal('halt-clear-1')).toBeUndefined();
|
|
1437
|
+
});
|
|
1438
|
+
});
|
|
1439
|
+
|
|
1440
|
+
describe('Async fire-and-forget hooks ignore decision/context fields', () => {
|
|
1441
|
+
afterEach(() => {
|
|
1442
|
+
jest.restoreAllMocks();
|
|
1443
|
+
});
|
|
1444
|
+
|
|
1445
|
+
it('PreToolUse with `async: true` does not block the tool even when decision is `deny`', async () => {
|
|
1446
|
+
mockEventDispatch([
|
|
1447
|
+
{ toolCallId: 'call_1', content: 'ran', status: 'success' },
|
|
1448
|
+
]);
|
|
1449
|
+
|
|
1450
|
+
let bgFired = false;
|
|
1451
|
+
const registry = new HookRegistry();
|
|
1452
|
+
registry.register('PreToolUse', {
|
|
1453
|
+
hooks: [
|
|
1454
|
+
async (): Promise<PreToolUseHookOutput> => {
|
|
1455
|
+
/** Side effect runs in background; agent doesn't wait. */
|
|
1456
|
+
void Promise.resolve().then(() => {
|
|
1457
|
+
bgFired = true;
|
|
1458
|
+
});
|
|
1459
|
+
return {
|
|
1460
|
+
async: true,
|
|
1461
|
+
decision: 'deny',
|
|
1462
|
+
reason: 'this should be ignored',
|
|
1463
|
+
additionalContext: 'this should also be ignored',
|
|
1464
|
+
};
|
|
1465
|
+
},
|
|
1466
|
+
],
|
|
1467
|
+
});
|
|
1468
|
+
|
|
1469
|
+
const node = new ToolNode({
|
|
1470
|
+
tools: [createSchemaStub('echo')],
|
|
1471
|
+
eventDrivenMode: true,
|
|
1472
|
+
agentId: 'agent-x',
|
|
1473
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1474
|
+
hookRegistry: registry,
|
|
1475
|
+
humanInTheLoop: { enabled: false },
|
|
1476
|
+
});
|
|
1477
|
+
|
|
1478
|
+
const graph = buildHITLGraph(node, [
|
|
1479
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
1480
|
+
]);
|
|
1481
|
+
const result = (await graph.invoke(
|
|
1482
|
+
{ messages: [] },
|
|
1483
|
+
{ configurable: { thread_id: 'async-1' } }
|
|
1484
|
+
)) as { messages: BaseMessage[] };
|
|
1485
|
+
|
|
1486
|
+
const toolMsg = result.messages.find(
|
|
1487
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
1488
|
+
);
|
|
1489
|
+
expect(toolMsg).toBeDefined();
|
|
1490
|
+
/** Tool ran (no Blocked: prefix) — async output's `decision: 'deny'` was
|
|
1491
|
+
* ignored as documented. */
|
|
1492
|
+
expect(toolMsg!.status).not.toBe('error');
|
|
1493
|
+
expect(toolMsg!.content).toBe('ran');
|
|
1494
|
+
/** Background work runs even though we ignored the output. */
|
|
1495
|
+
await new Promise((r) => setImmediate(r));
|
|
1496
|
+
expect(bgFired).toBe(true);
|
|
1497
|
+
/** No injected context message — `additionalContext` was also ignored. */
|
|
1498
|
+
const injected = result.messages.find(
|
|
1499
|
+
(m) =>
|
|
1500
|
+
m._getType() === 'human' &&
|
|
1501
|
+
(m as { additional_kwargs?: { source?: string } }).additional_kwargs
|
|
1502
|
+
?.source === 'hook'
|
|
1503
|
+
);
|
|
1504
|
+
expect(injected).toBeUndefined();
|
|
1505
|
+
});
|
|
1506
|
+
|
|
1507
|
+
it('PostToolUse with `async: true` does not halt the run even when preventContinuation is set', async () => {
|
|
1508
|
+
mockEventDispatch([
|
|
1509
|
+
{ toolCallId: 'call_1', content: 'ran', status: 'success' },
|
|
1510
|
+
]);
|
|
1511
|
+
|
|
1512
|
+
const registry = new HookRegistry();
|
|
1513
|
+
registry.register('PostToolUse', {
|
|
1514
|
+
hooks: [
|
|
1515
|
+
async (): Promise<PostToolUseHookOutput> => ({
|
|
1516
|
+
async: true,
|
|
1517
|
+
preventContinuation: true,
|
|
1518
|
+
stopReason: 'should not halt',
|
|
1519
|
+
}),
|
|
1520
|
+
],
|
|
1521
|
+
});
|
|
1522
|
+
|
|
1523
|
+
const node = new ToolNode({
|
|
1524
|
+
tools: [createSchemaStub('echo')],
|
|
1525
|
+
eventDrivenMode: true,
|
|
1526
|
+
agentId: 'agent-x',
|
|
1527
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1528
|
+
hookRegistry: registry,
|
|
1529
|
+
humanInTheLoop: { enabled: false },
|
|
1530
|
+
});
|
|
1531
|
+
|
|
1532
|
+
const graph = buildHITLGraph(node, [
|
|
1533
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
1534
|
+
]);
|
|
1535
|
+
await graph.invoke(
|
|
1536
|
+
{ messages: [] },
|
|
1537
|
+
{ configurable: { thread_id: 'async-2' } }
|
|
1538
|
+
);
|
|
1539
|
+
|
|
1540
|
+
/** preventContinuation was on an async output → ignored → no halt
|
|
1541
|
+
* signal raised under any session id. The standalone graph here
|
|
1542
|
+
* runs with `runId = ''` (no `config.configurable.run_id` set),
|
|
1543
|
+
* so check that key explicitly. */
|
|
1544
|
+
expect(registry.getHaltSignal('')).toBeUndefined();
|
|
1545
|
+
});
|
|
1546
|
+
});
|
|
1547
|
+
|
|
1548
|
+
describe('Codex review fixes', () => {
|
|
1549
|
+
afterEach(() => {
|
|
1550
|
+
jest.restoreAllMocks();
|
|
1551
|
+
});
|
|
1552
|
+
|
|
1553
|
+
it('preserves session-scoped hooks across HITL interrupt so the policy still fires on resume', async () => {
|
|
1554
|
+
let dispatchCalls = 0;
|
|
1555
|
+
jest
|
|
1556
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
1557
|
+
.mockImplementation(async (event, data) => {
|
|
1558
|
+
if (event !== 'on_tool_execute') {
|
|
1559
|
+
return;
|
|
1560
|
+
}
|
|
1561
|
+
dispatchCalls += 1;
|
|
1562
|
+
const request = data as {
|
|
1563
|
+
toolCalls: t.ToolCallRequest[];
|
|
1564
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
1565
|
+
};
|
|
1566
|
+
request.resolve(
|
|
1567
|
+
request.toolCalls.map((c) => ({
|
|
1568
|
+
toolCallId: c.id,
|
|
1569
|
+
content: 'host-result',
|
|
1570
|
+
status: 'success' as const,
|
|
1571
|
+
}))
|
|
1572
|
+
);
|
|
1573
|
+
});
|
|
1574
|
+
|
|
1575
|
+
const registry = new HookRegistry();
|
|
1576
|
+
let preCallCount = 0;
|
|
1577
|
+
/**
|
|
1578
|
+
* Register the policy hook against the runId via `registerSession`
|
|
1579
|
+
* (mirrors how a host scopes per-run policy without leaking it to
|
|
1580
|
+
* concurrent runs). The fix under test: this matcher MUST still be
|
|
1581
|
+
* present when `Run.resume()` re-runs the node so the policy
|
|
1582
|
+
* decision applies the second time too.
|
|
1583
|
+
*/
|
|
1584
|
+
const runId = 'session-hook-preserve';
|
|
1585
|
+
registry.registerSession(runId, 'PreToolUse', {
|
|
1586
|
+
hooks: [
|
|
1587
|
+
async (): Promise<PreToolUseHookOutput> => {
|
|
1588
|
+
preCallCount += 1;
|
|
1589
|
+
return { decision: 'ask', reason: 'session policy' };
|
|
1590
|
+
},
|
|
1591
|
+
],
|
|
1592
|
+
});
|
|
1593
|
+
|
|
1594
|
+
const node = new ToolNode({
|
|
1595
|
+
tools: [createSchemaStub('echo')],
|
|
1596
|
+
eventDrivenMode: true,
|
|
1597
|
+
agentId: 'agent-x',
|
|
1598
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
1599
|
+
hookRegistry: registry,
|
|
1600
|
+
humanInTheLoop: { enabled: true },
|
|
1601
|
+
});
|
|
1602
|
+
|
|
1603
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
1604
|
+
.addNode(
|
|
1605
|
+
'agent',
|
|
1606
|
+
(): MessagesUpdate => ({
|
|
1607
|
+
messages: [
|
|
1608
|
+
new AIMessage({
|
|
1609
|
+
content: '',
|
|
1610
|
+
tool_calls: [
|
|
1611
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
1612
|
+
],
|
|
1613
|
+
}),
|
|
1614
|
+
],
|
|
1615
|
+
})
|
|
1616
|
+
)
|
|
1617
|
+
.addNode('tools', node)
|
|
1618
|
+
.addEdge(START, 'agent')
|
|
1619
|
+
.addEdge('agent', 'tools')
|
|
1620
|
+
.addEdge('tools', END);
|
|
1621
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
1622
|
+
|
|
1623
|
+
const { Run } = await import('@/run');
|
|
1624
|
+
const run = await Run.create<t.IState>({
|
|
1625
|
+
runId,
|
|
1626
|
+
graphConfig: {
|
|
1627
|
+
type: 'standard',
|
|
1628
|
+
agents: [
|
|
1629
|
+
{
|
|
1630
|
+
agentId: 'a',
|
|
1631
|
+
provider: providers.OPENAI,
|
|
1632
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
1633
|
+
instructions: 'noop',
|
|
1634
|
+
maxContextTokens: 8000,
|
|
1635
|
+
},
|
|
1636
|
+
],
|
|
1637
|
+
},
|
|
1638
|
+
hooks: registry,
|
|
1639
|
+
humanInTheLoop: { enabled: true },
|
|
1640
|
+
});
|
|
1641
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
1642
|
+
|
|
1643
|
+
const callerConfig = {
|
|
1644
|
+
configurable: { thread_id: 'session-thread-1' },
|
|
1645
|
+
version: 'v2' as const,
|
|
1646
|
+
};
|
|
1647
|
+
|
|
1648
|
+
await run.processStream({ messages: [] }, callerConfig);
|
|
1649
|
+
|
|
1650
|
+
/** Interrupt fired; one hook invocation so far. Session matcher
|
|
1651
|
+
* MUST still be present — the regression was that finally cleared
|
|
1652
|
+
* it, leaving the resume to bypass the policy entirely. */
|
|
1653
|
+
expect(run.getInterrupt()).toBeDefined();
|
|
1654
|
+
expect(preCallCount).toBe(1);
|
|
1655
|
+
expect(registry.hasHookFor('PreToolUse', runId)).toBe(true);
|
|
1656
|
+
expect(dispatchCalls).toBe(0);
|
|
1657
|
+
|
|
1658
|
+
await run.resume([{ type: 'approve' }], callerConfig);
|
|
1659
|
+
|
|
1660
|
+
/** Hook fired AGAIN on resume — policy was actually applied a
|
|
1661
|
+
* second time, not skipped. Tool then executed. */
|
|
1662
|
+
expect(preCallCount).toBe(2);
|
|
1663
|
+
expect(dispatchCalls).toBe(1);
|
|
1664
|
+
/** After natural completion, session matchers ARE cleared so the
|
|
1665
|
+
* next run on this registry starts clean. */
|
|
1666
|
+
expect(registry.hasHookFor('PreToolUse', runId)).toBe(false);
|
|
1667
|
+
});
|
|
1668
|
+
|
|
1669
|
+
it('denied tool in a deny+ask batch dispatches ON_RUN_STEP_COMPLETED exactly once across interrupt + resume', async () => {
|
|
1670
|
+
const stepCompletedDispatches: string[] = [];
|
|
1671
|
+
/** Spy on the underlying custom event dispatcher to capture every
|
|
1672
|
+
* ON_RUN_STEP_COMPLETED event with its tool_call_id. Without the
|
|
1673
|
+
* blockEntry deferral, this would record `call_a` twice for one
|
|
1674
|
+
* logical denial (once before interrupt, once after resume
|
|
1675
|
+
* re-execution). */
|
|
1676
|
+
jest
|
|
1677
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
1678
|
+
.mockImplementation(async (event, data) => {
|
|
1679
|
+
if (event === GraphEvents.ON_RUN_STEP_COMPLETED) {
|
|
1680
|
+
const payload = data as {
|
|
1681
|
+
result?: { tool_call?: { id?: string } };
|
|
1682
|
+
};
|
|
1683
|
+
const id = payload.result?.tool_call?.id;
|
|
1684
|
+
if (id != null) {
|
|
1685
|
+
stepCompletedDispatches.push(id);
|
|
1686
|
+
}
|
|
1687
|
+
return;
|
|
1688
|
+
}
|
|
1689
|
+
if (event !== 'on_tool_execute') {
|
|
1690
|
+
return;
|
|
1691
|
+
}
|
|
1692
|
+
const request = data as {
|
|
1693
|
+
toolCalls: t.ToolCallRequest[];
|
|
1694
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
1695
|
+
};
|
|
1696
|
+
request.resolve(
|
|
1697
|
+
request.toolCalls.map((c) => ({
|
|
1698
|
+
toolCallId: c.id,
|
|
1699
|
+
content: `ran:${c.name}`,
|
|
1700
|
+
status: 'success' as const,
|
|
1701
|
+
}))
|
|
1702
|
+
);
|
|
1703
|
+
});
|
|
1704
|
+
|
|
1705
|
+
const registry = new HookRegistry();
|
|
1706
|
+
registry.register('PreToolUse', {
|
|
1707
|
+
hooks: [
|
|
1708
|
+
async (input): Promise<PreToolUseHookOutput> => {
|
|
1709
|
+
if (input.toolName === 'tool_a') {
|
|
1710
|
+
return { decision: 'deny', reason: 'policy:a' };
|
|
1711
|
+
}
|
|
1712
|
+
return { decision: 'ask', reason: 'policy:b-needs-review' };
|
|
1713
|
+
},
|
|
1714
|
+
],
|
|
1715
|
+
});
|
|
1716
|
+
|
|
1717
|
+
const node = new ToolNode({
|
|
1718
|
+
tools: [createSchemaStub('tool_a'), createSchemaStub('tool_b')],
|
|
1719
|
+
eventDrivenMode: true,
|
|
1720
|
+
agentId: 'agent-x',
|
|
1721
|
+
toolCallStepIds: new Map([
|
|
1722
|
+
['call_a', 'step_a'],
|
|
1723
|
+
['call_b', 'step_b'],
|
|
1724
|
+
]),
|
|
1725
|
+
hookRegistry: registry,
|
|
1726
|
+
humanInTheLoop: { enabled: true },
|
|
1727
|
+
});
|
|
1728
|
+
|
|
1729
|
+
const graph = buildHITLGraph(node, [
|
|
1730
|
+
{ id: 'call_a', name: 'tool_a', args: { command: 'a' } },
|
|
1731
|
+
{ id: 'call_b', name: 'tool_b', args: { command: 'b' } },
|
|
1732
|
+
]);
|
|
1733
|
+
const config = { configurable: { thread_id: 'dedup-thread' } };
|
|
1734
|
+
|
|
1735
|
+
await graph.invoke({ messages: [] }, config);
|
|
1736
|
+
/** First pass: interrupt() threw, so the deferred denial side
|
|
1737
|
+
* effects were not flushed. Zero step-completed events for the
|
|
1738
|
+
* denied tool yet. */
|
|
1739
|
+
expect(stepCompletedDispatches.filter((id) => id === 'call_a')).toEqual([]);
|
|
1740
|
+
|
|
1741
|
+
await graph.invoke(new Command({ resume: [{ type: 'approve' }] }), config);
|
|
1742
|
+
|
|
1743
|
+
/** After resume: the denied tool dispatches exactly once (deferred
|
|
1744
|
+
* flush on the resume re-execution); the approved tool dispatches
|
|
1745
|
+
* once via the normal execution path. */
|
|
1746
|
+
expect(stepCompletedDispatches.filter((id) => id === 'call_a')).toEqual([
|
|
1747
|
+
'call_a',
|
|
1748
|
+
]);
|
|
1749
|
+
expect(stepCompletedDispatches.filter((id) => id === 'call_b')).toEqual([
|
|
1750
|
+
'call_b',
|
|
1751
|
+
]);
|
|
1752
|
+
});
|
|
1753
|
+
|
|
1754
|
+
it('enforces allowedDecisions on resume — host-submitted decision outside the allowlist is rejected', async () => {
|
|
1755
|
+
const dispatchedToolNames: string[] = [];
|
|
1756
|
+
jest
|
|
1757
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
1758
|
+
.mockImplementation(async (event, data) => {
|
|
1759
|
+
if (event !== 'on_tool_execute') {
|
|
1760
|
+
return;
|
|
1761
|
+
}
|
|
1762
|
+
const request = data as {
|
|
1763
|
+
toolCalls: t.ToolCallRequest[];
|
|
1764
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
1765
|
+
};
|
|
1766
|
+
for (const c of request.toolCalls) {
|
|
1767
|
+
dispatchedToolNames.push(c.name);
|
|
1768
|
+
}
|
|
1769
|
+
request.resolve(
|
|
1770
|
+
request.toolCalls.map((c) => ({
|
|
1771
|
+
toolCallId: c.id,
|
|
1772
|
+
content: 'ran',
|
|
1773
|
+
status: 'success' as const,
|
|
1774
|
+
}))
|
|
1775
|
+
);
|
|
1776
|
+
});
|
|
1777
|
+
|
|
1778
|
+
/** Hook restricts to approve/reject only — edit/respond are
|
|
1779
|
+
* forbidden. Even if a buggy or hostile host UI submits an
|
|
1780
|
+
* `edit`, the SDK must fail closed instead of mutating the args
|
|
1781
|
+
* and running the tool. */
|
|
1782
|
+
const registry = new HookRegistry();
|
|
1783
|
+
registry.register('PreToolUse', {
|
|
1784
|
+
hooks: [
|
|
1785
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
1786
|
+
decision: 'ask',
|
|
1787
|
+
allowedDecisions: ['approve', 'reject'],
|
|
1788
|
+
}),
|
|
1789
|
+
],
|
|
1790
|
+
});
|
|
1791
|
+
|
|
1792
|
+
const node = new ToolNode({
|
|
1793
|
+
tools: [createSchemaStub('echo')],
|
|
1794
|
+
eventDrivenMode: true,
|
|
1795
|
+
agentId: 'agent-x',
|
|
1796
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1797
|
+
hookRegistry: registry,
|
|
1798
|
+
humanInTheLoop: { enabled: true },
|
|
1799
|
+
});
|
|
1800
|
+
|
|
1801
|
+
const graph = buildHITLGraph(node, [
|
|
1802
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
1803
|
+
]);
|
|
1804
|
+
const config = { configurable: { thread_id: 'allowed-enforce' } };
|
|
1805
|
+
|
|
1806
|
+
await graph.invoke({ messages: [] }, config);
|
|
1807
|
+
|
|
1808
|
+
/** Submit `edit` — outside the advertised allowlist. */
|
|
1809
|
+
const resumed = (await graph.invoke(
|
|
1810
|
+
new Command({
|
|
1811
|
+
resume: [{ type: 'edit', updatedInput: { command: 'malicious' } }],
|
|
1812
|
+
}),
|
|
1813
|
+
config
|
|
1814
|
+
)) as { messages: BaseMessage[] };
|
|
1815
|
+
|
|
1816
|
+
const toolMessages = resumed.messages.filter(
|
|
1817
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
1818
|
+
);
|
|
1819
|
+
expect(toolMessages).toHaveLength(1);
|
|
1820
|
+
/** Tool was blocked; arg-mutation never reached the host. */
|
|
1821
|
+
expect(toolMessages[0].status).toBe('error');
|
|
1822
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
1823
|
+
'not in allowedDecisions'
|
|
1824
|
+
);
|
|
1825
|
+
expect(String(toolMessages[0].content)).toContain('approve');
|
|
1826
|
+
expect(String(toolMessages[0].content)).toContain('reject');
|
|
1827
|
+
expect(dispatchedToolNames).toEqual([]);
|
|
1828
|
+
});
|
|
1829
|
+
|
|
1830
|
+
it('enforces allowedDecisions on resume — approved decision passes through when in the allowlist', async () => {
|
|
1831
|
+
const dispatchedArgs: Array<Record<string, unknown>> = [];
|
|
1832
|
+
jest
|
|
1833
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
1834
|
+
.mockImplementation(async (event, data) => {
|
|
1835
|
+
if (event !== 'on_tool_execute') {
|
|
1836
|
+
return;
|
|
1837
|
+
}
|
|
1838
|
+
const request = data as {
|
|
1839
|
+
toolCalls: t.ToolCallRequest[];
|
|
1840
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
1841
|
+
};
|
|
1842
|
+
for (const c of request.toolCalls) {
|
|
1843
|
+
dispatchedArgs.push(c.args);
|
|
1844
|
+
}
|
|
1845
|
+
request.resolve(
|
|
1846
|
+
request.toolCalls.map((c) => ({
|
|
1847
|
+
toolCallId: c.id,
|
|
1848
|
+
content: 'ran',
|
|
1849
|
+
status: 'success' as const,
|
|
1850
|
+
}))
|
|
1851
|
+
);
|
|
1852
|
+
});
|
|
1853
|
+
|
|
1854
|
+
const registry = new HookRegistry();
|
|
1855
|
+
registry.register('PreToolUse', {
|
|
1856
|
+
hooks: [
|
|
1857
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
1858
|
+
decision: 'ask',
|
|
1859
|
+
allowedDecisions: ['approve', 'reject'],
|
|
1860
|
+
}),
|
|
1861
|
+
],
|
|
1862
|
+
});
|
|
1863
|
+
|
|
1864
|
+
const node = new ToolNode({
|
|
1865
|
+
tools: [createSchemaStub('echo')],
|
|
1866
|
+
eventDrivenMode: true,
|
|
1867
|
+
agentId: 'agent-x',
|
|
1868
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
1869
|
+
hookRegistry: registry,
|
|
1870
|
+
humanInTheLoop: { enabled: true },
|
|
1871
|
+
});
|
|
1872
|
+
|
|
1873
|
+
const graph = buildHITLGraph(node, [
|
|
1874
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
1875
|
+
]);
|
|
1876
|
+
const config = { configurable: { thread_id: 'allowed-pass' } };
|
|
1877
|
+
|
|
1878
|
+
await graph.invoke({ messages: [] }, config);
|
|
1879
|
+
|
|
1880
|
+
/** Submit `approve` — explicitly in the allowlist. */
|
|
1881
|
+
await graph.invoke(new Command({ resume: [{ type: 'approve' }] }), config);
|
|
1882
|
+
|
|
1883
|
+
expect(dispatchedArgs).toEqual([{ command: 'original' }]);
|
|
1884
|
+
});
|
|
1885
|
+
|
|
1886
|
+
it('getInterrupt<T>() returns the captured payload typed as the host-asserted shape', async () => {
|
|
1887
|
+
/**
|
|
1888
|
+
* Custom graph node raises an interrupt with a payload shape the
|
|
1889
|
+
* SDK doesn't know about. `run.getInterrupt<MyCustomPayload>()`
|
|
1890
|
+
* returns the payload typed as the host's assertion — the SDK
|
|
1891
|
+
* doesn't validate, it just transports.
|
|
1892
|
+
*/
|
|
1893
|
+
interface MyCustomPayload {
|
|
1894
|
+
type: 'custom_review';
|
|
1895
|
+
diff: string;
|
|
1896
|
+
reviewerHints: string[];
|
|
1897
|
+
}
|
|
1898
|
+
|
|
1899
|
+
const langgraph = await import('@langchain/langgraph');
|
|
1900
|
+
|
|
1901
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
1902
|
+
.addNode('clarifier', () => {
|
|
1903
|
+
langgraph.interrupt({
|
|
1904
|
+
type: 'custom_review',
|
|
1905
|
+
diff: '+ added line',
|
|
1906
|
+
reviewerHints: ['check formatting'],
|
|
1907
|
+
} satisfies MyCustomPayload);
|
|
1908
|
+
return { messages: [] };
|
|
1909
|
+
})
|
|
1910
|
+
.addEdge(START, 'clarifier')
|
|
1911
|
+
.addEdge('clarifier', END);
|
|
1912
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
1913
|
+
|
|
1914
|
+
const { Run } = await import('@/run');
|
|
1915
|
+
const run = await Run.create<t.IState>({
|
|
1916
|
+
runId: 'custom-interrupt',
|
|
1917
|
+
graphConfig: {
|
|
1918
|
+
type: 'standard',
|
|
1919
|
+
agents: [
|
|
1920
|
+
{
|
|
1921
|
+
agentId: 'a',
|
|
1922
|
+
provider: providers.OPENAI,
|
|
1923
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
1924
|
+
instructions: 'noop',
|
|
1925
|
+
maxContextTokens: 8000,
|
|
1926
|
+
},
|
|
1927
|
+
],
|
|
1928
|
+
},
|
|
1929
|
+
humanInTheLoop: { enabled: true },
|
|
1930
|
+
});
|
|
1931
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
1932
|
+
|
|
1933
|
+
await run.processStream(
|
|
1934
|
+
{ messages: [] },
|
|
1935
|
+
{
|
|
1936
|
+
configurable: { thread_id: 'custom-interrupt-thread' },
|
|
1937
|
+
version: 'v2',
|
|
1938
|
+
}
|
|
1939
|
+
);
|
|
1940
|
+
|
|
1941
|
+
const interrupt = run.getInterrupt<MyCustomPayload>();
|
|
1942
|
+
expect(interrupt).toBeDefined();
|
|
1943
|
+
expect(interrupt!.payload.type).toBe('custom_review');
|
|
1944
|
+
expect(interrupt!.payload.diff).toBe('+ added line');
|
|
1945
|
+
expect(interrupt!.payload.reviewerHints).toEqual(['check formatting']);
|
|
1946
|
+
});
|
|
1947
|
+
|
|
1948
|
+
it('isToolApprovalInterrupt / isAskUserQuestionInterrupt narrow safely from `unknown` (defensive)', async () => {
|
|
1949
|
+
const { isToolApprovalInterrupt, isAskUserQuestionInterrupt } =
|
|
1950
|
+
await import('@/types/hitl');
|
|
1951
|
+
|
|
1952
|
+
/** The guards must accept arbitrary runtime values without throwing,
|
|
1953
|
+
* since hosts can pass anything from custom interrupts. */
|
|
1954
|
+
expect(isToolApprovalInterrupt(null as unknown)).toBe(false);
|
|
1955
|
+
expect(isToolApprovalInterrupt(undefined as unknown)).toBe(false);
|
|
1956
|
+
expect(isToolApprovalInterrupt('string' as unknown)).toBe(false);
|
|
1957
|
+
expect(isToolApprovalInterrupt(42 as unknown)).toBe(false);
|
|
1958
|
+
expect(isToolApprovalInterrupt({} as unknown)).toBe(false);
|
|
1959
|
+
expect(isToolApprovalInterrupt({ type: 'something_else' } as unknown)).toBe(
|
|
1960
|
+
false
|
|
1961
|
+
);
|
|
1962
|
+
expect(
|
|
1963
|
+
isToolApprovalInterrupt({
|
|
1964
|
+
type: 'tool_approval',
|
|
1965
|
+
action_requests: [],
|
|
1966
|
+
review_configs: [],
|
|
1967
|
+
} as unknown)
|
|
1968
|
+
).toBe(true);
|
|
1969
|
+
|
|
1970
|
+
expect(isAskUserQuestionInterrupt(null as unknown)).toBe(false);
|
|
1971
|
+
expect(
|
|
1972
|
+
isAskUserQuestionInterrupt({ type: 'tool_approval' } as unknown)
|
|
1973
|
+
).toBe(false);
|
|
1974
|
+
expect(
|
|
1975
|
+
isAskUserQuestionInterrupt({
|
|
1976
|
+
type: 'ask_user_question',
|
|
1977
|
+
question: { question: 'why' },
|
|
1978
|
+
} as unknown)
|
|
1979
|
+
).toBe(true);
|
|
1980
|
+
});
|
|
1981
|
+
|
|
1982
|
+
it('hook returning ask + updatedInput rewrites args BEFORE the interrupt and BEFORE host execution', async () => {
|
|
1983
|
+
const dispatchedArgs: Array<Record<string, unknown>> = [];
|
|
1984
|
+
jest
|
|
1985
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
1986
|
+
.mockImplementation(async (event, data) => {
|
|
1987
|
+
if (event !== 'on_tool_execute') {
|
|
1988
|
+
return;
|
|
1989
|
+
}
|
|
1990
|
+
const request = data as {
|
|
1991
|
+
toolCalls: t.ToolCallRequest[];
|
|
1992
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
1993
|
+
};
|
|
1994
|
+
for (const c of request.toolCalls) {
|
|
1995
|
+
dispatchedArgs.push(c.args);
|
|
1996
|
+
}
|
|
1997
|
+
request.resolve(
|
|
1998
|
+
request.toolCalls.map((c) => ({
|
|
1999
|
+
toolCallId: c.id,
|
|
2000
|
+
content: 'ran',
|
|
2001
|
+
status: 'success' as const,
|
|
2002
|
+
}))
|
|
2003
|
+
);
|
|
2004
|
+
});
|
|
2005
|
+
|
|
2006
|
+
/**
|
|
2007
|
+
* Hook returns BOTH a sanitization rewrite AND `ask`. Real-world
|
|
2008
|
+
* pattern: one matcher redacts secrets in the args, another
|
|
2009
|
+
* matcher requires human approval. Both signals must apply.
|
|
2010
|
+
*/
|
|
2011
|
+
const registry = new HookRegistry();
|
|
2012
|
+
registry.register('PreToolUse', {
|
|
2013
|
+
hooks: [
|
|
2014
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
2015
|
+
decision: 'ask',
|
|
2016
|
+
reason: 'review redacted args',
|
|
2017
|
+
updatedInput: { command: 'redacted-command' },
|
|
2018
|
+
}),
|
|
2019
|
+
],
|
|
2020
|
+
});
|
|
2021
|
+
|
|
2022
|
+
const node = new ToolNode({
|
|
2023
|
+
tools: [createSchemaStub('echo')],
|
|
2024
|
+
eventDrivenMode: true,
|
|
2025
|
+
agentId: 'agent-x',
|
|
2026
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2027
|
+
hookRegistry: registry,
|
|
2028
|
+
humanInTheLoop: { enabled: true },
|
|
2029
|
+
});
|
|
2030
|
+
|
|
2031
|
+
const graph = buildHITLGraph(node, [
|
|
2032
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original-secret' } },
|
|
2033
|
+
]);
|
|
2034
|
+
const config = { configurable: { thread_id: 'ask-with-update' } };
|
|
2035
|
+
|
|
2036
|
+
const interrupted = await graph.invoke({ messages: [] }, config);
|
|
2037
|
+
if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
|
|
2038
|
+
throw new Error('expected interrupt');
|
|
2039
|
+
}
|
|
2040
|
+
const payload = interrupted.__interrupt__[0].value!;
|
|
2041
|
+
if (payload.type !== 'tool_approval') {
|
|
2042
|
+
throw new Error('expected tool_approval');
|
|
2043
|
+
}
|
|
2044
|
+
/** The interrupt payload surfaces the REWRITTEN args to the
|
|
2045
|
+
* reviewer, not the original. Without the fix, the reviewer
|
|
2046
|
+
* would see the secret. */
|
|
2047
|
+
expect(payload.action_requests[0].arguments).toEqual({
|
|
2048
|
+
command: 'redacted-command',
|
|
2049
|
+
});
|
|
2050
|
+
|
|
2051
|
+
await graph.invoke(new Command({ resume: [{ type: 'approve' }] }), config);
|
|
2052
|
+
|
|
2053
|
+
/** And the host execution dispatches the rewritten args, not
|
|
2054
|
+
* the original. Without the fix, the policy redaction would be
|
|
2055
|
+
* silently dropped after approval. */
|
|
2056
|
+
expect(dispatchedArgs).toEqual([{ command: 'redacted-command' }]);
|
|
2057
|
+
});
|
|
2058
|
+
|
|
2059
|
+
it('captures interrupt even when payload is null (custom node calling interrupt(null))', async () => {
|
|
2060
|
+
const langgraph = await import('@langchain/langgraph');
|
|
2061
|
+
|
|
2062
|
+
let stopFired = false;
|
|
2063
|
+
const registry = new HookRegistry();
|
|
2064
|
+
registry.register('Stop', {
|
|
2065
|
+
hooks: [
|
|
2066
|
+
async (): Promise<Record<string, never>> => {
|
|
2067
|
+
stopFired = true;
|
|
2068
|
+
return {};
|
|
2069
|
+
},
|
|
2070
|
+
],
|
|
2071
|
+
});
|
|
2072
|
+
|
|
2073
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
2074
|
+
.addNode('pauser', () => {
|
|
2075
|
+
/** Custom node pauses without payload — valid use case (the
|
|
2076
|
+
* pause itself is the signal; no metadata needed). */
|
|
2077
|
+
langgraph.interrupt(null);
|
|
2078
|
+
return { messages: [] };
|
|
2079
|
+
})
|
|
2080
|
+
.addEdge(START, 'pauser')
|
|
2081
|
+
.addEdge('pauser', END);
|
|
2082
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
2083
|
+
|
|
2084
|
+
const { Run } = await import('@/run');
|
|
2085
|
+
const run = await Run.create<t.IState>({
|
|
2086
|
+
runId: 'null-payload-interrupt',
|
|
2087
|
+
graphConfig: {
|
|
2088
|
+
type: 'standard',
|
|
2089
|
+
agents: [
|
|
2090
|
+
{
|
|
2091
|
+
agentId: 'a',
|
|
2092
|
+
provider: providers.OPENAI,
|
|
2093
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
2094
|
+
instructions: 'noop',
|
|
2095
|
+
maxContextTokens: 8000,
|
|
2096
|
+
},
|
|
2097
|
+
],
|
|
2098
|
+
},
|
|
2099
|
+
hooks: registry,
|
|
2100
|
+
humanInTheLoop: { enabled: true },
|
|
2101
|
+
});
|
|
2102
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
2103
|
+
|
|
2104
|
+
await run.processStream(
|
|
2105
|
+
{ messages: [] },
|
|
2106
|
+
{
|
|
2107
|
+
configurable: { thread_id: 'null-payload-thread' },
|
|
2108
|
+
version: 'v2',
|
|
2109
|
+
}
|
|
2110
|
+
);
|
|
2111
|
+
|
|
2112
|
+
/** Run was paused, NOT completed — getInterrupt returns a result
|
|
2113
|
+
* (with the null payload preserved) and the Stop hook does not
|
|
2114
|
+
* fire. Without the fix, both inversions held. */
|
|
2115
|
+
const interrupt = run.getInterrupt<unknown>();
|
|
2116
|
+
expect(interrupt).toBeDefined();
|
|
2117
|
+
expect(interrupt!.payload).toBeNull();
|
|
2118
|
+
expect(stopFired).toBe(false);
|
|
2119
|
+
});
|
|
2120
|
+
|
|
2121
|
+
it('halt signal raised by run A does not bleed into a concurrent run B sharing the same registry', async () => {
|
|
2122
|
+
/**
|
|
2123
|
+
* One registry, two runs. RunStart hook for run A raises
|
|
2124
|
+
* preventContinuation; run B has no halt signal. Without
|
|
2125
|
+
* per-session scoping, run B's stream-loop poll would see A's
|
|
2126
|
+
* signal and silently terminate. With scoping, each run reads
|
|
2127
|
+
* only its own halt entry.
|
|
2128
|
+
*/
|
|
2129
|
+
const registry = new HookRegistry();
|
|
2130
|
+
let runStartFires = 0;
|
|
2131
|
+
registry.register('RunStart', {
|
|
2132
|
+
hooks: [
|
|
2133
|
+
async (input): Promise<RunStartHookOutput> => {
|
|
2134
|
+
runStartFires += 1;
|
|
2135
|
+
/** Halt only run A, not run B. */
|
|
2136
|
+
if (input.runId === 'run-a') {
|
|
2137
|
+
return {
|
|
2138
|
+
preventContinuation: true,
|
|
2139
|
+
stopReason: 'A halted',
|
|
2140
|
+
};
|
|
2141
|
+
}
|
|
2142
|
+
return {};
|
|
2143
|
+
},
|
|
2144
|
+
],
|
|
2145
|
+
});
|
|
2146
|
+
|
|
2147
|
+
const { Run } = await import('@/run');
|
|
2148
|
+
const { HumanMessage: HM } = await import('@langchain/core/messages');
|
|
2149
|
+
|
|
2150
|
+
/** No-op graph so we never hit the real model. */
|
|
2151
|
+
const makeNoopGraph = (): t.CompiledStateWorkflow => {
|
|
2152
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
2153
|
+
.addNode('noop', (): MessagesUpdate => ({ messages: [] }))
|
|
2154
|
+
.addEdge(START, 'noop')
|
|
2155
|
+
.addEdge('noop', END);
|
|
2156
|
+
return builder.compile() as unknown as t.CompiledStateWorkflow;
|
|
2157
|
+
};
|
|
2158
|
+
|
|
2159
|
+
const makeRun = async (
|
|
2160
|
+
runId: string
|
|
2161
|
+
): Promise<Awaited<ReturnType<typeof Run.create<t.IState>>>> => {
|
|
2162
|
+
const r = await Run.create<t.IState>({
|
|
2163
|
+
runId,
|
|
2164
|
+
graphConfig: {
|
|
2165
|
+
type: 'standard',
|
|
2166
|
+
agents: [
|
|
2167
|
+
{
|
|
2168
|
+
agentId: 'a',
|
|
2169
|
+
provider: providers.OPENAI,
|
|
2170
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
2171
|
+
instructions: 'noop',
|
|
2172
|
+
maxContextTokens: 8000,
|
|
2173
|
+
},
|
|
2174
|
+
],
|
|
2175
|
+
},
|
|
2176
|
+
hooks: registry,
|
|
2177
|
+
humanInTheLoop: { enabled: false },
|
|
2178
|
+
});
|
|
2179
|
+
r.graphRunnable = makeNoopGraph();
|
|
2180
|
+
return r;
|
|
2181
|
+
};
|
|
2182
|
+
|
|
2183
|
+
const runA = await makeRun('run-a');
|
|
2184
|
+
const runB = await makeRun('run-b');
|
|
2185
|
+
|
|
2186
|
+
/** Run A — its preventContinuation lands in the per-session halt
|
|
2187
|
+
* map under key `'run-a'` and triggers a pre-stream early
|
|
2188
|
+
* return. Note that the early-return path also clears its own
|
|
2189
|
+
* halt signal in the same step, so run B can never observe it
|
|
2190
|
+
* even momentarily. */
|
|
2191
|
+
await runA.processStream(
|
|
2192
|
+
{ messages: [new HM('a')] },
|
|
2193
|
+
{ configurable: { thread_id: 'thread-a' }, version: 'v2' }
|
|
2194
|
+
);
|
|
2195
|
+
expect(runA.getHaltReason()).toBe('A halted');
|
|
2196
|
+
|
|
2197
|
+
/** Run B's signal must be undefined — A's halt is scoped to A's
|
|
2198
|
+
* session id, and was cleared in A's pre-stream finally path. */
|
|
2199
|
+
expect(registry.getHaltSignal('run-b')).toBeUndefined();
|
|
2200
|
+
expect(registry.getHaltSignal('run-a')).toBeUndefined();
|
|
2201
|
+
|
|
2202
|
+
/** Run B — RunStart returns no halt, so processStream proceeds
|
|
2203
|
+
* past the pre-stream gate, executes the no-op graph, and
|
|
2204
|
+
* completes without halt. */
|
|
2205
|
+
runStartFires = 0;
|
|
2206
|
+
await runB.processStream(
|
|
2207
|
+
{ messages: [new HM('b')] },
|
|
2208
|
+
{ configurable: { thread_id: 'thread-b' }, version: 'v2' }
|
|
2209
|
+
);
|
|
2210
|
+
expect(runStartFires).toBe(1);
|
|
2211
|
+
expect(runB.getHaltReason()).toBeUndefined();
|
|
2212
|
+
});
|
|
2213
|
+
|
|
2214
|
+
it('review_configs entries carry tool_call_id so duplicate-tool batches map unambiguously', async () => {
|
|
2215
|
+
mockEventDispatch([]);
|
|
2216
|
+
|
|
2217
|
+
const registry = new HookRegistry();
|
|
2218
|
+
registry.register('PreToolUse', {
|
|
2219
|
+
hooks: [
|
|
2220
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
2221
|
+
decision: 'ask',
|
|
2222
|
+
reason: 'review',
|
|
2223
|
+
}),
|
|
2224
|
+
],
|
|
2225
|
+
});
|
|
2226
|
+
|
|
2227
|
+
/** Same tool name called twice in one batch — by-position
|
|
2228
|
+
* mapping breaks down for hosts that reorder; tool_call_id
|
|
2229
|
+
* lets the UI map review_configs → action_requests directly. */
|
|
2230
|
+
const node = new ToolNode({
|
|
2231
|
+
tools: [createSchemaStub('echo')],
|
|
2232
|
+
eventDrivenMode: true,
|
|
2233
|
+
agentId: 'agent-x',
|
|
2234
|
+
toolCallStepIds: new Map([
|
|
2235
|
+
['call_first', 'step_first'],
|
|
2236
|
+
['call_second', 'step_second'],
|
|
2237
|
+
]),
|
|
2238
|
+
hookRegistry: registry,
|
|
2239
|
+
humanInTheLoop: { enabled: true },
|
|
2240
|
+
});
|
|
2241
|
+
|
|
2242
|
+
const graph = buildHITLGraph(node, [
|
|
2243
|
+
{ id: 'call_first', name: 'echo', args: { command: 'a' } },
|
|
2244
|
+
{ id: 'call_second', name: 'echo', args: { command: 'b' } },
|
|
2245
|
+
]);
|
|
2246
|
+
const config = { configurable: { thread_id: 'duplicate-tool' } };
|
|
2247
|
+
|
|
2248
|
+
const interrupted = await graph.invoke({ messages: [] }, config);
|
|
2249
|
+
if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
|
|
2250
|
+
throw new Error('expected interrupt');
|
|
2251
|
+
}
|
|
2252
|
+
const payload = interrupted.__interrupt__[0].value!;
|
|
2253
|
+
if (payload.type !== 'tool_approval') {
|
|
2254
|
+
throw new Error('expected tool_approval');
|
|
2255
|
+
}
|
|
2256
|
+
|
|
2257
|
+
/** Each review_config carries its own tool_call_id matching the
|
|
2258
|
+
* action_request at the same index. UI can build a Map keyed by
|
|
2259
|
+
* tool_call_id rather than relying on positional order. */
|
|
2260
|
+
expect(payload.review_configs).toEqual([
|
|
2261
|
+
{
|
|
2262
|
+
action_name: 'echo',
|
|
2263
|
+
tool_call_id: 'call_first',
|
|
2264
|
+
allowed_decisions: ['approve', 'reject', 'edit', 'respond'],
|
|
2265
|
+
},
|
|
2266
|
+
{
|
|
2267
|
+
action_name: 'echo',
|
|
2268
|
+
tool_call_id: 'call_second',
|
|
2269
|
+
allowed_decisions: ['approve', 'reject', 'edit', 'respond'],
|
|
2270
|
+
},
|
|
2271
|
+
]);
|
|
2272
|
+
/** And the action_requests carry the same ids — pairing is
|
|
2273
|
+
* always derivable from id even when names collide. */
|
|
2274
|
+
expect(payload.action_requests.map((r) => r.tool_call_id)).toEqual([
|
|
2275
|
+
'call_first',
|
|
2276
|
+
'call_second',
|
|
2277
|
+
]);
|
|
2278
|
+
});
|
|
2279
|
+
|
|
2280
|
+
it('malformed edit decision (missing updatedInput) is blocked, not approved with garbage args', async () => {
|
|
2281
|
+
let dispatchCount = 0;
|
|
2282
|
+
jest
|
|
2283
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2284
|
+
.mockImplementation(async (event, data) => {
|
|
2285
|
+
if (event !== 'on_tool_execute') {
|
|
2286
|
+
return;
|
|
2287
|
+
}
|
|
2288
|
+
dispatchCount += 1;
|
|
2289
|
+
const request = data as {
|
|
2290
|
+
toolCalls: t.ToolCallRequest[];
|
|
2291
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
2292
|
+
};
|
|
2293
|
+
request.resolve([]);
|
|
2294
|
+
});
|
|
2295
|
+
|
|
2296
|
+
const node = new ToolNode({
|
|
2297
|
+
tools: [createSchemaStub('echo')],
|
|
2298
|
+
eventDrivenMode: true,
|
|
2299
|
+
agentId: 'agent-x',
|
|
2300
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2301
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
2302
|
+
humanInTheLoop: { enabled: true },
|
|
2303
|
+
});
|
|
2304
|
+
|
|
2305
|
+
const graph = buildHITLGraph(node, [
|
|
2306
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
2307
|
+
]);
|
|
2308
|
+
const config = { configurable: { thread_id: 'edit-malformed' } };
|
|
2309
|
+
|
|
2310
|
+
await graph.invoke({ messages: [] }, config);
|
|
2311
|
+
|
|
2312
|
+
/** `{ type: 'edit' }` with no updatedInput — same trust-boundary
|
|
2313
|
+
* issue as malformed respond. Must fail closed, NOT pass undefined
|
|
2314
|
+
* into applyInputOverride and approve a tool with garbage args. */
|
|
2315
|
+
const resumed = (await graph.invoke(
|
|
2316
|
+
new Command({
|
|
2317
|
+
resume: [{ type: 'edit' } as unknown as t.ToolApprovalDecision],
|
|
2318
|
+
}),
|
|
2319
|
+
config
|
|
2320
|
+
)) as { messages: BaseMessage[] };
|
|
2321
|
+
|
|
2322
|
+
const toolMessages = resumed.messages.filter(
|
|
2323
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
2324
|
+
);
|
|
2325
|
+
expect(toolMessages).toHaveLength(1);
|
|
2326
|
+
expect(toolMessages[0].status).toBe('error');
|
|
2327
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
2328
|
+
'missing object updatedInput'
|
|
2329
|
+
);
|
|
2330
|
+
expect(String(toolMessages[0].content)).toContain('<missing>');
|
|
2331
|
+
expect(dispatchCount).toBe(0);
|
|
2332
|
+
});
|
|
2333
|
+
|
|
2334
|
+
it('malformed edit decision (non-object updatedInput) is blocked', async () => {
|
|
2335
|
+
let dispatchCount = 0;
|
|
2336
|
+
jest
|
|
2337
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2338
|
+
.mockImplementation(async (event, data) => {
|
|
2339
|
+
if (event !== 'on_tool_execute') {
|
|
2340
|
+
return;
|
|
2341
|
+
}
|
|
2342
|
+
dispatchCount += 1;
|
|
2343
|
+
const request = data as {
|
|
2344
|
+
toolCalls: t.ToolCallRequest[];
|
|
2345
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
2346
|
+
};
|
|
2347
|
+
request.resolve([]);
|
|
2348
|
+
});
|
|
2349
|
+
|
|
2350
|
+
const node = new ToolNode({
|
|
2351
|
+
tools: [createSchemaStub('echo')],
|
|
2352
|
+
eventDrivenMode: true,
|
|
2353
|
+
agentId: 'agent-x',
|
|
2354
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2355
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
2356
|
+
humanInTheLoop: { enabled: true },
|
|
2357
|
+
});
|
|
2358
|
+
|
|
2359
|
+
const graph = buildHITLGraph(node, [
|
|
2360
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
2361
|
+
]);
|
|
2362
|
+
const config = { configurable: { thread_id: 'edit-nonobject' } };
|
|
2363
|
+
|
|
2364
|
+
await graph.invoke({ messages: [] }, config);
|
|
2365
|
+
|
|
2366
|
+
/** `updatedInput: 'string'` — wire deserializer didn't enforce
|
|
2367
|
+
* object shape; SDK must reject. */
|
|
2368
|
+
const resumed = (await graph.invoke(
|
|
2369
|
+
new Command({
|
|
2370
|
+
resume: [
|
|
2371
|
+
{
|
|
2372
|
+
type: 'edit',
|
|
2373
|
+
updatedInput: 'not-an-object' as unknown as Record<string, unknown>,
|
|
2374
|
+
},
|
|
2375
|
+
],
|
|
2376
|
+
}),
|
|
2377
|
+
config
|
|
2378
|
+
)) as { messages: BaseMessage[] };
|
|
2379
|
+
|
|
2380
|
+
const toolMessages = resumed.messages.filter(
|
|
2381
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
2382
|
+
);
|
|
2383
|
+
expect(toolMessages).toHaveLength(1);
|
|
2384
|
+
expect(toolMessages[0].status).toBe('error');
|
|
2385
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
2386
|
+
'missing object updatedInput'
|
|
2387
|
+
);
|
|
2388
|
+
expect(String(toolMessages[0].content)).toContain('string');
|
|
2389
|
+
expect(dispatchCount).toBe(0);
|
|
2390
|
+
});
|
|
2391
|
+
|
|
2392
|
+
it('malformed edit decision (array updatedInput) is blocked — arrays are objects but not plain records', async () => {
|
|
2393
|
+
jest
|
|
2394
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2395
|
+
.mockImplementation(async () => {
|
|
2396
|
+
return;
|
|
2397
|
+
});
|
|
2398
|
+
|
|
2399
|
+
const node = new ToolNode({
|
|
2400
|
+
tools: [createSchemaStub('echo')],
|
|
2401
|
+
eventDrivenMode: true,
|
|
2402
|
+
agentId: 'agent-x',
|
|
2403
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2404
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
2405
|
+
humanInTheLoop: { enabled: true },
|
|
2406
|
+
});
|
|
2407
|
+
|
|
2408
|
+
const graph = buildHITLGraph(node, [
|
|
2409
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
2410
|
+
]);
|
|
2411
|
+
const config = { configurable: { thread_id: 'edit-array' } };
|
|
2412
|
+
|
|
2413
|
+
await graph.invoke({ messages: [] }, config);
|
|
2414
|
+
|
|
2415
|
+
const resumed = (await graph.invoke(
|
|
2416
|
+
new Command({
|
|
2417
|
+
resume: [
|
|
2418
|
+
{
|
|
2419
|
+
type: 'edit',
|
|
2420
|
+
updatedInput: [1, 2, 3] as unknown as Record<string, unknown>,
|
|
2421
|
+
},
|
|
2422
|
+
],
|
|
2423
|
+
}),
|
|
2424
|
+
config
|
|
2425
|
+
)) as { messages: BaseMessage[] };
|
|
2426
|
+
|
|
2427
|
+
const toolMessages = resumed.messages.filter(
|
|
2428
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
2429
|
+
);
|
|
2430
|
+
expect(toolMessages).toHaveLength(1);
|
|
2431
|
+
expect(toolMessages[0].status).toBe('error');
|
|
2432
|
+
expect(String(toolMessages[0].content)).toContain('array');
|
|
2433
|
+
});
|
|
2434
|
+
|
|
2435
|
+
it('malformed respond decision (missing responseText) is blocked, not crashed', async () => {
|
|
2436
|
+
let dispatchCount = 0;
|
|
2437
|
+
jest
|
|
2438
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2439
|
+
.mockImplementation(async (event, data) => {
|
|
2440
|
+
if (event !== 'on_tool_execute') {
|
|
2441
|
+
return;
|
|
2442
|
+
}
|
|
2443
|
+
dispatchCount += 1;
|
|
2444
|
+
const request = data as {
|
|
2445
|
+
toolCalls: t.ToolCallRequest[];
|
|
2446
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
2447
|
+
};
|
|
2448
|
+
request.resolve([]);
|
|
2449
|
+
});
|
|
2450
|
+
|
|
2451
|
+
const node = new ToolNode({
|
|
2452
|
+
tools: [createSchemaStub('echo')],
|
|
2453
|
+
eventDrivenMode: true,
|
|
2454
|
+
agentId: 'agent-x',
|
|
2455
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2456
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
2457
|
+
humanInTheLoop: { enabled: true },
|
|
2458
|
+
});
|
|
2459
|
+
|
|
2460
|
+
const graph = buildHITLGraph(node, [
|
|
2461
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
2462
|
+
]);
|
|
2463
|
+
const config = { configurable: { thread_id: 'respond-malformed' } };
|
|
2464
|
+
|
|
2465
|
+
await graph.invoke({ messages: [] }, config);
|
|
2466
|
+
|
|
2467
|
+
/** Submit a `respond` decision with NO responseText — wire shape
|
|
2468
|
+
* the SDK can't honor. Must fail closed (blockEntry path), NOT
|
|
2469
|
+
* crash truncateToolResultContent on `undefined.length`. */
|
|
2470
|
+
const resumed = (await graph.invoke(
|
|
2471
|
+
new Command({
|
|
2472
|
+
resume: [{ type: 'respond' } as unknown as t.ToolApprovalDecision],
|
|
2473
|
+
}),
|
|
2474
|
+
config
|
|
2475
|
+
)) as { messages: BaseMessage[] };
|
|
2476
|
+
|
|
2477
|
+
const toolMessages = resumed.messages.filter(
|
|
2478
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
2479
|
+
);
|
|
2480
|
+
expect(toolMessages).toHaveLength(1);
|
|
2481
|
+
expect(toolMessages[0].status).toBe('error');
|
|
2482
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
2483
|
+
'missing string responseText'
|
|
2484
|
+
);
|
|
2485
|
+
expect(String(toolMessages[0].content)).toContain('<missing>');
|
|
2486
|
+
/** Tool was never dispatched — fail-closed worked. */
|
|
2487
|
+
expect(dispatchCount).toBe(0);
|
|
2488
|
+
});
|
|
2489
|
+
|
|
2490
|
+
it('malformed respond decision (non-string responseText) is blocked, not crashed', async () => {
|
|
2491
|
+
jest
|
|
2492
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2493
|
+
.mockImplementation(async () => {
|
|
2494
|
+
return;
|
|
2495
|
+
});
|
|
2496
|
+
|
|
2497
|
+
const node = new ToolNode({
|
|
2498
|
+
tools: [createSchemaStub('echo')],
|
|
2499
|
+
eventDrivenMode: true,
|
|
2500
|
+
agentId: 'agent-x',
|
|
2501
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2502
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
2503
|
+
humanInTheLoop: { enabled: true },
|
|
2504
|
+
});
|
|
2505
|
+
|
|
2506
|
+
const graph = buildHITLGraph(node, [
|
|
2507
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
2508
|
+
]);
|
|
2509
|
+
const config = { configurable: { thread_id: 'respond-nonstring' } };
|
|
2510
|
+
|
|
2511
|
+
await graph.invoke({ messages: [] }, config);
|
|
2512
|
+
|
|
2513
|
+
/** `responseText: 42` — wire deserializer didn't enforce string;
|
|
2514
|
+
* SDK must reject without crashing. */
|
|
2515
|
+
const resumed = (await graph.invoke(
|
|
2516
|
+
new Command({
|
|
2517
|
+
resume: [
|
|
2518
|
+
{
|
|
2519
|
+
type: 'respond',
|
|
2520
|
+
responseText: 42 as unknown as string,
|
|
2521
|
+
},
|
|
2522
|
+
],
|
|
2523
|
+
}),
|
|
2524
|
+
config
|
|
2525
|
+
)) as { messages: BaseMessage[] };
|
|
2526
|
+
|
|
2527
|
+
const toolMessages = resumed.messages.filter(
|
|
2528
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
2529
|
+
);
|
|
2530
|
+
expect(toolMessages).toHaveLength(1);
|
|
2531
|
+
expect(toolMessages[0].status).toBe('error');
|
|
2532
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
2533
|
+
'missing string responseText'
|
|
2534
|
+
);
|
|
2535
|
+
expect(String(toolMessages[0].content)).toContain('number');
|
|
2536
|
+
});
|
|
2537
|
+
|
|
2538
|
+
it('respond decision truncates oversized text the same way real tool output is truncated', async () => {
|
|
2539
|
+
mockEventDispatch([]);
|
|
2540
|
+
|
|
2541
|
+
/** Build a ToolNode with a tiny `maxToolResultChars` so the
|
|
2542
|
+
* truncation kicks in for a 200-char response. Without the fix,
|
|
2543
|
+
* the full string would land in the ToolMessage and PostToolBatch
|
|
2544
|
+
* entry — bypassing the model context budget. */
|
|
2545
|
+
const registry = new HookRegistry();
|
|
2546
|
+
registry.register('PreToolUse', {
|
|
2547
|
+
hooks: [async (): Promise<PreToolUseHookOutput> => ({ decision: 'ask' })],
|
|
2548
|
+
});
|
|
2549
|
+
let captured: PostToolBatchEntry | undefined;
|
|
2550
|
+
registry.register('PostToolBatch', {
|
|
2551
|
+
hooks: [
|
|
2552
|
+
async (input): Promise<PostToolBatchHookOutput> => {
|
|
2553
|
+
captured = (input as PostToolBatchHookInput).entries[0];
|
|
2554
|
+
return {};
|
|
2555
|
+
},
|
|
2556
|
+
],
|
|
2557
|
+
});
|
|
2558
|
+
|
|
2559
|
+
const node = new ToolNode({
|
|
2560
|
+
tools: [createSchemaStub('echo')],
|
|
2561
|
+
eventDrivenMode: true,
|
|
2562
|
+
agentId: 'agent-x',
|
|
2563
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2564
|
+
hookRegistry: registry,
|
|
2565
|
+
humanInTheLoop: { enabled: true },
|
|
2566
|
+
maxToolResultChars: 50,
|
|
2567
|
+
});
|
|
2568
|
+
|
|
2569
|
+
const graph = buildHITLGraph(node, [
|
|
2570
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
2571
|
+
]);
|
|
2572
|
+
const config = { configurable: { thread_id: 'respond-truncate' } };
|
|
2573
|
+
|
|
2574
|
+
await graph.invoke({ messages: [] }, config);
|
|
2575
|
+
|
|
2576
|
+
/** 200-char response — well over the 50-char cap. */
|
|
2577
|
+
const oversized = 'A'.repeat(200);
|
|
2578
|
+
const resumed = (await graph.invoke(
|
|
2579
|
+
new Command({
|
|
2580
|
+
resume: [{ type: 'respond', responseText: oversized }],
|
|
2581
|
+
}),
|
|
2582
|
+
config
|
|
2583
|
+
)) as { messages: BaseMessage[] };
|
|
2584
|
+
|
|
2585
|
+
const toolMessages = resumed.messages.filter(
|
|
2586
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
2587
|
+
);
|
|
2588
|
+
expect(toolMessages).toHaveLength(1);
|
|
2589
|
+
/** The ToolMessage content is truncated; not the raw 200 chars. */
|
|
2590
|
+
const content = String(toolMessages[0].content);
|
|
2591
|
+
expect(content.length).toBeLessThan(oversized.length);
|
|
2592
|
+
/** And the PostToolBatch entry sees the SAME truncated value
|
|
2593
|
+
* — batch hooks observe what the model will actually see. */
|
|
2594
|
+
expect(captured).toBeDefined();
|
|
2595
|
+
expect(typeof captured!.toolOutput).toBe('string');
|
|
2596
|
+
expect(captured!.toolOutput).toBe(content);
|
|
2597
|
+
});
|
|
2598
|
+
|
|
2599
|
+
it('hook returning both ask + preventContinuation halts cleanly and clears session hooks', async () => {
|
|
2600
|
+
mockEventDispatch([]);
|
|
2601
|
+
|
|
2602
|
+
const registry = new HookRegistry();
|
|
2603
|
+
/** Session-scoped policy hook returns BOTH `ask` (which would
|
|
2604
|
+
* raise an interrupt) AND `preventContinuation: true` (which
|
|
2605
|
+
* raises a halt signal). The halt wins — no resume is expected,
|
|
2606
|
+
* sessions must clear. */
|
|
2607
|
+
const runId = 'ask-and-halt';
|
|
2608
|
+
registry.registerSession(runId, 'PreToolUse', {
|
|
2609
|
+
hooks: [
|
|
2610
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
2611
|
+
decision: 'ask',
|
|
2612
|
+
preventContinuation: true,
|
|
2613
|
+
stopReason: 'policy halted ask',
|
|
2614
|
+
}),
|
|
2615
|
+
],
|
|
2616
|
+
});
|
|
2617
|
+
|
|
2618
|
+
const node = new ToolNode({
|
|
2619
|
+
tools: [createSchemaStub('echo')],
|
|
2620
|
+
eventDrivenMode: true,
|
|
2621
|
+
agentId: 'agent-x',
|
|
2622
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
2623
|
+
hookRegistry: registry,
|
|
2624
|
+
humanInTheLoop: { enabled: true },
|
|
2625
|
+
});
|
|
2626
|
+
|
|
2627
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
2628
|
+
.addNode(
|
|
2629
|
+
'agent',
|
|
2630
|
+
(): MessagesUpdate => ({
|
|
2631
|
+
messages: [
|
|
2632
|
+
new AIMessage({
|
|
2633
|
+
content: '',
|
|
2634
|
+
tool_calls: [
|
|
2635
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
2636
|
+
],
|
|
2637
|
+
}),
|
|
2638
|
+
],
|
|
2639
|
+
})
|
|
2640
|
+
)
|
|
2641
|
+
.addNode('tools', node)
|
|
2642
|
+
.addEdge(START, 'agent')
|
|
2643
|
+
.addEdge('agent', 'tools')
|
|
2644
|
+
.addEdge('tools', END);
|
|
2645
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
2646
|
+
|
|
2647
|
+
const { Run } = await import('@/run');
|
|
2648
|
+
const run = await Run.create<t.IState>({
|
|
2649
|
+
runId,
|
|
2650
|
+
graphConfig: {
|
|
2651
|
+
type: 'standard',
|
|
2652
|
+
agents: [
|
|
2653
|
+
{
|
|
2654
|
+
agentId: 'a',
|
|
2655
|
+
provider: providers.OPENAI,
|
|
2656
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
2657
|
+
instructions: 'noop',
|
|
2658
|
+
maxContextTokens: 8000,
|
|
2659
|
+
},
|
|
2660
|
+
],
|
|
2661
|
+
},
|
|
2662
|
+
hooks: registry,
|
|
2663
|
+
humanInTheLoop: { enabled: true },
|
|
2664
|
+
});
|
|
2665
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
2666
|
+
|
|
2667
|
+
await run.processStream(
|
|
2668
|
+
{ messages: [] },
|
|
2669
|
+
{
|
|
2670
|
+
configurable: { thread_id: 'ask-and-halt-thread' },
|
|
2671
|
+
version: 'v2',
|
|
2672
|
+
}
|
|
2673
|
+
);
|
|
2674
|
+
|
|
2675
|
+
/** Both signals landed: interrupt was captured AND halt fired. */
|
|
2676
|
+
expect(run.getInterrupt()).toBeDefined();
|
|
2677
|
+
expect(run.getHaltReason()).toBe('policy halted ask');
|
|
2678
|
+
/** Session hooks MUST be cleared — no resume is expected on a
|
|
2679
|
+
* halted run, even one that also captured an interrupt. */
|
|
2680
|
+
expect(registry.hasHookFor('PreToolUse', runId)).toBe(false);
|
|
2681
|
+
});
|
|
2682
|
+
|
|
2683
|
+
it('preserves Graph sidecars across HITL interrupt + resume so tool completions keep their step ids', async () => {
|
|
2684
|
+
/**
|
|
2685
|
+
* Regression test for the cleanup-vs-resume bug: previously
|
|
2686
|
+
* `processStream` always called `Graph.clearHeavyState()` in its
|
|
2687
|
+
* `finally` block AND `Graph.resetValues()` on entry, even when
|
|
2688
|
+
* pausing on a HITL interrupt. That wiped `toolCallStepIds`,
|
|
2689
|
+
* `_toolOutputRegistry`, and `sessions` between pause and resume,
|
|
2690
|
+
* so the resumed `ToolNode` could no longer find the original
|
|
2691
|
+
* step id and dispatched `ON_RUN_STEP_COMPLETED` with an empty id
|
|
2692
|
+
* — the host's stream consumer would then drop the result.
|
|
2693
|
+
*
|
|
2694
|
+
* The fix is two gated cleanups:
|
|
2695
|
+
* - `clearHeavyState` skipped when `_interrupt != null && _haltedReason == null && !streamThrew`
|
|
2696
|
+
* - `resetValues` skipped when entering processStream via `Command` (resume)
|
|
2697
|
+
*
|
|
2698
|
+
* To exercise the SDK Graph's actual sidecar state (not a private
|
|
2699
|
+
* test ToolNode), this test wires the custom ToolNode to share
|
|
2700
|
+
* the SDK Graph's `toolCallStepIds` Map by reference. After the
|
|
2701
|
+
* interrupt fires AND after the resume completes, the
|
|
2702
|
+
* pre-populated entry must still be present.
|
|
2703
|
+
*/
|
|
2704
|
+
const dispatchedStepIds: string[] = [];
|
|
2705
|
+
jest
|
|
2706
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2707
|
+
.mockImplementation(async (event, data) => {
|
|
2708
|
+
if (event === GraphEvents.ON_RUN_STEP_COMPLETED) {
|
|
2709
|
+
const payload = data as { result?: { id?: string } };
|
|
2710
|
+
if (payload.result?.id != null) {
|
|
2711
|
+
dispatchedStepIds.push(payload.result.id);
|
|
2712
|
+
}
|
|
2713
|
+
return;
|
|
2714
|
+
}
|
|
2715
|
+
if (event !== 'on_tool_execute') {
|
|
2716
|
+
return;
|
|
2717
|
+
}
|
|
2718
|
+
const request = data as {
|
|
2719
|
+
toolCalls: t.ToolCallRequest[];
|
|
2720
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
2721
|
+
};
|
|
2722
|
+
request.resolve(
|
|
2723
|
+
request.toolCalls.map((c) => ({
|
|
2724
|
+
toolCallId: c.id,
|
|
2725
|
+
content: 'host-result',
|
|
2726
|
+
status: 'success' as const,
|
|
2727
|
+
}))
|
|
2728
|
+
);
|
|
2729
|
+
});
|
|
2730
|
+
|
|
2731
|
+
const registry = new HookRegistry();
|
|
2732
|
+
registry.register('PreToolUse', {
|
|
2733
|
+
hooks: [
|
|
2734
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
2735
|
+
decision: 'ask',
|
|
2736
|
+
reason: 'review',
|
|
2737
|
+
}),
|
|
2738
|
+
],
|
|
2739
|
+
});
|
|
2740
|
+
|
|
2741
|
+
const { Run } = await import('@/run');
|
|
2742
|
+
const run = await Run.create<t.IState>({
|
|
2743
|
+
runId: 'sidecar-preserve',
|
|
2744
|
+
graphConfig: {
|
|
2745
|
+
type: 'standard',
|
|
2746
|
+
agents: [
|
|
2747
|
+
{
|
|
2748
|
+
agentId: 'a',
|
|
2749
|
+
provider: providers.OPENAI,
|
|
2750
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
2751
|
+
instructions: 'noop',
|
|
2752
|
+
maxContextTokens: 8000,
|
|
2753
|
+
},
|
|
2754
|
+
],
|
|
2755
|
+
},
|
|
2756
|
+
hooks: registry,
|
|
2757
|
+
humanInTheLoop: { enabled: true },
|
|
2758
|
+
});
|
|
2759
|
+
|
|
2760
|
+
/** Wire the test ToolNode to share the SDK Graph's
|
|
2761
|
+
* `toolCallStepIds` Map by reference — this is how the real
|
|
2762
|
+
* StandardGraph builds its inner ToolNode at Graph.ts:587. */
|
|
2763
|
+
const toolNode = new ToolNode({
|
|
2764
|
+
tools: [createSchemaStub('echo')],
|
|
2765
|
+
eventDrivenMode: true,
|
|
2766
|
+
agentId: 'a',
|
|
2767
|
+
toolCallStepIds: run.Graph!.toolCallStepIds,
|
|
2768
|
+
hookRegistry: registry,
|
|
2769
|
+
humanInTheLoop: { enabled: true },
|
|
2770
|
+
});
|
|
2771
|
+
|
|
2772
|
+
/** The agent node simulates `attemptInvoke`'s sidecar-population
|
|
2773
|
+
* step: in a real run, the model invocation creates a run step
|
|
2774
|
+
* and writes its id into `toolCallStepIds` before tools dispatch.
|
|
2775
|
+
* Doing it here means the entry lands AFTER `processStream`'s
|
|
2776
|
+
* `resetValues` (which fires once on entry) and BEFORE the
|
|
2777
|
+
* ToolNode's hook + interrupt — exactly mirroring the production
|
|
2778
|
+
* timing the cleanup gate has to preserve. */
|
|
2779
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
2780
|
+
.addNode('agent', (): MessagesUpdate => {
|
|
2781
|
+
run.Graph!.toolCallStepIds.set('call_1', 'step_real_id');
|
|
2782
|
+
return {
|
|
2783
|
+
messages: [
|
|
2784
|
+
new AIMessage({
|
|
2785
|
+
content: '',
|
|
2786
|
+
tool_calls: [
|
|
2787
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
2788
|
+
],
|
|
2789
|
+
}),
|
|
2790
|
+
],
|
|
2791
|
+
};
|
|
2792
|
+
})
|
|
2793
|
+
.addNode('tools', toolNode)
|
|
2794
|
+
.addEdge(START, 'agent')
|
|
2795
|
+
.addEdge('agent', 'tools')
|
|
2796
|
+
.addEdge('tools', END);
|
|
2797
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
2798
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
2799
|
+
|
|
2800
|
+
const callerConfig = {
|
|
2801
|
+
configurable: { thread_id: 'sidecar-thread' },
|
|
2802
|
+
version: 'v2' as const,
|
|
2803
|
+
};
|
|
2804
|
+
|
|
2805
|
+
await run.processStream({ messages: [] }, callerConfig);
|
|
2806
|
+
|
|
2807
|
+
/** After interrupt: sidecar entry MUST still be present. Without
|
|
2808
|
+
* the fix, `clearHeavyState` in the `finally` block would have
|
|
2809
|
+
* wiped this map. */
|
|
2810
|
+
expect(run.getInterrupt()).toBeDefined();
|
|
2811
|
+
expect(run.Graph!.toolCallStepIds.has('call_1')).toBe(true);
|
|
2812
|
+
expect(run.Graph!.toolCallStepIds.get('call_1')).toBe('step_real_id');
|
|
2813
|
+
|
|
2814
|
+
/** Resume: without the resetValues gate, this would also wipe
|
|
2815
|
+
* the map at the START of the second processStream invocation. */
|
|
2816
|
+
await run.resume([{ type: 'approve' }], callerConfig);
|
|
2817
|
+
|
|
2818
|
+
/** After resume completes naturally: dispatch fired with the real
|
|
2819
|
+
* step id (not an empty string from a wiped map). Without either
|
|
2820
|
+
* fix, `dispatchedStepIds` would contain `''`. */
|
|
2821
|
+
expect(dispatchedStepIds).toContain('step_real_id');
|
|
2822
|
+
expect(dispatchedStepIds).not.toContain('');
|
|
2823
|
+
/** And clearHeavyState DID fire on the natural-completion side
|
|
2824
|
+
* — sidecar map is now empty after the resume settled. */
|
|
2825
|
+
expect(run.Graph!.toolCallStepIds.size).toBe(0);
|
|
2826
|
+
});
|
|
2827
|
+
|
|
2828
|
+
it('clears Graph sidecars on natural completion when no interrupt was raised', async () => {
|
|
2829
|
+
/** Negative case: when no interrupt fires, `clearHeavyState`
|
|
2830
|
+
* MUST run as before. This pins the gate so a future change
|
|
2831
|
+
* doesn't accidentally preserve sidecars on natural completion
|
|
2832
|
+
* (memory leak across runs). */
|
|
2833
|
+
mockEventDispatch([]);
|
|
2834
|
+
|
|
2835
|
+
const { Run } = await import('@/run');
|
|
2836
|
+
const run = await Run.create<t.IState>({
|
|
2837
|
+
runId: 'sidecar-clear-natural',
|
|
2838
|
+
graphConfig: {
|
|
2839
|
+
type: 'standard',
|
|
2840
|
+
agents: [
|
|
2841
|
+
{
|
|
2842
|
+
agentId: 'a',
|
|
2843
|
+
provider: providers.OPENAI,
|
|
2844
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
2845
|
+
instructions: 'noop',
|
|
2846
|
+
maxContextTokens: 8000,
|
|
2847
|
+
},
|
|
2848
|
+
],
|
|
2849
|
+
},
|
|
2850
|
+
humanInTheLoop: { enabled: false },
|
|
2851
|
+
});
|
|
2852
|
+
|
|
2853
|
+
/** No-op graph — runs to completion without an interrupt. */
|
|
2854
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
2855
|
+
.addNode('noop', (): MessagesUpdate => ({ messages: [] }))
|
|
2856
|
+
.addEdge(START, 'noop')
|
|
2857
|
+
.addEdge('noop', END);
|
|
2858
|
+
const graph = builder.compile();
|
|
2859
|
+
run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
2860
|
+
|
|
2861
|
+
/** Stash an entry so we can verify clearHeavyState wiped it. */
|
|
2862
|
+
run.Graph!.toolCallStepIds.set('stale_call', 'stale_step');
|
|
2863
|
+
|
|
2864
|
+
await run.processStream(
|
|
2865
|
+
{ messages: [] },
|
|
2866
|
+
{
|
|
2867
|
+
configurable: { thread_id: 'sidecar-clear-thread' },
|
|
2868
|
+
version: 'v2',
|
|
2869
|
+
}
|
|
2870
|
+
);
|
|
2871
|
+
|
|
2872
|
+
/** No interrupt → clearHeavyState ran → sidecar wiped. */
|
|
2873
|
+
expect(run.getInterrupt()).toBeUndefined();
|
|
2874
|
+
expect(run.Graph!.toolCallStepIds.size).toBe(0);
|
|
2875
|
+
});
|
|
2876
|
+
|
|
2877
|
+
it('clears session hooks when the stream throws AFTER an interrupt is captured (stale interrupt)', async () => {
|
|
2878
|
+
jest
|
|
2879
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2880
|
+
.mockImplementation(async () => {
|
|
2881
|
+
return;
|
|
2882
|
+
});
|
|
2883
|
+
|
|
2884
|
+
const registry = new HookRegistry();
|
|
2885
|
+
const runId = 'stream-error-after-interrupt';
|
|
2886
|
+
registry.registerSession(runId, 'PreToolUse', {
|
|
2887
|
+
hooks: [
|
|
2888
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
2889
|
+
decision: 'ask',
|
|
2890
|
+
reason: 'session policy',
|
|
2891
|
+
}),
|
|
2892
|
+
],
|
|
2893
|
+
});
|
|
2894
|
+
|
|
2895
|
+
const node = new ToolNode({
|
|
2896
|
+
tools: [createSchemaStub('echo')],
|
|
2897
|
+
eventDrivenMode: true,
|
|
2898
|
+
agentId: 'agent-x',
|
|
2899
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
2900
|
+
hookRegistry: registry,
|
|
2901
|
+
humanInTheLoop: { enabled: true },
|
|
2902
|
+
});
|
|
2903
|
+
|
|
2904
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
2905
|
+
.addNode(
|
|
2906
|
+
'agent',
|
|
2907
|
+
(): MessagesUpdate => ({
|
|
2908
|
+
messages: [
|
|
2909
|
+
new AIMessage({
|
|
2910
|
+
content: '',
|
|
2911
|
+
tool_calls: [
|
|
2912
|
+
{ id: 'call_1', name: 'echo', args: { command: 'x' } },
|
|
2913
|
+
],
|
|
2914
|
+
}),
|
|
2915
|
+
],
|
|
2916
|
+
})
|
|
2917
|
+
)
|
|
2918
|
+
.addNode('tools', node)
|
|
2919
|
+
.addEdge(START, 'agent')
|
|
2920
|
+
.addEdge('agent', 'tools')
|
|
2921
|
+
.addEdge('tools', END);
|
|
2922
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
2923
|
+
|
|
2924
|
+
const { Run } = await import('@/run');
|
|
2925
|
+
/**
|
|
2926
|
+
* Holder for forward-referencing the run inside the sentinel
|
|
2927
|
+
* handler closure. The handler is constructed before `Run.create`
|
|
2928
|
+
* runs (it's passed into `customHandlers`) but needs to read
|
|
2929
|
+
* `run.getInterrupt()` at firing time.
|
|
2930
|
+
*/
|
|
2931
|
+
const holder: {
|
|
2932
|
+
run: Awaited<ReturnType<typeof Run.create<t.IState>>> | undefined;
|
|
2933
|
+
} = { run: undefined };
|
|
2934
|
+
|
|
2935
|
+
/**
|
|
2936
|
+
* Handler keyed to a chain-stream event that throws ONLY after the
|
|
2937
|
+
* interrupt has been captured. The stream loop captures the
|
|
2938
|
+
* interrupt on the chunk that carries `__interrupt__`, then
|
|
2939
|
+
* dispatches to handlers in the same iteration — so the throw
|
|
2940
|
+
* exits the loop with `_interrupt != null`. Without the
|
|
2941
|
+
* `streamThrew` guard, the `finally` block would preserve session
|
|
2942
|
+
* hooks on this stale interrupt.
|
|
2943
|
+
*/
|
|
2944
|
+
const sentinelHandler = {
|
|
2945
|
+
handle: async (): Promise<void> => {
|
|
2946
|
+
if (holder.run?.getInterrupt() != null) {
|
|
2947
|
+
throw new Error('post-interrupt handler failure');
|
|
2948
|
+
}
|
|
2949
|
+
},
|
|
2950
|
+
};
|
|
2951
|
+
|
|
2952
|
+
holder.run = await Run.create<t.IState>({
|
|
2953
|
+
runId,
|
|
2954
|
+
graphConfig: {
|
|
2955
|
+
type: 'standard',
|
|
2956
|
+
agents: [
|
|
2957
|
+
{
|
|
2958
|
+
agentId: 'a',
|
|
2959
|
+
provider: providers.OPENAI,
|
|
2960
|
+
clientOptions: { modelName: 'gpt-4o-mini', apiKey: 'test-key' },
|
|
2961
|
+
instructions: 'noop',
|
|
2962
|
+
maxContextTokens: 8000,
|
|
2963
|
+
},
|
|
2964
|
+
],
|
|
2965
|
+
},
|
|
2966
|
+
hooks: registry,
|
|
2967
|
+
humanInTheLoop: { enabled: true },
|
|
2968
|
+
customHandlers: {
|
|
2969
|
+
[GraphEvents.CHAIN_STREAM]: sentinelHandler,
|
|
2970
|
+
[GraphEvents.CHAIN_END]: sentinelHandler,
|
|
2971
|
+
},
|
|
2972
|
+
});
|
|
2973
|
+
holder.run.graphRunnable = graph as unknown as t.CompiledStateWorkflow;
|
|
2974
|
+
|
|
2975
|
+
const callerConfig = {
|
|
2976
|
+
configurable: { thread_id: 'stale-interrupt-thread' },
|
|
2977
|
+
version: 'v2' as const,
|
|
2978
|
+
};
|
|
2979
|
+
|
|
2980
|
+
await expect(
|
|
2981
|
+
holder.run.processStream({ messages: [] }, callerConfig)
|
|
2982
|
+
).rejects.toThrow('post-interrupt handler failure');
|
|
2983
|
+
|
|
2984
|
+
/** Interrupt WAS captured on the run instance, but because the
|
|
2985
|
+
* stream subsequently threw, session hooks must be cleared so the
|
|
2986
|
+
* next run on this registry isn't poisoned by stale state. */
|
|
2987
|
+
expect(holder.run.getInterrupt()).toBeDefined();
|
|
2988
|
+
expect(registry.hasHookFor('PreToolUse', runId)).toBe(false);
|
|
2989
|
+
});
|
|
2990
|
+
|
|
2991
|
+
it('mixed deny/ask/allow batch: deny short-circuits, allow runs immediately, ask interrupts; resume completes the asked tool', async () => {
|
|
2992
|
+
const dispatchedToolNames: string[] = [];
|
|
2993
|
+
jest
|
|
2994
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
2995
|
+
.mockImplementation(async (event, data) => {
|
|
2996
|
+
if (event !== 'on_tool_execute') {
|
|
2997
|
+
return;
|
|
2998
|
+
}
|
|
2999
|
+
const request = data as {
|
|
3000
|
+
toolCalls: t.ToolCallRequest[];
|
|
3001
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
3002
|
+
};
|
|
3003
|
+
for (const c of request.toolCalls) {
|
|
3004
|
+
dispatchedToolNames.push(c.name);
|
|
3005
|
+
}
|
|
3006
|
+
request.resolve(
|
|
3007
|
+
request.toolCalls.map((c) => ({
|
|
3008
|
+
toolCallId: c.id,
|
|
3009
|
+
content: `ran:${c.name}`,
|
|
3010
|
+
status: 'success' as const,
|
|
3011
|
+
}))
|
|
3012
|
+
);
|
|
3013
|
+
});
|
|
3014
|
+
|
|
3015
|
+
/**
|
|
3016
|
+
* Per-tool policy hook: tool_a denied, tool_b asks, tool_c allowed.
|
|
3017
|
+
* The hook is registered without a pattern so it fires once per
|
|
3018
|
+
* tool call and dispatches by tool name.
|
|
3019
|
+
*/
|
|
3020
|
+
const registry = new HookRegistry();
|
|
3021
|
+
registry.register('PreToolUse', {
|
|
3022
|
+
hooks: [
|
|
3023
|
+
async (input): Promise<PreToolUseHookOutput> => {
|
|
3024
|
+
if (input.toolName === 'tool_a') {
|
|
3025
|
+
return { decision: 'deny', reason: 'policy:a' };
|
|
3026
|
+
}
|
|
3027
|
+
if (input.toolName === 'tool_b') {
|
|
3028
|
+
return { decision: 'ask', reason: 'policy:b-needs-review' };
|
|
3029
|
+
}
|
|
3030
|
+
return { decision: 'allow' };
|
|
3031
|
+
},
|
|
3032
|
+
],
|
|
3033
|
+
});
|
|
3034
|
+
/**
|
|
3035
|
+
* Listen on PostToolBatch to verify the batch entry shape after
|
|
3036
|
+
* resume reflects the final outcomes (deny + run + run), not
|
|
3037
|
+
* stale state from the first pass.
|
|
3038
|
+
*/
|
|
3039
|
+
const batchSnapshots: PostToolBatchEntry[][] = [];
|
|
3040
|
+
registry.register('PostToolBatch', {
|
|
3041
|
+
hooks: [
|
|
3042
|
+
async (input): Promise<PostToolBatchHookOutput> => {
|
|
3043
|
+
batchSnapshots.push(
|
|
3044
|
+
(input as PostToolBatchHookInput).entries.map((e) => ({ ...e }))
|
|
3045
|
+
);
|
|
3046
|
+
return {};
|
|
3047
|
+
},
|
|
3048
|
+
],
|
|
3049
|
+
});
|
|
3050
|
+
|
|
3051
|
+
const node = new ToolNode({
|
|
3052
|
+
tools: [
|
|
3053
|
+
createSchemaStub('tool_a'),
|
|
3054
|
+
createSchemaStub('tool_b'),
|
|
3055
|
+
createSchemaStub('tool_c'),
|
|
3056
|
+
],
|
|
3057
|
+
eventDrivenMode: true,
|
|
3058
|
+
agentId: 'agent-x',
|
|
3059
|
+
toolCallStepIds: new Map([
|
|
3060
|
+
['call_a', 'step_a'],
|
|
3061
|
+
['call_b', 'step_b'],
|
|
3062
|
+
['call_c', 'step_c'],
|
|
3063
|
+
]),
|
|
3064
|
+
hookRegistry: registry,
|
|
3065
|
+
humanInTheLoop: { enabled: true },
|
|
3066
|
+
});
|
|
3067
|
+
|
|
3068
|
+
const graph = buildHITLGraph(node, [
|
|
3069
|
+
{ id: 'call_a', name: 'tool_a', args: { command: 'a' } },
|
|
3070
|
+
{ id: 'call_b', name: 'tool_b', args: { command: 'b' } },
|
|
3071
|
+
{ id: 'call_c', name: 'tool_c', args: { command: 'c' } },
|
|
3072
|
+
]);
|
|
3073
|
+
const config = { configurable: { thread_id: 'mixed-thread' } };
|
|
3074
|
+
|
|
3075
|
+
const interrupted = await graph.invoke({ messages: [] }, config);
|
|
3076
|
+
if (!isInterrupted<t.HumanInterruptPayload>(interrupted)) {
|
|
3077
|
+
throw new Error('expected interrupt');
|
|
3078
|
+
}
|
|
3079
|
+
const payload = interrupted.__interrupt__[0].value!;
|
|
3080
|
+
if (payload.type !== 'tool_approval') {
|
|
3081
|
+
throw new Error('expected tool_approval payload');
|
|
3082
|
+
}
|
|
3083
|
+
/** Only tool_b appears in the interrupt — deny short-circuited
|
|
3084
|
+
* locally, allow was queued for dispatch but never reached it
|
|
3085
|
+
* because `interrupt()` threw inside the same node first. LangGraph
|
|
3086
|
+
* rolls back the entire node's effects on throw, so no host event
|
|
3087
|
+
* fires for any tool until after resume. This is the safe
|
|
3088
|
+
* semantic: partial execution while a human is being asked would
|
|
3089
|
+
* leak side effects ahead of approval. */
|
|
3090
|
+
expect(payload.action_requests).toHaveLength(1);
|
|
3091
|
+
expect(payload.action_requests[0].tool_call_id).toBe('call_b');
|
|
3092
|
+
expect(dispatchedToolNames).toEqual([]);
|
|
3093
|
+
|
|
3094
|
+
const resumed = (await graph.invoke(
|
|
3095
|
+
new Command({ resume: [{ type: 'approve' }] }),
|
|
3096
|
+
config
|
|
3097
|
+
)) as { messages: BaseMessage[] };
|
|
3098
|
+
|
|
3099
|
+
/**
|
|
3100
|
+
* After resume, all three tools have ToolMessages: tool_a blocked
|
|
3101
|
+
* (deny), tool_b ran (host approved), tool_c ran (allow). The
|
|
3102
|
+
* ToolNode re-executed from scratch, so both tool_b and tool_c
|
|
3103
|
+
* dispatch in this pass.
|
|
3104
|
+
*/
|
|
3105
|
+
const toolMessages = resumed.messages.filter(
|
|
3106
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
3107
|
+
);
|
|
3108
|
+
expect(toolMessages).toHaveLength(3);
|
|
3109
|
+
const byId = new Map(toolMessages.map((m) => [m.tool_call_id, m]));
|
|
3110
|
+
expect(byId.get('call_a')!.status).toBe('error');
|
|
3111
|
+
expect(String(byId.get('call_a')!.content)).toContain('policy:a');
|
|
3112
|
+
expect(byId.get('call_b')!.status).not.toBe('error');
|
|
3113
|
+
expect(byId.get('call_b')!.content).toBe('ran:tool_b');
|
|
3114
|
+
expect(byId.get('call_c')!.status).not.toBe('error');
|
|
3115
|
+
expect(byId.get('call_c')!.content).toBe('ran:tool_c');
|
|
3116
|
+
/** Both approved tools dispatched on resume; tool_a (deny) never did. */
|
|
3117
|
+
expect(new Set(dispatchedToolNames)).toEqual(new Set(['tool_b', 'tool_c']));
|
|
3118
|
+
expect(dispatchedToolNames).not.toContain('tool_a');
|
|
3119
|
+
|
|
3120
|
+
/**
|
|
3121
|
+
* PostToolBatch is dispatched at the bottom of `dispatchToolEvents`,
|
|
3122
|
+
* after tool execution. On the FIRST pass `interrupt()` throws
|
|
3123
|
+
* before reaching that line, so PostToolBatch does NOT fire for
|
|
3124
|
+
* the interrupted pass. Only the resume pass yields a snapshot —
|
|
3125
|
+
* carrying all three entries with their final outcomes (tool_a
|
|
3126
|
+
* blocked by deny, tool_b approved + ran, tool_c approved + ran).
|
|
3127
|
+
*/
|
|
3128
|
+
expect(batchSnapshots).toHaveLength(1);
|
|
3129
|
+
const finalSnapshot = batchSnapshots[0];
|
|
3130
|
+
/**
|
|
3131
|
+
* Order assertion: entries must match the original toolCalls
|
|
3132
|
+
* sequence (`call_a`, `call_b`, `call_c`) regardless of when each
|
|
3133
|
+
* outcome was recorded — `call_a` was denied synchronously in the
|
|
3134
|
+
* hook loop, `call_b` was approved through the resume branch,
|
|
3135
|
+
* `call_c` was approved+executed via the host event path. Hooks
|
|
3136
|
+
* correlating outcomes by position (per the API doc) depend on
|
|
3137
|
+
* this stability.
|
|
3138
|
+
*/
|
|
3139
|
+
expect(finalSnapshot.map((e) => e.toolUseId)).toEqual([
|
|
3140
|
+
'call_a',
|
|
3141
|
+
'call_b',
|
|
3142
|
+
'call_c',
|
|
3143
|
+
]);
|
|
3144
|
+
const byCallId = new Map(finalSnapshot.map((e) => [e.toolUseId, e]));
|
|
3145
|
+
expect(byCallId.size).toBe(3);
|
|
3146
|
+
expect(byCallId.get('call_a')!.status).toBe('error');
|
|
3147
|
+
expect(byCallId.get('call_a')!.error).toContain('policy:a');
|
|
3148
|
+
expect(byCallId.get('call_b')!.status).toBe('success');
|
|
3149
|
+
expect(byCallId.get('call_b')!.toolOutput).toBe('ran:tool_b');
|
|
3150
|
+
expect(byCallId.get('call_c')!.status).toBe('success');
|
|
3151
|
+
expect(byCallId.get('call_c')!.toolOutput).toBe('ran:tool_c');
|
|
3152
|
+
});
|
|
3153
|
+
|
|
3154
|
+
it('mixed respond + reject in the same resume: dispatches once each, batch entries in toolCalls order', async () => {
|
|
3155
|
+
const stepCompletedDispatches: string[] = [];
|
|
3156
|
+
jest
|
|
3157
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
3158
|
+
.mockImplementation(async (event, data) => {
|
|
3159
|
+
if (event === GraphEvents.ON_RUN_STEP_COMPLETED) {
|
|
3160
|
+
const payload = data as {
|
|
3161
|
+
result?: { tool_call?: { id?: string } };
|
|
3162
|
+
};
|
|
3163
|
+
const id = payload.result?.tool_call?.id;
|
|
3164
|
+
if (id != null) {
|
|
3165
|
+
stepCompletedDispatches.push(id);
|
|
3166
|
+
}
|
|
3167
|
+
return;
|
|
3168
|
+
}
|
|
3169
|
+
if (event !== 'on_tool_execute') {
|
|
3170
|
+
return;
|
|
3171
|
+
}
|
|
3172
|
+
const request = data as {
|
|
3173
|
+
toolCalls: t.ToolCallRequest[];
|
|
3174
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
3175
|
+
};
|
|
3176
|
+
request.resolve([]);
|
|
3177
|
+
});
|
|
3178
|
+
|
|
3179
|
+
const registry = new HookRegistry();
|
|
3180
|
+
/** Both tools `ask`; the resume picks `respond` for one and
|
|
3181
|
+
* `reject` for the other. Exercises the timing interaction
|
|
3182
|
+
* between respond's immediate dispatch and reject's deferred
|
|
3183
|
+
* flush in the same resume pass. */
|
|
3184
|
+
registry.register('PreToolUse', {
|
|
3185
|
+
hooks: [
|
|
3186
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
3187
|
+
decision: 'ask',
|
|
3188
|
+
reason: 'review',
|
|
3189
|
+
}),
|
|
3190
|
+
],
|
|
3191
|
+
});
|
|
3192
|
+
const batchSnapshots: PostToolBatchEntry[][] = [];
|
|
3193
|
+
registry.register('PostToolBatch', {
|
|
3194
|
+
hooks: [
|
|
3195
|
+
async (input): Promise<PostToolBatchHookOutput> => {
|
|
3196
|
+
batchSnapshots.push(
|
|
3197
|
+
(input as PostToolBatchHookInput).entries.map((e) => ({ ...e }))
|
|
3198
|
+
);
|
|
3199
|
+
return {};
|
|
3200
|
+
},
|
|
3201
|
+
],
|
|
3202
|
+
});
|
|
3203
|
+
|
|
3204
|
+
const node = new ToolNode({
|
|
3205
|
+
tools: [
|
|
3206
|
+
createSchemaStub('respond_tool'),
|
|
3207
|
+
createSchemaStub('reject_tool'),
|
|
3208
|
+
],
|
|
3209
|
+
eventDrivenMode: true,
|
|
3210
|
+
agentId: 'agent-x',
|
|
3211
|
+
toolCallStepIds: new Map([
|
|
3212
|
+
['call_respond', 'step_respond'],
|
|
3213
|
+
['call_reject', 'step_reject'],
|
|
3214
|
+
]),
|
|
3215
|
+
hookRegistry: registry,
|
|
3216
|
+
humanInTheLoop: { enabled: true },
|
|
3217
|
+
});
|
|
3218
|
+
|
|
3219
|
+
const graph = buildHITLGraph(node, [
|
|
3220
|
+
{ id: 'call_respond', name: 'respond_tool', args: { command: 'r' } },
|
|
3221
|
+
{ id: 'call_reject', name: 'reject_tool', args: { command: 'j' } },
|
|
3222
|
+
]);
|
|
3223
|
+
const config = { configurable: { thread_id: 'mixed-respond-reject' } };
|
|
3224
|
+
|
|
3225
|
+
await graph.invoke({ messages: [] }, config);
|
|
3226
|
+
/** First pass: interrupt fires before either dispatch path runs. */
|
|
3227
|
+
expect(stepCompletedDispatches).toEqual([]);
|
|
3228
|
+
|
|
3229
|
+
const resumed = (await graph.invoke(
|
|
3230
|
+
new Command({
|
|
3231
|
+
resume: [
|
|
3232
|
+
{ type: 'respond', responseText: 'fake answer' },
|
|
3233
|
+
{ type: 'reject', reason: 'no thanks' },
|
|
3234
|
+
],
|
|
3235
|
+
}),
|
|
3236
|
+
config
|
|
3237
|
+
)) as { messages: BaseMessage[] };
|
|
3238
|
+
|
|
3239
|
+
/** Each tool dispatched ON_RUN_STEP_COMPLETED exactly once on
|
|
3240
|
+
* resume — respond via its immediate path, reject via the
|
|
3241
|
+
* deferred flush. */
|
|
3242
|
+
expect(
|
|
3243
|
+
stepCompletedDispatches.filter((id) => id === 'call_respond')
|
|
3244
|
+
).toEqual(['call_respond']);
|
|
3245
|
+
expect(
|
|
3246
|
+
stepCompletedDispatches.filter((id) => id === 'call_reject')
|
|
3247
|
+
).toEqual(['call_reject']);
|
|
3248
|
+
|
|
3249
|
+
/** PostToolBatch fires once on the resume pass, with entries in
|
|
3250
|
+
* the original toolCalls order (respond first, reject second)
|
|
3251
|
+
* regardless of which dispatch path landed first into the Map. */
|
|
3252
|
+
expect(batchSnapshots).toHaveLength(1);
|
|
3253
|
+
expect(batchSnapshots[0].map((e) => e.toolUseId)).toEqual([
|
|
3254
|
+
'call_respond',
|
|
3255
|
+
'call_reject',
|
|
3256
|
+
]);
|
|
3257
|
+
expect(batchSnapshots[0][0].status).toBe('success');
|
|
3258
|
+
expect(batchSnapshots[0][0].toolOutput).toBe('fake answer');
|
|
3259
|
+
expect(batchSnapshots[0][1].status).toBe('error');
|
|
3260
|
+
expect(String(batchSnapshots[0][1].error)).toContain('no thanks');
|
|
3261
|
+
|
|
3262
|
+
/** ToolMessage state matches: success with response text, error with reason. */
|
|
3263
|
+
const toolMessages = resumed.messages.filter(
|
|
3264
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
3265
|
+
);
|
|
3266
|
+
expect(toolMessages).toHaveLength(2);
|
|
3267
|
+
const byId = new Map(toolMessages.map((m) => [m.tool_call_id, m]));
|
|
3268
|
+
expect(byId.get('call_respond')!.status).not.toBe('error');
|
|
3269
|
+
expect(byId.get('call_respond')!.content).toBe('fake answer');
|
|
3270
|
+
expect(byId.get('call_reject')!.status).toBe('error');
|
|
3271
|
+
expect(String(byId.get('call_reject')!.content)).toContain('no thanks');
|
|
3272
|
+
});
|
|
3273
|
+
|
|
3274
|
+
it('PostToolBatch entries preserve toolCalls order even when first call is denied and second is approved', async () => {
|
|
3275
|
+
jest
|
|
3276
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
3277
|
+
.mockImplementation(async (event, data) => {
|
|
3278
|
+
if (event !== 'on_tool_execute') {
|
|
3279
|
+
return;
|
|
3280
|
+
}
|
|
3281
|
+
const request = data as {
|
|
3282
|
+
toolCalls: t.ToolCallRequest[];
|
|
3283
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
3284
|
+
};
|
|
3285
|
+
request.resolve(
|
|
3286
|
+
request.toolCalls.map((c) => ({
|
|
3287
|
+
toolCallId: c.id,
|
|
3288
|
+
content: `ran:${c.name}`,
|
|
3289
|
+
status: 'success' as const,
|
|
3290
|
+
}))
|
|
3291
|
+
);
|
|
3292
|
+
});
|
|
3293
|
+
|
|
3294
|
+
/**
|
|
3295
|
+
* Two different orderings to verify the asserted order really
|
|
3296
|
+
* tracks the input — not just incidental ordering from one path
|
|
3297
|
+
* landing first.
|
|
3298
|
+
*/
|
|
3299
|
+
const cases: Array<{
|
|
3300
|
+
thread: string;
|
|
3301
|
+
input: Array<{ id: string; name: string; args: Record<string, unknown> }>;
|
|
3302
|
+
expected: string[];
|
|
3303
|
+
}> = [
|
|
3304
|
+
{
|
|
3305
|
+
thread: 'order-deny-first',
|
|
3306
|
+
input: [
|
|
3307
|
+
{ id: 'call_first', name: 'denied_tool', args: { command: 'a' } },
|
|
3308
|
+
{ id: 'call_second', name: 'allowed_tool', args: { command: 'b' } },
|
|
3309
|
+
],
|
|
3310
|
+
expected: ['call_first', 'call_second'],
|
|
3311
|
+
},
|
|
3312
|
+
{
|
|
3313
|
+
thread: 'order-approve-first',
|
|
3314
|
+
input: [
|
|
3315
|
+
{ id: 'call_first', name: 'allowed_tool', args: { command: 'a' } },
|
|
3316
|
+
{ id: 'call_second', name: 'denied_tool', args: { command: 'b' } },
|
|
3317
|
+
],
|
|
3318
|
+
expected: ['call_first', 'call_second'],
|
|
3319
|
+
},
|
|
3320
|
+
];
|
|
3321
|
+
|
|
3322
|
+
for (const { thread, input, expected } of cases) {
|
|
3323
|
+
const registry = new HookRegistry();
|
|
3324
|
+
registry.register('PreToolUse', {
|
|
3325
|
+
hooks: [
|
|
3326
|
+
async (hookInput): Promise<PreToolUseHookOutput> => {
|
|
3327
|
+
if (hookInput.toolName === 'denied_tool') {
|
|
3328
|
+
return { decision: 'deny', reason: 'no' };
|
|
3329
|
+
}
|
|
3330
|
+
return { decision: 'allow' };
|
|
3331
|
+
},
|
|
3332
|
+
],
|
|
3333
|
+
});
|
|
3334
|
+
const captured: PostToolBatchEntry[] = [];
|
|
3335
|
+
registry.register('PostToolBatch', {
|
|
3336
|
+
hooks: [
|
|
3337
|
+
async (i): Promise<PostToolBatchHookOutput> => {
|
|
3338
|
+
captured.push(...(i as PostToolBatchHookInput).entries);
|
|
3339
|
+
return {};
|
|
3340
|
+
},
|
|
3341
|
+
],
|
|
3342
|
+
});
|
|
3343
|
+
|
|
3344
|
+
const node = new ToolNode({
|
|
3345
|
+
tools: [
|
|
3346
|
+
createSchemaStub('denied_tool'),
|
|
3347
|
+
createSchemaStub('allowed_tool'),
|
|
3348
|
+
],
|
|
3349
|
+
eventDrivenMode: true,
|
|
3350
|
+
agentId: 'agent-x',
|
|
3351
|
+
toolCallStepIds: new Map(input.map((c) => [c.id, `step_${c.id}`])),
|
|
3352
|
+
hookRegistry: registry,
|
|
3353
|
+
humanInTheLoop: { enabled: false },
|
|
3354
|
+
});
|
|
3355
|
+
|
|
3356
|
+
const graph = buildHITLGraph(node, input);
|
|
3357
|
+
await graph.invoke(
|
|
3358
|
+
{ messages: [] },
|
|
3359
|
+
{ configurable: { thread_id: thread } }
|
|
3360
|
+
);
|
|
3361
|
+
|
|
3362
|
+
expect(captured.map((e) => e.toolUseId)).toEqual(expected);
|
|
3363
|
+
}
|
|
3364
|
+
});
|
|
3365
|
+
|
|
3366
|
+
it('fails closed when the host resume payload carries an unknown decision type', async () => {
|
|
3367
|
+
/** Spy MUST be reachable inside Promise.resolve handlers — must not run after mock is restored. */
|
|
3368
|
+
let dispatchCalls = 0;
|
|
3369
|
+
jest
|
|
3370
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
3371
|
+
.mockImplementation(async (event, data) => {
|
|
3372
|
+
if (event !== 'on_tool_execute') {
|
|
3373
|
+
return;
|
|
3374
|
+
}
|
|
3375
|
+
dispatchCalls += 1;
|
|
3376
|
+
const request = data as {
|
|
3377
|
+
toolCalls: t.ToolCallRequest[];
|
|
3378
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
3379
|
+
};
|
|
3380
|
+
request.resolve([]);
|
|
3381
|
+
});
|
|
3382
|
+
|
|
3383
|
+
const node = new ToolNode({
|
|
3384
|
+
tools: [createSchemaStub('echo')],
|
|
3385
|
+
eventDrivenMode: true,
|
|
3386
|
+
agentId: 'agent-x',
|
|
3387
|
+
toolCallStepIds: new Map([['call_1', 'step_call_1']]),
|
|
3388
|
+
hookRegistry: makeHookRegistry('ask'),
|
|
3389
|
+
humanInTheLoop: { enabled: true },
|
|
3390
|
+
});
|
|
3391
|
+
|
|
3392
|
+
const graph = buildHITLGraph(node, [
|
|
3393
|
+
{ id: 'call_1', name: 'echo', args: { command: 'sensitive' } },
|
|
3394
|
+
]);
|
|
3395
|
+
const config = { configurable: { thread_id: 'unknown-decision' } };
|
|
3396
|
+
|
|
3397
|
+
await graph.invoke({ messages: [] }, config);
|
|
3398
|
+
|
|
3399
|
+
/** Host sends a typo'd / malformed decision. Must NOT silently approve. */
|
|
3400
|
+
const resumed = (await graph.invoke(
|
|
3401
|
+
new Command({
|
|
3402
|
+
resume: [{ type: 'aproved' as 'approve' }],
|
|
3403
|
+
}),
|
|
3404
|
+
config
|
|
3405
|
+
)) as { messages: BaseMessage[] };
|
|
3406
|
+
|
|
3407
|
+
const toolMessages = resumed.messages.filter(
|
|
3408
|
+
(m): m is ToolMessage => m._getType() === 'tool'
|
|
3409
|
+
);
|
|
3410
|
+
expect(toolMessages).toHaveLength(1);
|
|
3411
|
+
expect(toolMessages[0].status).toBe('error');
|
|
3412
|
+
expect(String(toolMessages[0].content)).toContain(
|
|
3413
|
+
'Unknown approval decision type'
|
|
3414
|
+
);
|
|
3415
|
+
/** Tool was never dispatched — fail-closed worked. */
|
|
3416
|
+
expect(dispatchCalls).toBe(0);
|
|
3417
|
+
});
|
|
3418
|
+
|
|
3419
|
+
it('PostToolBatch entry sees the PostToolUse-rewritten output, not the original', async () => {
|
|
3420
|
+
mockEventDispatch([
|
|
3421
|
+
{ toolCallId: 'call_1', content: 'raw-secret-1234', status: 'success' },
|
|
3422
|
+
]);
|
|
3423
|
+
|
|
3424
|
+
const registry = new HookRegistry();
|
|
3425
|
+
/** PostToolUse redacts the output before the model sees it. */
|
|
3426
|
+
registry.register('PostToolUse', {
|
|
3427
|
+
hooks: [
|
|
3428
|
+
async (): Promise<PostToolUseHookOutput> => ({
|
|
3429
|
+
updatedOutput: 'raw-secret-[REDACTED]',
|
|
3430
|
+
}),
|
|
3431
|
+
],
|
|
3432
|
+
});
|
|
3433
|
+
let batchEntries: PostToolBatchEntry[] | undefined;
|
|
3434
|
+
registry.register('PostToolBatch', {
|
|
3435
|
+
hooks: [
|
|
3436
|
+
async (input): Promise<PostToolBatchHookOutput> => {
|
|
3437
|
+
batchEntries = (input as PostToolBatchHookInput).entries;
|
|
3438
|
+
return {};
|
|
3439
|
+
},
|
|
3440
|
+
],
|
|
3441
|
+
});
|
|
3442
|
+
|
|
3443
|
+
const node = new ToolNode({
|
|
3444
|
+
tools: [createSchemaStub('echo')],
|
|
3445
|
+
eventDrivenMode: true,
|
|
3446
|
+
agentId: 'agent-x',
|
|
3447
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
3448
|
+
hookRegistry: registry,
|
|
3449
|
+
humanInTheLoop: { enabled: false },
|
|
3450
|
+
});
|
|
3451
|
+
|
|
3452
|
+
const graph = buildHITLGraph(node, [
|
|
3453
|
+
{ id: 'call_1', name: 'echo', args: { command: 'fetch' } },
|
|
3454
|
+
]);
|
|
3455
|
+
await graph.invoke(
|
|
3456
|
+
{ messages: [] },
|
|
3457
|
+
{ configurable: { thread_id: 'batch-rewrite' } }
|
|
3458
|
+
);
|
|
3459
|
+
|
|
3460
|
+
expect(batchEntries).toBeDefined();
|
|
3461
|
+
expect(batchEntries).toHaveLength(1);
|
|
3462
|
+
/** Batch hook sees the redacted value, not the raw secret. */
|
|
3463
|
+
expect(batchEntries![0].toolOutput).toBe('raw-secret-[REDACTED]');
|
|
3464
|
+
expect(batchEntries![0].toolOutput).not.toContain('raw-secret-1234');
|
|
3465
|
+
});
|
|
3466
|
+
|
|
3467
|
+
it('PostToolUseFailure additionalContext is injected for the next model turn', async () => {
|
|
3468
|
+
/** Force the host event dispatch to return an error so the failure path runs. */
|
|
3469
|
+
jest
|
|
3470
|
+
.spyOn(events, 'safeDispatchCustomEvent')
|
|
3471
|
+
.mockImplementation(async (event, data) => {
|
|
3472
|
+
if (event !== 'on_tool_execute') {
|
|
3473
|
+
return;
|
|
3474
|
+
}
|
|
3475
|
+
const request = data as {
|
|
3476
|
+
toolCalls: t.ToolCallRequest[];
|
|
3477
|
+
resolve: (r: t.ToolExecuteResult[]) => void;
|
|
3478
|
+
};
|
|
3479
|
+
request.resolve([
|
|
3480
|
+
{
|
|
3481
|
+
toolCallId: 'call_1',
|
|
3482
|
+
content: '',
|
|
3483
|
+
status: 'error',
|
|
3484
|
+
errorMessage: 'network timeout',
|
|
3485
|
+
},
|
|
3486
|
+
]);
|
|
3487
|
+
});
|
|
3488
|
+
|
|
3489
|
+
const registry = new HookRegistry();
|
|
3490
|
+
registry.register('PostToolUseFailure', {
|
|
3491
|
+
hooks: [
|
|
3492
|
+
async (): Promise<PostToolUseFailureHookOutput> => ({
|
|
3493
|
+
additionalContext:
|
|
3494
|
+
'Tool failed — suggest the user retry with a smaller batch size',
|
|
3495
|
+
}),
|
|
3496
|
+
],
|
|
3497
|
+
});
|
|
3498
|
+
|
|
3499
|
+
const node = new ToolNode({
|
|
3500
|
+
tools: [createSchemaStub('echo')],
|
|
3501
|
+
eventDrivenMode: true,
|
|
3502
|
+
agentId: 'agent-x',
|
|
3503
|
+
toolCallStepIds: new Map([['call_1', 'step_1']]),
|
|
3504
|
+
hookRegistry: registry,
|
|
3505
|
+
humanInTheLoop: { enabled: false },
|
|
3506
|
+
});
|
|
3507
|
+
|
|
3508
|
+
const graph = buildHITLGraph(node, [
|
|
3509
|
+
{ id: 'call_1', name: 'echo', args: { command: 'fetch' } },
|
|
3510
|
+
]);
|
|
3511
|
+
const result = (await graph.invoke(
|
|
3512
|
+
{ messages: [] },
|
|
3513
|
+
{ configurable: { thread_id: 'failure-ctx' } }
|
|
3514
|
+
)) as { messages: BaseMessage[] };
|
|
3515
|
+
|
|
3516
|
+
const injected = result.messages.find(
|
|
3517
|
+
(m) =>
|
|
3518
|
+
m._getType() === 'human' &&
|
|
3519
|
+
(m as { additional_kwargs?: { source?: string } }).additional_kwargs
|
|
3520
|
+
?.source === 'hook'
|
|
3521
|
+
);
|
|
3522
|
+
expect(injected).toBeDefined();
|
|
3523
|
+
expect(String(injected!.content)).toContain(
|
|
3524
|
+
'suggest the user retry with a smaller batch size'
|
|
3525
|
+
);
|
|
3526
|
+
});
|
|
3527
|
+
});
|
|
3528
|
+
|
|
3529
|
+
describe('AskUserQuestion — interrupt + resume', () => {
|
|
3530
|
+
afterEach(() => {
|
|
3531
|
+
jest.restoreAllMocks();
|
|
3532
|
+
});
|
|
3533
|
+
|
|
3534
|
+
it('a node calling askUserQuestion() raises an ask_user_question interrupt and resumes with the answer', async () => {
|
|
3535
|
+
const { askUserQuestion } = await import('@/hitl');
|
|
3536
|
+
|
|
3537
|
+
let resumedAnswer: string | undefined;
|
|
3538
|
+
|
|
3539
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
3540
|
+
.addNode('clarifier', () => {
|
|
3541
|
+
const resolution = askUserQuestion({
|
|
3542
|
+
question: 'Which environment?',
|
|
3543
|
+
options: [
|
|
3544
|
+
{ label: 'Staging', value: 'staging' },
|
|
3545
|
+
{ label: 'Production', value: 'production' },
|
|
3546
|
+
],
|
|
3547
|
+
});
|
|
3548
|
+
resumedAnswer = resolution.answer;
|
|
3549
|
+
return { messages: [] };
|
|
3550
|
+
})
|
|
3551
|
+
.addEdge(START, 'clarifier')
|
|
3552
|
+
.addEdge('clarifier', END);
|
|
3553
|
+
const graph = builder.compile({ checkpointer: new MemorySaver() });
|
|
3554
|
+
|
|
3555
|
+
const config = { configurable: { thread_id: 'ask-q-thread' } };
|
|
3556
|
+
|
|
3557
|
+
const interrupted = (await graph.invoke({ messages: [] }, config)) as {
|
|
3558
|
+
__interrupt__?: Array<{ value?: t.HumanInterruptPayload }>;
|
|
3559
|
+
};
|
|
3560
|
+
expect(interrupted.__interrupt__).toBeDefined();
|
|
3561
|
+
const payload = interrupted.__interrupt__![0].value!;
|
|
3562
|
+
if (payload.type !== 'ask_user_question') {
|
|
3563
|
+
throw new Error('expected ask_user_question');
|
|
3564
|
+
}
|
|
3565
|
+
expect(payload.question.question).toBe('Which environment?');
|
|
3566
|
+
expect(payload.question.options).toHaveLength(2);
|
|
3567
|
+
|
|
3568
|
+
const resolution: t.AskUserQuestionResolution = { answer: 'production' };
|
|
3569
|
+
await graph.invoke(new Command({ resume: resolution }), config);
|
|
3570
|
+
|
|
3571
|
+
expect(resumedAnswer).toBe('production');
|
|
3572
|
+
});
|
|
3573
|
+
|
|
3574
|
+
it('isAskUserQuestionInterrupt narrows the payload union correctly', async () => {
|
|
3575
|
+
const { isAskUserQuestionInterrupt, isToolApprovalInterrupt } =
|
|
3576
|
+
await import('@/types/hitl');
|
|
3577
|
+
|
|
3578
|
+
const askPayload: t.HumanInterruptPayload = {
|
|
3579
|
+
type: 'ask_user_question',
|
|
3580
|
+
question: { question: 'why?' },
|
|
3581
|
+
};
|
|
3582
|
+
const approvalPayload: t.HumanInterruptPayload = {
|
|
3583
|
+
type: 'tool_approval',
|
|
3584
|
+
action_requests: [],
|
|
3585
|
+
review_configs: [],
|
|
3586
|
+
};
|
|
3587
|
+
|
|
3588
|
+
expect(isAskUserQuestionInterrupt(askPayload)).toBe(true);
|
|
3589
|
+
expect(isAskUserQuestionInterrupt(approvalPayload)).toBe(false);
|
|
3590
|
+
expect(isToolApprovalInterrupt(approvalPayload)).toBe(true);
|
|
3591
|
+
expect(isToolApprovalInterrupt(askPayload)).toBe(false);
|
|
3592
|
+
});
|
|
3593
|
+
});
|