@librechat/agents 3.1.77-dev.1 → 3.1.78-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/common/enum.cjs +54 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +148 -4
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/hooks/createWorkspacePolicyHook.cjs +291 -0
- package/dist/cjs/hooks/createWorkspacePolicyHook.cjs.map +1 -0
- package/dist/cjs/llm/openai/index.cjs +317 -1
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +90 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/anthropicToolCache.cjs +102 -0
- package/dist/cjs/messages/anthropicToolCache.cjs.map +1 -0
- package/dist/cjs/messages/prune.cjs +27 -0
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/messages/recency.cjs +99 -0
- package/dist/cjs/messages/recency.cjs.map +1 -0
- package/dist/cjs/run.cjs +30 -0
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/summarization/node.cjs +100 -6
- package/dist/cjs/summarization/node.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +635 -23
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/local/CompileCheckTool.cjs +227 -0
- package/dist/cjs/tools/local/CompileCheckTool.cjs.map +1 -0
- package/dist/cjs/tools/local/FileCheckpointer.cjs +90 -0
- package/dist/cjs/tools/local/FileCheckpointer.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalCodingTools.cjs +1098 -0
- package/dist/cjs/tools/local/LocalCodingTools.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalExecutionEngine.cjs +1042 -0
- package/dist/cjs/tools/local/LocalExecutionEngine.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalExecutionTools.cjs +122 -0
- package/dist/cjs/tools/local/LocalExecutionTools.cjs.map +1 -0
- package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs +453 -0
- package/dist/cjs/tools/local/LocalProgrammaticToolCalling.cjs.map +1 -0
- package/dist/cjs/tools/local/attachments.cjs +183 -0
- package/dist/cjs/tools/local/attachments.cjs.map +1 -0
- package/dist/cjs/tools/local/bashAst.cjs +129 -0
- package/dist/cjs/tools/local/bashAst.cjs.map +1 -0
- package/dist/cjs/tools/local/editStrategies.cjs +188 -0
- package/dist/cjs/tools/local/editStrategies.cjs.map +1 -0
- package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs +141 -0
- package/dist/cjs/tools/local/resolveLocalExecutionTools.cjs.map +1 -0
- package/dist/cjs/tools/local/syntaxCheck.cjs +182 -0
- package/dist/cjs/tools/local/syntaxCheck.cjs.map +1 -0
- package/dist/cjs/tools/local/textEncoding.cjs +30 -0
- package/dist/cjs/tools/local/textEncoding.cjs.map +1 -0
- package/dist/cjs/tools/local/workspaceFS.cjs +51 -0
- package/dist/cjs/tools/local/workspaceFS.cjs.map +1 -0
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs +1 -0
- package/dist/cjs/tools/subagent/SubagentExecutor.cjs.map +1 -1
- package/dist/esm/common/enum.mjs +53 -1
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +149 -5
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/hooks/createWorkspacePolicyHook.mjs +289 -0
- package/dist/esm/hooks/createWorkspacePolicyHook.mjs.map +1 -0
- package/dist/esm/llm/openai/index.mjs +318 -2
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/main.mjs +17 -2
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/anthropicToolCache.mjs +99 -0
- package/dist/esm/messages/anthropicToolCache.mjs.map +1 -0
- package/dist/esm/messages/prune.mjs +26 -1
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/messages/recency.mjs +97 -0
- package/dist/esm/messages/recency.mjs.map +1 -0
- package/dist/esm/run.mjs +30 -0
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/summarization/node.mjs +100 -6
- package/dist/esm/summarization/node.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +635 -23
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/local/CompileCheckTool.mjs +223 -0
- package/dist/esm/tools/local/CompileCheckTool.mjs.map +1 -0
- package/dist/esm/tools/local/FileCheckpointer.mjs +87 -0
- package/dist/esm/tools/local/FileCheckpointer.mjs.map +1 -0
- package/dist/esm/tools/local/LocalCodingTools.mjs +1075 -0
- package/dist/esm/tools/local/LocalCodingTools.mjs.map +1 -0
- package/dist/esm/tools/local/LocalExecutionEngine.mjs +1022 -0
- package/dist/esm/tools/local/LocalExecutionEngine.mjs.map +1 -0
- package/dist/esm/tools/local/LocalExecutionTools.mjs +117 -0
- package/dist/esm/tools/local/LocalExecutionTools.mjs.map +1 -0
- package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs +448 -0
- package/dist/esm/tools/local/LocalProgrammaticToolCalling.mjs.map +1 -0
- package/dist/esm/tools/local/attachments.mjs +180 -0
- package/dist/esm/tools/local/attachments.mjs.map +1 -0
- package/dist/esm/tools/local/bashAst.mjs +126 -0
- package/dist/esm/tools/local/bashAst.mjs.map +1 -0
- package/dist/esm/tools/local/editStrategies.mjs +185 -0
- package/dist/esm/tools/local/editStrategies.mjs.map +1 -0
- package/dist/esm/tools/local/resolveLocalExecutionTools.mjs +137 -0
- package/dist/esm/tools/local/resolveLocalExecutionTools.mjs.map +1 -0
- package/dist/esm/tools/local/syntaxCheck.mjs +179 -0
- package/dist/esm/tools/local/syntaxCheck.mjs.map +1 -0
- package/dist/esm/tools/local/textEncoding.mjs +27 -0
- package/dist/esm/tools/local/textEncoding.mjs.map +1 -0
- package/dist/esm/tools/local/workspaceFS.mjs +49 -0
- package/dist/esm/tools/local/workspaceFS.mjs.map +1 -0
- package/dist/esm/tools/subagent/SubagentExecutor.mjs +1 -0
- package/dist/esm/tools/subagent/SubagentExecutor.mjs.map +1 -1
- package/dist/types/common/enum.d.ts +39 -1
- package/dist/types/graphs/Graph.d.ts +34 -0
- package/dist/types/hooks/createWorkspacePolicyHook.d.ts +95 -0
- package/dist/types/hooks/index.d.ts +2 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/llm/openai/index.d.ts +17 -0
- package/dist/types/messages/anthropicToolCache.d.ts +51 -0
- package/dist/types/messages/index.d.ts +2 -0
- package/dist/types/messages/prune.d.ts +11 -0
- package/dist/types/messages/recency.d.ts +64 -0
- package/dist/types/run.d.ts +21 -0
- package/dist/types/tools/ToolNode.d.ts +145 -2
- package/dist/types/tools/local/CompileCheckTool.d.ts +31 -0
- package/dist/types/tools/local/FileCheckpointer.d.ts +39 -0
- package/dist/types/tools/local/LocalCodingTools.d.ts +57 -0
- package/dist/types/tools/local/LocalExecutionEngine.d.ts +149 -0
- package/dist/types/tools/local/LocalExecutionTools.d.ts +9 -0
- package/dist/types/tools/local/LocalProgrammaticToolCalling.d.ts +21 -0
- package/dist/types/tools/local/attachments.d.ts +84 -0
- package/dist/types/tools/local/bashAst.d.ts +11 -0
- package/dist/types/tools/local/editStrategies.d.ts +28 -0
- package/dist/types/tools/local/index.d.ts +12 -0
- package/dist/types/tools/local/resolveLocalExecutionTools.d.ts +38 -0
- package/dist/types/tools/local/syntaxCheck.d.ts +42 -0
- package/dist/types/tools/local/textEncoding.d.ts +21 -0
- package/dist/types/tools/local/workspaceFS.d.ts +49 -0
- package/dist/types/types/hitl.d.ts +56 -27
- package/dist/types/types/run.d.ts +8 -1
- package/dist/types/types/summarize.d.ts +30 -0
- package/dist/types/types/tools.d.ts +341 -6
- package/package.json +21 -2
- package/src/common/enum.ts +54 -0
- package/src/graphs/Graph.ts +164 -6
- package/src/hooks/__tests__/compactHooks.test.ts +38 -2
- package/src/hooks/__tests__/createWorkspacePolicyHook.test.ts +393 -0
- package/src/hooks/createWorkspacePolicyHook.ts +355 -0
- package/src/hooks/index.ts +6 -0
- package/src/index.ts +1 -0
- package/src/llm/openai/deepseek.test.ts +479 -0
- package/src/llm/openai/index.ts +484 -1
- package/src/messages/__tests__/anthropicToolCache.test.ts +125 -0
- package/src/messages/__tests__/recency.test.ts +267 -0
- package/src/messages/anthropicToolCache.ts +116 -0
- package/src/messages/index.ts +2 -0
- package/src/messages/prune.ts +27 -1
- package/src/messages/recency.ts +155 -0
- package/src/run.ts +31 -0
- package/src/scripts/compare_pi_vs_ours.ts +840 -0
- package/src/scripts/local_engine.ts +166 -0
- package/src/scripts/local_engine_checkpointer.ts +205 -0
- package/src/scripts/local_engine_compile.ts +263 -0
- package/src/scripts/local_engine_hooks.ts +226 -0
- package/src/scripts/local_engine_image.ts +201 -0
- package/src/scripts/local_engine_ptc.ts +151 -0
- package/src/scripts/local_engine_workspace.ts +258 -0
- package/src/scripts/summarization-recency.ts +462 -0
- package/src/specs/prune.test.ts +39 -0
- package/src/summarization/__tests__/node.test.ts +499 -3
- package/src/summarization/node.ts +124 -7
- package/src/tools/ToolNode.ts +769 -20
- package/src/tools/__tests__/LocalExecutionTools.test.ts +2647 -0
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +175 -0
- package/src/tools/__tests__/ToolNode.outputReferences.test.ts +114 -0
- package/src/tools/__tests__/ToolNode.session.test.ts +84 -0
- package/src/tools/__tests__/directToolHITLResumeScope.test.ts +467 -0
- package/src/tools/__tests__/directToolHooks.test.ts +411 -0
- package/src/tools/__tests__/localToolNames.test.ts +73 -0
- package/src/tools/__tests__/workspaceSeam.test.ts +134 -0
- package/src/tools/local/CompileCheckTool.ts +278 -0
- package/src/tools/local/FileCheckpointer.ts +93 -0
- package/src/tools/local/LocalCodingTools.ts +1342 -0
- package/src/tools/local/LocalExecutionEngine.ts +1329 -0
- package/src/tools/local/LocalExecutionTools.ts +167 -0
- package/src/tools/local/LocalProgrammaticToolCalling.ts +594 -0
- package/src/tools/local/__tests__/FileCheckpointer.test.ts +120 -0
- package/src/tools/local/__tests__/editStrategies.test.ts +134 -0
- package/src/tools/local/attachments.ts +251 -0
- package/src/tools/local/bashAst.ts +151 -0
- package/src/tools/local/editStrategies.ts +188 -0
- package/src/tools/local/index.ts +12 -0
- package/src/tools/local/resolveLocalExecutionTools.ts +208 -0
- package/src/tools/local/syntaxCheck.ts +243 -0
- package/src/tools/local/textEncoding.ts +37 -0
- package/src/tools/local/workspaceFS.ts +89 -0
- package/src/types/hitl.ts +56 -27
- package/src/types/run.ts +12 -1
- package/src/types/summarize.ts +31 -0
- package/src/types/tools.ts +359 -7
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { tool } from '@langchain/core/tools';
|
|
3
|
+
import {
|
|
4
|
+
END,
|
|
5
|
+
START,
|
|
6
|
+
StateGraph,
|
|
7
|
+
MemorySaver,
|
|
8
|
+
isInterrupted,
|
|
9
|
+
MessagesAnnotation,
|
|
10
|
+
Command,
|
|
11
|
+
} from '@langchain/langgraph';
|
|
12
|
+
import { AIMessage, ToolMessage } from '@langchain/core/messages';
|
|
13
|
+
import { describe, it, expect, jest, afterEach } from '@jest/globals';
|
|
14
|
+
import type { StructuredToolInterface } from '@langchain/core/tools';
|
|
15
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
16
|
+
import type { Runnable, RunnableConfig } from '@langchain/core/runnables';
|
|
17
|
+
import type { PreToolUseHookOutput } from '@/hooks';
|
|
18
|
+
import type * as t from '@/types';
|
|
19
|
+
import { HookRegistry } from '@/hooks';
|
|
20
|
+
import { ToolNode } from '../ToolNode';
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Pins the resume-scope behaviour for direct-path interrupts. The
|
|
24
|
+
* existing JSDoc on `HumanInTheLoopConfig` warned that mixed
|
|
25
|
+
* direct+event batches re-execute the direct half on resume because
|
|
26
|
+
* LangGraph rolls back the entire ToolNode on `interrupt()`. After
|
|
27
|
+
* lifting HITL into the direct path, the same rollback applies — but
|
|
28
|
+
* for direct-only batches too, since `interrupt()` always rewinds to
|
|
29
|
+
* the start of the suspending node.
|
|
30
|
+
*
|
|
31
|
+
* This test makes that concrete: a direct tool whose call is gated
|
|
32
|
+
* by a PreToolUse 'ask' hook fires its execute callback exactly N
|
|
33
|
+
* times, where N == number of resume passes that lead to an
|
|
34
|
+
* 'approve'. Side-effect-bearing tools should be designed
|
|
35
|
+
* idempotent regardless of whether they're direct or event-dispatched.
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
function aiCall(
|
|
39
|
+
callId: string,
|
|
40
|
+
name: string,
|
|
41
|
+
args: Record<string, unknown>
|
|
42
|
+
): AIMessage {
|
|
43
|
+
return new AIMessage({
|
|
44
|
+
content: '',
|
|
45
|
+
tool_calls: [{ id: callId, name, args }],
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
type MessagesUpdate = { messages: BaseMessage[] };
|
|
50
|
+
type CompiledMessagesGraph = Runnable<unknown, { messages: BaseMessage[] }> & {
|
|
51
|
+
invoke(input: unknown, config?: RunnableConfig): Promise<unknown>;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
function buildGraph(
|
|
55
|
+
toolNode: ToolNode,
|
|
56
|
+
toolCalls: Array<{ id: string; name: string; args: Record<string, unknown> }>
|
|
57
|
+
): CompiledMessagesGraph {
|
|
58
|
+
let agentInvocations = 0;
|
|
59
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
60
|
+
.addNode('agent', (): MessagesUpdate => {
|
|
61
|
+
agentInvocations += 1;
|
|
62
|
+
if (agentInvocations === 1) {
|
|
63
|
+
return { messages: [aiCall(toolCalls[0].id, toolCalls[0].name, toolCalls[0].args)] };
|
|
64
|
+
}
|
|
65
|
+
return { messages: [] };
|
|
66
|
+
})
|
|
67
|
+
.addNode('tools', toolNode)
|
|
68
|
+
.addEdge(START, 'agent')
|
|
69
|
+
.addEdge('agent', 'tools')
|
|
70
|
+
.addEdge('tools', END);
|
|
71
|
+
return builder.compile({
|
|
72
|
+
checkpointer: new MemorySaver(),
|
|
73
|
+
}) as unknown as CompiledMessagesGraph;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
describe('direct-path HITL: resume scope', () => {
|
|
77
|
+
afterEach(() => {
|
|
78
|
+
jest.restoreAllMocks();
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('re-executes the direct tool body on resume when interrupt() fires from the direct path', async () => {
|
|
82
|
+
const sideEffect = jest.fn(() => 'EXECUTED');
|
|
83
|
+
const directTool = tool(
|
|
84
|
+
async () => sideEffect(),
|
|
85
|
+
{
|
|
86
|
+
name: 'echo',
|
|
87
|
+
description: 'direct tool that records every body invocation',
|
|
88
|
+
schema: z.object({ command: z.string().optional() }).passthrough(),
|
|
89
|
+
}
|
|
90
|
+
) as unknown as StructuredToolInterface;
|
|
91
|
+
|
|
92
|
+
const registry = new HookRegistry();
|
|
93
|
+
let hookInvocations = 0;
|
|
94
|
+
// Realistic shape: ask the FIRST time the hook sees a tool call,
|
|
95
|
+
// allow on subsequent invocations. A real policy hook would key
|
|
96
|
+
// off persistent state (an "approved paths" set, a session
|
|
97
|
+
// approval token, etc.); we just count.
|
|
98
|
+
registry.register('PreToolUse', {
|
|
99
|
+
hooks: [
|
|
100
|
+
async (): Promise<PreToolUseHookOutput> => {
|
|
101
|
+
hookInvocations += 1;
|
|
102
|
+
if (hookInvocations === 1) {
|
|
103
|
+
return { decision: 'ask', reason: 'first-time-ask' };
|
|
104
|
+
}
|
|
105
|
+
return { decision: 'allow' };
|
|
106
|
+
},
|
|
107
|
+
],
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const node = new ToolNode({
|
|
111
|
+
tools: [directTool],
|
|
112
|
+
eventDrivenMode: true,
|
|
113
|
+
hookRegistry: registry,
|
|
114
|
+
directToolNames: new Set(['echo']),
|
|
115
|
+
humanInTheLoop: { enabled: true },
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
const graph = buildGraph(node, [
|
|
119
|
+
{ id: 'call_1', name: 'echo', args: { command: 'go' } },
|
|
120
|
+
]);
|
|
121
|
+
const config = { configurable: { thread_id: 'thread-resume-1' } };
|
|
122
|
+
|
|
123
|
+
const first = await graph.invoke({ messages: [] }, config);
|
|
124
|
+
expect(isInterrupted<t.HumanInterruptPayload>(first)).toBe(true);
|
|
125
|
+
|
|
126
|
+
// Body should NOT have run yet — the hook intercepted before
|
|
127
|
+
// the tool executed.
|
|
128
|
+
expect(sideEffect).not.toHaveBeenCalled();
|
|
129
|
+
// Hook fires once per attempt; the first interrupt is attempt #1.
|
|
130
|
+
expect(hookInvocations).toBe(1);
|
|
131
|
+
|
|
132
|
+
// Resume with approve. LangGraph re-enters the ToolNode body,
|
|
133
|
+
// PreToolUse fires again (the "idempotency" caveat — see the
|
|
134
|
+
// HumanInTheLoopConfig JSDoc).
|
|
135
|
+
const second = await graph.invoke(
|
|
136
|
+
{ resume: [{ tool_call_id: 'call_1', type: 'approve' }] },
|
|
137
|
+
config
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
// PreToolUse fired a SECOND time on the resume re-entry.
|
|
141
|
+
expect(hookInvocations).toBe(2);
|
|
142
|
+
// Body executed exactly once — only on the resume pass, after
|
|
143
|
+
// the hook returned 'allow'. The interrupted first pass never
|
|
144
|
+
// reached the body. This pins the resume scope: LangGraph
|
|
145
|
+
// restarts the ToolNode at the top, but the body itself only
|
|
146
|
+
// runs once because the first pass interrupted before the
|
|
147
|
+
// body, not after.
|
|
148
|
+
expect(sideEffect).toHaveBeenCalledTimes(1);
|
|
149
|
+
|
|
150
|
+
// Result should carry the executed output.
|
|
151
|
+
const messages = (second as { messages: ToolMessage[] }).messages;
|
|
152
|
+
const toolMsg = messages.find((m) => m instanceof ToolMessage) as ToolMessage;
|
|
153
|
+
expect(String(toolMsg.content)).toBe('EXECUTED');
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it('re-runs sibling tools that already executed in the same batch when a later tool interrupts', async () => {
|
|
157
|
+
// Two direct tools in the same batch. Tool A is a no-op the
|
|
158
|
+
// hook always allows; tool B asks the first time. On resume,
|
|
159
|
+
// LangGraph rewinds the entire ToolNode — meaning tool A's
|
|
160
|
+
// body runs twice (once per pass). This pins the side-effect
|
|
161
|
+
// caveat: tools called BEFORE an interrupting sibling MUST be
|
|
162
|
+
// idempotent regardless of whether they're direct or
|
|
163
|
+
// event-dispatched.
|
|
164
|
+
const aSideEffect = jest.fn(() => 'A-OK');
|
|
165
|
+
const bSideEffect = jest.fn(() => 'B-OK');
|
|
166
|
+
const a = tool(async () => aSideEffect(), {
|
|
167
|
+
name: 'tool_a',
|
|
168
|
+
description: 'allowed direct tool',
|
|
169
|
+
schema: z.object({}).passthrough(),
|
|
170
|
+
}) as unknown as StructuredToolInterface;
|
|
171
|
+
const b = tool(async () => bSideEffect(), {
|
|
172
|
+
name: 'tool_b',
|
|
173
|
+
description: 'asks first time, allows after',
|
|
174
|
+
schema: z.object({}).passthrough(),
|
|
175
|
+
}) as unknown as StructuredToolInterface;
|
|
176
|
+
|
|
177
|
+
let bHookInvocations = 0;
|
|
178
|
+
const registry = new HookRegistry();
|
|
179
|
+
registry.register('PreToolUse', {
|
|
180
|
+
hooks: [
|
|
181
|
+
async ({ toolName }): Promise<PreToolUseHookOutput> => {
|
|
182
|
+
if (toolName === 'tool_b') {
|
|
183
|
+
bHookInvocations += 1;
|
|
184
|
+
if (bHookInvocations === 1) {
|
|
185
|
+
return { decision: 'ask', reason: 'b-first-ask' };
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return { decision: 'allow' };
|
|
189
|
+
},
|
|
190
|
+
],
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
const node = new ToolNode({
|
|
194
|
+
tools: [a, b],
|
|
195
|
+
eventDrivenMode: true,
|
|
196
|
+
hookRegistry: registry,
|
|
197
|
+
directToolNames: new Set(['tool_a', 'tool_b']),
|
|
198
|
+
humanInTheLoop: { enabled: true },
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const builder = new StateGraph(MessagesAnnotation)
|
|
202
|
+
.addNode('agent', (): MessagesUpdate => ({
|
|
203
|
+
messages: [
|
|
204
|
+
new AIMessage({
|
|
205
|
+
content: '',
|
|
206
|
+
tool_calls: [
|
|
207
|
+
{ id: 'a1', name: 'tool_a', args: {} },
|
|
208
|
+
{ id: 'b1', name: 'tool_b', args: {} },
|
|
209
|
+
],
|
|
210
|
+
}),
|
|
211
|
+
],
|
|
212
|
+
}))
|
|
213
|
+
.addNode('tools', node)
|
|
214
|
+
.addEdge(START, 'agent')
|
|
215
|
+
.addEdge('agent', 'tools')
|
|
216
|
+
.addEdge('tools', END);
|
|
217
|
+
const graph = builder.compile({
|
|
218
|
+
checkpointer: new MemorySaver(),
|
|
219
|
+
}) as unknown as CompiledMessagesGraph;
|
|
220
|
+
|
|
221
|
+
const config = { configurable: { thread_id: 'thread-mixed-batch' } };
|
|
222
|
+
const first = await graph.invoke({ messages: [] }, config);
|
|
223
|
+
expect(isInterrupted<t.HumanInterruptPayload>(first)).toBe(true);
|
|
224
|
+
|
|
225
|
+
// First pass: A ran (allowed), B asked.
|
|
226
|
+
expect(aSideEffect).toHaveBeenCalledTimes(1);
|
|
227
|
+
expect(bSideEffect).not.toHaveBeenCalled();
|
|
228
|
+
expect(bHookInvocations).toBe(1);
|
|
229
|
+
|
|
230
|
+
await graph.invoke(
|
|
231
|
+
{ resume: [{ tool_call_id: 'b1', type: 'approve' }] },
|
|
232
|
+
config
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
// Resume: LangGraph rewinds the ToolNode and re-enters from the
|
|
236
|
+
// start. A's body runs AGAIN. B's body runs once now that the
|
|
237
|
+
// hook allowed.
|
|
238
|
+
expect(aSideEffect).toHaveBeenCalledTimes(2);
|
|
239
|
+
expect(bSideEffect).toHaveBeenCalledTimes(1);
|
|
240
|
+
expect(bHookInvocations).toBe(2);
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
describe('edit decision (Codex P1 #16)', () => {
|
|
244
|
+
it('applies decision.updatedInput (the documented field) to the executed tool args', async () => {
|
|
245
|
+
const receivedArgs: Array<Record<string, unknown>> = [];
|
|
246
|
+
const directTool = tool(
|
|
247
|
+
async (input) => {
|
|
248
|
+
receivedArgs.push(input as Record<string, unknown>);
|
|
249
|
+
return JSON.stringify(input);
|
|
250
|
+
},
|
|
251
|
+
{
|
|
252
|
+
name: 'echo',
|
|
253
|
+
description: 'records the args it actually executed with',
|
|
254
|
+
schema: z.object({ command: z.string() }),
|
|
255
|
+
}
|
|
256
|
+
) as unknown as StructuredToolInterface;
|
|
257
|
+
|
|
258
|
+
// Pattern: hook ALWAYS asks. interrupt() throws on the first
|
|
259
|
+
// pass (sends the host the askEntry payload) and RETURNS the
|
|
260
|
+
// resume value on the second pass — the resume value is what
|
|
261
|
+
// actually carries the approve/edit/reject decision.
|
|
262
|
+
const registry = new HookRegistry();
|
|
263
|
+
registry.register('PreToolUse', {
|
|
264
|
+
hooks: [
|
|
265
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
266
|
+
decision: 'ask',
|
|
267
|
+
allowedDecisions: ['approve', 'edit'],
|
|
268
|
+
}),
|
|
269
|
+
],
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
const node = new ToolNode({
|
|
273
|
+
tools: [directTool],
|
|
274
|
+
eventDrivenMode: true,
|
|
275
|
+
hookRegistry: registry,
|
|
276
|
+
directToolNames: new Set(['echo']),
|
|
277
|
+
humanInTheLoop: { enabled: true },
|
|
278
|
+
});
|
|
279
|
+
|
|
280
|
+
const graph = buildGraph(node, [
|
|
281
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
282
|
+
]);
|
|
283
|
+
const config = { configurable: { thread_id: 'thread-edit-1' } };
|
|
284
|
+
|
|
285
|
+
const first = await graph.invoke({ messages: [] }, config);
|
|
286
|
+
expect(isInterrupted<t.HumanInterruptPayload>(first)).toBe(true);
|
|
287
|
+
// Body did not run yet.
|
|
288
|
+
expect(receivedArgs).toEqual([]);
|
|
289
|
+
|
|
290
|
+
const second = await graph.invoke(
|
|
291
|
+
new Command({
|
|
292
|
+
resume: [
|
|
293
|
+
{ type: 'edit', updatedInput: { command: 'edited-by-host' } },
|
|
294
|
+
],
|
|
295
|
+
}),
|
|
296
|
+
config
|
|
297
|
+
);
|
|
298
|
+
|
|
299
|
+
// The whole point of the fix: the edited input flows through.
|
|
300
|
+
// Pre-fix the direct path read `decision.args` (wrong field) so
|
|
301
|
+
// updatedInput was silently dropped and the tool ran with
|
|
302
|
+
// `{ command: 'original' }`.
|
|
303
|
+
expect(receivedArgs).toHaveLength(1);
|
|
304
|
+
expect(receivedArgs[0]).toEqual({ command: 'edited-by-host' });
|
|
305
|
+
|
|
306
|
+
const messages = (second as { messages: ToolMessage[] }).messages;
|
|
307
|
+
const toolMsg = messages.find(
|
|
308
|
+
(m) => m instanceof ToolMessage
|
|
309
|
+
) as ToolMessage;
|
|
310
|
+
expect(String(toolMsg.content)).toContain('edited-by-host');
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
it('fails closed when updatedInput is missing or wrong-shaped', async () => {
|
|
314
|
+
const directTool = tool(
|
|
315
|
+
async () => 'should-not-execute',
|
|
316
|
+
{
|
|
317
|
+
name: 'echo',
|
|
318
|
+
description: 'must not execute on malformed edit',
|
|
319
|
+
schema: z.object({ command: z.string().optional() }).passthrough(),
|
|
320
|
+
}
|
|
321
|
+
) as unknown as StructuredToolInterface;
|
|
322
|
+
|
|
323
|
+
const registry = new HookRegistry();
|
|
324
|
+
registry.register('PreToolUse', {
|
|
325
|
+
hooks: [
|
|
326
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
327
|
+
decision: 'ask',
|
|
328
|
+
allowedDecisions: ['approve', 'edit'],
|
|
329
|
+
}),
|
|
330
|
+
],
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
const node = new ToolNode({
|
|
334
|
+
tools: [directTool],
|
|
335
|
+
eventDrivenMode: true,
|
|
336
|
+
hookRegistry: registry,
|
|
337
|
+
directToolNames: new Set(['echo']),
|
|
338
|
+
humanInTheLoop: { enabled: true },
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
const graph = buildGraph(node, [
|
|
342
|
+
{ id: 'call_1', name: 'echo', args: { command: 'original' } },
|
|
343
|
+
]);
|
|
344
|
+
const config = { configurable: { thread_id: 'thread-edit-2' } };
|
|
345
|
+
|
|
346
|
+
await graph.invoke({ messages: [] }, config);
|
|
347
|
+
|
|
348
|
+
// Send `{ type: 'edit' }` with no updatedInput at all (simulates
|
|
349
|
+
// a host that misnamed the field, e.g. used `args` like the old
|
|
350
|
+
// bug expected). Must fail closed instead of executing.
|
|
351
|
+
const second = await graph.invoke(
|
|
352
|
+
new Command({
|
|
353
|
+
resume: [
|
|
354
|
+
{
|
|
355
|
+
type: 'edit',
|
|
356
|
+
args: { command: 'this-field-name-is-wrong' },
|
|
357
|
+
} as unknown as t.ToolApprovalDecision,
|
|
358
|
+
],
|
|
359
|
+
}),
|
|
360
|
+
config
|
|
361
|
+
);
|
|
362
|
+
|
|
363
|
+
const messages = (second as { messages: ToolMessage[] }).messages;
|
|
364
|
+
const toolMsg = messages.find(
|
|
365
|
+
(m) => m instanceof ToolMessage
|
|
366
|
+
) as ToolMessage;
|
|
367
|
+
expect(toolMsg.status).toBe('error');
|
|
368
|
+
expect(String(toolMsg.content)).toContain(
|
|
369
|
+
'Decision "edit" missing object updatedInput'
|
|
370
|
+
);
|
|
371
|
+
expect(String(toolMsg.content)).not.toContain('should-not-execute');
|
|
372
|
+
});
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
describe('usage counter stability across resume (Codex P2 #30)', () => {
|
|
376
|
+
it('turn stays the same across an interrupt + resume — does not double-increment', async () => {
|
|
377
|
+
// Pre-fix the P2 #27 turn-race fix incremented before the
|
|
378
|
+
// hook fired, and never rolled back on `ask`. LangGraph
|
|
379
|
+
// re-runs ToolNode from the start on resume, so a single
|
|
380
|
+
// call that asks once before approval got turn=1 instead of
|
|
381
|
+
// turn=0. Now turns are cached per call.id and re-used on
|
|
382
|
+
// re-entry.
|
|
383
|
+
const observed: number[] = [];
|
|
384
|
+
const directTool = tool(
|
|
385
|
+
async (_, config) => {
|
|
386
|
+
const tc = (config as { toolCall?: { turn?: number } } | undefined)
|
|
387
|
+
?.toolCall;
|
|
388
|
+
if (typeof tc?.turn === 'number') observed.push(tc.turn);
|
|
389
|
+
return 'EXECUTED';
|
|
390
|
+
},
|
|
391
|
+
{
|
|
392
|
+
name: 'echo',
|
|
393
|
+
description: 'records the turn it ran under',
|
|
394
|
+
schema: z.object({ command: z.string() }),
|
|
395
|
+
}
|
|
396
|
+
) as unknown as StructuredToolInterface;
|
|
397
|
+
|
|
398
|
+
// Hook ALWAYS asks. The resume value is what unblocks.
|
|
399
|
+
const registry = new HookRegistry();
|
|
400
|
+
registry.register('PreToolUse', {
|
|
401
|
+
hooks: [
|
|
402
|
+
async (): Promise<PreToolUseHookOutput> => ({
|
|
403
|
+
decision: 'ask',
|
|
404
|
+
allowedDecisions: ['approve'],
|
|
405
|
+
}),
|
|
406
|
+
],
|
|
407
|
+
});
|
|
408
|
+
|
|
409
|
+
const node = new ToolNode({
|
|
410
|
+
tools: [directTool],
|
|
411
|
+
eventDrivenMode: true,
|
|
412
|
+
hookRegistry: registry,
|
|
413
|
+
directToolNames: new Set(['echo']),
|
|
414
|
+
humanInTheLoop: { enabled: true },
|
|
415
|
+
});
|
|
416
|
+
|
|
417
|
+
const graph = buildGraph(node, [
|
|
418
|
+
{ id: 'call_1', name: 'echo', args: { command: 'go' } },
|
|
419
|
+
]);
|
|
420
|
+
const config = { configurable: { thread_id: 'thread-turn-stable' } };
|
|
421
|
+
|
|
422
|
+
const first = await graph.invoke({ messages: [] }, config);
|
|
423
|
+
expect(isInterrupted<t.HumanInterruptPayload>(first)).toBe(true);
|
|
424
|
+
|
|
425
|
+
await graph.invoke(
|
|
426
|
+
new Command({ resume: [{ type: 'approve' }] }),
|
|
427
|
+
config
|
|
428
|
+
);
|
|
429
|
+
|
|
430
|
+
// Body ran once. The turn it observed must be 0 (the slot
|
|
431
|
+
// assigned on the FIRST entry, reused on resume), not 1
|
|
432
|
+
// (which is what pre-fix produced because the second entry
|
|
433
|
+
// re-incremented).
|
|
434
|
+
expect(observed).toEqual([0]);
|
|
435
|
+
});
|
|
436
|
+
|
|
437
|
+
it('clearDirectPathTurns() empties the per-Run cache (Codex P2 #33)', () => {
|
|
438
|
+
// The resume-stable map must be cleared at end-of-Run so it
|
|
439
|
+
// doesn't grow unbounded across long runs and doesn't return
|
|
440
|
+
// stale slots if a provider reuses call IDs across turns.
|
|
441
|
+
// Graph.clearHeavyState calls this on every compiled
|
|
442
|
+
// ToolNode; pin the method directly so a regression here
|
|
443
|
+
// doesn't slip past the integration boundary.
|
|
444
|
+
const echo = tool(async () => 'EXECUTED', {
|
|
445
|
+
name: 'echo',
|
|
446
|
+
description: 'noop',
|
|
447
|
+
schema: z.object({}).passthrough(),
|
|
448
|
+
}) as unknown as StructuredToolInterface;
|
|
449
|
+
const node = new ToolNode({
|
|
450
|
+
tools: [echo],
|
|
451
|
+
eventDrivenMode: true,
|
|
452
|
+
directToolNames: new Set(['echo']),
|
|
453
|
+
});
|
|
454
|
+
// Synthesise an entry by reaching into the private map via
|
|
455
|
+
// the internal accessor we just exposed. Use a simple
|
|
456
|
+
// call-shape trick: invoke and assert clearDirectPathTurns
|
|
457
|
+
// produces a no-op on a fresh map (sanity), then on a
|
|
458
|
+
// populated one it empties.
|
|
459
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
460
|
+
const internal = node as any;
|
|
461
|
+
internal.directPathTurns.set('call_x', 7);
|
|
462
|
+
expect(internal.directPathTurns.size).toBe(1);
|
|
463
|
+
node.clearDirectPathTurns();
|
|
464
|
+
expect(internal.directPathTurns.size).toBe(0);
|
|
465
|
+
});
|
|
466
|
+
});
|
|
467
|
+
});
|