illuma-agents 1.0.37 → 1.0.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +112 -14
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/enum.cjs +5 -1
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +148 -8
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/graphs/MultiAgentGraph.cjs +277 -11
- package/dist/cjs/graphs/MultiAgentGraph.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +128 -61
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -1
- package/dist/cjs/main.cjs +22 -7
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/cache.cjs +140 -46
- package/dist/cjs/messages/cache.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +1 -1
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/tools.cjs +2 -2
- package/dist/cjs/messages/tools.cjs.map +1 -1
- package/dist/cjs/schemas/validate.cjs +173 -0
- package/dist/cjs/schemas/validate.cjs.map +1 -0
- package/dist/cjs/stream.cjs +4 -2
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/cjs/tools/CodeExecutor.cjs +22 -21
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs +14 -11
- package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +101 -2
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/ToolSearch.cjs +862 -0
- package/dist/cjs/tools/ToolSearch.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +112 -14
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/enum.mjs +5 -1
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +149 -9
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/graphs/MultiAgentGraph.mjs +278 -12
- package/dist/esm/graphs/MultiAgentGraph.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +127 -60
- package/dist/esm/llm/bedrock/index.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/cache.mjs +140 -46
- package/dist/esm/messages/cache.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +1 -1
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/tools.mjs +2 -2
- package/dist/esm/messages/tools.mjs.map +1 -1
- package/dist/esm/schemas/validate.mjs +167 -0
- package/dist/esm/schemas/validate.mjs.map +1 -0
- package/dist/esm/stream.mjs +4 -2
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/esm/tools/CodeExecutor.mjs +22 -21
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/tools/ProgrammaticToolCalling.mjs +14 -11
- package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +102 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/ToolSearch.mjs +827 -0
- package/dist/esm/tools/ToolSearch.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +51 -1
- package/dist/types/common/enum.d.ts +6 -2
- package/dist/types/graphs/Graph.d.ts +12 -0
- package/dist/types/graphs/MultiAgentGraph.d.ts +16 -0
- package/dist/types/index.d.ts +2 -1
- package/dist/types/llm/bedrock/index.d.ts +89 -11
- package/dist/types/llm/bedrock/types.d.ts +27 -0
- package/dist/types/llm/bedrock/utils/index.d.ts +5 -0
- package/dist/types/llm/bedrock/utils/message_inputs.d.ts +31 -0
- package/dist/types/llm/bedrock/utils/message_outputs.d.ts +33 -0
- package/dist/types/messages/cache.d.ts +4 -1
- package/dist/types/schemas/index.d.ts +1 -0
- package/dist/types/schemas/validate.d.ts +36 -0
- package/dist/types/tools/CodeExecutor.d.ts +0 -3
- package/dist/types/tools/ProgrammaticToolCalling.d.ts +0 -3
- package/dist/types/tools/ToolNode.d.ts +3 -1
- package/dist/types/tools/ToolSearch.d.ts +148 -0
- package/dist/types/types/graph.d.ts +71 -0
- package/dist/types/types/llm.d.ts +3 -1
- package/dist/types/types/tools.d.ts +42 -2
- package/package.json +13 -6
- package/src/agents/AgentContext.test.ts +312 -0
- package/src/agents/AgentContext.ts +144 -16
- package/src/common/enum.ts +5 -1
- package/src/graphs/Graph.ts +214 -13
- package/src/graphs/MultiAgentGraph.ts +350 -13
- package/src/index.ts +4 -1
- package/src/llm/bedrock/index.ts +221 -99
- package/src/llm/bedrock/llm.spec.ts +616 -0
- package/src/llm/bedrock/types.ts +51 -0
- package/src/llm/bedrock/utils/index.ts +18 -0
- package/src/llm/bedrock/utils/message_inputs.ts +563 -0
- package/src/llm/bedrock/utils/message_outputs.ts +310 -0
- package/src/messages/__tests__/tools.test.ts +21 -21
- package/src/messages/cache.test.ts +304 -0
- package/src/messages/cache.ts +183 -53
- package/src/messages/core.ts +1 -1
- package/src/messages/tools.ts +2 -2
- package/src/schemas/index.ts +2 -0
- package/src/schemas/validate.test.ts +358 -0
- package/src/schemas/validate.ts +238 -0
- package/src/scripts/caching.ts +27 -19
- package/src/scripts/code_exec_files.ts +58 -15
- package/src/scripts/code_exec_multi_session.ts +241 -0
- package/src/scripts/code_exec_session.ts +282 -0
- package/src/scripts/multi-agent-conditional.ts +1 -0
- package/src/scripts/multi-agent-supervisor.ts +1 -0
- package/src/scripts/programmatic_exec_agent.ts +4 -4
- package/src/scripts/test-handoff-preamble.ts +277 -0
- package/src/scripts/test-parallel-handoffs.ts +291 -0
- package/src/scripts/test-tools-before-handoff.ts +8 -4
- package/src/scripts/test_code_api.ts +361 -0
- package/src/scripts/thinking-bedrock.ts +159 -0
- package/src/scripts/thinking.ts +39 -18
- package/src/scripts/{tool_search_regex.ts → tool_search.ts} +5 -5
- package/src/scripts/tools.ts +7 -3
- package/src/specs/cache.simple.test.ts +396 -0
- package/src/stream.ts +4 -2
- package/src/tools/BrowserTools.ts +39 -17
- package/src/tools/CodeExecutor.ts +26 -23
- package/src/tools/ProgrammaticToolCalling.ts +18 -14
- package/src/tools/ToolNode.ts +114 -1
- package/src/tools/ToolSearch.ts +1041 -0
- package/src/tools/__tests__/ProgrammaticToolCalling.test.ts +0 -2
- package/src/tools/__tests__/{ToolSearchRegex.integration.test.ts → ToolSearch.integration.test.ts} +6 -6
- package/src/tools/__tests__/ToolSearch.test.ts +1003 -0
- package/src/types/graph.test.ts +183 -0
- package/src/types/graph.ts +73 -0
- package/src/types/llm.ts +3 -1
- package/src/types/tools.ts +51 -2
- package/dist/cjs/tools/ToolSearchRegex.cjs +0 -455
- package/dist/cjs/tools/ToolSearchRegex.cjs.map +0 -1
- package/dist/esm/tools/ToolSearchRegex.mjs +0 -448
- package/dist/esm/tools/ToolSearchRegex.mjs.map +0 -1
- package/dist/types/tools/ToolSearchRegex.d.ts +0 -80
- package/src/tools/ToolSearchRegex.ts +0 -535
- package/src/tools/__tests__/ToolSearchRegex.test.ts +0 -232
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
/* eslint-disable no-console */
|
|
2
|
+
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
3
|
+
import { config } from 'dotenv';
|
|
4
|
+
config();
|
|
5
|
+
import { Calculator } from '@/tools/Calculator';
|
|
6
|
+
import {
|
|
7
|
+
AIMessage,
|
|
8
|
+
BaseMessage,
|
|
9
|
+
HumanMessage,
|
|
10
|
+
UsageMetadata,
|
|
11
|
+
} from '@langchain/core/messages';
|
|
12
|
+
import type * as t from '@/types';
|
|
13
|
+
import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
|
|
14
|
+
import { ModelEndHandler, ToolEndHandler } from '@/events';
|
|
15
|
+
import { capitalizeFirstLetter } from './spec.utils';
|
|
16
|
+
import { GraphEvents, Providers } from '@/common';
|
|
17
|
+
import { getLLMConfig } from '@/utils/llmConfig';
|
|
18
|
+
import { getArgs } from '@/scripts/args';
|
|
19
|
+
import { Run } from '@/run';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* These tests verify that prompt caching works correctly across multi-turn
|
|
23
|
+
* conversations and that messages are not mutated in place.
|
|
24
|
+
*/
|
|
25
|
+
describe('Prompt Caching Integration Tests', () => {
|
|
26
|
+
jest.setTimeout(120000);
|
|
27
|
+
|
|
28
|
+
const setupTest = (): {
|
|
29
|
+
collectedUsage: UsageMetadata[];
|
|
30
|
+
contentParts: Array<t.MessageContentComplex | undefined>;
|
|
31
|
+
customHandlers: Record<string | GraphEvents, t.EventHandler>;
|
|
32
|
+
} => {
|
|
33
|
+
const collectedUsage: UsageMetadata[] = [];
|
|
34
|
+
const { contentParts, aggregateContent } = createContentAggregator();
|
|
35
|
+
|
|
36
|
+
const customHandlers: Record<string | GraphEvents, t.EventHandler> = {
|
|
37
|
+
[GraphEvents.TOOL_END]: new ToolEndHandler(),
|
|
38
|
+
[GraphEvents.CHAT_MODEL_END]: new ModelEndHandler(collectedUsage),
|
|
39
|
+
[GraphEvents.CHAT_MODEL_STREAM]: new ChatModelStreamHandler(),
|
|
40
|
+
[GraphEvents.ON_RUN_STEP_COMPLETED]: {
|
|
41
|
+
handle: (
|
|
42
|
+
event: GraphEvents.ON_RUN_STEP_COMPLETED,
|
|
43
|
+
data: t.StreamEventData
|
|
44
|
+
): void => {
|
|
45
|
+
aggregateContent({
|
|
46
|
+
event,
|
|
47
|
+
data: data as unknown as { result: t.ToolEndEvent },
|
|
48
|
+
});
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
[GraphEvents.ON_RUN_STEP]: {
|
|
52
|
+
handle: (
|
|
53
|
+
event: GraphEvents.ON_RUN_STEP,
|
|
54
|
+
data: t.StreamEventData
|
|
55
|
+
): void => {
|
|
56
|
+
aggregateContent({ event, data: data as t.RunStep });
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
[GraphEvents.ON_RUN_STEP_DELTA]: {
|
|
60
|
+
handle: (
|
|
61
|
+
event: GraphEvents.ON_RUN_STEP_DELTA,
|
|
62
|
+
data: t.StreamEventData
|
|
63
|
+
): void => {
|
|
64
|
+
aggregateContent({ event, data: data as t.RunStepDeltaEvent });
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
[GraphEvents.ON_MESSAGE_DELTA]: {
|
|
68
|
+
handle: (
|
|
69
|
+
event: GraphEvents.ON_MESSAGE_DELTA,
|
|
70
|
+
data: t.StreamEventData
|
|
71
|
+
): void => {
|
|
72
|
+
aggregateContent({ event, data: data as t.MessageDeltaEvent });
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
return { collectedUsage, contentParts, customHandlers };
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
const streamConfig = {
|
|
81
|
+
configurable: { thread_id: 'cache-test-thread' },
|
|
82
|
+
streamMode: 'values',
|
|
83
|
+
version: 'v2' as const,
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
describe('Anthropic Prompt Caching', () => {
|
|
87
|
+
const provider = Providers.ANTHROPIC;
|
|
88
|
+
|
|
89
|
+
test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
|
|
90
|
+
const { userName, location } = await getArgs();
|
|
91
|
+
const llmConfig = getLLMConfig(provider);
|
|
92
|
+
const { collectedUsage, customHandlers } = setupTest();
|
|
93
|
+
|
|
94
|
+
const run = await Run.create<t.IState>({
|
|
95
|
+
runId: 'cache-test-anthropic',
|
|
96
|
+
graphConfig: {
|
|
97
|
+
type: 'standard',
|
|
98
|
+
llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
|
|
99
|
+
tools: [new Calculator()],
|
|
100
|
+
instructions: 'You are a helpful assistant.',
|
|
101
|
+
additional_instructions: `User: ${userName}, Location: ${location}`,
|
|
102
|
+
},
|
|
103
|
+
returnContent: true,
|
|
104
|
+
customHandlers,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
// Turn 1
|
|
108
|
+
const turn1Messages: BaseMessage[] = [
|
|
109
|
+
new HumanMessage('Hello, what is 2+2?'),
|
|
110
|
+
];
|
|
111
|
+
const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
|
|
112
|
+
|
|
113
|
+
const turn1Result = await run.processStream(
|
|
114
|
+
{ messages: turn1Messages },
|
|
115
|
+
streamConfig
|
|
116
|
+
);
|
|
117
|
+
expect(turn1Result).toBeDefined();
|
|
118
|
+
|
|
119
|
+
// Verify original message was NOT mutated
|
|
120
|
+
expect(JSON.stringify(turn1Messages[0].content)).toBe(
|
|
121
|
+
turn1ContentSnapshot
|
|
122
|
+
);
|
|
123
|
+
expect((turn1Messages[0] as any).content).not.toContain('cache_control');
|
|
124
|
+
|
|
125
|
+
const turn1RunMessages = run.getRunMessages();
|
|
126
|
+
expect(turn1RunMessages).toBeDefined();
|
|
127
|
+
expect(turn1RunMessages!.length).toBeGreaterThan(0);
|
|
128
|
+
|
|
129
|
+
// Turn 2 - build on conversation
|
|
130
|
+
const turn2Messages: BaseMessage[] = [
|
|
131
|
+
...turn1Messages,
|
|
132
|
+
...turn1RunMessages!,
|
|
133
|
+
new HumanMessage('Now multiply that by 10'),
|
|
134
|
+
];
|
|
135
|
+
const turn2HumanContentSnapshot = JSON.stringify(
|
|
136
|
+
turn2Messages[turn2Messages.length - 1].content
|
|
137
|
+
);
|
|
138
|
+
|
|
139
|
+
const run2 = await Run.create<t.IState>({
|
|
140
|
+
runId: 'cache-test-anthropic-2',
|
|
141
|
+
graphConfig: {
|
|
142
|
+
type: 'standard',
|
|
143
|
+
llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
|
|
144
|
+
tools: [new Calculator()],
|
|
145
|
+
instructions: 'You are a helpful assistant.',
|
|
146
|
+
additional_instructions: `User: ${userName}, Location: ${location}`,
|
|
147
|
+
},
|
|
148
|
+
returnContent: true,
|
|
149
|
+
customHandlers,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const turn2Result = await run2.processStream(
|
|
153
|
+
{ messages: turn2Messages },
|
|
154
|
+
streamConfig
|
|
155
|
+
);
|
|
156
|
+
expect(turn2Result).toBeDefined();
|
|
157
|
+
|
|
158
|
+
// Verify messages were NOT mutated
|
|
159
|
+
expect(
|
|
160
|
+
JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
|
|
161
|
+
).toBe(turn2HumanContentSnapshot);
|
|
162
|
+
|
|
163
|
+
// Check that we got cache read tokens (indicating caching worked)
|
|
164
|
+
console.log(`${provider} Usage:`, collectedUsage);
|
|
165
|
+
expect(collectedUsage.length).toBeGreaterThan(0);
|
|
166
|
+
|
|
167
|
+
console.log(
|
|
168
|
+
`${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
|
|
169
|
+
);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
|
|
173
|
+
const llmConfig = getLLMConfig(provider);
|
|
174
|
+
const { customHandlers } = setupTest();
|
|
175
|
+
|
|
176
|
+
const run = await Run.create<t.IState>({
|
|
177
|
+
runId: 'cache-test-anthropic-tools',
|
|
178
|
+
graphConfig: {
|
|
179
|
+
type: 'standard',
|
|
180
|
+
llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
|
|
181
|
+
tools: [new Calculator()],
|
|
182
|
+
instructions:
|
|
183
|
+
'You are a math assistant. Use the calculator tool for all calculations.',
|
|
184
|
+
},
|
|
185
|
+
returnContent: true,
|
|
186
|
+
customHandlers,
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
const messages: BaseMessage[] = [
|
|
190
|
+
new HumanMessage('Calculate 123 * 456 using the calculator'),
|
|
191
|
+
];
|
|
192
|
+
|
|
193
|
+
const result = await run.processStream({ messages }, streamConfig);
|
|
194
|
+
expect(result).toBeDefined();
|
|
195
|
+
|
|
196
|
+
const runMessages = run.getRunMessages();
|
|
197
|
+
expect(runMessages).toBeDefined();
|
|
198
|
+
|
|
199
|
+
// Should have used the calculator tool
|
|
200
|
+
const hasToolUse = runMessages?.some(
|
|
201
|
+
(msg) =>
|
|
202
|
+
msg._getType() === 'ai' &&
|
|
203
|
+
((msg as AIMessage).tool_calls?.length ?? 0) > 0
|
|
204
|
+
);
|
|
205
|
+
expect(hasToolUse).toBe(true);
|
|
206
|
+
|
|
207
|
+
console.log(
|
|
208
|
+
`${capitalizeFirstLetter(provider)} tool call with caching test passed`
|
|
209
|
+
);
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
describe('Bedrock Prompt Caching', () => {
|
|
214
|
+
const provider = Providers.BEDROCK;
|
|
215
|
+
|
|
216
|
+
test(`${capitalizeFirstLetter(provider)}: multi-turn conversation with caching should not corrupt messages`, async () => {
|
|
217
|
+
const { userName, location } = await getArgs();
|
|
218
|
+
const llmConfig = getLLMConfig(provider);
|
|
219
|
+
const { collectedUsage, customHandlers } = setupTest();
|
|
220
|
+
|
|
221
|
+
const run = await Run.create<t.IState>({
|
|
222
|
+
runId: 'cache-test-bedrock',
|
|
223
|
+
graphConfig: {
|
|
224
|
+
type: 'standard',
|
|
225
|
+
llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
|
|
226
|
+
tools: [new Calculator()],
|
|
227
|
+
instructions: 'You are a helpful assistant.',
|
|
228
|
+
additional_instructions: `User: ${userName}, Location: ${location}`,
|
|
229
|
+
},
|
|
230
|
+
returnContent: true,
|
|
231
|
+
customHandlers,
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
// Turn 1
|
|
235
|
+
const turn1Messages: BaseMessage[] = [
|
|
236
|
+
new HumanMessage('Hello, what is 5+5?'),
|
|
237
|
+
];
|
|
238
|
+
const turn1ContentSnapshot = JSON.stringify(turn1Messages[0].content);
|
|
239
|
+
|
|
240
|
+
const turn1Result = await run.processStream(
|
|
241
|
+
{ messages: turn1Messages },
|
|
242
|
+
streamConfig
|
|
243
|
+
);
|
|
244
|
+
expect(turn1Result).toBeDefined();
|
|
245
|
+
|
|
246
|
+
// Verify original message was NOT mutated
|
|
247
|
+
expect(JSON.stringify(turn1Messages[0].content)).toBe(
|
|
248
|
+
turn1ContentSnapshot
|
|
249
|
+
);
|
|
250
|
+
|
|
251
|
+
const turn1RunMessages = run.getRunMessages();
|
|
252
|
+
expect(turn1RunMessages).toBeDefined();
|
|
253
|
+
expect(turn1RunMessages!.length).toBeGreaterThan(0);
|
|
254
|
+
|
|
255
|
+
// Turn 2
|
|
256
|
+
const turn2Messages: BaseMessage[] = [
|
|
257
|
+
...turn1Messages,
|
|
258
|
+
...turn1RunMessages!,
|
|
259
|
+
new HumanMessage('Multiply that by 3'),
|
|
260
|
+
];
|
|
261
|
+
const turn2HumanContentSnapshot = JSON.stringify(
|
|
262
|
+
turn2Messages[turn2Messages.length - 1].content
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
const run2 = await Run.create<t.IState>({
|
|
266
|
+
runId: 'cache-test-bedrock-2',
|
|
267
|
+
graphConfig: {
|
|
268
|
+
type: 'standard',
|
|
269
|
+
llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
|
|
270
|
+
tools: [new Calculator()],
|
|
271
|
+
instructions: 'You are a helpful assistant.',
|
|
272
|
+
additional_instructions: `User: ${userName}, Location: ${location}`,
|
|
273
|
+
},
|
|
274
|
+
returnContent: true,
|
|
275
|
+
customHandlers,
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
const turn2Result = await run2.processStream(
|
|
279
|
+
{ messages: turn2Messages },
|
|
280
|
+
streamConfig
|
|
281
|
+
);
|
|
282
|
+
expect(turn2Result).toBeDefined();
|
|
283
|
+
|
|
284
|
+
// Verify messages were NOT mutated
|
|
285
|
+
expect(
|
|
286
|
+
JSON.stringify(turn2Messages[turn2Messages.length - 1].content)
|
|
287
|
+
).toBe(turn2HumanContentSnapshot);
|
|
288
|
+
|
|
289
|
+
console.log(`${provider} Usage:`, collectedUsage);
|
|
290
|
+
expect(collectedUsage.length).toBeGreaterThan(0);
|
|
291
|
+
|
|
292
|
+
console.log(
|
|
293
|
+
`${capitalizeFirstLetter(provider)} multi-turn caching test passed - messages not mutated`
|
|
294
|
+
);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
test(`${capitalizeFirstLetter(provider)}: tool calls should work with caching enabled`, async () => {
|
|
298
|
+
const llmConfig = getLLMConfig(provider);
|
|
299
|
+
const { customHandlers } = setupTest();
|
|
300
|
+
|
|
301
|
+
const run = await Run.create<t.IState>({
|
|
302
|
+
runId: 'cache-test-bedrock-tools',
|
|
303
|
+
graphConfig: {
|
|
304
|
+
type: 'standard',
|
|
305
|
+
llmConfig: { ...llmConfig, promptCache: true } as t.LLMConfig,
|
|
306
|
+
tools: [new Calculator()],
|
|
307
|
+
instructions:
|
|
308
|
+
'You are a math assistant. Use the calculator tool for all calculations.',
|
|
309
|
+
},
|
|
310
|
+
returnContent: true,
|
|
311
|
+
customHandlers,
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
const messages: BaseMessage[] = [
|
|
315
|
+
new HumanMessage('Calculate 789 * 123 using the calculator'),
|
|
316
|
+
];
|
|
317
|
+
|
|
318
|
+
const result = await run.processStream({ messages }, streamConfig);
|
|
319
|
+
expect(result).toBeDefined();
|
|
320
|
+
|
|
321
|
+
const runMessages = run.getRunMessages();
|
|
322
|
+
expect(runMessages).toBeDefined();
|
|
323
|
+
|
|
324
|
+
// Should have used the calculator tool
|
|
325
|
+
const hasToolUse = runMessages?.some(
|
|
326
|
+
(msg) =>
|
|
327
|
+
msg._getType() === 'ai' &&
|
|
328
|
+
((msg as AIMessage).tool_calls?.length ?? 0) > 0
|
|
329
|
+
);
|
|
330
|
+
expect(hasToolUse).toBe(true);
|
|
331
|
+
|
|
332
|
+
console.log(
|
|
333
|
+
`${capitalizeFirstLetter(provider)} tool call with caching test passed`
|
|
334
|
+
);
|
|
335
|
+
});
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
describe('Cross-provider message isolation', () => {
|
|
339
|
+
test('Messages processed by Anthropic should not affect Bedrock processing', async () => {
|
|
340
|
+
const anthropicConfig = getLLMConfig(Providers.ANTHROPIC);
|
|
341
|
+
const bedrockConfig = getLLMConfig(Providers.BEDROCK);
|
|
342
|
+
const { customHandlers: handlers1 } = setupTest();
|
|
343
|
+
const { customHandlers: handlers2 } = setupTest();
|
|
344
|
+
|
|
345
|
+
// Create a shared message array
|
|
346
|
+
const sharedMessages: BaseMessage[] = [
|
|
347
|
+
new HumanMessage('Hello, what is the capital of France?'),
|
|
348
|
+
];
|
|
349
|
+
const originalContent = JSON.stringify(sharedMessages[0].content);
|
|
350
|
+
|
|
351
|
+
// Process with Anthropic first
|
|
352
|
+
const anthropicRun = await Run.create<t.IState>({
|
|
353
|
+
runId: 'cross-provider-anthropic',
|
|
354
|
+
graphConfig: {
|
|
355
|
+
type: 'standard',
|
|
356
|
+
llmConfig: { ...anthropicConfig, promptCache: true } as t.LLMConfig,
|
|
357
|
+
instructions: 'You are a helpful assistant.',
|
|
358
|
+
},
|
|
359
|
+
returnContent: true,
|
|
360
|
+
customHandlers: handlers1,
|
|
361
|
+
});
|
|
362
|
+
|
|
363
|
+
const anthropicResult = await anthropicRun.processStream(
|
|
364
|
+
{ messages: sharedMessages },
|
|
365
|
+
streamConfig
|
|
366
|
+
);
|
|
367
|
+
expect(anthropicResult).toBeDefined();
|
|
368
|
+
|
|
369
|
+
// Verify message not mutated
|
|
370
|
+
expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
|
|
371
|
+
|
|
372
|
+
// Now process with Bedrock using the SAME messages
|
|
373
|
+
const bedrockRun = await Run.create<t.IState>({
|
|
374
|
+
runId: 'cross-provider-bedrock',
|
|
375
|
+
graphConfig: {
|
|
376
|
+
type: 'standard',
|
|
377
|
+
llmConfig: { ...bedrockConfig, promptCache: true } as t.LLMConfig,
|
|
378
|
+
instructions: 'You are a helpful assistant.',
|
|
379
|
+
},
|
|
380
|
+
returnContent: true,
|
|
381
|
+
customHandlers: handlers2,
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
const bedrockResult = await bedrockRun.processStream(
|
|
385
|
+
{ messages: sharedMessages },
|
|
386
|
+
streamConfig
|
|
387
|
+
);
|
|
388
|
+
expect(bedrockResult).toBeDefined();
|
|
389
|
+
|
|
390
|
+
// Verify message STILL not mutated after both providers processed
|
|
391
|
+
expect(JSON.stringify(sharedMessages[0].content)).toBe(originalContent);
|
|
392
|
+
|
|
393
|
+
console.log('Cross-provider message isolation test passed');
|
|
394
|
+
});
|
|
395
|
+
});
|
|
396
|
+
});
|
package/src/stream.ts
CHANGED
|
@@ -339,7 +339,8 @@ hasToolCallChunks: ${hasToolCallChunks}
|
|
|
339
339
|
(c) =>
|
|
340
340
|
(c.type?.startsWith(ContentTypes.THINKING) ?? false) ||
|
|
341
341
|
(c.type?.startsWith(ContentTypes.REASONING) ?? false) ||
|
|
342
|
-
(c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false)
|
|
342
|
+
(c.type?.startsWith(ContentTypes.REASONING_CONTENT) ?? false) ||
|
|
343
|
+
c.type === 'redacted_thinking'
|
|
343
344
|
)
|
|
344
345
|
) {
|
|
345
346
|
await graph.dispatchReasoningDelta(stepId, {
|
|
@@ -365,7 +366,8 @@ hasToolCallChunks: ${hasToolCallChunks}
|
|
|
365
366
|
Array.isArray(chunk.content) &&
|
|
366
367
|
(chunk.content[0]?.type === ContentTypes.THINKING ||
|
|
367
368
|
chunk.content[0]?.type === ContentTypes.REASONING ||
|
|
368
|
-
chunk.content[0]?.type === ContentTypes.REASONING_CONTENT
|
|
369
|
+
chunk.content[0]?.type === ContentTypes.REASONING_CONTENT ||
|
|
370
|
+
chunk.content[0]?.type === 'redacted_thinking')
|
|
369
371
|
) {
|
|
370
372
|
reasoning_content = 'valid';
|
|
371
373
|
} else if (
|
|
@@ -83,12 +83,16 @@ const BrowserClickSchema = z.object({
|
|
|
83
83
|
const BrowserTypeSchema = z.object({
|
|
84
84
|
index: z
|
|
85
85
|
.number()
|
|
86
|
-
.describe(
|
|
86
|
+
.describe(
|
|
87
|
+
'The [index] of the INPUT element to type into. Target <input> or <textarea> elements. Check fieldLabel to identify the correct field.'
|
|
88
|
+
),
|
|
87
89
|
text: z.string().describe('The text to type into the element'),
|
|
88
90
|
pressEnter: z
|
|
89
91
|
.boolean()
|
|
90
92
|
.optional()
|
|
91
|
-
.describe(
|
|
93
|
+
.describe(
|
|
94
|
+
'Whether to press Enter after typing (useful for search forms and submitting)'
|
|
95
|
+
),
|
|
92
96
|
});
|
|
93
97
|
|
|
94
98
|
const BrowserNavigateSchema = z.object({
|
|
@@ -134,13 +138,17 @@ const BrowserGetPageStateSchema = z.object({});
|
|
|
134
138
|
const BrowserKeypressSchema = z.object({
|
|
135
139
|
keys: z
|
|
136
140
|
.string()
|
|
137
|
-
.describe(
|
|
141
|
+
.describe(
|
|
142
|
+
'Keyboard keys to press. Use "+" to combine modifiers (e.g., "Control+Enter", "Control+a", "Escape", "Tab", "Enter"). Common shortcuts: Control+Enter (submit forms/send), Escape (close dialogs), Tab (next field).'
|
|
143
|
+
),
|
|
138
144
|
});
|
|
139
145
|
|
|
140
146
|
const BrowserSwitchTabSchema = z.object({
|
|
141
147
|
tabId: z
|
|
142
148
|
.number()
|
|
143
|
-
.describe(
|
|
149
|
+
.describe(
|
|
150
|
+
'The tab ID to switch to. Use the tab IDs shown in the tabs list from page state.'
|
|
151
|
+
),
|
|
144
152
|
});
|
|
145
153
|
|
|
146
154
|
/**
|
|
@@ -187,7 +195,9 @@ function formatResultForLLM(
|
|
|
187
195
|
}
|
|
188
196
|
if (result.elementList != null && result.elementList !== '') {
|
|
189
197
|
// Add hint about fieldLabel and targeting inputs for form interactions
|
|
190
|
-
parts.push(
|
|
198
|
+
parts.push(
|
|
199
|
+
`\n**Interactive Elements** (for typing: target <input> elements with fieldLabel, NOT parent <div> containers):\n${result.elementList}`
|
|
200
|
+
);
|
|
191
201
|
}
|
|
192
202
|
if (result.screenshot != null && result.screenshot !== '') {
|
|
193
203
|
parts.push('\n[Screenshot captured and displayed to user]');
|
|
@@ -263,7 +273,8 @@ export function createBrowserTools(
|
|
|
263
273
|
tools.push(
|
|
264
274
|
tool(createToolFunction('click'), {
|
|
265
275
|
name: EBrowserTools.CLICK,
|
|
266
|
-
description:
|
|
276
|
+
description:
|
|
277
|
+
'Click element by [index]. Use fieldLabel attribute to identify correct element. For form fields, target <input> elements NOT parent <div> containers.',
|
|
267
278
|
schema: BrowserClickSchema,
|
|
268
279
|
})
|
|
269
280
|
);
|
|
@@ -272,7 +283,8 @@ export function createBrowserTools(
|
|
|
272
283
|
tools.push(
|
|
273
284
|
tool(createToolFunction('type'), {
|
|
274
285
|
name: EBrowserTools.TYPE,
|
|
275
|
-
description:
|
|
286
|
+
description:
|
|
287
|
+
'Type text into <input> element by [index]. CRITICAL: Always target <input> or <textarea> tags (NOT parent <div> containers). Use fieldLabel to identify correct field (e.g., fieldLabel="To recipients" for To field).',
|
|
276
288
|
schema: BrowserTypeSchema,
|
|
277
289
|
})
|
|
278
290
|
);
|
|
@@ -281,7 +293,8 @@ export function createBrowserTools(
|
|
|
281
293
|
tools.push(
|
|
282
294
|
tool(createToolFunction('navigate'), {
|
|
283
295
|
name: EBrowserTools.NAVIGATE,
|
|
284
|
-
description:
|
|
296
|
+
description:
|
|
297
|
+
'Navigate to URL (include https://). Returns new page element list.',
|
|
285
298
|
schema: BrowserNavigateSchema,
|
|
286
299
|
})
|
|
287
300
|
);
|
|
@@ -290,7 +303,8 @@ export function createBrowserTools(
|
|
|
290
303
|
tools.push(
|
|
291
304
|
tool(createToolFunction('scroll'), {
|
|
292
305
|
name: EBrowserTools.SCROLL,
|
|
293
|
-
description:
|
|
306
|
+
description:
|
|
307
|
+
'Scroll page (up/down/left/right). Returns updated element list.',
|
|
294
308
|
schema: BrowserScrollSchema,
|
|
295
309
|
})
|
|
296
310
|
);
|
|
@@ -299,7 +313,8 @@ export function createBrowserTools(
|
|
|
299
313
|
tools.push(
|
|
300
314
|
tool(createToolFunction('extract'), {
|
|
301
315
|
name: EBrowserTools.EXTRACT,
|
|
302
|
-
description:
|
|
316
|
+
description:
|
|
317
|
+
'Extract page content. Returns URL, title, and element list.',
|
|
303
318
|
schema: BrowserExtractSchema,
|
|
304
319
|
})
|
|
305
320
|
);
|
|
@@ -308,7 +323,8 @@ export function createBrowserTools(
|
|
|
308
323
|
tools.push(
|
|
309
324
|
tool(createToolFunction('hover'), {
|
|
310
325
|
name: EBrowserTools.HOVER,
|
|
311
|
-
description:
|
|
326
|
+
description:
|
|
327
|
+
'Hover element by [index] to reveal menus/tooltips. Returns updated element list.',
|
|
312
328
|
schema: BrowserHoverSchema,
|
|
313
329
|
})
|
|
314
330
|
);
|
|
@@ -317,7 +333,8 @@ export function createBrowserTools(
|
|
|
317
333
|
tools.push(
|
|
318
334
|
tool(createToolFunction('wait'), {
|
|
319
335
|
name: EBrowserTools.WAIT,
|
|
320
|
-
description:
|
|
336
|
+
description:
|
|
337
|
+
'Wait for async content to load. Returns updated element list.',
|
|
321
338
|
schema: BrowserWaitSchema,
|
|
322
339
|
})
|
|
323
340
|
);
|
|
@@ -326,7 +343,8 @@ export function createBrowserTools(
|
|
|
326
343
|
tools.push(
|
|
327
344
|
tool(createToolFunction('back'), {
|
|
328
345
|
name: EBrowserTools.BACK,
|
|
329
|
-
description:
|
|
346
|
+
description:
|
|
347
|
+
'Go back in browser history. Returns previous page element list.',
|
|
330
348
|
schema: BrowserBackSchema,
|
|
331
349
|
})
|
|
332
350
|
);
|
|
@@ -335,7 +353,8 @@ export function createBrowserTools(
|
|
|
335
353
|
tools.push(
|
|
336
354
|
tool(createToolFunction('screenshot'), {
|
|
337
355
|
name: EBrowserTools.SCREENSHOT,
|
|
338
|
-
description:
|
|
356
|
+
description:
|
|
357
|
+
'Capture screenshot. Displayed to user. Use get_page_state for automation.',
|
|
339
358
|
schema: BrowserScreenshotSchema,
|
|
340
359
|
})
|
|
341
360
|
);
|
|
@@ -344,7 +363,8 @@ export function createBrowserTools(
|
|
|
344
363
|
tools.push(
|
|
345
364
|
tool(createToolFunction('get_page_state'), {
|
|
346
365
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
347
|
-
description:
|
|
366
|
+
description:
|
|
367
|
+
'Get page URL, title, and interactive elements with [index] for actions. Start here.',
|
|
348
368
|
schema: BrowserGetPageStateSchema,
|
|
349
369
|
})
|
|
350
370
|
);
|
|
@@ -353,7 +373,8 @@ export function createBrowserTools(
|
|
|
353
373
|
tools.push(
|
|
354
374
|
tool(createToolFunction('keypress'), {
|
|
355
375
|
name: EBrowserTools.KEYPRESS,
|
|
356
|
-
description:
|
|
376
|
+
description:
|
|
377
|
+
'Send keyboard shortcut or key press. Use for: Control+Enter (send email/submit), Escape (close dialog/cancel), Tab (next field), Enter (confirm). The keys are sent to the currently focused element.',
|
|
357
378
|
schema: BrowserKeypressSchema,
|
|
358
379
|
})
|
|
359
380
|
);
|
|
@@ -362,7 +383,8 @@ export function createBrowserTools(
|
|
|
362
383
|
tools.push(
|
|
363
384
|
tool(createToolFunction('switch_tab'), {
|
|
364
385
|
name: EBrowserTools.SWITCH_TAB,
|
|
365
|
-
description:
|
|
386
|
+
description:
|
|
387
|
+
'Switch to a different browser tab by its ID. Tab IDs are shown in the page state. Use this to work with existing open tabs (e.g., use existing Gmail tab instead of opening a new one).',
|
|
366
388
|
schema: BrowserSwitchTabSchema,
|
|
367
389
|
})
|
|
368
390
|
);
|
|
@@ -17,7 +17,7 @@ export const getCodeBaseURL = (): string =>
|
|
|
17
17
|
const imageMessage = 'Image is already displayed to the user';
|
|
18
18
|
const otherMessage = 'File is already downloaded by the user';
|
|
19
19
|
const accessMessage =
|
|
20
|
-
'Note: Files
|
|
20
|
+
'Note: Files from previous executions are automatically available and can be modified.';
|
|
21
21
|
const emptyOutputMessage =
|
|
22
22
|
'stdout: Empty. Ensure you\'re writing output explicitly.\n';
|
|
23
23
|
|
|
@@ -41,7 +41,8 @@ const CodeExecutionToolSchema = z.object({
|
|
|
41
41
|
code: z.string()
|
|
42
42
|
.describe(`The complete, self-contained code to execute, without any truncation or minimization.
|
|
43
43
|
- The environment is stateless; variables and imports don't persist between executions.
|
|
44
|
-
-
|
|
44
|
+
- Generated files from previous executions are automatically available in "/mnt/data/".
|
|
45
|
+
- Files from previous executions are automatically available and can be modified in place.
|
|
45
46
|
- Input code **IS ALREADY** displayed to the user, so **DO NOT** repeat it in your response unless asked.
|
|
46
47
|
- Output code **IS NOT** displayed to the user, so **DO** write all desired output explicitly.
|
|
47
48
|
- IMPORTANT: You MUST explicitly print/output ALL results you want the user to see.
|
|
@@ -50,17 +51,6 @@ const CodeExecutionToolSchema = z.object({
|
|
|
50
51
|
- js: use the \`console\` or \`process\` methods for all outputs.
|
|
51
52
|
- r: IMPORTANT: No X11 display available. ALL graphics MUST use Cairo library (library(Cairo)).
|
|
52
53
|
- Other languages: use appropriate output functions.`),
|
|
53
|
-
session_id: z
|
|
54
|
-
.string()
|
|
55
|
-
.optional()
|
|
56
|
-
.describe(
|
|
57
|
-
`Session ID from a previous response to access generated files.
|
|
58
|
-
- Files load into the current working directory ("/mnt/data/")
|
|
59
|
-
- Use relative paths ONLY
|
|
60
|
-
- Files are READ-ONLY and cannot be modified in-place
|
|
61
|
-
- To modify: read original file, write to NEW filename
|
|
62
|
-
`.trim()
|
|
63
|
-
),
|
|
64
54
|
args: z
|
|
65
55
|
.array(z.string())
|
|
66
56
|
.optional()
|
|
@@ -107,15 +97,33 @@ Rules:
|
|
|
107
97
|
`.trim();
|
|
108
98
|
|
|
109
99
|
return tool<typeof CodeExecutionToolSchema>(
|
|
110
|
-
async ({ lang, code,
|
|
111
|
-
|
|
100
|
+
async ({ lang, code, ...rest }, config) => {
|
|
101
|
+
/**
|
|
102
|
+
* Extract session context from config.toolCall (injected by ToolNode).
|
|
103
|
+
* - session_id: For API to associate with previous session
|
|
104
|
+
* - _injected_files: File refs to pass directly (avoids /files endpoint race condition)
|
|
105
|
+
*/
|
|
106
|
+
const { session_id, _injected_files } = (config.toolCall ?? {}) as {
|
|
107
|
+
session_id?: string;
|
|
108
|
+
_injected_files?: t.CodeEnvFile[];
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
const postData: Record<string, unknown> = {
|
|
112
112
|
lang,
|
|
113
113
|
code,
|
|
114
114
|
...rest,
|
|
115
115
|
...params,
|
|
116
116
|
};
|
|
117
117
|
|
|
118
|
-
|
|
118
|
+
/**
|
|
119
|
+
* File injection priority:
|
|
120
|
+
* 1. Use _injected_files from ToolNode (avoids /files endpoint race condition)
|
|
121
|
+
* 2. Fall back to fetching from /files endpoint if session_id provided but no injected files
|
|
122
|
+
*/
|
|
123
|
+
if (_injected_files && _injected_files.length > 0) {
|
|
124
|
+
postData.files = _injected_files;
|
|
125
|
+
} else if (session_id != null && session_id.length > 0) {
|
|
126
|
+
/** Fallback: fetch from /files endpoint (may have race condition issues) */
|
|
119
127
|
try {
|
|
120
128
|
const filesEndpoint = `${baseEndpoint}/files/${session_id}?detail=full`;
|
|
121
129
|
const fetchOptions: RequestInit = {
|
|
@@ -140,7 +148,6 @@ Rules:
|
|
|
140
148
|
const files = await response.json();
|
|
141
149
|
if (Array.isArray(files) && files.length > 0) {
|
|
142
150
|
const fileReferences: t.CodeEnvFile[] = files.map((file) => {
|
|
143
|
-
// Extract the ID from the file name (part after session ID prefix and before extension)
|
|
144
151
|
const nameParts = file.name.split('/');
|
|
145
152
|
const id = nameParts.length > 1 ? nameParts[1].split('.')[0] : '';
|
|
146
153
|
|
|
@@ -151,11 +158,7 @@ Rules:
|
|
|
151
158
|
};
|
|
152
159
|
});
|
|
153
160
|
|
|
154
|
-
|
|
155
|
-
postData.files = fileReferences;
|
|
156
|
-
} else if (Array.isArray(postData.files)) {
|
|
157
|
-
postData.files = [...postData.files, ...fileReferences];
|
|
158
|
-
}
|
|
161
|
+
postData.files = fileReferences;
|
|
159
162
|
}
|
|
160
163
|
} catch {
|
|
161
164
|
// eslint-disable-next-line no-console
|
|
@@ -204,7 +207,7 @@ Rules:
|
|
|
204
207
|
}
|
|
205
208
|
}
|
|
206
209
|
|
|
207
|
-
formattedOutput += `\
|
|
210
|
+
formattedOutput += `\n\n${accessMessage}`;
|
|
208
211
|
return [
|
|
209
212
|
formattedOutput.trim(),
|
|
210
213
|
{
|