@aj-archipelago/cortex 1.4.32 → 1.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +72 -0
- package/helper-apps/cortex-file-handler/Dockerfile +1 -1
- package/lib/fileUtils.js +24 -5
- package/lib/pathwayManager.js +6 -6
- package/lib/pathwayTools.js +21 -1
- package/lib/requestExecutor.js +49 -5
- package/package.json +1 -1
- package/pathways/system/entity/sys_compress_context.js +82 -0
- package/pathways/system/entity/sys_entity_agent.js +65 -15
- package/pathways/transcribe_gemini.js +1 -1
- package/server/modelExecutor.js +4 -0
- package/server/pathwayResolver.js +102 -12
- package/server/plugins/claudeAnthropicPlugin.js +84 -0
- package/server/plugins/gemini15ChatPlugin.js +17 -0
- package/server/plugins/gemini15VisionPlugin.js +51 -5
- package/server/plugins/grokResponsesPlugin.js +2 -0
- package/server/plugins/openAiVisionPlugin.js +4 -2
- package/test.log +42834 -0
- package/tests/integration/rest/vendors/claude_anthropic_direct.test.js +197 -0
- package/tests/unit/plugins/claudeAnthropicPlugin.test.js +236 -0
- package/tests/unit/sys_entity_agent_errors.test.js +792 -0
|
@@ -0,0 +1,792 @@
|
|
|
1
|
+
import test from 'ava';
|
|
2
|
+
import sysEntityAgent from '../../pathways/system/entity/sys_entity_agent.js';
|
|
3
|
+
import { config } from '../../config.js';
|
|
4
|
+
import { getToolsForEntity } from '../../pathways/system/entity/tools/shared/sys_entity_tools.js';
|
|
5
|
+
import { withTimeout } from '../../lib/pathwayTools.js';
|
|
6
|
+
|
|
7
|
+
const buildToolDefinition = (name, pathwayName, overrides = {}) => ({
|
|
8
|
+
pathwayName,
|
|
9
|
+
definition: {
|
|
10
|
+
type: 'function',
|
|
11
|
+
icon: '🧪',
|
|
12
|
+
function: {
|
|
13
|
+
name,
|
|
14
|
+
description: `Test tool for ${name}`,
|
|
15
|
+
parameters: {
|
|
16
|
+
type: 'object',
|
|
17
|
+
properties: {
|
|
18
|
+
userMessage: { type: 'string' },
|
|
19
|
+
},
|
|
20
|
+
required: [],
|
|
21
|
+
},
|
|
22
|
+
},
|
|
23
|
+
...overrides,
|
|
24
|
+
},
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
const buildToolCall = (name, args = { userMessage: 'run test' }, id = 'call-1') => ({
|
|
28
|
+
id,
|
|
29
|
+
type: 'function',
|
|
30
|
+
function: {
|
|
31
|
+
name,
|
|
32
|
+
arguments: JSON.stringify(args),
|
|
33
|
+
},
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const buildResolver = (overrides = {}) => ({
|
|
37
|
+
errors: [],
|
|
38
|
+
requestId: 'req-test',
|
|
39
|
+
rootRequestId: 'root-req-test',
|
|
40
|
+
pathway: sysEntityAgent,
|
|
41
|
+
modelExecutor: {
|
|
42
|
+
plugin: {
|
|
43
|
+
truncateMessagesToTargetLength: (messages) => messages,
|
|
44
|
+
},
|
|
45
|
+
},
|
|
46
|
+
promptAndParse: async () => 'final-response',
|
|
47
|
+
...overrides,
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const setupConfig = () => {
|
|
51
|
+
const originalGet = config.get.bind(config);
|
|
52
|
+
const originalPathways = config.get('pathways') || {};
|
|
53
|
+
const originalEntityTools = config.get('entityTools') || {};
|
|
54
|
+
|
|
55
|
+
const tools = {
|
|
56
|
+
errorjson: buildToolDefinition('ErrorJson', 'test_tool_error_json'),
|
|
57
|
+
throws500: buildToolDefinition('Throws500', 'test_tool_500'),
|
|
58
|
+
timeouttool: buildToolDefinition('TimeoutTool', 'test_tool_timeout'),
|
|
59
|
+
nullresult: buildToolDefinition('NullResult', 'test_tool_null'),
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const entityId = 'entity-test-errors';
|
|
63
|
+
const entityConfig = {
|
|
64
|
+
[entityId]: {
|
|
65
|
+
id: entityId,
|
|
66
|
+
isDefault: true,
|
|
67
|
+
tools: Object.keys(tools),
|
|
68
|
+
customTools: tools,
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const pathways = {
|
|
73
|
+
...originalPathways,
|
|
74
|
+
sys_generator_error: {
|
|
75
|
+
rootResolver: async (_parent, args) => ({
|
|
76
|
+
result: `ERROR_RESPONSE: ${args.text}`,
|
|
77
|
+
}),
|
|
78
|
+
},
|
|
79
|
+
test_tool_error_json: {
|
|
80
|
+
rootResolver: async () => ({
|
|
81
|
+
result: JSON.stringify({ error: true, message: '400 Bad Request' }),
|
|
82
|
+
}),
|
|
83
|
+
},
|
|
84
|
+
test_tool_500: {
|
|
85
|
+
rootResolver: async () => {
|
|
86
|
+
throw new Error('500 Internal Server Error');
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
test_tool_timeout: {
|
|
90
|
+
rootResolver: async () => {
|
|
91
|
+
throw new Error('ETIMEDOUT');
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
test_tool_null: {
|
|
95
|
+
rootResolver: async () => ({
|
|
96
|
+
result: null,
|
|
97
|
+
}),
|
|
98
|
+
},
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
config.load({
|
|
102
|
+
pathways,
|
|
103
|
+
entityTools: {},
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
// convict schema does not expose entityConfig; override config.get for tests
|
|
107
|
+
config.get = (key) => {
|
|
108
|
+
if (key === 'entityConfig') {
|
|
109
|
+
return entityConfig;
|
|
110
|
+
}
|
|
111
|
+
return originalGet(key);
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
entityId,
|
|
116
|
+
originalGet,
|
|
117
|
+
originalPathways,
|
|
118
|
+
originalEntityTools,
|
|
119
|
+
};
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const restoreConfig = (originals) => {
|
|
123
|
+
config.load({
|
|
124
|
+
pathways: originals.originalPathways,
|
|
125
|
+
entityTools: originals.originalEntityTools,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
config.get = originals.originalGet;
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
test.serial('executePathway returns sys_generator_error output on 500 base model error', async (t) => {
|
|
132
|
+
const originals = setupConfig();
|
|
133
|
+
t.teardown(() => restoreConfig(originals));
|
|
134
|
+
|
|
135
|
+
const resolver = buildResolver();
|
|
136
|
+
const args = {
|
|
137
|
+
text: 'trigger base model error',
|
|
138
|
+
chatHistory: [{ role: 'user', content: 'hi' }],
|
|
139
|
+
agentContext: [],
|
|
140
|
+
entityId: originals.entityId,
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const runAllPrompts = async () => {
|
|
144
|
+
throw new Error('HTTP 500 from model');
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const result = await sysEntityAgent.executePathway({ args, runAllPrompts, resolver });
|
|
148
|
+
t.true(result.includes('ERROR_RESPONSE'));
|
|
149
|
+
t.true(result.includes('HTTP 500 from model'));
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
test.serial('executePathway falls back when sys_generator_error fails after null model response', async (t) => {
|
|
153
|
+
const originals = setupConfig();
|
|
154
|
+
t.teardown(() => restoreConfig(originals));
|
|
155
|
+
|
|
156
|
+
const brokenPathways = {
|
|
157
|
+
...config.get('pathways'),
|
|
158
|
+
sys_generator_error: {
|
|
159
|
+
rootResolver: async () => {
|
|
160
|
+
throw new Error('sys_generator_error failed');
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
};
|
|
164
|
+
config.load({ pathways: brokenPathways });
|
|
165
|
+
|
|
166
|
+
const resolver = buildResolver();
|
|
167
|
+
const args = {
|
|
168
|
+
text: 'trigger null response',
|
|
169
|
+
chatHistory: [{ role: 'user', content: 'hi' }],
|
|
170
|
+
agentContext: [],
|
|
171
|
+
entityId: originals.entityId,
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
const runAllPrompts = async () => null;
|
|
175
|
+
const result = await sysEntityAgent.executePathway({ args, runAllPrompts, resolver });
|
|
176
|
+
|
|
177
|
+
t.true(result.includes('I apologize, but I encountered an error while processing your request'));
|
|
178
|
+
t.true(result.includes('Model execution returned null'));
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
test.serial('toolCallback surfaces 400 error JSON from tool result', async (t) => {
|
|
182
|
+
const originals = setupConfig();
|
|
183
|
+
t.teardown(() => restoreConfig(originals));
|
|
184
|
+
|
|
185
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
186
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
187
|
+
|
|
188
|
+
let promptArgs;
|
|
189
|
+
const resolver = buildResolver({
|
|
190
|
+
promptAndParse: async (args) => {
|
|
191
|
+
promptArgs = args;
|
|
192
|
+
return 'tool-handled';
|
|
193
|
+
},
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
const args = {
|
|
197
|
+
chatHistory: [{ role: 'user', content: 'use tool' }],
|
|
198
|
+
entityTools,
|
|
199
|
+
entityToolsOpenAiFormat,
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
const message = { tool_calls: [buildToolCall('ErrorJson')] };
|
|
203
|
+
const result = await sysEntityAgent.toolCallback(args, message, resolver);
|
|
204
|
+
|
|
205
|
+
t.is(result, 'tool-handled');
|
|
206
|
+
const toolMessage = args.chatHistory.find((entry) => entry.role === 'tool');
|
|
207
|
+
t.truthy(toolMessage);
|
|
208
|
+
t.true(toolMessage.content.includes('400 Bad Request'));
|
|
209
|
+
t.truthy(promptArgs);
|
|
210
|
+
t.true(promptArgs.chatHistory.some((entry) => (
|
|
211
|
+
entry.role === 'tool' && entry.content.includes('400 Bad Request')
|
|
212
|
+
)));
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
test.serial('toolCallback captures 500 error thrown by tool pathway', async (t) => {
|
|
216
|
+
const originals = setupConfig();
|
|
217
|
+
t.teardown(() => restoreConfig(originals));
|
|
218
|
+
|
|
219
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
220
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
221
|
+
|
|
222
|
+
let promptArgs;
|
|
223
|
+
const resolver = buildResolver({
|
|
224
|
+
promptAndParse: async (args) => {
|
|
225
|
+
promptArgs = args;
|
|
226
|
+
return 'tool-handled';
|
|
227
|
+
},
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
const args = {
|
|
231
|
+
chatHistory: [{ role: 'user', content: 'use tool' }],
|
|
232
|
+
entityTools,
|
|
233
|
+
entityToolsOpenAiFormat,
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
const message = { tool_calls: [buildToolCall('Throws500')] };
|
|
237
|
+
const result = await sysEntityAgent.toolCallback(args, message, resolver);
|
|
238
|
+
|
|
239
|
+
t.is(result, 'tool-handled');
|
|
240
|
+
const toolMessage = args.chatHistory.find((entry) => entry.role === 'tool');
|
|
241
|
+
t.truthy(toolMessage);
|
|
242
|
+
t.true(toolMessage.content.includes('500 Internal Server Error'));
|
|
243
|
+
t.truthy(promptArgs);
|
|
244
|
+
t.true(promptArgs.chatHistory.some((entry) => (
|
|
245
|
+
entry.role === 'tool' && entry.content.includes('500 Internal Server Error')
|
|
246
|
+
)));
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
test.serial('toolCallback captures tool null result as error', async (t) => {
|
|
250
|
+
const originals = setupConfig();
|
|
251
|
+
t.teardown(() => restoreConfig(originals));
|
|
252
|
+
|
|
253
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
254
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
255
|
+
|
|
256
|
+
let promptArgs;
|
|
257
|
+
const resolver = buildResolver({
|
|
258
|
+
promptAndParse: async (args) => {
|
|
259
|
+
promptArgs = args;
|
|
260
|
+
return 'tool-handled';
|
|
261
|
+
},
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
const args = {
|
|
265
|
+
chatHistory: [{ role: 'user', content: 'use tool' }],
|
|
266
|
+
entityTools,
|
|
267
|
+
entityToolsOpenAiFormat,
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
const message = { tool_calls: [buildToolCall('NullResult')] };
|
|
271
|
+
const result = await sysEntityAgent.toolCallback(args, message, resolver);
|
|
272
|
+
|
|
273
|
+
t.is(result, 'tool-handled');
|
|
274
|
+
const toolMessage = args.chatHistory.find((entry) => entry.role === 'tool');
|
|
275
|
+
t.truthy(toolMessage);
|
|
276
|
+
t.true(toolMessage.content.includes('returned null result'));
|
|
277
|
+
t.truthy(promptArgs);
|
|
278
|
+
t.true(promptArgs.chatHistory.some((entry) => (
|
|
279
|
+
entry.role === 'tool' && entry.content.includes('returned null result')
|
|
280
|
+
)));
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
test.serial('toolCallback reports invalid tool call arguments', async (t) => {
|
|
284
|
+
const originals = setupConfig();
|
|
285
|
+
t.teardown(() => restoreConfig(originals));
|
|
286
|
+
|
|
287
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
288
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
289
|
+
|
|
290
|
+
let promptArgs;
|
|
291
|
+
const resolver = buildResolver({
|
|
292
|
+
promptAndParse: async (args) => {
|
|
293
|
+
promptArgs = args;
|
|
294
|
+
return 'tool-handled';
|
|
295
|
+
},
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
const args = {
|
|
299
|
+
chatHistory: [{ role: 'user', content: 'use tool' }],
|
|
300
|
+
entityTools,
|
|
301
|
+
entityToolsOpenAiFormat,
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
const message = {
|
|
305
|
+
tool_calls: [{
|
|
306
|
+
id: 'bad-tool-call',
|
|
307
|
+
type: 'function',
|
|
308
|
+
function: { name: 'ErrorJson' },
|
|
309
|
+
}],
|
|
310
|
+
};
|
|
311
|
+
|
|
312
|
+
const result = await sysEntityAgent.toolCallback(args, message, resolver);
|
|
313
|
+
t.is(result, 'tool-handled');
|
|
314
|
+
const toolMessage = args.chatHistory.find((entry) => entry.role === 'tool');
|
|
315
|
+
t.truthy(toolMessage);
|
|
316
|
+
t.true(toolMessage.content.includes('Invalid tool call structure: missing function arguments'));
|
|
317
|
+
t.truthy(promptArgs);
|
|
318
|
+
t.true(promptArgs.chatHistory.some((entry) => (
|
|
319
|
+
entry.role === 'tool' && entry.content.includes('Invalid tool call structure')
|
|
320
|
+
)));
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
test.serial('toolCallback returns error response when promptAndParse throws', async (t) => {
|
|
324
|
+
const originals = setupConfig();
|
|
325
|
+
t.teardown(() => restoreConfig(originals));
|
|
326
|
+
|
|
327
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
328
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
329
|
+
|
|
330
|
+
const resolver = buildResolver({
|
|
331
|
+
promptAndParse: async () => {
|
|
332
|
+
throw new Error('Model crashed after tool calls');
|
|
333
|
+
},
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
const args = {
|
|
337
|
+
chatHistory: [{ role: 'user', content: 'use tool' }],
|
|
338
|
+
entityTools,
|
|
339
|
+
entityToolsOpenAiFormat,
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
const message = { tool_calls: [buildToolCall('ErrorJson')] };
|
|
343
|
+
const result = await sysEntityAgent.toolCallback(args, message, resolver);
|
|
344
|
+
|
|
345
|
+
t.true(result.includes('ERROR_RESPONSE'));
|
|
346
|
+
t.true(result.includes('Model crashed after tool calls'));
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
test.serial('executePathway returns error response when tool recursion times out', async (t) => {
|
|
350
|
+
const originals = setupConfig();
|
|
351
|
+
t.teardown(() => restoreConfig(originals));
|
|
352
|
+
|
|
353
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
354
|
+
const { entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
355
|
+
|
|
356
|
+
const resolver = buildResolver({
|
|
357
|
+
promptAndParse: async () => {
|
|
358
|
+
throw new Error('Tool recursion timeout');
|
|
359
|
+
},
|
|
360
|
+
});
|
|
361
|
+
|
|
362
|
+
const args = {
|
|
363
|
+
text: 'trigger tool recursion',
|
|
364
|
+
chatHistory: [{ role: 'user', content: 'hi' }],
|
|
365
|
+
agentContext: [],
|
|
366
|
+
entityId: originals.entityId,
|
|
367
|
+
entityToolsOpenAiFormat,
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
const runAllPrompts = async () => ({
|
|
371
|
+
tool_calls: [buildToolCall('TimeoutTool')],
|
|
372
|
+
});
|
|
373
|
+
|
|
374
|
+
const result = await sysEntityAgent.executePathway({ args, runAllPrompts, resolver });
|
|
375
|
+
t.true(result.includes('ERROR_RESPONSE'));
|
|
376
|
+
t.true(result.includes('Tool recursion timeout'));
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
test.serial('toolCallback injects max tool call message once limit reached', async (t) => {
|
|
380
|
+
const originals = setupConfig();
|
|
381
|
+
t.teardown(() => restoreConfig(originals));
|
|
382
|
+
|
|
383
|
+
const entityConfig = config.get('entityConfig')[originals.entityId];
|
|
384
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig);
|
|
385
|
+
|
|
386
|
+
let promptArgs;
|
|
387
|
+
const resolver = buildResolver({
|
|
388
|
+
toolCallCount: 50,
|
|
389
|
+
promptAndParse: async (args) => {
|
|
390
|
+
promptArgs = args;
|
|
391
|
+
return 'tool-handled';
|
|
392
|
+
},
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
const args = {
|
|
396
|
+
chatHistory: [{ role: 'user', content: 'use tool' }],
|
|
397
|
+
entityTools,
|
|
398
|
+
entityToolsOpenAiFormat,
|
|
399
|
+
};
|
|
400
|
+
|
|
401
|
+
const message = { tool_calls: [buildToolCall('ErrorJson')] };
|
|
402
|
+
await sysEntityAgent.toolCallback(args, message, resolver);
|
|
403
|
+
|
|
404
|
+
const systemMessage = promptArgs.chatHistory.find((entry) => (
|
|
405
|
+
entry.role === 'user' &&
|
|
406
|
+
typeof entry.content === 'string' &&
|
|
407
|
+
entry.content.includes('Maximum tool call limit reached')
|
|
408
|
+
));
|
|
409
|
+
|
|
410
|
+
t.truthy(systemMessage);
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
// === NEW TESTS FOR ROBUSTNESS FEATURES ===
|
|
414
|
+
|
|
415
|
+
test('withTimeout resolves when promise completes before timeout', async (t) => {
|
|
416
|
+
const result = await withTimeout(
|
|
417
|
+
Promise.resolve('success'),
|
|
418
|
+
1000,
|
|
419
|
+
'Should not timeout'
|
|
420
|
+
);
|
|
421
|
+
t.is(result, 'success');
|
|
422
|
+
});
|
|
423
|
+
|
|
424
|
+
test('withTimeout rejects when promise takes longer than timeout', async (t) => {
|
|
425
|
+
const slowPromise = new Promise((resolve) => setTimeout(() => resolve('too late'), 200));
|
|
426
|
+
|
|
427
|
+
const error = await t.throwsAsync(
|
|
428
|
+
withTimeout(slowPromise, 50, 'Operation timed out after 50ms')
|
|
429
|
+
);
|
|
430
|
+
|
|
431
|
+
t.is(error.message, 'Operation timed out after 50ms');
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
test('withTimeout clears timeout when promise resolves', async (t) => {
|
|
435
|
+
// This test ensures no memory leaks from dangling timeouts
|
|
436
|
+
const result = await withTimeout(
|
|
437
|
+
Promise.resolve('quick'),
|
|
438
|
+
10000, // Long timeout that should be cleared
|
|
439
|
+
'Should not timeout'
|
|
440
|
+
);
|
|
441
|
+
t.is(result, 'quick');
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
test('withTimeout clears timeout when promise rejects', async (t) => {
|
|
445
|
+
const error = await t.throwsAsync(
|
|
446
|
+
withTimeout(
|
|
447
|
+
Promise.reject(new Error('Original error')),
|
|
448
|
+
10000,
|
|
449
|
+
'Should not timeout'
|
|
450
|
+
)
|
|
451
|
+
);
|
|
452
|
+
t.is(error.message, 'Original error');
|
|
453
|
+
});
|
|
454
|
+
|
|
455
|
+
test.serial('toolCallback truncates oversized tool results', async (t) => {
|
|
456
|
+
const originals = setupConfig();
|
|
457
|
+
t.teardown(() => restoreConfig(originals));
|
|
458
|
+
|
|
459
|
+
// Create a tool that returns a very large result
|
|
460
|
+
const largeResultPathways = {
|
|
461
|
+
...config.get('pathways'),
|
|
462
|
+
test_tool_large_result: {
|
|
463
|
+
rootResolver: async () => ({
|
|
464
|
+
// Create a result larger than MAX_TOOL_RESULT_LENGTH (150000)
|
|
465
|
+
result: JSON.stringify({ data: 'x'.repeat(160000) }),
|
|
466
|
+
}),
|
|
467
|
+
},
|
|
468
|
+
};
|
|
469
|
+
config.load({ pathways: largeResultPathways });
|
|
470
|
+
|
|
471
|
+
const tools = {
|
|
472
|
+
...config.get('entityConfig')[originals.entityId].customTools,
|
|
473
|
+
largeresult: buildToolDefinition('LargeResult', 'test_tool_large_result'),
|
|
474
|
+
};
|
|
475
|
+
|
|
476
|
+
const entityConfig = {
|
|
477
|
+
[originals.entityId]: {
|
|
478
|
+
...config.get('entityConfig')[originals.entityId],
|
|
479
|
+
tools: [...config.get('entityConfig')[originals.entityId].tools, 'largeresult'],
|
|
480
|
+
customTools: tools,
|
|
481
|
+
},
|
|
482
|
+
};
|
|
483
|
+
|
|
484
|
+
config.get = (key) => {
|
|
485
|
+
if (key === 'entityConfig') {
|
|
486
|
+
return entityConfig;
|
|
487
|
+
}
|
|
488
|
+
return originals.originalGet(key);
|
|
489
|
+
};
|
|
490
|
+
|
|
491
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig[originals.entityId]);
|
|
492
|
+
|
|
493
|
+
let promptArgs;
|
|
494
|
+
const resolver = buildResolver({
|
|
495
|
+
promptAndParse: async (args) => {
|
|
496
|
+
promptArgs = args;
|
|
497
|
+
return 'tool-handled';
|
|
498
|
+
},
|
|
499
|
+
});
|
|
500
|
+
|
|
501
|
+
const args = {
|
|
502
|
+
chatHistory: [{ role: 'user', content: 'use large tool' }],
|
|
503
|
+
entityTools,
|
|
504
|
+
entityToolsOpenAiFormat,
|
|
505
|
+
};
|
|
506
|
+
|
|
507
|
+
const message = { tool_calls: [buildToolCall('LargeResult')] };
|
|
508
|
+
await sysEntityAgent.toolCallback(args, message, resolver);
|
|
509
|
+
|
|
510
|
+
// Find the tool result message in chatHistory
|
|
511
|
+
const toolMessage = promptArgs.chatHistory.find((entry) => entry.role === 'tool');
|
|
512
|
+
t.truthy(toolMessage);
|
|
513
|
+
|
|
514
|
+
// Verify the content was truncated (should be less than 160000 chars)
|
|
515
|
+
t.true(toolMessage.content.length < 160000);
|
|
516
|
+
|
|
517
|
+
// Verify truncation message was added
|
|
518
|
+
t.true(toolMessage.content.includes('[Content truncated due to length]'));
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
test('findSafeSplitPoint preserves tool call/result pairs', (t) => {
|
|
522
|
+
// Import the helper (we'll need to export it or test via integration)
|
|
523
|
+
// For now, test the concept with inline implementation
|
|
524
|
+
|
|
525
|
+
const findSafeSplitPoint = (messages, keepRecentCount = 6) => {
|
|
526
|
+
const toolCallIndexMap = new Map();
|
|
527
|
+
for (let i = 0; i < messages.length; i++) {
|
|
528
|
+
const msg = messages[i];
|
|
529
|
+
if (msg.tool_calls) {
|
|
530
|
+
for (const tc of msg.tool_calls) {
|
|
531
|
+
if (tc.id) toolCallIndexMap.set(tc.id, i);
|
|
532
|
+
}
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
let splitIndex = Math.max(0, messages.length - keepRecentCount);
|
|
537
|
+
|
|
538
|
+
let adjusted = true;
|
|
539
|
+
while (adjusted && splitIndex > 0) {
|
|
540
|
+
adjusted = false;
|
|
541
|
+
for (let i = splitIndex; i < messages.length; i++) {
|
|
542
|
+
const msg = messages[i];
|
|
543
|
+
if (msg.role === 'tool' && msg.tool_call_id) {
|
|
544
|
+
const callIndex = toolCallIndexMap.get(msg.tool_call_id);
|
|
545
|
+
if (callIndex !== undefined && callIndex < splitIndex) {
|
|
546
|
+
splitIndex = callIndex;
|
|
547
|
+
adjusted = true;
|
|
548
|
+
break;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
return splitIndex;
|
|
555
|
+
};
|
|
556
|
+
|
|
557
|
+
// Test: should not split if it would orphan a tool result
|
|
558
|
+
const messages = [
|
|
559
|
+
{ role: 'user', content: 'query 1' },
|
|
560
|
+
{ role: 'assistant', content: '', tool_calls: [{ id: 'tc1', function: { name: 'search' } }] },
|
|
561
|
+
{ role: 'tool', tool_call_id: 'tc1', content: 'result 1' },
|
|
562
|
+
{ role: 'assistant', content: 'response 1' },
|
|
563
|
+
{ role: 'user', content: 'query 2' },
|
|
564
|
+
{ role: 'assistant', content: '', tool_calls: [{ id: 'tc2', function: { name: 'search' } }] },
|
|
565
|
+
{ role: 'tool', tool_call_id: 'tc2', content: 'result 2' },
|
|
566
|
+
{ role: 'assistant', content: 'response 2' },
|
|
567
|
+
];
|
|
568
|
+
|
|
569
|
+
// With keepRecentCount=4, naive split would be at index 4
|
|
570
|
+
// But tc2's result is at index 6, its call at index 5
|
|
571
|
+
// So split should be adjusted to keep tc2 call with its result
|
|
572
|
+
const splitIndex = findSafeSplitPoint(messages, 4);
|
|
573
|
+
|
|
574
|
+
// The split should ensure tc2 call (index 5) stays with tc2 result (index 6)
|
|
575
|
+
// So split should be at index 4 or earlier
|
|
576
|
+
t.true(splitIndex <= 4, 'Split should be at or before index 4');
|
|
577
|
+
|
|
578
|
+
// Verify: messages from splitIndex onwards should have paired tool calls/results
|
|
579
|
+
const keptMessages = messages.slice(splitIndex);
|
|
580
|
+
const keptToolCallIds = new Set();
|
|
581
|
+
for (const msg of keptMessages) {
|
|
582
|
+
if (msg.tool_calls) {
|
|
583
|
+
for (const tc of msg.tool_calls) {
|
|
584
|
+
keptToolCallIds.add(tc.id);
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// Every tool result in kept messages should have its call in kept messages
|
|
590
|
+
for (const msg of keptMessages) {
|
|
591
|
+
if (msg.role === 'tool' && msg.tool_call_id) {
|
|
592
|
+
t.true(keptToolCallIds.has(msg.tool_call_id),
|
|
593
|
+
`Tool result ${msg.tool_call_id} should have its call in kept messages`);
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
});
|
|
597
|
+
|
|
598
|
+
test.serial('toolCallback handles tool timeout error correctly', async (t) => {
|
|
599
|
+
const originals = setupConfig();
|
|
600
|
+
t.teardown(() => restoreConfig(originals));
|
|
601
|
+
|
|
602
|
+
// Create a tool that simulates a timeout
|
|
603
|
+
const timeoutPathways = {
|
|
604
|
+
...config.get('pathways'),
|
|
605
|
+
test_tool_slow: {
|
|
606
|
+
rootResolver: async () => {
|
|
607
|
+
// Simulate a slow tool that would timeout
|
|
608
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
609
|
+
return { result: 'completed' };
|
|
610
|
+
},
|
|
611
|
+
},
|
|
612
|
+
};
|
|
613
|
+
config.load({ pathways: timeoutPathways });
|
|
614
|
+
|
|
615
|
+
const tools = {
|
|
616
|
+
...config.get('entityConfig')[originals.entityId].customTools,
|
|
617
|
+
slowtool: {
|
|
618
|
+
...buildToolDefinition('SlowTool', 'test_tool_slow'),
|
|
619
|
+
definition: {
|
|
620
|
+
...buildToolDefinition('SlowTool', 'test_tool_slow').definition,
|
|
621
|
+
// Set a very short timeout to trigger timeout
|
|
622
|
+
timeout: 10,
|
|
623
|
+
},
|
|
624
|
+
},
|
|
625
|
+
};
|
|
626
|
+
|
|
627
|
+
const entityConfig = {
|
|
628
|
+
[originals.entityId]: {
|
|
629
|
+
...config.get('entityConfig')[originals.entityId],
|
|
630
|
+
tools: [...config.get('entityConfig')[originals.entityId].tools, 'slowtool'],
|
|
631
|
+
customTools: tools,
|
|
632
|
+
},
|
|
633
|
+
};
|
|
634
|
+
|
|
635
|
+
config.get = (key) => {
|
|
636
|
+
if (key === 'entityConfig') {
|
|
637
|
+
return entityConfig;
|
|
638
|
+
}
|
|
639
|
+
return originals.originalGet(key);
|
|
640
|
+
};
|
|
641
|
+
|
|
642
|
+
const { entityTools, entityToolsOpenAiFormat } = getToolsForEntity(entityConfig[originals.entityId]);
|
|
643
|
+
|
|
644
|
+
let promptArgs;
|
|
645
|
+
const resolver = buildResolver({
|
|
646
|
+
promptAndParse: async (args) => {
|
|
647
|
+
promptArgs = args;
|
|
648
|
+
return 'tool-handled';
|
|
649
|
+
},
|
|
650
|
+
});
|
|
651
|
+
|
|
652
|
+
const args = {
|
|
653
|
+
chatHistory: [{ role: 'user', content: 'use slow tool' }],
|
|
654
|
+
entityTools,
|
|
655
|
+
entityToolsOpenAiFormat,
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
const message = { tool_calls: [buildToolCall('SlowTool')] };
|
|
659
|
+
const result = await sysEntityAgent.toolCallback(args, message, resolver);
|
|
660
|
+
|
|
661
|
+
t.is(result, 'tool-handled');
|
|
662
|
+
|
|
663
|
+
// Find the tool result message - should contain timeout error
|
|
664
|
+
const toolMessage = promptArgs.chatHistory.find((entry) => entry.role === 'tool');
|
|
665
|
+
t.truthy(toolMessage);
|
|
666
|
+
t.true(toolMessage.content.includes('timed out'));
|
|
667
|
+
});
|
|
668
|
+
|
|
669
|
+
// Test the logic that prevents non-streaming responses from killing parent streams
|
|
670
|
+
// The fix was: only publish completion if receivedSSEData is true
|
|
671
|
+
test('non-streaming tool response should not trigger parent stream completion', (t) => {
|
|
672
|
+
// This test validates the logic pattern used in pathwayResolver.handleStream
|
|
673
|
+
// The bug was: non-streaming tool calls would publish progress=1 to rootRequestId
|
|
674
|
+
// because completionSent was false (no SSE events received)
|
|
675
|
+
|
|
676
|
+
// Simulate the state after a non-streaming response closes
|
|
677
|
+
const receivedSSEData = false; // No SSE events received (non-streaming)
|
|
678
|
+
const completionSent = false; // No completion signal from stream
|
|
679
|
+
const streamErrorOccurred = false;
|
|
680
|
+
|
|
681
|
+
// The OLD buggy logic:
|
|
682
|
+
const oldLogicWouldPublish = streamErrorOccurred || !completionSent;
|
|
683
|
+
t.true(oldLogicWouldPublish, 'Old logic would incorrectly publish completion');
|
|
684
|
+
|
|
685
|
+
// The NEW fixed logic:
|
|
686
|
+
const newLogicWouldPublish = receivedSSEData && (streamErrorOccurred || !completionSent);
|
|
687
|
+
t.false(newLogicWouldPublish, 'New logic correctly skips completion for non-streaming');
|
|
688
|
+
});
|
|
689
|
+
|
|
690
|
+
test('streaming response with incomplete data should trigger completion', (t) => {
|
|
691
|
+
// When we receive SSE data but stream closes without completion signal
|
|
692
|
+
// we SHOULD send a completion (to clean up the client state)
|
|
693
|
+
|
|
694
|
+
const receivedSSEData = true; // SSE events were received
|
|
695
|
+
const completionSent = false; // But no completion signal
|
|
696
|
+
const streamErrorOccurred = false;
|
|
697
|
+
|
|
698
|
+
const newLogicWouldPublish = receivedSSEData && (streamErrorOccurred || !completionSent);
|
|
699
|
+
t.true(newLogicWouldPublish, 'Should publish completion when streaming response has no completion signal');
|
|
700
|
+
});
|
|
701
|
+
|
|
702
|
+
test('streaming response with error should trigger completion with error', (t) => {
|
|
703
|
+
// When stream has an error, we should send completion with error info
|
|
704
|
+
|
|
705
|
+
const receivedSSEData = true;
|
|
706
|
+
const completionSent = false;
|
|
707
|
+
const streamErrorOccurred = true;
|
|
708
|
+
|
|
709
|
+
const newLogicWouldPublish = receivedSSEData && (streamErrorOccurred || !completionSent);
|
|
710
|
+
t.true(newLogicWouldPublish, 'Should publish completion when stream has error');
|
|
711
|
+
});
|
|
712
|
+
|
|
713
|
+
test('normal streaming completion should not double-send', (t) => {
|
|
714
|
+
// When stream completes normally (completionSent = true), don't send again
|
|
715
|
+
|
|
716
|
+
const receivedSSEData = true;
|
|
717
|
+
const completionSent = true; // Normal completion already sent
|
|
718
|
+
const streamErrorOccurred = false;
|
|
719
|
+
|
|
720
|
+
const newLogicWouldPublish = receivedSSEData && (streamErrorOccurred || !completionSent);
|
|
721
|
+
t.false(newLogicWouldPublish, 'Should not double-send completion');
|
|
722
|
+
});
|
|
723
|
+
|
|
724
|
+
// Test that actually exercises the SSE parser behavior
|
|
725
|
+
test('SSE parser only sets receivedSSEData for actual event types', async (t) => {
|
|
726
|
+
const { createParser } = await import('eventsource-parser');
|
|
727
|
+
|
|
728
|
+
// Simulate the pathwayResolver's onParse logic
|
|
729
|
+
let receivedSSEData = false;
|
|
730
|
+
|
|
731
|
+
const onParse = (event) => {
|
|
732
|
+
// This mirrors the FIXED code in pathwayResolver.js
|
|
733
|
+
if (event.type === 'event') {
|
|
734
|
+
receivedSSEData = true;
|
|
735
|
+
}
|
|
736
|
+
// Other event types (like 'reconnect-interval') should NOT set receivedSSEData
|
|
737
|
+
};
|
|
738
|
+
|
|
739
|
+
const parser = createParser(onParse);
|
|
740
|
+
|
|
741
|
+
// Feed non-SSE JSON data (like a Grok non-streaming response)
|
|
742
|
+
const jsonResponse = JSON.stringify({
|
|
743
|
+
id: 'resp_123',
|
|
744
|
+
output: [{ type: 'message', content: [{ text: 'Hello' }] }]
|
|
745
|
+
});
|
|
746
|
+
parser.feed(jsonResponse);
|
|
747
|
+
|
|
748
|
+
t.false(receivedSSEData, 'Non-SSE JSON should not set receivedSSEData');
|
|
749
|
+
|
|
750
|
+
// Now feed actual SSE data (proper SSE format with event type)
|
|
751
|
+
parser.feed('event: message\ndata: {"content":"hello"}\n\n');
|
|
752
|
+
|
|
753
|
+
t.true(receivedSSEData, 'Actual SSE event should set receivedSSEData');
|
|
754
|
+
});
|
|
755
|
+
|
|
756
|
+
test('SSE parser with reconnect-interval should not set receivedSSEData', async (t) => {
|
|
757
|
+
const { createParser } = await import('eventsource-parser');
|
|
758
|
+
|
|
759
|
+
let receivedSSEData = false;
|
|
760
|
+
|
|
761
|
+
const onParse = (event) => {
|
|
762
|
+
if (event.type === 'event') {
|
|
763
|
+
receivedSSEData = true;
|
|
764
|
+
}
|
|
765
|
+
};
|
|
766
|
+
|
|
767
|
+
const parser = createParser(onParse);
|
|
768
|
+
|
|
769
|
+
// Feed a reconnect-interval directive (valid SSE but not an 'event' type)
|
|
770
|
+
parser.feed('retry: 3000\n\n');
|
|
771
|
+
|
|
772
|
+
t.false(receivedSSEData, 'reconnect-interval should not set receivedSSEData');
|
|
773
|
+
});
|
|
774
|
+
|
|
775
|
+
test('tool callback invoked should not trigger stream warning or completion', (t) => {
|
|
776
|
+
// When a tool callback is invoked (e.g., Gemini returns tool calls),
|
|
777
|
+
// the stream closes but this is expected - the tool will execute and
|
|
778
|
+
// a new stream will open. We should not warn or send completion.
|
|
779
|
+
|
|
780
|
+
const receivedSSEData = true; // SSE data was received
|
|
781
|
+
const completionSent = false; // No progress=1 from the model (expected for tool calls)
|
|
782
|
+
const streamErrorOccurred = false;
|
|
783
|
+
const toolCallbackInvoked = true; // Tool callback was invoked
|
|
784
|
+
|
|
785
|
+
// Warning condition
|
|
786
|
+
const shouldWarn = receivedSSEData && !completionSent && !streamErrorOccurred && !toolCallbackInvoked;
|
|
787
|
+
t.false(shouldWarn, 'Should not warn when tool callback invoked');
|
|
788
|
+
|
|
789
|
+
// Completion condition
|
|
790
|
+
const shouldPublishCompletion = receivedSSEData && !toolCallbackInvoked && (streamErrorOccurred || !completionSent);
|
|
791
|
+
t.false(shouldPublishCompletion, 'Should not publish completion when tool callback invoked');
|
|
792
|
+
});
|