@machina.ai/cell-cli-core 1.8.2-rc1 → 1.10.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/package.json +1 -1
- package/dist/src/agents/codebase-investigator.d.ts +36 -1
- package/dist/src/agents/codebase-investigator.js +93 -31
- package/dist/src/agents/codebase-investigator.js.map +1 -1
- package/dist/src/agents/executor.d.ts +15 -11
- package/dist/src/agents/executor.js +265 -103
- package/dist/src/agents/executor.js.map +1 -1
- package/dist/src/agents/executor.test.js +493 -232
- package/dist/src/agents/executor.test.js.map +1 -1
- package/dist/src/agents/invocation.d.ts +5 -2
- package/dist/src/agents/invocation.js +4 -2
- package/dist/src/agents/invocation.js.map +1 -1
- package/dist/src/agents/invocation.test.js +9 -0
- package/dist/src/agents/invocation.test.js.map +1 -1
- package/dist/src/agents/registry.d.ts +2 -1
- package/dist/src/agents/registry.js +24 -1
- package/dist/src/agents/registry.js.map +1 -1
- package/dist/src/agents/subagent-tool-wrapper.d.ts +3 -1
- package/dist/src/agents/subagent-tool-wrapper.js +4 -3
- package/dist/src/agents/subagent-tool-wrapper.js.map +1 -1
- package/dist/src/agents/subagent-tool-wrapper.test.js +8 -1
- package/dist/src/agents/subagent-tool-wrapper.test.js.map +1 -1
- package/dist/src/agents/types.d.ts +35 -6
- package/dist/src/agents/types.js +1 -0
- package/dist/src/agents/types.js.map +1 -1
- package/dist/src/code_assist/oauth-credential-storage.js +1 -1
- package/dist/src/code_assist/oauth-credential-storage.js.map +1 -1
- package/dist/src/code_assist/oauth-credential-storage.test.js +1 -1
- package/dist/src/code_assist/oauth-credential-storage.test.js.map +1 -1
- package/dist/src/code_assist/oauth2.test.js +14 -13
- package/dist/src/code_assist/oauth2.test.js.map +1 -1
- package/dist/src/code_assist/setup.js +4 -2
- package/dist/src/code_assist/setup.js.map +1 -1
- package/dist/src/config/config.d.ts +48 -9
- package/dist/src/config/config.js +91 -16
- package/dist/src/config/config.js.map +1 -1
- package/dist/src/config/config.test.js +67 -3
- package/dist/src/config/config.test.js.map +1 -1
- package/dist/src/config/storage.d.ts +0 -1
- package/dist/src/config/storage.js +2 -2
- package/dist/src/config/storage.js.map +1 -1
- package/dist/src/config/storage.test.js +7 -6
- package/dist/src/config/storage.test.js.map +1 -1
- package/dist/src/core/client.d.ts +3 -1
- package/dist/src/core/client.js +67 -17
- package/dist/src/core/client.js.map +1 -1
- package/dist/src/core/client.test.js +199 -5
- package/dist/src/core/client.test.js.map +1 -1
- package/dist/src/core/contentGenerator.js +3 -1
- package/dist/src/core/contentGenerator.js.map +1 -1
- package/dist/src/core/coreToolScheduler.js +12 -12
- package/dist/src/core/coreToolScheduler.js.map +1 -1
- package/dist/src/core/coreToolScheduler.test.js +227 -0
- package/dist/src/core/coreToolScheduler.test.js.map +1 -1
- package/dist/src/core/geminiChat.d.ts +7 -11
- package/dist/src/core/geminiChat.js +32 -70
- package/dist/src/core/geminiChat.js.map +1 -1
- package/dist/src/core/geminiChat.test.js +93 -228
- package/dist/src/core/geminiChat.test.js.map +1 -1
- package/dist/src/core/logger.test.js +2 -2
- package/dist/src/core/logger.test.js.map +1 -1
- package/dist/src/core/nonInteractiveToolExecutor.d.ts +3 -2
- package/dist/src/core/nonInteractiveToolExecutor.js +2 -2
- package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
- package/dist/src/core/nonInteractiveToolExecutor.test.js +8 -8
- package/dist/src/core/nonInteractiveToolExecutor.test.js.map +1 -1
- package/dist/src/core/prompts.d.ts +2 -1
- package/dist/src/core/prompts.js +51 -110
- package/dist/src/core/prompts.js.map +1 -1
- package/dist/src/core/prompts.test.js +83 -29
- package/dist/src/core/prompts.test.js.map +1 -1
- package/dist/src/core/subagent.js +1 -1
- package/dist/src/core/subagent.js.map +1 -1
- package/dist/src/core/subagent.test.js +38 -12
- package/dist/src/core/subagent.test.js.map +1 -1
- package/dist/src/core/turn.d.ts +14 -2
- package/dist/src/core/turn.js +12 -1
- package/dist/src/core/turn.js.map +1 -1
- package/dist/src/core/turn.test.js +14 -2
- package/dist/src/core/turn.test.js.map +1 -1
- package/dist/src/generated/git-commit.d.ts +2 -2
- package/dist/src/generated/git-commit.js +2 -2
- package/dist/src/generated/git-commit.js.map +1 -1
- package/dist/src/ide/detect-ide.test.js +11 -0
- package/dist/src/ide/detect-ide.test.js.map +1 -1
- package/dist/src/ide/ide-client.js +3 -3
- package/dist/src/ide/ide-client.test.js +4 -4
- package/dist/src/ide/ide-installer.js +1 -1
- package/dist/src/ide/ide-installer.js.map +1 -1
- package/dist/src/ide/ide-installer.test.js +13 -1
- package/dist/src/ide/ide-installer.test.js.map +1 -1
- package/dist/src/ide/process-utils.js +85 -75
- package/dist/src/ide/process-utils.js.map +1 -1
- package/dist/src/ide/process-utils.test.js +83 -90
- package/dist/src/ide/process-utils.test.js.map +1 -1
- package/dist/src/index.d.ts +2 -0
- package/dist/src/index.js +2 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/token-storage/file-token-storage.js +2 -1
- package/dist/src/mcp/token-storage/file-token-storage.js.map +1 -1
- package/dist/src/mcp/token-storage/file-token-storage.test.js +4 -3
- package/dist/src/mcp/token-storage/file-token-storage.test.js.map +1 -1
- package/dist/src/services/chatRecordingService.d.ts +2 -1
- package/dist/src/services/chatRecordingService.js +2 -1
- package/dist/src/services/chatRecordingService.js.map +1 -1
- package/dist/src/services/shellExecutionService.d.ts +1 -0
- package/dist/src/services/shellExecutionService.js +144 -69
- package/dist/src/services/shellExecutionService.js.map +1 -1
- package/dist/src/services/shellExecutionService.test.js +61 -1
- package/dist/src/services/shellExecutionService.test.js.map +1 -1
- package/dist/src/telemetry/clearcut-logger/clearcut-logger.d.ts +14 -2
- package/dist/src/telemetry/clearcut-logger/clearcut-logger.js +104 -8
- package/dist/src/telemetry/clearcut-logger/clearcut-logger.js.map +1 -1
- package/dist/src/telemetry/clearcut-logger/clearcut-logger.test.js +81 -1
- package/dist/src/telemetry/clearcut-logger/clearcut-logger.test.js.map +1 -1
- package/dist/src/telemetry/clearcut-logger/event-metadata-key.d.ts +13 -3
- package/dist/src/telemetry/clearcut-logger/event-metadata-key.js +32 -5
- package/dist/src/telemetry/clearcut-logger/event-metadata-key.js.map +1 -1
- package/dist/src/telemetry/constants.d.ts +0 -28
- package/dist/src/telemetry/constants.js +0 -29
- package/dist/src/telemetry/constants.js.map +1 -1
- package/dist/src/telemetry/index.d.ts +5 -3
- package/dist/src/telemetry/index.js +11 -4
- package/dist/src/telemetry/index.js.map +1 -1
- package/dist/src/telemetry/loggers.d.ts +8 -2
- package/dist/src/telemetry/loggers.js +165 -299
- package/dist/src/telemetry/loggers.js.map +1 -1
- package/dist/src/telemetry/loggers.test.js +195 -7
- package/dist/src/telemetry/loggers.test.js.map +1 -1
- package/dist/src/telemetry/memory-monitor.d.ts +149 -0
- package/dist/src/telemetry/memory-monitor.js +335 -0
- package/dist/src/telemetry/memory-monitor.js.map +1 -0
- package/dist/src/telemetry/memory-monitor.test.d.ts +6 -0
- package/dist/src/telemetry/memory-monitor.test.js +472 -0
- package/dist/src/telemetry/memory-monitor.test.js.map +1 -0
- package/dist/src/telemetry/metrics.d.ts +131 -4
- package/dist/src/telemetry/metrics.js +182 -6
- package/dist/src/telemetry/metrics.js.map +1 -1
- package/dist/src/telemetry/metrics.test.js +360 -1
- package/dist/src/telemetry/metrics.test.js.map +1 -1
- package/dist/src/telemetry/telemetryAttributes.d.ts +8 -0
- package/dist/src/telemetry/telemetryAttributes.js +18 -0
- package/dist/src/telemetry/telemetryAttributes.js.map +1 -0
- package/dist/src/telemetry/types.d.ts +150 -3
- package/dist/src/telemetry/types.js +664 -33
- package/dist/src/telemetry/types.js.map +1 -1
- package/dist/src/telemetry/uiTelemetry.d.ts +1 -1
- package/dist/src/telemetry/uiTelemetry.js +1 -1
- package/dist/src/telemetry/uiTelemetry.js.map +1 -1
- package/dist/src/telemetry/uiTelemetry.test.js +1 -1
- package/dist/src/telemetry/uiTelemetry.test.js.map +1 -1
- package/dist/src/tools/glob.js +2 -1
- package/dist/src/tools/glob.js.map +1 -1
- package/dist/src/tools/mcp-client.d.ts +3 -2
- package/dist/src/tools/mcp-client.js +29 -33
- package/dist/src/tools/mcp-client.js.map +1 -1
- package/dist/src/tools/mcp-client.test.js +168 -5
- package/dist/src/tools/mcp-client.test.js.map +1 -1
- package/dist/src/tools/memoryTool.d.ts +1 -1
- package/dist/src/tools/memoryTool.js +1 -2
- package/dist/src/tools/memoryTool.js.map +1 -1
- package/dist/src/tools/memoryTool.test.js +9 -8
- package/dist/src/tools/memoryTool.test.js.map +1 -1
- package/dist/src/tools/shell.js +55 -2
- package/dist/src/tools/shell.js.map +1 -1
- package/dist/src/tools/shell.test.js +2 -1
- package/dist/src/tools/shell.test.js.map +1 -1
- package/dist/src/tools/smart-edit.d.ts +1 -20
- package/dist/src/tools/smart-edit.js +57 -55
- package/dist/src/tools/smart-edit.js.map +1 -1
- package/dist/src/tools/smart-edit.test.js +70 -86
- package/dist/src/tools/smart-edit.test.js.map +1 -1
- package/dist/src/tools/tool-error.d.ts +21 -0
- package/dist/src/tools/tool-error.js +27 -0
- package/dist/src/tools/tool-error.js.map +1 -1
- package/dist/src/tools/tool-names.d.ts +9 -0
- package/dist/src/tools/tool-names.js +18 -0
- package/dist/src/tools/tool-names.js.map +1 -0
- package/dist/src/tools/web-fetch.d.ts +7 -0
- package/dist/src/tools/web-fetch.js +42 -10
- package/dist/src/tools/web-fetch.js.map +1 -1
- package/dist/src/tools/web-fetch.test.js +127 -8
- package/dist/src/tools/web-fetch.test.js.map +1 -1
- package/dist/src/tools/web-search.js +2 -1
- package/dist/src/tools/web-search.js.map +1 -1
- package/dist/src/tools/write-file.js +2 -1
- package/dist/src/tools/write-file.js.map +1 -1
- package/dist/src/tools/write-todos.d.ts +1 -1
- package/dist/src/tools/write-todos.js +4 -3
- package/dist/src/tools/write-todos.js.map +1 -1
- package/dist/src/utils/editCorrector.js +2 -2
- package/dist/src/utils/editCorrector.js.map +1 -1
- package/dist/src/utils/editor.js +1 -0
- package/dist/src/utils/editor.js.map +1 -1
- package/dist/src/utils/editor.test.js +1 -0
- package/dist/src/utils/editor.test.js.map +1 -1
- package/dist/src/utils/formatters.d.ts +1 -0
- package/dist/src/utils/formatters.js +2 -1
- package/dist/src/utils/formatters.js.map +1 -1
- package/dist/src/utils/formatters.test.d.ts +6 -0
- package/dist/src/utils/formatters.test.js +26 -0
- package/dist/src/utils/formatters.test.js.map +1 -0
- package/dist/src/utils/getFolderStructure.test.js +7 -6
- package/dist/src/utils/getFolderStructure.test.js.map +1 -1
- package/dist/src/utils/installationManager.test.js +2 -1
- package/dist/src/utils/installationManager.test.js.map +1 -1
- package/dist/src/utils/llm-edit-fixer.js +3 -3
- package/dist/src/utils/memoryDiscovery.d.ts +1 -0
- package/dist/src/utils/memoryDiscovery.js +2 -1
- package/dist/src/utils/memoryDiscovery.js.map +1 -1
- package/dist/src/utils/memoryDiscovery.test.js +99 -21
- package/dist/src/utils/memoryDiscovery.test.js.map +1 -1
- package/dist/src/utils/pathCorrector.d.ts +25 -0
- package/dist/src/utils/pathCorrector.js +33 -0
- package/dist/src/utils/pathCorrector.js.map +1 -0
- package/dist/src/utils/pathCorrector.test.d.ts +6 -0
- package/dist/src/utils/pathCorrector.test.js +83 -0
- package/dist/src/utils/pathCorrector.test.js.map +1 -0
- package/dist/src/utils/retry.d.ts +2 -1
- package/dist/src/utils/retry.js +22 -14
- package/dist/src/utils/retry.js.map +1 -1
- package/dist/src/utils/retry.test.js +83 -39
- package/dist/src/utils/retry.test.js.map +1 -1
- package/dist/src/utils/shell-utils.d.ts +1 -0
- package/dist/src/utils/shell-utils.js +1 -1
- package/dist/src/utils/shell-utils.js.map +1 -1
- package/dist/src/utils/tool-utils.js +2 -2
- package/dist/src/utils/tool-utils.js.map +1 -1
- package/dist/src/utils/tool-utils.test.js +8 -0
- package/dist/src/utils/tool-utils.test.js.map +1 -1
- package/dist/src/utils/userAccountManager.test.js +2 -1
- package/dist/src/utils/userAccountManager.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +1 -1
|
@@ -3,16 +3,21 @@
|
|
|
3
3
|
* Copyright 2025 Google LLC
|
|
4
4
|
* SPDX-License-Identifier: Apache-2.0
|
|
5
5
|
*/
|
|
6
|
-
import { describe, it, expect, vi, beforeEach, afterEach
|
|
6
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
7
7
|
import { AgentExecutor } from './executor.js';
|
|
8
|
-
import { AgentTerminateMode } from './types.js';
|
|
9
8
|
import { makeFakeConfig } from '../test-utils/config.js';
|
|
10
9
|
import { ToolRegistry } from '../tools/tool-registry.js';
|
|
11
10
|
import { LSTool } from '../tools/ls.js';
|
|
12
11
|
import { ReadFileTool } from '../tools/read-file.js';
|
|
13
12
|
import { GeminiChat, StreamEventType, } from '../core/geminiChat.js';
|
|
13
|
+
import {} from '@google/genai';
|
|
14
14
|
import { MockTool } from '../test-utils/mock-tool.js';
|
|
15
15
|
import { getDirectoryContextString } from '../utils/environmentContext.js';
|
|
16
|
+
import { z } from 'zod';
|
|
17
|
+
import { promptIdContext } from '../utils/promptIdContext.js';
|
|
18
|
+
import { logAgentStart, logAgentFinish } from '../telemetry/loggers.js';
|
|
19
|
+
import { AgentStartEvent, AgentFinishEvent } from '../telemetry/types.js';
|
|
20
|
+
import { AgentTerminateMode } from './types.js';
|
|
16
21
|
const { mockSendMessageStream, mockExecuteToolCall } = vi.hoisted(() => ({
|
|
17
22
|
mockSendMessageStream: vi.fn(),
|
|
18
23
|
mockExecuteToolCall: vi.fn(),
|
|
@@ -30,13 +35,40 @@ vi.mock('../core/nonInteractiveToolExecutor.js', () => ({
|
|
|
30
35
|
executeToolCall: mockExecuteToolCall,
|
|
31
36
|
}));
|
|
32
37
|
vi.mock('../utils/environmentContext.js');
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
38
|
+
vi.mock('../telemetry/loggers.js', () => ({
|
|
39
|
+
logAgentStart: vi.fn(),
|
|
40
|
+
logAgentFinish: vi.fn(),
|
|
41
|
+
}));
|
|
42
|
+
vi.mock('../utils/promptIdContext.js', async (importOriginal) => {
|
|
43
|
+
const actual = await importOriginal();
|
|
44
|
+
return {
|
|
45
|
+
...actual,
|
|
46
|
+
promptIdContext: {
|
|
47
|
+
...actual.promptIdContext,
|
|
48
|
+
getStore: vi.fn(),
|
|
49
|
+
run: vi.fn((_id, fn) => fn()),
|
|
50
|
+
},
|
|
51
|
+
};
|
|
52
|
+
});
|
|
53
|
+
const MockedGeminiChat = vi.mocked(GeminiChat);
|
|
54
|
+
const mockedGetDirectoryContextString = vi.mocked(getDirectoryContextString);
|
|
55
|
+
const mockedPromptIdContext = vi.mocked(promptIdContext);
|
|
56
|
+
const mockedLogAgentStart = vi.mocked(logAgentStart);
|
|
57
|
+
const mockedLogAgentFinish = vi.mocked(logAgentFinish);
|
|
58
|
+
// Constants for testing
|
|
59
|
+
const TASK_COMPLETE_TOOL_NAME = 'complete_task';
|
|
60
|
+
const MOCK_TOOL_NOT_ALLOWED = new MockTool({ name: 'write_file_interactive' });
|
|
61
|
+
/**
|
|
62
|
+
* Helper to create a mock API response chunk.
|
|
63
|
+
* Uses conditional spread to handle readonly functionCalls property safely.
|
|
64
|
+
*/
|
|
36
65
|
const createMockResponseChunk = (parts, functionCalls) => ({
|
|
37
66
|
candidates: [{ index: 0, content: { role: 'model', parts } }],
|
|
38
|
-
functionCalls,
|
|
67
|
+
...(functionCalls && functionCalls.length > 0 ? { functionCalls } : {}),
|
|
39
68
|
});
|
|
69
|
+
/**
|
|
70
|
+
* Helper to mock a single turn of model response in the stream.
|
|
71
|
+
*/
|
|
40
72
|
const mockModelResponse = (functionCalls, thought, text) => {
|
|
41
73
|
const parts = [];
|
|
42
74
|
if (thought) {
|
|
@@ -47,9 +79,7 @@ const mockModelResponse = (functionCalls, thought, text) => {
|
|
|
47
79
|
}
|
|
48
80
|
if (text)
|
|
49
81
|
parts.push({ text });
|
|
50
|
-
const responseChunk = createMockResponseChunk(parts,
|
|
51
|
-
// Ensure functionCalls is undefined if the array is empty, matching API behavior
|
|
52
|
-
functionCalls.length > 0 ? functionCalls : undefined);
|
|
82
|
+
const responseChunk = createMockResponseChunk(parts, functionCalls);
|
|
53
83
|
mockSendMessageStream.mockImplementationOnce(async () => (async function* () {
|
|
54
84
|
yield {
|
|
55
85
|
type: StreamEventType.CHUNK,
|
|
@@ -57,30 +87,59 @@ const mockModelResponse = (functionCalls, thought, text) => {
|
|
|
57
87
|
};
|
|
58
88
|
})());
|
|
59
89
|
};
|
|
90
|
+
/**
|
|
91
|
+
* Helper to extract the message parameters sent to sendMessageStream.
|
|
92
|
+
* Provides type safety for inspecting mock calls.
|
|
93
|
+
*/
|
|
94
|
+
const getMockMessageParams = (callIndex) => {
|
|
95
|
+
const call = mockSendMessageStream.mock.calls[callIndex];
|
|
96
|
+
expect(call).toBeDefined();
|
|
97
|
+
// Arg 1 of sendMessageStream is the message parameters
|
|
98
|
+
return call[1];
|
|
99
|
+
};
|
|
60
100
|
let mockConfig;
|
|
61
101
|
let parentToolRegistry;
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
}
|
|
102
|
+
/**
|
|
103
|
+
* Type-safe helper to create agent definitions for tests.
|
|
104
|
+
*/
|
|
105
|
+
const createTestDefinition = (tools = [LSTool.Name], runConfigOverrides = {}, outputConfigMode = 'default', schema = z.string()) => {
|
|
106
|
+
let outputConfig;
|
|
107
|
+
if (outputConfigMode === 'default') {
|
|
108
|
+
outputConfig = {
|
|
109
|
+
outputName: 'finalResult',
|
|
110
|
+
description: 'The final result.',
|
|
111
|
+
schema,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
return {
|
|
115
|
+
name: 'TestAgent',
|
|
116
|
+
description: 'An agent for testing.',
|
|
117
|
+
inputConfig: {
|
|
118
|
+
inputs: { goal: { type: 'string', required: true, description: 'goal' } },
|
|
119
|
+
},
|
|
120
|
+
modelConfig: { model: 'gemini-test-model', temp: 0, top_p: 1 },
|
|
121
|
+
runConfig: { max_time_minutes: 5, max_turns: 5, ...runConfigOverrides },
|
|
122
|
+
promptConfig: { systemPrompt: 'Achieve the goal: ${goal}.' },
|
|
123
|
+
toolConfig: { tools },
|
|
124
|
+
outputConfig,
|
|
125
|
+
};
|
|
126
|
+
};
|
|
74
127
|
describe('AgentExecutor', () => {
|
|
75
128
|
let activities;
|
|
76
129
|
let onActivity;
|
|
77
130
|
let abortController;
|
|
78
131
|
let signal;
|
|
79
132
|
beforeEach(async () => {
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
133
|
+
vi.resetAllMocks();
|
|
134
|
+
mockSendMessageStream.mockReset();
|
|
135
|
+
mockExecuteToolCall.mockReset();
|
|
136
|
+
mockedLogAgentStart.mockReset();
|
|
137
|
+
mockedLogAgentFinish.mockReset();
|
|
138
|
+
mockedPromptIdContext.getStore.mockReset();
|
|
139
|
+
mockedPromptIdContext.run.mockImplementation((_id, fn) => fn());
|
|
140
|
+
MockedGeminiChat.mockImplementation(() => ({
|
|
141
|
+
sendMessageStream: mockSendMessageStream,
|
|
142
|
+
}));
|
|
84
143
|
vi.useFakeTimers();
|
|
85
144
|
mockConfig = makeFakeConfig();
|
|
86
145
|
parentToolRegistry = new ToolRegistry(mockConfig);
|
|
@@ -88,7 +147,7 @@ describe('AgentExecutor', () => {
|
|
|
88
147
|
parentToolRegistry.registerTool(new ReadFileTool(mockConfig));
|
|
89
148
|
parentToolRegistry.registerTool(MOCK_TOOL_NOT_ALLOWED);
|
|
90
149
|
vi.spyOn(mockConfig, 'getToolRegistry').mockResolvedValue(parentToolRegistry);
|
|
91
|
-
|
|
150
|
+
mockedGetDirectoryContextString.mockResolvedValue('Mocked Environment Context');
|
|
92
151
|
activities = [];
|
|
93
152
|
onActivity = (activity) => activities.push(activity);
|
|
94
153
|
abortController = new AbortController();
|
|
@@ -105,313 +164,515 @@ describe('AgentExecutor', () => {
|
|
|
105
164
|
});
|
|
106
165
|
it('SECURITY: should throw if a tool is not on the non-interactive allowlist', async () => {
|
|
107
166
|
const definition = createTestDefinition([MOCK_TOOL_NOT_ALLOWED.name]);
|
|
108
|
-
await expect(AgentExecutor.create(definition, mockConfig, onActivity)).rejects.toThrow(
|
|
167
|
+
await expect(AgentExecutor.create(definition, mockConfig, onActivity)).rejects.toThrow(/not on the allow-list for non-interactive execution/);
|
|
109
168
|
});
|
|
110
169
|
it('should create an isolated ToolRegistry for the agent', async () => {
|
|
111
170
|
const definition = createTestDefinition([LSTool.Name, ReadFileTool.Name]);
|
|
112
171
|
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
113
|
-
|
|
114
|
-
const agentRegistry = executor.toolRegistry;
|
|
172
|
+
const agentRegistry = executor['toolRegistry'];
|
|
115
173
|
expect(agentRegistry).not.toBe(parentToolRegistry);
|
|
116
174
|
expect(agentRegistry.getAllToolNames()).toEqual(expect.arrayContaining([LSTool.Name, ReadFileTool.Name]));
|
|
117
175
|
expect(agentRegistry.getAllToolNames()).toHaveLength(2);
|
|
118
176
|
expect(agentRegistry.getTool(MOCK_TOOL_NOT_ALLOWED.name)).toBeUndefined();
|
|
119
177
|
});
|
|
178
|
+
it('should use parentPromptId from context to create agentId', async () => {
|
|
179
|
+
const parentId = 'parent-id';
|
|
180
|
+
mockedPromptIdContext.getStore.mockReturnValue(parentId);
|
|
181
|
+
const definition = createTestDefinition();
|
|
182
|
+
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
183
|
+
expect(executor['agentId']).toMatch(new RegExp(`^${parentId}-${definition.name}-`));
|
|
184
|
+
});
|
|
120
185
|
});
|
|
121
186
|
describe('run (Execution Loop and Logic)', () => {
|
|
122
|
-
it('should
|
|
187
|
+
it('should log AgentFinish with error if run throws', async () => {
|
|
188
|
+
const definition = createTestDefinition();
|
|
189
|
+
// Make the definition invalid to cause an error during run
|
|
190
|
+
definition.inputConfig.inputs = {
|
|
191
|
+
goal: { type: 'string', required: true, description: 'goal' },
|
|
192
|
+
};
|
|
193
|
+
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
194
|
+
// Run without inputs to trigger validation error
|
|
195
|
+
await expect(executor.run({}, signal)).rejects.toThrow(/Missing required input parameters/);
|
|
196
|
+
expect(mockedLogAgentStart).toHaveBeenCalledTimes(1);
|
|
197
|
+
expect(mockedLogAgentFinish).toHaveBeenCalledTimes(1);
|
|
198
|
+
expect(mockedLogAgentFinish).toHaveBeenCalledWith(mockConfig, expect.objectContaining({
|
|
199
|
+
terminate_reason: AgentTerminateMode.ERROR,
|
|
200
|
+
}));
|
|
201
|
+
});
|
|
202
|
+
it('should execute successfully when model calls complete_task with output (Happy Path with Output)', async () => {
|
|
123
203
|
const definition = createTestDefinition();
|
|
124
204
|
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
125
205
|
const inputs = { goal: 'Find files' };
|
|
126
206
|
// Turn 1: Model calls ls
|
|
127
207
|
mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 'call1' }], 'T1: Listing');
|
|
128
208
|
mockExecuteToolCall.mockResolvedValueOnce({
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
209
|
+
status: 'success',
|
|
210
|
+
request: {
|
|
211
|
+
callId: 'call1',
|
|
212
|
+
name: LSTool.Name,
|
|
213
|
+
args: { path: '.' },
|
|
214
|
+
isClientInitiated: false,
|
|
215
|
+
prompt_id: 'test-prompt',
|
|
216
|
+
},
|
|
217
|
+
tool: {},
|
|
218
|
+
invocation: {},
|
|
219
|
+
response: {
|
|
220
|
+
callId: 'call1',
|
|
221
|
+
resultDisplay: 'file1.txt',
|
|
222
|
+
responseParts: [
|
|
223
|
+
{
|
|
224
|
+
functionResponse: {
|
|
225
|
+
name: LSTool.Name,
|
|
226
|
+
response: { result: 'file1.txt' },
|
|
227
|
+
id: 'call1',
|
|
228
|
+
},
|
|
137
229
|
},
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
230
|
+
],
|
|
231
|
+
error: undefined,
|
|
232
|
+
errorType: undefined,
|
|
233
|
+
contentLength: undefined,
|
|
234
|
+
},
|
|
141
235
|
});
|
|
142
|
-
// Turn 2: Model
|
|
143
|
-
mockModelResponse([
|
|
144
|
-
|
|
145
|
-
|
|
236
|
+
// Turn 2: Model calls complete_task with required output
|
|
237
|
+
mockModelResponse([
|
|
238
|
+
{
|
|
239
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
240
|
+
args: { finalResult: 'Found file1.txt' },
|
|
241
|
+
id: 'call2',
|
|
242
|
+
},
|
|
243
|
+
], 'T2: Done');
|
|
146
244
|
const output = await executor.run(inputs, signal);
|
|
147
|
-
expect(mockSendMessageStream).toHaveBeenCalledTimes(
|
|
148
|
-
expect(mockExecuteToolCall).toHaveBeenCalledTimes(1);
|
|
149
|
-
// Verify System Prompt Templating
|
|
245
|
+
expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
|
|
150
246
|
const chatConstructorArgs = MockedGeminiChat.mock.calls[0];
|
|
151
247
|
const chatConfig = chatConstructorArgs[1];
|
|
152
|
-
expect(chatConfig?.systemInstruction).toContain(
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
expect(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
}), expect.stringContaining('#extraction'));
|
|
169
|
-
expect(output.result).toBe('Result: file1.txt.');
|
|
248
|
+
expect(chatConfig?.systemInstruction).toContain(`MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool`);
|
|
249
|
+
const turn1Params = getMockMessageParams(0);
|
|
250
|
+
const firstToolGroup = turn1Params.config?.tools?.[0];
|
|
251
|
+
expect(firstToolGroup).toBeDefined();
|
|
252
|
+
if (!firstToolGroup || !('functionDeclarations' in firstToolGroup)) {
|
|
253
|
+
throw new Error('Test expectation failed: Config does not contain functionDeclarations.');
|
|
254
|
+
}
|
|
255
|
+
const sentTools = firstToolGroup.functionDeclarations;
|
|
256
|
+
expect(sentTools).toBeDefined();
|
|
257
|
+
expect(sentTools).toEqual(expect.arrayContaining([
|
|
258
|
+
expect.objectContaining({ name: LSTool.Name }),
|
|
259
|
+
expect.objectContaining({ name: TASK_COMPLETE_TOOL_NAME }),
|
|
260
|
+
]));
|
|
261
|
+
const completeToolDef = sentTools.find((t) => t.name === TASK_COMPLETE_TOOL_NAME);
|
|
262
|
+
expect(completeToolDef?.parameters?.required).toContain('finalResult');
|
|
263
|
+
expect(output.result).toBe('Found file1.txt');
|
|
170
264
|
expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
|
|
171
|
-
//
|
|
265
|
+
// Telemetry checks
|
|
266
|
+
expect(mockedLogAgentStart).toHaveBeenCalledTimes(1);
|
|
267
|
+
expect(mockedLogAgentStart).toHaveBeenCalledWith(mockConfig, expect.any(AgentStartEvent));
|
|
268
|
+
expect(mockedLogAgentFinish).toHaveBeenCalledTimes(1);
|
|
269
|
+
expect(mockedLogAgentFinish).toHaveBeenCalledWith(mockConfig, expect.any(AgentFinishEvent));
|
|
270
|
+
const finishEvent = mockedLogAgentFinish.mock.calls[0][1];
|
|
271
|
+
expect(finishEvent.terminate_reason).toBe(AgentTerminateMode.GOAL);
|
|
272
|
+
// Context checks
|
|
273
|
+
expect(mockedPromptIdContext.run).toHaveBeenCalledTimes(2); // Two turns
|
|
274
|
+
const agentId = executor['agentId'];
|
|
275
|
+
expect(mockedPromptIdContext.run).toHaveBeenNthCalledWith(1, `${agentId}#0`, expect.any(Function));
|
|
276
|
+
expect(mockedPromptIdContext.run).toHaveBeenNthCalledWith(2, `${agentId}#1`, expect.any(Function));
|
|
172
277
|
expect(activities).toEqual(expect.arrayContaining([
|
|
173
|
-
// Thought subjects are extracted by the executor (parseThought)
|
|
174
278
|
expect.objectContaining({
|
|
175
279
|
type: 'THOUGHT_CHUNK',
|
|
176
280
|
data: { text: 'T1: Listing' },
|
|
177
281
|
}),
|
|
178
|
-
expect.objectContaining({
|
|
179
|
-
type: 'TOOL_CALL_START',
|
|
180
|
-
data: { name: LSTool.Name, args: { path: '.' } },
|
|
181
|
-
}),
|
|
182
282
|
expect.objectContaining({
|
|
183
283
|
type: 'TOOL_CALL_END',
|
|
184
284
|
data: { name: LSTool.Name, output: 'file1.txt' },
|
|
185
285
|
}),
|
|
186
286
|
expect.objectContaining({
|
|
187
|
-
type: '
|
|
188
|
-
data: {
|
|
287
|
+
type: 'TOOL_CALL_START',
|
|
288
|
+
data: {
|
|
289
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
290
|
+
args: { finalResult: 'Found file1.txt' },
|
|
291
|
+
},
|
|
292
|
+
}),
|
|
293
|
+
expect.objectContaining({
|
|
294
|
+
type: 'TOOL_CALL_END',
|
|
295
|
+
data: {
|
|
296
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
297
|
+
output: expect.stringContaining('Output submitted'),
|
|
298
|
+
},
|
|
189
299
|
}),
|
|
190
300
|
]));
|
|
191
301
|
});
|
|
192
|
-
it('should execute
|
|
193
|
-
const definition = createTestDefinition([LSTool.Name,
|
|
302
|
+
it('should execute successfully when model calls complete_task without output (Happy Path No Output)', async () => {
|
|
303
|
+
const definition = createTestDefinition([LSTool.Name], {}, 'none');
|
|
194
304
|
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
195
|
-
|
|
196
|
-
name: LSTool.Name,
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
activeCalls++;
|
|
213
|
-
maxActiveCalls = Math.max(maxActiveCalls, activeCalls);
|
|
214
|
-
// Simulate latency. We must advance the fake timers for this to resolve.
|
|
215
|
-
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
216
|
-
activeCalls--;
|
|
217
|
-
return {
|
|
218
|
-
callId: reqInfo.callId,
|
|
219
|
-
resultDisplay: `Result for ${reqInfo.name}`,
|
|
305
|
+
mockModelResponse([
|
|
306
|
+
{ name: LSTool.Name, args: { path: '.' }, id: 'call1' },
|
|
307
|
+
]);
|
|
308
|
+
mockExecuteToolCall.mockResolvedValueOnce({
|
|
309
|
+
status: 'success',
|
|
310
|
+
request: {
|
|
311
|
+
callId: 'call1',
|
|
312
|
+
name: LSTool.Name,
|
|
313
|
+
args: { path: '.' },
|
|
314
|
+
isClientInitiated: false,
|
|
315
|
+
prompt_id: 'test-prompt',
|
|
316
|
+
},
|
|
317
|
+
tool: {},
|
|
318
|
+
invocation: {},
|
|
319
|
+
response: {
|
|
320
|
+
callId: 'call1',
|
|
321
|
+
resultDisplay: 'ok',
|
|
220
322
|
responseParts: [
|
|
221
323
|
{
|
|
222
324
|
functionResponse: {
|
|
223
|
-
name:
|
|
325
|
+
name: LSTool.Name,
|
|
224
326
|
response: {},
|
|
225
|
-
id:
|
|
327
|
+
id: 'call1',
|
|
226
328
|
},
|
|
227
329
|
},
|
|
228
330
|
],
|
|
229
331
|
error: undefined,
|
|
230
|
-
|
|
332
|
+
errorType: undefined,
|
|
333
|
+
contentLength: undefined,
|
|
334
|
+
},
|
|
231
335
|
});
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
expect(
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
expect(turn2Parts.length).toBe(2);
|
|
248
|
-
expect(turn2Parts[0]).toEqual(expect.objectContaining({
|
|
249
|
-
functionResponse: expect.objectContaining({ id: 'call1' }),
|
|
250
|
-
}));
|
|
251
|
-
expect(turn2Parts[1]).toEqual(expect.objectContaining({
|
|
252
|
-
functionResponse: expect.objectContaining({ id: 'call2' }),
|
|
253
|
-
}));
|
|
336
|
+
mockModelResponse([{ name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call2' }], 'Task finished.');
|
|
337
|
+
const output = await executor.run({ goal: 'Do work' }, signal);
|
|
338
|
+
const turn1Params = getMockMessageParams(0);
|
|
339
|
+
const firstToolGroup = turn1Params.config?.tools?.[0];
|
|
340
|
+
expect(firstToolGroup).toBeDefined();
|
|
341
|
+
if (!firstToolGroup || !('functionDeclarations' in firstToolGroup)) {
|
|
342
|
+
throw new Error('Test expectation failed: Config does not contain functionDeclarations.');
|
|
343
|
+
}
|
|
344
|
+
const sentTools = firstToolGroup.functionDeclarations;
|
|
345
|
+
expect(sentTools).toBeDefined();
|
|
346
|
+
const completeToolDef = sentTools.find((t) => t.name === TASK_COMPLETE_TOOL_NAME);
|
|
347
|
+
expect(completeToolDef?.parameters?.required).toEqual([]);
|
|
348
|
+
expect(completeToolDef?.description).toContain('signal that you have completed');
|
|
349
|
+
expect(output.result).toBe('Task completed successfully.');
|
|
350
|
+
expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
|
|
254
351
|
});
|
|
255
|
-
it('should
|
|
256
|
-
const definition = createTestDefinition(
|
|
352
|
+
it('should error immediately if the model stops tools without calling complete_task (Protocol Violation)', async () => {
|
|
353
|
+
const definition = createTestDefinition();
|
|
257
354
|
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
258
|
-
// Turn 1: Model calls ls, but it fails
|
|
259
355
|
mockModelResponse([
|
|
260
|
-
{ name: LSTool.Name, args: { path: '
|
|
356
|
+
{ name: LSTool.Name, args: { path: '.' }, id: 'call1' },
|
|
261
357
|
]);
|
|
262
|
-
const errorMessage = 'Internal failure.';
|
|
263
358
|
mockExecuteToolCall.mockResolvedValueOnce({
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
359
|
+
status: 'success',
|
|
360
|
+
request: {
|
|
361
|
+
callId: 'call1',
|
|
362
|
+
name: LSTool.Name,
|
|
363
|
+
args: { path: '.' },
|
|
364
|
+
isClientInitiated: false,
|
|
365
|
+
prompt_id: 'test-prompt',
|
|
366
|
+
},
|
|
367
|
+
tool: {},
|
|
368
|
+
invocation: {},
|
|
369
|
+
response: {
|
|
370
|
+
callId: 'call1',
|
|
371
|
+
resultDisplay: 'ok',
|
|
372
|
+
responseParts: [
|
|
373
|
+
{
|
|
374
|
+
functionResponse: {
|
|
375
|
+
name: LSTool.Name,
|
|
376
|
+
response: {},
|
|
377
|
+
id: 'call1',
|
|
378
|
+
},
|
|
379
|
+
},
|
|
380
|
+
],
|
|
381
|
+
error: undefined,
|
|
382
|
+
errorType: undefined,
|
|
383
|
+
contentLength: undefined,
|
|
384
|
+
},
|
|
268
385
|
});
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
386
|
+
mockModelResponse([], 'I think I am done.');
|
|
387
|
+
const output = await executor.run({ goal: 'Strict test' }, signal);
|
|
388
|
+
expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
|
|
389
|
+
const expectedError = `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}' to finalize the session.`;
|
|
390
|
+
expect(output.terminate_reason).toBe(AgentTerminateMode.ERROR);
|
|
391
|
+
expect(output.result).toBe(expectedError);
|
|
392
|
+
// Telemetry check for error
|
|
393
|
+
expect(mockedLogAgentFinish).toHaveBeenCalledWith(mockConfig, expect.objectContaining({
|
|
394
|
+
terminate_reason: AgentTerminateMode.ERROR,
|
|
395
|
+
}));
|
|
396
|
+
expect(activities).toContainEqual(expect.objectContaining({
|
|
397
|
+
type: 'ERROR',
|
|
398
|
+
data: expect.objectContaining({
|
|
399
|
+
context: 'protocol_violation',
|
|
400
|
+
error: expectedError,
|
|
401
|
+
}),
|
|
402
|
+
}));
|
|
403
|
+
});
|
|
404
|
+
it('should report an error if complete_task is called with missing required arguments', async () => {
|
|
405
|
+
const definition = createTestDefinition();
|
|
406
|
+
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
407
|
+
// Turn 1: Missing arg
|
|
408
|
+
mockModelResponse([
|
|
409
|
+
{
|
|
410
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
411
|
+
args: { wrongArg: 'oops' },
|
|
412
|
+
id: 'call1',
|
|
413
|
+
},
|
|
414
|
+
]);
|
|
415
|
+
// Turn 2: Corrected
|
|
416
|
+
mockModelResponse([
|
|
417
|
+
{
|
|
418
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
419
|
+
args: { finalResult: 'Corrected result' },
|
|
420
|
+
id: 'call2',
|
|
421
|
+
},
|
|
422
|
+
]);
|
|
423
|
+
const output = await executor.run({ goal: 'Error test' }, signal);
|
|
424
|
+
expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
|
|
425
|
+
const expectedError = "Missing required argument 'finalResult' for completion.";
|
|
274
426
|
expect(activities).toContainEqual(expect.objectContaining({
|
|
275
427
|
type: 'ERROR',
|
|
276
428
|
data: {
|
|
277
|
-
error: errorMessage,
|
|
278
429
|
context: 'tool_call',
|
|
279
|
-
name:
|
|
430
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
431
|
+
error: expectedError,
|
|
280
432
|
},
|
|
281
433
|
}));
|
|
282
|
-
|
|
283
|
-
const
|
|
284
|
-
|
|
285
|
-
expect(turn2Parts).
|
|
434
|
+
const turn2Params = getMockMessageParams(1);
|
|
435
|
+
const turn2Parts = turn2Params.message;
|
|
436
|
+
expect(turn2Parts).toBeDefined();
|
|
437
|
+
expect(turn2Parts).toHaveLength(1);
|
|
438
|
+
expect(turn2Parts[0]).toEqual(expect.objectContaining({
|
|
439
|
+
functionResponse: expect.objectContaining({
|
|
440
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
441
|
+
response: { error: expectedError },
|
|
442
|
+
id: 'call1',
|
|
443
|
+
}),
|
|
444
|
+
}));
|
|
445
|
+
expect(output.result).toBe('Corrected result');
|
|
446
|
+
expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
|
|
447
|
+
});
|
|
448
|
+
it('should handle multiple calls to complete_task in the same turn (accept first, block rest)', async () => {
|
|
449
|
+
const definition = createTestDefinition([], {}, 'none');
|
|
450
|
+
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
451
|
+
// Turn 1: Duplicate calls
|
|
452
|
+
mockModelResponse([
|
|
453
|
+
{ name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call1' },
|
|
454
|
+
{ name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call2' },
|
|
455
|
+
]);
|
|
456
|
+
const output = await executor.run({ goal: 'Dup test' }, signal);
|
|
457
|
+
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
|
|
458
|
+
expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
|
|
459
|
+
const completions = activities.filter((a) => a.type === 'TOOL_CALL_END' &&
|
|
460
|
+
a.data['name'] === TASK_COMPLETE_TOOL_NAME);
|
|
461
|
+
const errors = activities.filter((a) => a.type === 'ERROR' && a.data['name'] === TASK_COMPLETE_TOOL_NAME);
|
|
462
|
+
expect(completions).toHaveLength(1);
|
|
463
|
+
expect(errors).toHaveLength(1);
|
|
464
|
+
expect(errors[0].data['error']).toContain('Task already marked complete in this turn');
|
|
465
|
+
});
|
|
466
|
+
it('should execute parallel tool calls and then complete', async () => {
|
|
467
|
+
const definition = createTestDefinition([LSTool.Name]);
|
|
468
|
+
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
469
|
+
const call1 = {
|
|
470
|
+
name: LSTool.Name,
|
|
471
|
+
args: { path: '/a' },
|
|
472
|
+
id: 'c1',
|
|
473
|
+
};
|
|
474
|
+
const call2 = {
|
|
475
|
+
name: LSTool.Name,
|
|
476
|
+
args: { path: '/b' },
|
|
477
|
+
id: 'c2',
|
|
478
|
+
};
|
|
479
|
+
// Turn 1: Parallel calls
|
|
480
|
+
mockModelResponse([call1, call2]);
|
|
481
|
+
// Concurrency mock
|
|
482
|
+
let callsStarted = 0;
|
|
483
|
+
let resolveCalls;
|
|
484
|
+
const bothStarted = new Promise((r) => {
|
|
485
|
+
resolveCalls = r;
|
|
486
|
+
});
|
|
487
|
+
mockExecuteToolCall.mockImplementation(async (_ctx, reqInfo) => {
|
|
488
|
+
callsStarted++;
|
|
489
|
+
if (callsStarted === 2)
|
|
490
|
+
resolveCalls();
|
|
491
|
+
await vi.advanceTimersByTimeAsync(100);
|
|
492
|
+
return {
|
|
493
|
+
status: 'success',
|
|
494
|
+
request: reqInfo,
|
|
495
|
+
tool: {},
|
|
496
|
+
invocation: {},
|
|
497
|
+
response: {
|
|
498
|
+
callId: reqInfo.callId,
|
|
499
|
+
resultDisplay: 'ok',
|
|
500
|
+
responseParts: [
|
|
501
|
+
{
|
|
502
|
+
functionResponse: {
|
|
503
|
+
name: reqInfo.name,
|
|
504
|
+
response: {},
|
|
505
|
+
id: reqInfo.callId,
|
|
506
|
+
},
|
|
507
|
+
},
|
|
508
|
+
],
|
|
509
|
+
error: undefined,
|
|
510
|
+
errorType: undefined,
|
|
511
|
+
contentLength: undefined,
|
|
512
|
+
},
|
|
513
|
+
};
|
|
514
|
+
});
|
|
515
|
+
// Turn 2: Completion
|
|
516
|
+
mockModelResponse([
|
|
286
517
|
{
|
|
287
|
-
|
|
518
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
519
|
+
args: { finalResult: 'done' },
|
|
520
|
+
id: 'c3',
|
|
288
521
|
},
|
|
289
522
|
]);
|
|
523
|
+
const runPromise = executor.run({ goal: 'Parallel' }, signal);
|
|
524
|
+
await vi.advanceTimersByTimeAsync(1);
|
|
525
|
+
await bothStarted;
|
|
526
|
+
await vi.advanceTimersByTimeAsync(150);
|
|
527
|
+
await vi.advanceTimersByTimeAsync(1);
|
|
528
|
+
const output = await runPromise;
|
|
529
|
+
expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
|
|
530
|
+
expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
|
|
531
|
+
// Safe access to message parts
|
|
532
|
+
const turn2Params = getMockMessageParams(1);
|
|
533
|
+
const parts = turn2Params.message;
|
|
534
|
+
expect(parts).toBeDefined();
|
|
535
|
+
expect(parts).toHaveLength(2);
|
|
536
|
+
expect(parts).toEqual(expect.arrayContaining([
|
|
537
|
+
expect.objectContaining({
|
|
538
|
+
functionResponse: expect.objectContaining({ id: 'c1' }),
|
|
539
|
+
}),
|
|
540
|
+
expect.objectContaining({
|
|
541
|
+
functionResponse: expect.objectContaining({ id: 'c2' }),
|
|
542
|
+
}),
|
|
543
|
+
]));
|
|
290
544
|
});
|
|
291
|
-
it('SECURITY: should block
|
|
292
|
-
// Agent definition only includes LSTool
|
|
545
|
+
it('SECURITY: should block unauthorized tools and provide explicit failure to model', async () => {
|
|
293
546
|
const definition = createTestDefinition([LSTool.Name]);
|
|
294
547
|
const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
|
|
295
|
-
// Turn 1: Model
|
|
296
|
-
|
|
548
|
+
// Turn 1: Model tries to use a tool not in its config
|
|
549
|
+
const badCallId = 'bad_call_1';
|
|
297
550
|
mockModelResponse([
|
|
298
551
|
{
|
|
299
552
|
name: ReadFileTool.Name,
|
|
300
|
-
args: { path: '
|
|
301
|
-
id:
|
|
553
|
+
args: { path: 'secret.txt' },
|
|
554
|
+
id: badCallId,
|
|
555
|
+
},
|
|
556
|
+
]);
|
|
557
|
+
// Turn 2: Model gives up and completes
|
|
558
|
+
mockModelResponse([
|
|
559
|
+
{
|
|
560
|
+
name: TASK_COMPLETE_TOOL_NAME,
|
|
561
|
+
args: { finalResult: 'Could not read file.' },
|
|
562
|
+
id: 'c2',
|
|
302
563
|
},
|
|
303
564
|
]);
|
|
304
|
-
// Turn 2: Model stops
|
|
305
|
-
mockModelResponse([]);
|
|
306
|
-
// Extraction
|
|
307
|
-
mockModelResponse([], undefined, 'Done.');
|
|
308
565
|
const consoleWarnSpy = vi
|
|
309
566
|
.spyOn(console, 'warn')
|
|
310
567
|
.mockImplementation(() => { });
|
|
311
|
-
await executor.run({ goal: '
|
|
312
|
-
// Verify
|
|
568
|
+
await executor.run({ goal: 'Sec test' }, signal);
|
|
569
|
+
// Verify external executor was not called (Security held)
|
|
313
570
|
expect(mockExecuteToolCall).not.toHaveBeenCalled();
|
|
314
|
-
|
|
571
|
+
// 2. Verify console warning
|
|
572
|
+
expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining(`[AgentExecutor] Blocked call:`));
|
|
315
573
|
consoleWarnSpy.mockRestore();
|
|
316
|
-
// Verify
|
|
317
|
-
const
|
|
318
|
-
const
|
|
319
|
-
expect(
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
//
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
expect(extractionText).toContain('Be sure you have addressed:');
|
|
338
|
-
expect(extractionText).toContain('- Must include file names');
|
|
339
|
-
expect(extractionText).toContain('- Must be concise');
|
|
574
|
+
// Verify specific error was sent back to model
|
|
575
|
+
const turn2Params = getMockMessageParams(1);
|
|
576
|
+
const parts = turn2Params.message;
|
|
577
|
+
expect(parts).toBeDefined();
|
|
578
|
+
expect(parts[0]).toEqual(expect.objectContaining({
|
|
579
|
+
functionResponse: expect.objectContaining({
|
|
580
|
+
id: badCallId,
|
|
581
|
+
name: ReadFileTool.Name,
|
|
582
|
+
response: {
|
|
583
|
+
error: expect.stringContaining('Unauthorized tool call'),
|
|
584
|
+
},
|
|
585
|
+
}),
|
|
586
|
+
}));
|
|
587
|
+
// Verify Activity Stream reported the error
|
|
588
|
+
expect(activities).toContainEqual(expect.objectContaining({
|
|
589
|
+
type: 'ERROR',
|
|
590
|
+
data: expect.objectContaining({
|
|
591
|
+
context: 'tool_call_unauthorized',
|
|
592
|
+
name: ReadFileTool.Name,
|
|
593
|
+
}),
|
|
594
|
+
}));
|
|
340
595
|
});
|
|
341
596
|
});
|
|
342
597
|
describe('run (Termination Conditions)', () => {
|
|
343
|
-
const
|
|
344
|
-
mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id
|
|
345
|
-
mockExecuteToolCall.
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
598
|
+
const mockWorkResponse = (id) => {
|
|
599
|
+
mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id }]);
|
|
600
|
+
mockExecuteToolCall.mockResolvedValueOnce({
|
|
601
|
+
status: 'success',
|
|
602
|
+
request: {
|
|
603
|
+
callId: id,
|
|
604
|
+
name: LSTool.Name,
|
|
605
|
+
args: { path: '.' },
|
|
606
|
+
isClientInitiated: false,
|
|
607
|
+
prompt_id: 'test-prompt',
|
|
608
|
+
},
|
|
609
|
+
tool: {},
|
|
610
|
+
invocation: {},
|
|
611
|
+
response: {
|
|
612
|
+
callId: id,
|
|
613
|
+
resultDisplay: 'ok',
|
|
614
|
+
responseParts: [
|
|
615
|
+
{ functionResponse: { name: LSTool.Name, response: {}, id } },
|
|
616
|
+
],
|
|
617
|
+
error: undefined,
|
|
618
|
+
errorType: undefined,
|
|
619
|
+
contentLength: undefined,
|
|
620
|
+
},
|
|
352
621
|
});
|
|
353
622
|
};
|
|
354
623
|
it('should terminate when max_turns is reached', async () => {
|
|
355
|
-
const
|
|
624
|
+
const MAX = 2;
|
|
356
625
|
const definition = createTestDefinition([LSTool.Name], {
|
|
357
|
-
max_turns:
|
|
626
|
+
max_turns: MAX,
|
|
358
627
|
});
|
|
359
|
-
const executor = await AgentExecutor.create(definition, mockConfig
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
mockKeepAliveResponse();
|
|
364
|
-
const output = await executor.run({ goal: 'Termination test' }, signal);
|
|
628
|
+
const executor = await AgentExecutor.create(definition, mockConfig);
|
|
629
|
+
mockWorkResponse('t1');
|
|
630
|
+
mockWorkResponse('t2');
|
|
631
|
+
const output = await executor.run({ goal: 'Turns test' }, signal);
|
|
365
632
|
expect(output.terminate_reason).toBe(AgentTerminateMode.MAX_TURNS);
|
|
366
|
-
expect(mockSendMessageStream).toHaveBeenCalledTimes(
|
|
367
|
-
// Extraction phase should be skipped when termination is forced
|
|
368
|
-
expect(mockSendMessageStream).not.toHaveBeenCalledWith(expect.any(String), expect.any(Object), expect.stringContaining('#extraction'));
|
|
633
|
+
expect(mockSendMessageStream).toHaveBeenCalledTimes(MAX);
|
|
369
634
|
});
|
|
370
635
|
it('should terminate if timeout is reached', async () => {
|
|
371
636
|
const definition = createTestDefinition([LSTool.Name], {
|
|
372
|
-
max_time_minutes:
|
|
373
|
-
max_turns: 100,
|
|
637
|
+
max_time_minutes: 1,
|
|
374
638
|
});
|
|
375
|
-
const executor = await AgentExecutor.create(definition, mockConfig
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
// Advance time past the 5-minute limit during the tool call execution
|
|
381
|
-
await vi.advanceTimersByTimeAsync(5 * 60 * 1000 + 1);
|
|
639
|
+
const executor = await AgentExecutor.create(definition, mockConfig);
|
|
640
|
+
mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 't1' }]);
|
|
641
|
+
// Long running tool
|
|
642
|
+
mockExecuteToolCall.mockImplementationOnce(async (_ctx, reqInfo) => {
|
|
643
|
+
await vi.advanceTimersByTimeAsync(61 * 1000);
|
|
382
644
|
return {
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
645
|
+
status: 'success',
|
|
646
|
+
request: reqInfo,
|
|
647
|
+
tool: {},
|
|
648
|
+
invocation: {},
|
|
649
|
+
response: {
|
|
650
|
+
callId: 't1',
|
|
651
|
+
resultDisplay: 'ok',
|
|
652
|
+
responseParts: [],
|
|
653
|
+
error: undefined,
|
|
654
|
+
errorType: undefined,
|
|
655
|
+
contentLength: undefined,
|
|
656
|
+
},
|
|
391
657
|
};
|
|
392
658
|
});
|
|
393
|
-
const output = await executor.run({ goal: '
|
|
659
|
+
const output = await executor.run({ goal: 'Timeout test' }, signal);
|
|
394
660
|
expect(output.terminate_reason).toBe(AgentTerminateMode.TIMEOUT);
|
|
395
|
-
// Should only have called the model once before the timeout check stopped it
|
|
396
661
|
expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
|
|
397
662
|
});
|
|
398
|
-
it('should terminate when AbortSignal is triggered
|
|
663
|
+
it('should terminate when AbortSignal is triggered', async () => {
|
|
399
664
|
const definition = createTestDefinition();
|
|
400
|
-
const executor = await AgentExecutor.create(definition, mockConfig
|
|
401
|
-
|
|
402
|
-
mockSendMessageStream.mockImplementation(async () => (async function* () {
|
|
403
|
-
// Yield the first chunk
|
|
665
|
+
const executor = await AgentExecutor.create(definition, mockConfig);
|
|
666
|
+
mockSendMessageStream.mockImplementationOnce(async () => (async function* () {
|
|
404
667
|
yield {
|
|
405
668
|
type: StreamEventType.CHUNK,
|
|
406
669
|
value: createMockResponseChunk([
|
|
407
|
-
{ text: '
|
|
670
|
+
{ text: 'Thinking...', thought: true },
|
|
408
671
|
]),
|
|
409
672
|
};
|
|
410
|
-
// Simulate abort happening mid-stream
|
|
411
673
|
abortController.abort();
|
|
412
|
-
// The loop in callModel should break immediately due to signal check.
|
|
413
674
|
})());
|
|
414
|
-
const output = await executor.run({ goal: '
|
|
675
|
+
const output = await executor.run({ goal: 'Abort test' }, signal);
|
|
415
676
|
expect(output.terminate_reason).toBe(AgentTerminateMode.ABORTED);
|
|
416
677
|
});
|
|
417
678
|
});
|