@google/gemini-cli-core 0.9.0-nightly.20251002.aa8b2abe → 0.9.0-nightly.20251004.7db79e14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +2 -1
  2. package/dist/index.d.ts +2 -0
  3. package/dist/index.js +1 -0
  4. package/dist/index.js.map +1 -1
  5. package/dist/src/agents/codebase-investigator.d.ts +36 -1
  6. package/dist/src/agents/codebase-investigator.js +58 -86
  7. package/dist/src/agents/codebase-investigator.js.map +1 -1
  8. package/dist/src/agents/executor.d.ts +7 -11
  9. package/dist/src/agents/executor.js +225 -94
  10. package/dist/src/agents/executor.js.map +1 -1
  11. package/dist/src/agents/executor.test.js +327 -287
  12. package/dist/src/agents/executor.test.js.map +1 -1
  13. package/dist/src/agents/invocation.d.ts +3 -2
  14. package/dist/src/agents/invocation.js +1 -0
  15. package/dist/src/agents/invocation.js.map +1 -1
  16. package/dist/src/agents/invocation.test.js +1 -0
  17. package/dist/src/agents/invocation.test.js.map +1 -1
  18. package/dist/src/agents/registry.d.ts +2 -1
  19. package/dist/src/agents/registry.js +2 -0
  20. package/dist/src/agents/registry.js.map +1 -1
  21. package/dist/src/agents/types.d.ts +28 -6
  22. package/dist/src/agents/types.js +1 -0
  23. package/dist/src/agents/types.js.map +1 -1
  24. package/dist/src/config/config.d.ts +4 -0
  25. package/dist/src/config/config.js +8 -0
  26. package/dist/src/config/config.js.map +1 -1
  27. package/dist/src/core/client.js +2 -2
  28. package/dist/src/core/client.js.map +1 -1
  29. package/dist/src/core/client.test.js +1 -1
  30. package/dist/src/core/client.test.js.map +1 -1
  31. package/dist/src/core/geminiChat.js +1 -14
  32. package/dist/src/core/geminiChat.js.map +1 -1
  33. package/dist/src/core/geminiChat.test.js +14 -18
  34. package/dist/src/core/geminiChat.test.js.map +1 -1
  35. package/dist/src/core/prompts.d.ts +2 -1
  36. package/dist/src/core/prompts.js +81 -8
  37. package/dist/src/core/prompts.js.map +1 -1
  38. package/dist/src/core/prompts.test.js +73 -24
  39. package/dist/src/core/prompts.test.js.map +1 -1
  40. package/dist/src/generated/git-commit.d.ts +2 -2
  41. package/dist/src/generated/git-commit.js +2 -2
  42. package/dist/src/telemetry/clearcut-logger/clearcut-logger.js +0 -8
  43. package/dist/src/telemetry/clearcut-logger/clearcut-logger.js.map +1 -1
  44. package/dist/src/telemetry/clearcut-logger/event-metadata-key.d.ts +2 -2
  45. package/dist/src/telemetry/clearcut-logger/event-metadata-key.js +4 -4
  46. package/dist/src/telemetry/clearcut-logger/event-metadata-key.js.map +1 -1
  47. package/dist/src/telemetry/constants.d.ts +1 -0
  48. package/dist/src/telemetry/constants.js +1 -0
  49. package/dist/src/telemetry/constants.js.map +1 -1
  50. package/dist/src/telemetry/loggers.js +2 -2
  51. package/dist/src/telemetry/loggers.js.map +1 -1
  52. package/dist/src/telemetry/loggers.test.js +2 -2
  53. package/dist/src/telemetry/loggers.test.js.map +1 -1
  54. package/dist/src/tools/mcp-client.d.ts +3 -2
  55. package/dist/src/tools/mcp-client.js +28 -30
  56. package/dist/src/tools/mcp-client.js.map +1 -1
  57. package/dist/src/tools/mcp-client.test.js +168 -5
  58. package/dist/src/tools/mcp-client.test.js.map +1 -1
  59. package/dist/src/utils/errorParsing.d.ts +1 -1
  60. package/dist/src/utils/errorParsing.js +5 -33
  61. package/dist/src/utils/errorParsing.js.map +1 -1
  62. package/dist/src/utils/errorParsing.test.js +0 -88
  63. package/dist/src/utils/errorParsing.test.js.map +1 -1
  64. package/dist/src/utils/flashFallback.test.js +26 -45
  65. package/dist/src/utils/flashFallback.test.js.map +1 -1
  66. package/dist/src/utils/googleErrors.d.ts +104 -0
  67. package/dist/src/utils/googleErrors.js +108 -0
  68. package/dist/src/utils/googleErrors.js.map +1 -0
  69. package/dist/src/utils/googleErrors.test.d.ts +6 -0
  70. package/dist/src/utils/googleErrors.test.js +212 -0
  71. package/dist/src/utils/googleErrors.test.js.map +1 -0
  72. package/dist/src/utils/googleQuotaErrors.d.ts +35 -0
  73. package/dist/src/utils/googleQuotaErrors.js +108 -0
  74. package/dist/src/utils/googleQuotaErrors.js.map +1 -0
  75. package/dist/src/utils/googleQuotaErrors.test.d.ts +6 -0
  76. package/dist/src/utils/googleQuotaErrors.test.js +189 -0
  77. package/dist/src/utils/googleQuotaErrors.test.js.map +1 -0
  78. package/dist/src/utils/memoryDiscovery.d.ts +1 -0
  79. package/dist/src/utils/memoryDiscovery.js +2 -1
  80. package/dist/src/utils/memoryDiscovery.js.map +1 -1
  81. package/dist/src/utils/memoryDiscovery.test.js +99 -21
  82. package/dist/src/utils/memoryDiscovery.test.js.map +1 -1
  83. package/dist/src/utils/quotaErrorDetection.d.ts +0 -2
  84. package/dist/src/utils/quotaErrorDetection.js +0 -46
  85. package/dist/src/utils/quotaErrorDetection.js.map +1 -1
  86. package/dist/src/utils/retry.js +40 -157
  87. package/dist/src/utils/retry.js.map +1 -1
  88. package/dist/src/utils/retry.test.js +85 -144
  89. package/dist/src/utils/retry.test.js.map +1 -1
  90. package/dist/tsconfig.tsbuildinfo +1 -1
  91. package/package.json +1 -1
@@ -3,7 +3,7 @@
3
3
  * Copyright 2025 Google LLC
4
4
  * SPDX-License-Identifier: Apache-2.0
5
5
  */
6
- import { describe, it, expect, vi, beforeEach, afterEach, } from 'vitest';
6
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
7
7
  import { AgentExecutor } from './executor.js';
8
8
  import { AgentTerminateMode } from './types.js';
9
9
  import { makeFakeConfig } from '../test-utils/config.js';
@@ -11,8 +11,10 @@ import { ToolRegistry } from '../tools/tool-registry.js';
11
11
  import { LSTool } from '../tools/ls.js';
12
12
  import { ReadFileTool } from '../tools/read-file.js';
13
13
  import { GeminiChat, StreamEventType, } from '../core/geminiChat.js';
14
+ import {} from '@google/genai';
14
15
  import { MockTool } from '../test-utils/mock-tool.js';
15
16
  import { getDirectoryContextString } from '../utils/environmentContext.js';
17
+ import { z } from 'zod';
16
18
  const { mockSendMessageStream, mockExecuteToolCall } = vi.hoisted(() => ({
17
19
  mockSendMessageStream: vi.fn(),
18
20
  mockExecuteToolCall: vi.fn(),
@@ -30,13 +32,22 @@ vi.mock('../core/nonInteractiveToolExecutor.js', () => ({
30
32
  executeToolCall: mockExecuteToolCall,
31
33
  }));
32
34
  vi.mock('../utils/environmentContext.js');
33
- const MockedGeminiChat = GeminiChat;
34
- // A mock tool that is NOT on the NON_INTERACTIVE_TOOL_ALLOWLIST
35
- const MOCK_TOOL_NOT_ALLOWED = new MockTool({ name: 'write_file' });
35
+ const MockedGeminiChat = vi.mocked(GeminiChat);
36
+ const mockedGetDirectoryContextString = vi.mocked(getDirectoryContextString);
37
+ // Constants for testing
38
+ const TASK_COMPLETE_TOOL_NAME = 'complete_task';
39
+ const MOCK_TOOL_NOT_ALLOWED = new MockTool({ name: 'write_file_interactive' });
40
+ /**
41
+ * Helper to create a mock API response chunk.
42
+ * Uses conditional spread to handle readonly functionCalls property safely.
43
+ */
36
44
  const createMockResponseChunk = (parts, functionCalls) => ({
37
45
  candidates: [{ index: 0, content: { role: 'model', parts } }],
38
- functionCalls,
46
+ ...(functionCalls && functionCalls.length > 0 ? { functionCalls } : {}),
39
47
  });
48
+ /**
49
+ * Helper to mock a single turn of model response in the stream.
50
+ */
40
51
  const mockModelResponse = (functionCalls, thought, text) => {
41
52
  const parts = [];
42
53
  if (thought) {
@@ -47,9 +58,7 @@ const mockModelResponse = (functionCalls, thought, text) => {
47
58
  }
48
59
  if (text)
49
60
  parts.push({ text });
50
- const responseChunk = createMockResponseChunk(parts,
51
- // Ensure functionCalls is undefined if the array is empty, matching API behavior
52
- functionCalls.length > 0 ? functionCalls : undefined);
61
+ const responseChunk = createMockResponseChunk(parts, functionCalls);
53
62
  mockSendMessageStream.mockImplementationOnce(async () => (async function* () {
54
63
  yield {
55
64
  type: StreamEventType.CHUNK,
@@ -57,33 +66,55 @@ const mockModelResponse = (functionCalls, thought, text) => {
57
66
  };
58
67
  })());
59
68
  };
69
+ /**
70
+ * Helper to extract the message parameters sent to sendMessageStream.
71
+ * Provides type safety for inspecting mock calls.
72
+ */
73
+ const getMockMessageParams = (callIndex) => {
74
+ const call = mockSendMessageStream.mock.calls[callIndex];
75
+ expect(call).toBeDefined();
76
+ // Arg 1 of sendMessageStream is the message parameters
77
+ return call[1];
78
+ };
60
79
  let mockConfig;
61
80
  let parentToolRegistry;
62
- const createTestDefinition = (tools = [LSTool.Name], runConfigOverrides = {}, outputConfigOverrides = {}, promptConfigOverrides = {}) => ({
63
- name: 'TestAgent',
64
- description: 'An agent for testing.',
65
- inputConfig: {
66
- inputs: { goal: { type: 'string', required: true, description: 'goal' } },
67
- },
68
- modelConfig: { model: 'gemini-test-model', temp: 0, top_p: 1 },
69
- runConfig: { max_time_minutes: 5, max_turns: 5, ...runConfigOverrides },
70
- promptConfig: {
71
- systemPrompt: 'Achieve the goal: ${goal}.',
72
- ...promptConfigOverrides,
73
- },
74
- toolConfig: { tools },
75
- outputConfig: { description: 'The final result.', ...outputConfigOverrides },
76
- });
81
+ /**
82
+ * Type-safe helper to create agent definitions for tests.
83
+ */
84
+ const createTestDefinition = (tools = [LSTool.Name], runConfigOverrides = {}, outputConfigMode = 'default', schema = z.string()) => {
85
+ let outputConfig;
86
+ if (outputConfigMode === 'default') {
87
+ outputConfig = {
88
+ outputName: 'finalResult',
89
+ description: 'The final result.',
90
+ schema,
91
+ };
92
+ }
93
+ return {
94
+ name: 'TestAgent',
95
+ description: 'An agent for testing.',
96
+ inputConfig: {
97
+ inputs: { goal: { type: 'string', required: true, description: 'goal' } },
98
+ },
99
+ modelConfig: { model: 'gemini-test-model', temp: 0, top_p: 1 },
100
+ runConfig: { max_time_minutes: 5, max_turns: 5, ...runConfigOverrides },
101
+ promptConfig: { systemPrompt: 'Achieve the goal: ${goal}.' },
102
+ toolConfig: { tools },
103
+ outputConfig,
104
+ };
105
+ };
77
106
  describe('AgentExecutor', () => {
78
107
  let activities;
79
108
  let onActivity;
80
109
  let abortController;
81
110
  let signal;
82
111
  beforeEach(async () => {
83
- mockSendMessageStream.mockClear();
84
- mockExecuteToolCall.mockClear();
85
- vi.clearAllMocks();
86
- // Use fake timers for timeout and concurrency testing
112
+ vi.resetAllMocks();
113
+ mockSendMessageStream.mockReset();
114
+ mockExecuteToolCall.mockReset();
115
+ MockedGeminiChat.mockImplementation(() => ({
116
+ sendMessageStream: mockSendMessageStream,
117
+ }));
87
118
  vi.useFakeTimers();
88
119
  mockConfig = makeFakeConfig();
89
120
  parentToolRegistry = new ToolRegistry(mockConfig);
@@ -91,7 +122,7 @@ describe('AgentExecutor', () => {
91
122
  parentToolRegistry.registerTool(new ReadFileTool(mockConfig));
92
123
  parentToolRegistry.registerTool(MOCK_TOOL_NOT_ALLOWED);
93
124
  vi.spyOn(mockConfig, 'getToolRegistry').mockResolvedValue(parentToolRegistry);
94
- vi.mocked(getDirectoryContextString).mockResolvedValue('Mocked Environment Context');
125
+ mockedGetDirectoryContextString.mockResolvedValue('Mocked Environment Context');
95
126
  activities = [];
96
127
  onActivity = (activity) => activities.push(activity);
97
128
  abortController = new AbortController();
@@ -108,13 +139,12 @@ describe('AgentExecutor', () => {
108
139
  });
109
140
  it('SECURITY: should throw if a tool is not on the non-interactive allowlist', async () => {
110
141
  const definition = createTestDefinition([MOCK_TOOL_NOT_ALLOWED.name]);
111
- await expect(AgentExecutor.create(definition, mockConfig, onActivity)).rejects.toThrow(`Tool "${MOCK_TOOL_NOT_ALLOWED.name}" is not on the allow-list for non-interactive execution`);
142
+ await expect(AgentExecutor.create(definition, mockConfig, onActivity)).rejects.toThrow(/not on the allow-list for non-interactive execution/);
112
143
  });
113
144
  it('should create an isolated ToolRegistry for the agent', async () => {
114
145
  const definition = createTestDefinition([LSTool.Name, ReadFileTool.Name]);
115
146
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
116
- // @ts-expect-error - accessing private property for test validation
117
- const agentRegistry = executor.toolRegistry;
147
+ const agentRegistry = executor['toolRegistry'];
118
148
  expect(agentRegistry).not.toBe(parentToolRegistry);
119
149
  expect(agentRegistry.getAllToolNames()).toEqual(expect.arrayContaining([LSTool.Name, ReadFileTool.Name]));
120
150
  expect(agentRegistry.getAllToolNames()).toHaveLength(2);
@@ -122,7 +152,7 @@ describe('AgentExecutor', () => {
122
152
  });
123
153
  });
124
154
  describe('run (Execution Loop and Logic)', () => {
125
- it('should execute a successful work and extraction phase (Happy Path) and emit activities', async () => {
155
+ it('should execute successfully when model calls complete_task with output (Happy Path with Output)', async () => {
126
156
  const definition = createTestDefinition();
127
157
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
128
158
  const inputs = { goal: 'Find files' };
@@ -142,84 +172,211 @@ describe('AgentExecutor', () => {
142
172
  ],
143
173
  error: undefined,
144
174
  });
145
- // Turn 2: Model stops
146
- mockModelResponse([], 'T2: Done');
147
- // Extraction Phase
148
- mockModelResponse([], undefined, 'Result: file1.txt.');
175
+ // Turn 2: Model calls complete_task with required output
176
+ mockModelResponse([
177
+ {
178
+ name: TASK_COMPLETE_TOOL_NAME,
179
+ args: { finalResult: 'Found file1.txt' },
180
+ id: 'call2',
181
+ },
182
+ ], 'T2: Done');
149
183
  const output = await executor.run(inputs, signal);
150
- expect(mockSendMessageStream).toHaveBeenCalledTimes(3);
151
- expect(mockExecuteToolCall).toHaveBeenCalledTimes(1);
152
- // Verify System Prompt Templating
184
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
153
185
  const chatConstructorArgs = MockedGeminiChat.mock.calls[0];
154
186
  const chatConfig = chatConstructorArgs[1];
155
- expect(chatConfig?.systemInstruction).toContain('Achieve the goal: Find files.');
156
- // Verify environment context is appended
157
- expect(chatConfig?.systemInstruction).toContain('# Environment Context\nMocked Environment Context');
158
- // Verify standard rules are appended
159
- expect(chatConfig?.systemInstruction).toContain('You are running in a non-interactive mode.');
160
- // Verify absolute path rule is appended
161
- expect(chatConfig?.systemInstruction).toContain('Always use absolute paths for file operations.');
162
- // Verify Extraction Phase Call (Specific arguments)
163
- expect(mockSendMessageStream).toHaveBeenCalledWith('gemini-test-model', expect.objectContaining({
164
- // Extraction message should be based on outputConfig.description
165
- message: expect.arrayContaining([
166
- {
167
- text: expect.stringContaining('Based on your work so far, provide: The final result.'),
168
- },
169
- ]),
170
- config: expect.objectContaining({ tools: undefined }), // No tools in extraction
171
- }), expect.stringContaining('#extraction'));
172
- expect(output.result).toBe('Result: file1.txt.');
187
+ expect(chatConfig?.systemInstruction).toContain(`MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool`);
188
+ const turn1Params = getMockMessageParams(0);
189
+ const firstToolGroup = turn1Params.config?.tools?.[0];
190
+ expect(firstToolGroup).toBeDefined();
191
+ if (!firstToolGroup || !('functionDeclarations' in firstToolGroup)) {
192
+ throw new Error('Test expectation failed: Config does not contain functionDeclarations.');
193
+ }
194
+ const sentTools = firstToolGroup.functionDeclarations;
195
+ expect(sentTools).toBeDefined();
196
+ expect(sentTools).toEqual(expect.arrayContaining([
197
+ expect.objectContaining({ name: LSTool.Name }),
198
+ expect.objectContaining({ name: TASK_COMPLETE_TOOL_NAME }),
199
+ ]));
200
+ const completeToolDef = sentTools.find((t) => t.name === TASK_COMPLETE_TOOL_NAME);
201
+ expect(completeToolDef?.parameters?.required).toContain('finalResult');
202
+ expect(output.result).toBe('Found file1.txt');
173
203
  expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
174
- // Verify Activity Stream (Observability)
175
204
  expect(activities).toEqual(expect.arrayContaining([
176
- // Thought subjects are extracted by the executor (parseThought)
177
205
  expect.objectContaining({
178
206
  type: 'THOUGHT_CHUNK',
179
207
  data: { text: 'T1: Listing' },
180
208
  }),
181
- expect.objectContaining({
182
- type: 'TOOL_CALL_START',
183
- data: { name: LSTool.Name, args: { path: '.' } },
184
- }),
185
209
  expect.objectContaining({
186
210
  type: 'TOOL_CALL_END',
187
211
  data: { name: LSTool.Name, output: 'file1.txt' },
188
212
  }),
189
213
  expect.objectContaining({
190
- type: 'THOUGHT_CHUNK',
191
- data: { text: 'T2: Done' },
214
+ type: 'TOOL_CALL_START',
215
+ data: {
216
+ name: TASK_COMPLETE_TOOL_NAME,
217
+ args: { finalResult: 'Found file1.txt' },
218
+ },
219
+ }),
220
+ expect.objectContaining({
221
+ type: 'TOOL_CALL_END',
222
+ data: {
223
+ name: TASK_COMPLETE_TOOL_NAME,
224
+ output: expect.stringContaining('Output submitted'),
225
+ },
192
226
  }),
193
227
  ]));
194
228
  });
195
- it('should execute parallel tool calls concurrently', async () => {
196
- const definition = createTestDefinition([LSTool.Name, ReadFileTool.Name]);
229
+ it('should execute successfully when model calls complete_task without output (Happy Path No Output)', async () => {
230
+ const definition = createTestDefinition([LSTool.Name], {}, 'none');
231
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
232
+ mockModelResponse([
233
+ { name: LSTool.Name, args: { path: '.' }, id: 'call1' },
234
+ ]);
235
+ mockExecuteToolCall.mockResolvedValueOnce({
236
+ callId: 'call1',
237
+ resultDisplay: 'ok',
238
+ responseParts: [
239
+ {
240
+ functionResponse: { name: LSTool.Name, response: {}, id: 'call1' },
241
+ },
242
+ ],
243
+ });
244
+ mockModelResponse([{ name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call2' }], 'Task finished.');
245
+ const output = await executor.run({ goal: 'Do work' }, signal);
246
+ const turn1Params = getMockMessageParams(0);
247
+ const firstToolGroup = turn1Params.config?.tools?.[0];
248
+ expect(firstToolGroup).toBeDefined();
249
+ if (!firstToolGroup || !('functionDeclarations' in firstToolGroup)) {
250
+ throw new Error('Test expectation failed: Config does not contain functionDeclarations.');
251
+ }
252
+ const sentTools = firstToolGroup.functionDeclarations;
253
+ expect(sentTools).toBeDefined();
254
+ const completeToolDef = sentTools.find((t) => t.name === TASK_COMPLETE_TOOL_NAME);
255
+ expect(completeToolDef?.parameters?.required).toEqual([]);
256
+ expect(completeToolDef?.description).toContain('signal that you have completed');
257
+ expect(output.result).toBe('Task completed successfully.');
258
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
259
+ });
260
+ it('should error immediately if the model stops tools without calling complete_task (Protocol Violation)', async () => {
261
+ const definition = createTestDefinition();
262
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
263
+ mockModelResponse([
264
+ { name: LSTool.Name, args: { path: '.' }, id: 'call1' },
265
+ ]);
266
+ mockExecuteToolCall.mockResolvedValueOnce({
267
+ callId: 'call1',
268
+ resultDisplay: 'ok',
269
+ responseParts: [
270
+ {
271
+ functionResponse: { name: LSTool.Name, response: {}, id: 'call1' },
272
+ },
273
+ ],
274
+ });
275
+ mockModelResponse([], 'I think I am done.');
276
+ const output = await executor.run({ goal: 'Strict test' }, signal);
277
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
278
+ const expectedError = `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}' to finalize the session.`;
279
+ expect(output.terminate_reason).toBe(AgentTerminateMode.ERROR);
280
+ expect(output.result).toBe(expectedError);
281
+ expect(activities).toContainEqual(expect.objectContaining({
282
+ type: 'ERROR',
283
+ data: expect.objectContaining({
284
+ context: 'protocol_violation',
285
+ error: expectedError,
286
+ }),
287
+ }));
288
+ });
289
+ it('should report an error if complete_task is called with missing required arguments', async () => {
290
+ const definition = createTestDefinition();
291
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
292
+ // Turn 1: Missing arg
293
+ mockModelResponse([
294
+ {
295
+ name: TASK_COMPLETE_TOOL_NAME,
296
+ args: { wrongArg: 'oops' },
297
+ id: 'call1',
298
+ },
299
+ ]);
300
+ // Turn 2: Corrected
301
+ mockModelResponse([
302
+ {
303
+ name: TASK_COMPLETE_TOOL_NAME,
304
+ args: { finalResult: 'Corrected result' },
305
+ id: 'call2',
306
+ },
307
+ ]);
308
+ const output = await executor.run({ goal: 'Error test' }, signal);
309
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
310
+ const expectedError = "Missing required argument 'finalResult' for completion.";
311
+ expect(activities).toContainEqual(expect.objectContaining({
312
+ type: 'ERROR',
313
+ data: {
314
+ context: 'tool_call',
315
+ name: TASK_COMPLETE_TOOL_NAME,
316
+ error: expectedError,
317
+ },
318
+ }));
319
+ const turn2Params = getMockMessageParams(1);
320
+ const turn2Parts = turn2Params.message;
321
+ expect(turn2Parts).toBeDefined();
322
+ expect(turn2Parts).toHaveLength(1);
323
+ expect(turn2Parts[0]).toEqual(expect.objectContaining({
324
+ functionResponse: expect.objectContaining({
325
+ name: TASK_COMPLETE_TOOL_NAME,
326
+ response: { error: expectedError },
327
+ id: 'call1',
328
+ }),
329
+ }));
330
+ expect(output.result).toBe('Corrected result');
331
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
332
+ });
333
+ it('should handle multiple calls to complete_task in the same turn (accept first, block rest)', async () => {
334
+ const definition = createTestDefinition([], {}, 'none');
335
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
336
+ // Turn 1: Duplicate calls
337
+ mockModelResponse([
338
+ { name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call1' },
339
+ { name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call2' },
340
+ ]);
341
+ const output = await executor.run({ goal: 'Dup test' }, signal);
342
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
343
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
344
+ const completions = activities.filter((a) => a.type === 'TOOL_CALL_END' &&
345
+ a.data['name'] === TASK_COMPLETE_TOOL_NAME);
346
+ const errors = activities.filter((a) => a.type === 'ERROR' && a.data['name'] === TASK_COMPLETE_TOOL_NAME);
347
+ expect(completions).toHaveLength(1);
348
+ expect(errors).toHaveLength(1);
349
+ expect(errors[0].data['error']).toContain('Task already marked complete in this turn');
350
+ });
351
+ it('should execute parallel tool calls and then complete', async () => {
352
+ const definition = createTestDefinition([LSTool.Name]);
197
353
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
198
354
  const call1 = {
199
355
  name: LSTool.Name,
200
- args: { path: '/dir1' },
201
- id: 'call1',
356
+ args: { path: '/a' },
357
+ id: 'c1',
202
358
  };
203
- // Using LSTool twice for simplicity in mocking standardized responses.
204
359
  const call2 = {
205
360
  name: LSTool.Name,
206
- args: { path: '/dir2' },
207
- id: 'call2',
361
+ args: { path: '/b' },
362
+ id: 'c2',
208
363
  };
209
- // Turn 1: Model calls two tools simultaneously
210
- mockModelResponse([call1, call2], 'T1: Listing both');
211
- // Use concurrency tracking to ensure parallelism
212
- let activeCalls = 0;
213
- let maxActiveCalls = 0;
364
+ // Turn 1: Parallel calls
365
+ mockModelResponse([call1, call2]);
366
+ // Concurrency mock
367
+ let callsStarted = 0;
368
+ let resolveCalls;
369
+ const bothStarted = new Promise((r) => {
370
+ resolveCalls = r;
371
+ });
214
372
  mockExecuteToolCall.mockImplementation(async (_ctx, reqInfo) => {
215
- activeCalls++;
216
- maxActiveCalls = Math.max(maxActiveCalls, activeCalls);
217
- // Simulate latency. We must advance the fake timers for this to resolve.
218
- await new Promise((resolve) => setTimeout(resolve, 100));
219
- activeCalls--;
373
+ callsStarted++;
374
+ if (callsStarted === 2)
375
+ resolveCalls();
376
+ await vi.advanceTimersByTimeAsync(100);
220
377
  return {
221
378
  callId: reqInfo.callId,
222
- resultDisplay: `Result for ${reqInfo.name}`,
379
+ resultDisplay: 'ok',
223
380
  responseParts: [
224
381
  {
225
382
  functionResponse: {
@@ -229,262 +386,145 @@ describe('AgentExecutor', () => {
229
386
  },
230
387
  },
231
388
  ],
232
- error: undefined,
233
389
  };
234
390
  });
235
- // Turn 2: Model stops
236
- mockModelResponse([]);
237
- // Extraction
238
- mockModelResponse([], undefined, 'Done.');
239
- const runPromise = executor.run({ goal: 'Parallel test' }, signal);
240
- // Advance timers while the parallel calls (Promise.all + setTimeout) are running
391
+ // Turn 2: Completion
392
+ mockModelResponse([
393
+ {
394
+ name: TASK_COMPLETE_TOOL_NAME,
395
+ args: { finalResult: 'done' },
396
+ id: 'c3',
397
+ },
398
+ ]);
399
+ const runPromise = executor.run({ goal: 'Parallel' }, signal);
400
+ await vi.advanceTimersByTimeAsync(1);
401
+ await bothStarted;
241
402
  await vi.advanceTimersByTimeAsync(150);
242
- await runPromise;
403
+ await vi.advanceTimersByTimeAsync(1);
404
+ const output = await runPromise;
243
405
  expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
244
- expect(maxActiveCalls).toBe(2);
245
- // Verify the input to the next model call (Turn 2) contains both responses
246
- // sendMessageStream calls: [0] Turn 1, [1] Turn 2, [2] Extraction
247
- const turn2Input = mockSendMessageStream.mock.calls[1][1];
248
- const turn2Parts = turn2Input.message;
249
- // Promise.all preserves the order of the input array.
250
- expect(turn2Parts.length).toBe(2);
251
- expect(turn2Parts[0]).toEqual(expect.objectContaining({
252
- functionResponse: expect.objectContaining({ id: 'call1' }),
253
- }));
254
- expect(turn2Parts[1]).toEqual(expect.objectContaining({
255
- functionResponse: expect.objectContaining({ id: 'call2' }),
256
- }));
257
- });
258
- it('should use the templated query from promptConfig.query when provided', async () => {
259
- const customQuery = 'Please achieve the goal: ${goal}';
260
- const definition = createTestDefinition([], // No tools needed for this test
261
- {}, {}, { query: customQuery, systemPrompt: 'You are a helpful agent.' });
262
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
263
- const inputs = { goal: 'test custom query' };
264
- // Model stops immediately
265
- mockModelResponse([]);
266
- // Extraction
267
- mockModelResponse([], undefined, 'Done.');
268
- await executor.run(inputs, signal);
269
- // Verify the first call to sendMessageStream (the work phase)
270
- const workPhaseCallArgs = mockSendMessageStream.mock.calls[0][1];
271
- const workPhaseMessageParts = workPhaseCallArgs.message;
272
- expect(workPhaseMessageParts).toEqual([
273
- { text: 'Please achieve the goal: test custom query' },
274
- ]);
275
- });
276
- it('should default to "Get Started!" when promptConfig.query is not provided', async () => {
277
- const definition = createTestDefinition([], // No tools needed for this test
278
- {}, {}, { query: undefined, systemPrompt: 'You are a helpful agent.' });
279
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
280
- const inputs = { goal: 'test default query' };
281
- // Model stops immediately
282
- mockModelResponse([]);
283
- // Extraction
284
- mockModelResponse([], undefined, 'Done.');
285
- await executor.run(inputs, signal);
286
- // Verify the first call to sendMessageStream (the work phase)
287
- const workPhaseCallArgs = mockSendMessageStream.mock.calls[0][1];
288
- const workPhaseMessageParts = workPhaseCallArgs.message;
289
- expect(workPhaseMessageParts).toEqual([{ text: 'Get Started!' }]);
406
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
407
+ // Safe access to message parts
408
+ const turn2Params = getMockMessageParams(1);
409
+ const parts = turn2Params.message;
410
+ expect(parts).toBeDefined();
411
+ expect(parts).toHaveLength(2);
412
+ expect(parts).toEqual(expect.arrayContaining([
413
+ expect.objectContaining({
414
+ functionResponse: expect.objectContaining({ id: 'c1' }),
415
+ }),
416
+ expect.objectContaining({
417
+ functionResponse: expect.objectContaining({ id: 'c2' }),
418
+ }),
419
+ ]));
290
420
  });
291
- it('should handle tool execution failure gracefully and report error', async () => {
421
+ it('SECURITY: should block unauthorized tools and provide explicit failure to model', async () => {
292
422
  const definition = createTestDefinition([LSTool.Name]);
293
423
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
294
- // Turn 1: Model calls ls, but it fails
424
+ // Turn 1: Model tries to use a tool not in its config
425
+ const badCallId = 'bad_call_1';
295
426
  mockModelResponse([
296
- { name: LSTool.Name, args: { path: '/invalid' }, id: 'call1' },
297
- ]);
298
- const errorMessage = 'Internal failure.';
299
- mockExecuteToolCall.mockResolvedValueOnce({
300
- callId: 'call1',
301
- resultDisplay: `Error: ${errorMessage}`,
302
- responseParts: undefined, // Failed tools might return undefined parts
303
- error: { message: errorMessage },
304
- });
305
- // Turn 2: Model stops
306
- mockModelResponse([]);
307
- mockModelResponse([], undefined, 'Failed.');
308
- await executor.run({ goal: 'Failure test' }, signal);
309
- // Verify that the error was reported in the activity stream
310
- expect(activities).toContainEqual(expect.objectContaining({
311
- type: 'ERROR',
312
- data: {
313
- error: errorMessage,
314
- context: 'tool_call',
315
- name: LSTool.Name,
316
- },
317
- }));
318
- // Verify the input to the next model call (Turn 2) contains the fallback error message
319
- const turn2Input = mockSendMessageStream.mock.calls[1][1];
320
- const turn2Parts = turn2Input.message;
321
- expect(turn2Parts).toEqual([
322
427
  {
323
- text: 'All tool calls failed. Please analyze the errors and try an alternative approach.',
428
+ name: ReadFileTool.Name,
429
+ args: { path: 'secret.txt' },
430
+ id: badCallId,
324
431
  },
325
432
  ]);
326
- });
327
- it('SECURITY: should block calls to tools not registered for the agent at runtime', async () => {
328
- // Agent definition only includes LSTool
329
- const definition = createTestDefinition([LSTool.Name]);
330
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
331
- // Turn 1: Model hallucinates a call to ReadFileTool
332
- // (ReadFileTool exists in the parent registry but not the agent's isolated registry)
433
+ // Turn 2: Model gives up and completes
333
434
  mockModelResponse([
334
435
  {
335
- name: ReadFileTool.Name,
336
- args: { path: 'config.txt' },
337
- id: 'call_blocked',
436
+ name: TASK_COMPLETE_TOOL_NAME,
437
+ args: { finalResult: 'Could not read file.' },
438
+ id: 'c2',
338
439
  },
339
440
  ]);
340
- // Turn 2: Model stops
341
- mockModelResponse([]);
342
- // Extraction
343
- mockModelResponse([], undefined, 'Done.');
344
441
  const consoleWarnSpy = vi
345
442
  .spyOn(console, 'warn')
346
443
  .mockImplementation(() => { });
347
- await executor.run({ goal: 'Security test' }, signal);
348
- // Verify executeToolCall was NEVER called because the tool was unauthorized
444
+ await executor.run({ goal: 'Sec test' }, signal);
445
+ // Verify external executor was not called (Security held)
349
446
  expect(mockExecuteToolCall).not.toHaveBeenCalled();
350
- expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining(`attempted to call unauthorized tool '${ReadFileTool.Name}'`));
447
+ // 2. Verify console warning
448
+ expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining(`[AgentExecutor] Blocked call:`));
351
449
  consoleWarnSpy.mockRestore();
352
- // Verify the input to the next model call (Turn 2) indicates failure (as the only call was blocked)
353
- const turn2Input = mockSendMessageStream.mock.calls[1][1];
354
- const turn2Parts = turn2Input.message;
355
- expect(turn2Parts[0].text).toContain('All tool calls failed');
356
- });
357
- it('should use OutputConfig completion_criteria in the extraction message', async () => {
358
- const definition = createTestDefinition([LSTool.Name], {}, {
359
- description: 'A summary.',
360
- completion_criteria: ['Must include file names', 'Must be concise'],
361
- });
362
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
363
- // Turn 1: Model stops immediately
364
- mockModelResponse([]);
365
- // Extraction Phase
366
- mockModelResponse([], undefined, 'Result: Done.');
367
- await executor.run({ goal: 'Extraction test' }, signal);
368
- // Verify the extraction call (the second call)
369
- const extractionCallArgs = mockSendMessageStream.mock.calls[1][1];
370
- const extractionMessageParts = extractionCallArgs.message;
371
- const extractionText = extractionMessageParts[0].text;
372
- expect(extractionText).toContain('Based on your work so far, provide: A summary.');
373
- expect(extractionText).toContain('Be sure you have addressed:');
374
- expect(extractionText).toContain('- Must include file names');
375
- expect(extractionText).toContain('- Must be concise');
376
- });
377
- it('should apply templating to initialMessages', async () => {
378
- const definition = createTestDefinition([], // No tools needed
379
- {}, {}, {
380
- // Override systemPrompt to be undefined and provide initialMessages
381
- systemPrompt: undefined,
382
- initialMessages: [
383
- {
384
- role: 'user',
385
- parts: [{ text: 'The user wants to ${goal}.' }],
386
- },
387
- {
388
- role: 'model',
389
- parts: [{ text: 'Okay, I will start working on ${goal}.' }],
450
+ // Verify specific error was sent back to model
451
+ const turn2Params = getMockMessageParams(1);
452
+ const parts = turn2Params.message;
453
+ expect(parts).toBeDefined();
454
+ expect(parts[0]).toEqual(expect.objectContaining({
455
+ functionResponse: expect.objectContaining({
456
+ id: badCallId,
457
+ name: ReadFileTool.Name,
458
+ response: {
459
+ error: expect.stringContaining('Unauthorized tool call'),
390
460
  },
391
- ],
392
- });
393
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
394
- const inputs = { goal: 'find the file' };
395
- // Model stops immediately
396
- mockModelResponse([]);
397
- // Extraction
398
- mockModelResponse([], undefined, 'Done.');
399
- await executor.run(inputs, signal);
400
- // Verify that the initialMessages were templated correctly
401
- const chatConstructorArgs = MockedGeminiChat.mock.calls[0];
402
- const startHistory = chatConstructorArgs[2]; // 3rd argument is startHistory
403
- expect(startHistory).toEqual([
404
- {
405
- role: 'user',
406
- parts: [{ text: 'The user wants to find the file.' }],
407
- },
408
- {
409
- role: 'model',
410
- parts: [{ text: 'Okay, I will start working on find the file.' }],
411
- },
412
- ]);
461
+ }),
462
+ }));
463
+ // Verify Activity Stream reported the error
464
+ expect(activities).toContainEqual(expect.objectContaining({
465
+ type: 'ERROR',
466
+ data: expect.objectContaining({
467
+ context: 'tool_call_unauthorized',
468
+ name: ReadFileTool.Name,
469
+ }),
470
+ }));
413
471
  });
414
472
  });
415
473
  describe('run (Termination Conditions)', () => {
416
- const mockKeepAliveResponse = () => {
417
- mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 'loop' }], 'Looping');
418
- mockExecuteToolCall.mockResolvedValue({
419
- callId: 'loop',
474
+ const mockWorkResponse = (id) => {
475
+ mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id }]);
476
+ mockExecuteToolCall.mockResolvedValueOnce({
477
+ callId: id,
420
478
  resultDisplay: 'ok',
421
479
  responseParts: [
422
- { functionResponse: { name: LSTool.Name, response: {}, id: 'loop' } },
480
+ { functionResponse: { name: LSTool.Name, response: {}, id } },
423
481
  ],
424
- error: undefined,
425
482
  });
426
483
  };
427
484
  it('should terminate when max_turns is reached', async () => {
428
- const MAX_TURNS = 2;
485
+ const MAX = 2;
429
486
  const definition = createTestDefinition([LSTool.Name], {
430
- max_turns: MAX_TURNS,
487
+ max_turns: MAX,
431
488
  });
432
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
433
- // Turn 1
434
- mockKeepAliveResponse();
435
- // Turn 2
436
- mockKeepAliveResponse();
437
- const output = await executor.run({ goal: 'Termination test' }, signal);
489
+ const executor = await AgentExecutor.create(definition, mockConfig);
490
+ mockWorkResponse('t1');
491
+ mockWorkResponse('t2');
492
+ const output = await executor.run({ goal: 'Turns test' }, signal);
438
493
  expect(output.terminate_reason).toBe(AgentTerminateMode.MAX_TURNS);
439
- expect(mockSendMessageStream).toHaveBeenCalledTimes(MAX_TURNS);
440
- // Extraction phase should be skipped when termination is forced
441
- expect(mockSendMessageStream).not.toHaveBeenCalledWith(expect.any(String), expect.any(Object), expect.stringContaining('#extraction'));
494
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(MAX);
442
495
  });
443
496
  it('should terminate if timeout is reached', async () => {
444
497
  const definition = createTestDefinition([LSTool.Name], {
445
- max_time_minutes: 5,
446
- max_turns: 100,
498
+ max_time_minutes: 1,
447
499
  });
448
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
449
- // Turn 1 setup
450
- mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 'loop' }], 'Looping');
451
- // Mock a tool call that takes a long time, causing the overall timeout
452
- mockExecuteToolCall.mockImplementation(async () => {
453
- // Advance time past the 5-minute limit during the tool call execution
454
- await vi.advanceTimersByTimeAsync(5 * 60 * 1000 + 1);
500
+ const executor = await AgentExecutor.create(definition, mockConfig);
501
+ mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 't1' }]);
502
+ // Long running tool
503
+ mockExecuteToolCall.mockImplementationOnce(async () => {
504
+ await vi.advanceTimersByTimeAsync(61 * 1000);
455
505
  return {
456
- callId: 'loop',
506
+ callId: 't1',
457
507
  resultDisplay: 'ok',
458
- responseParts: [
459
- {
460
- functionResponse: { name: LSTool.Name, response: {}, id: 'loop' },
461
- },
462
- ],
463
- error: undefined,
508
+ responseParts: [],
464
509
  };
465
510
  });
466
- const output = await executor.run({ goal: 'Termination test' }, signal);
511
+ const output = await executor.run({ goal: 'Timeout test' }, signal);
467
512
  expect(output.terminate_reason).toBe(AgentTerminateMode.TIMEOUT);
468
- // Should only have called the model once before the timeout check stopped it
469
513
  expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
470
514
  });
471
- it('should terminate when AbortSignal is triggered mid-stream', async () => {
515
+ it('should terminate when AbortSignal is triggered', async () => {
472
516
  const definition = createTestDefinition();
473
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
474
- // Mock the model response stream
475
- mockSendMessageStream.mockImplementation(async () => (async function* () {
476
- // Yield the first chunk
517
+ const executor = await AgentExecutor.create(definition, mockConfig);
518
+ mockSendMessageStream.mockImplementationOnce(async () => (async function* () {
477
519
  yield {
478
520
  type: StreamEventType.CHUNK,
479
521
  value: createMockResponseChunk([
480
- { text: '**Thinking** Step 1', thought: true },
522
+ { text: 'Thinking...', thought: true },
481
523
  ]),
482
524
  };
483
- // Simulate abort happening mid-stream
484
525
  abortController.abort();
485
- // The loop in callModel should break immediately due to signal check.
486
526
  })());
487
- const output = await executor.run({ goal: 'Termination test' }, signal);
527
+ const output = await executor.run({ goal: 'Abort test' }, signal);
488
528
  expect(output.terminate_reason).toBe(AgentTerminateMode.ABORTED);
489
529
  });
490
530
  });