@machina.ai/cell-cli-core 1.8.2-rc1 → 1.10.0-rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/dist/index.d.ts +1 -1
  2. package/dist/index.js +1 -1
  3. package/dist/index.js.map +1 -1
  4. package/dist/package.json +1 -1
  5. package/dist/src/agents/codebase-investigator.d.ts +36 -1
  6. package/dist/src/agents/codebase-investigator.js +93 -31
  7. package/dist/src/agents/codebase-investigator.js.map +1 -1
  8. package/dist/src/agents/executor.d.ts +15 -11
  9. package/dist/src/agents/executor.js +265 -103
  10. package/dist/src/agents/executor.js.map +1 -1
  11. package/dist/src/agents/executor.test.js +493 -232
  12. package/dist/src/agents/executor.test.js.map +1 -1
  13. package/dist/src/agents/invocation.d.ts +5 -2
  14. package/dist/src/agents/invocation.js +4 -2
  15. package/dist/src/agents/invocation.js.map +1 -1
  16. package/dist/src/agents/invocation.test.js +9 -0
  17. package/dist/src/agents/invocation.test.js.map +1 -1
  18. package/dist/src/agents/registry.d.ts +2 -1
  19. package/dist/src/agents/registry.js +24 -1
  20. package/dist/src/agents/registry.js.map +1 -1
  21. package/dist/src/agents/subagent-tool-wrapper.d.ts +3 -1
  22. package/dist/src/agents/subagent-tool-wrapper.js +4 -3
  23. package/dist/src/agents/subagent-tool-wrapper.js.map +1 -1
  24. package/dist/src/agents/subagent-tool-wrapper.test.js +8 -1
  25. package/dist/src/agents/subagent-tool-wrapper.test.js.map +1 -1
  26. package/dist/src/agents/types.d.ts +35 -6
  27. package/dist/src/agents/types.js +1 -0
  28. package/dist/src/agents/types.js.map +1 -1
  29. package/dist/src/code_assist/oauth-credential-storage.js +1 -1
  30. package/dist/src/code_assist/oauth-credential-storage.js.map +1 -1
  31. package/dist/src/code_assist/oauth-credential-storage.test.js +1 -1
  32. package/dist/src/code_assist/oauth-credential-storage.test.js.map +1 -1
  33. package/dist/src/code_assist/oauth2.test.js +14 -13
  34. package/dist/src/code_assist/oauth2.test.js.map +1 -1
  35. package/dist/src/code_assist/setup.js +4 -2
  36. package/dist/src/code_assist/setup.js.map +1 -1
  37. package/dist/src/config/config.d.ts +48 -9
  38. package/dist/src/config/config.js +91 -16
  39. package/dist/src/config/config.js.map +1 -1
  40. package/dist/src/config/config.test.js +67 -3
  41. package/dist/src/config/config.test.js.map +1 -1
  42. package/dist/src/config/storage.d.ts +0 -1
  43. package/dist/src/config/storage.js +2 -2
  44. package/dist/src/config/storage.js.map +1 -1
  45. package/dist/src/config/storage.test.js +7 -6
  46. package/dist/src/config/storage.test.js.map +1 -1
  47. package/dist/src/core/client.d.ts +3 -1
  48. package/dist/src/core/client.js +67 -17
  49. package/dist/src/core/client.js.map +1 -1
  50. package/dist/src/core/client.test.js +199 -5
  51. package/dist/src/core/client.test.js.map +1 -1
  52. package/dist/src/core/contentGenerator.js +3 -1
  53. package/dist/src/core/contentGenerator.js.map +1 -1
  54. package/dist/src/core/coreToolScheduler.js +12 -12
  55. package/dist/src/core/coreToolScheduler.js.map +1 -1
  56. package/dist/src/core/coreToolScheduler.test.js +227 -0
  57. package/dist/src/core/coreToolScheduler.test.js.map +1 -1
  58. package/dist/src/core/geminiChat.d.ts +7 -11
  59. package/dist/src/core/geminiChat.js +32 -70
  60. package/dist/src/core/geminiChat.js.map +1 -1
  61. package/dist/src/core/geminiChat.test.js +93 -228
  62. package/dist/src/core/geminiChat.test.js.map +1 -1
  63. package/dist/src/core/logger.test.js +2 -2
  64. package/dist/src/core/logger.test.js.map +1 -1
  65. package/dist/src/core/nonInteractiveToolExecutor.d.ts +3 -2
  66. package/dist/src/core/nonInteractiveToolExecutor.js +2 -2
  67. package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
  68. package/dist/src/core/nonInteractiveToolExecutor.test.js +8 -8
  69. package/dist/src/core/nonInteractiveToolExecutor.test.js.map +1 -1
  70. package/dist/src/core/prompts.d.ts +2 -1
  71. package/dist/src/core/prompts.js +51 -110
  72. package/dist/src/core/prompts.js.map +1 -1
  73. package/dist/src/core/prompts.test.js +83 -29
  74. package/dist/src/core/prompts.test.js.map +1 -1
  75. package/dist/src/core/subagent.js +1 -1
  76. package/dist/src/core/subagent.js.map +1 -1
  77. package/dist/src/core/subagent.test.js +38 -12
  78. package/dist/src/core/subagent.test.js.map +1 -1
  79. package/dist/src/core/turn.d.ts +14 -2
  80. package/dist/src/core/turn.js +12 -1
  81. package/dist/src/core/turn.js.map +1 -1
  82. package/dist/src/core/turn.test.js +14 -2
  83. package/dist/src/core/turn.test.js.map +1 -1
  84. package/dist/src/generated/git-commit.d.ts +2 -2
  85. package/dist/src/generated/git-commit.js +2 -2
  86. package/dist/src/generated/git-commit.js.map +1 -1
  87. package/dist/src/ide/detect-ide.test.js +11 -0
  88. package/dist/src/ide/detect-ide.test.js.map +1 -1
  89. package/dist/src/ide/ide-client.js +3 -3
  90. package/dist/src/ide/ide-client.test.js +4 -4
  91. package/dist/src/ide/ide-installer.js +1 -1
  92. package/dist/src/ide/ide-installer.js.map +1 -1
  93. package/dist/src/ide/ide-installer.test.js +13 -1
  94. package/dist/src/ide/ide-installer.test.js.map +1 -1
  95. package/dist/src/ide/process-utils.js +85 -75
  96. package/dist/src/ide/process-utils.js.map +1 -1
  97. package/dist/src/ide/process-utils.test.js +83 -90
  98. package/dist/src/ide/process-utils.test.js.map +1 -1
  99. package/dist/src/index.d.ts +2 -0
  100. package/dist/src/index.js +2 -0
  101. package/dist/src/index.js.map +1 -1
  102. package/dist/src/mcp/token-storage/file-token-storage.js +2 -1
  103. package/dist/src/mcp/token-storage/file-token-storage.js.map +1 -1
  104. package/dist/src/mcp/token-storage/file-token-storage.test.js +4 -3
  105. package/dist/src/mcp/token-storage/file-token-storage.test.js.map +1 -1
  106. package/dist/src/services/chatRecordingService.d.ts +2 -1
  107. package/dist/src/services/chatRecordingService.js +2 -1
  108. package/dist/src/services/chatRecordingService.js.map +1 -1
  109. package/dist/src/services/shellExecutionService.d.ts +1 -0
  110. package/dist/src/services/shellExecutionService.js +144 -69
  111. package/dist/src/services/shellExecutionService.js.map +1 -1
  112. package/dist/src/services/shellExecutionService.test.js +61 -1
  113. package/dist/src/services/shellExecutionService.test.js.map +1 -1
  114. package/dist/src/telemetry/clearcut-logger/clearcut-logger.d.ts +14 -2
  115. package/dist/src/telemetry/clearcut-logger/clearcut-logger.js +104 -8
  116. package/dist/src/telemetry/clearcut-logger/clearcut-logger.js.map +1 -1
  117. package/dist/src/telemetry/clearcut-logger/clearcut-logger.test.js +81 -1
  118. package/dist/src/telemetry/clearcut-logger/clearcut-logger.test.js.map +1 -1
  119. package/dist/src/telemetry/clearcut-logger/event-metadata-key.d.ts +13 -3
  120. package/dist/src/telemetry/clearcut-logger/event-metadata-key.js +32 -5
  121. package/dist/src/telemetry/clearcut-logger/event-metadata-key.js.map +1 -1
  122. package/dist/src/telemetry/constants.d.ts +0 -28
  123. package/dist/src/telemetry/constants.js +0 -29
  124. package/dist/src/telemetry/constants.js.map +1 -1
  125. package/dist/src/telemetry/index.d.ts +5 -3
  126. package/dist/src/telemetry/index.js +11 -4
  127. package/dist/src/telemetry/index.js.map +1 -1
  128. package/dist/src/telemetry/loggers.d.ts +8 -2
  129. package/dist/src/telemetry/loggers.js +165 -299
  130. package/dist/src/telemetry/loggers.js.map +1 -1
  131. package/dist/src/telemetry/loggers.test.js +195 -7
  132. package/dist/src/telemetry/loggers.test.js.map +1 -1
  133. package/dist/src/telemetry/memory-monitor.d.ts +149 -0
  134. package/dist/src/telemetry/memory-monitor.js +335 -0
  135. package/dist/src/telemetry/memory-monitor.js.map +1 -0
  136. package/dist/src/telemetry/memory-monitor.test.d.ts +6 -0
  137. package/dist/src/telemetry/memory-monitor.test.js +472 -0
  138. package/dist/src/telemetry/memory-monitor.test.js.map +1 -0
  139. package/dist/src/telemetry/metrics.d.ts +131 -4
  140. package/dist/src/telemetry/metrics.js +182 -6
  141. package/dist/src/telemetry/metrics.js.map +1 -1
  142. package/dist/src/telemetry/metrics.test.js +360 -1
  143. package/dist/src/telemetry/metrics.test.js.map +1 -1
  144. package/dist/src/telemetry/telemetryAttributes.d.ts +8 -0
  145. package/dist/src/telemetry/telemetryAttributes.js +18 -0
  146. package/dist/src/telemetry/telemetryAttributes.js.map +1 -0
  147. package/dist/src/telemetry/types.d.ts +150 -3
  148. package/dist/src/telemetry/types.js +664 -33
  149. package/dist/src/telemetry/types.js.map +1 -1
  150. package/dist/src/telemetry/uiTelemetry.d.ts +1 -1
  151. package/dist/src/telemetry/uiTelemetry.js +1 -1
  152. package/dist/src/telemetry/uiTelemetry.js.map +1 -1
  153. package/dist/src/telemetry/uiTelemetry.test.js +1 -1
  154. package/dist/src/telemetry/uiTelemetry.test.js.map +1 -1
  155. package/dist/src/tools/glob.js +2 -1
  156. package/dist/src/tools/glob.js.map +1 -1
  157. package/dist/src/tools/mcp-client.d.ts +3 -2
  158. package/dist/src/tools/mcp-client.js +29 -33
  159. package/dist/src/tools/mcp-client.js.map +1 -1
  160. package/dist/src/tools/mcp-client.test.js +168 -5
  161. package/dist/src/tools/mcp-client.test.js.map +1 -1
  162. package/dist/src/tools/memoryTool.d.ts +1 -1
  163. package/dist/src/tools/memoryTool.js +1 -2
  164. package/dist/src/tools/memoryTool.js.map +1 -1
  165. package/dist/src/tools/memoryTool.test.js +9 -8
  166. package/dist/src/tools/memoryTool.test.js.map +1 -1
  167. package/dist/src/tools/shell.js +55 -2
  168. package/dist/src/tools/shell.js.map +1 -1
  169. package/dist/src/tools/shell.test.js +2 -1
  170. package/dist/src/tools/shell.test.js.map +1 -1
  171. package/dist/src/tools/smart-edit.d.ts +1 -20
  172. package/dist/src/tools/smart-edit.js +57 -55
  173. package/dist/src/tools/smart-edit.js.map +1 -1
  174. package/dist/src/tools/smart-edit.test.js +70 -86
  175. package/dist/src/tools/smart-edit.test.js.map +1 -1
  176. package/dist/src/tools/tool-error.d.ts +21 -0
  177. package/dist/src/tools/tool-error.js +27 -0
  178. package/dist/src/tools/tool-error.js.map +1 -1
  179. package/dist/src/tools/tool-names.d.ts +9 -0
  180. package/dist/src/tools/tool-names.js +18 -0
  181. package/dist/src/tools/tool-names.js.map +1 -0
  182. package/dist/src/tools/web-fetch.d.ts +7 -0
  183. package/dist/src/tools/web-fetch.js +42 -10
  184. package/dist/src/tools/web-fetch.js.map +1 -1
  185. package/dist/src/tools/web-fetch.test.js +127 -8
  186. package/dist/src/tools/web-fetch.test.js.map +1 -1
  187. package/dist/src/tools/web-search.js +2 -1
  188. package/dist/src/tools/web-search.js.map +1 -1
  189. package/dist/src/tools/write-file.js +2 -1
  190. package/dist/src/tools/write-file.js.map +1 -1
  191. package/dist/src/tools/write-todos.d.ts +1 -1
  192. package/dist/src/tools/write-todos.js +4 -3
  193. package/dist/src/tools/write-todos.js.map +1 -1
  194. package/dist/src/utils/editCorrector.js +2 -2
  195. package/dist/src/utils/editCorrector.js.map +1 -1
  196. package/dist/src/utils/editor.js +1 -0
  197. package/dist/src/utils/editor.js.map +1 -1
  198. package/dist/src/utils/editor.test.js +1 -0
  199. package/dist/src/utils/editor.test.js.map +1 -1
  200. package/dist/src/utils/formatters.d.ts +1 -0
  201. package/dist/src/utils/formatters.js +2 -1
  202. package/dist/src/utils/formatters.js.map +1 -1
  203. package/dist/src/utils/formatters.test.d.ts +6 -0
  204. package/dist/src/utils/formatters.test.js +26 -0
  205. package/dist/src/utils/formatters.test.js.map +1 -0
  206. package/dist/src/utils/getFolderStructure.test.js +7 -6
  207. package/dist/src/utils/getFolderStructure.test.js.map +1 -1
  208. package/dist/src/utils/installationManager.test.js +2 -1
  209. package/dist/src/utils/installationManager.test.js.map +1 -1
  210. package/dist/src/utils/llm-edit-fixer.js +3 -3
  211. package/dist/src/utils/memoryDiscovery.d.ts +1 -0
  212. package/dist/src/utils/memoryDiscovery.js +2 -1
  213. package/dist/src/utils/memoryDiscovery.js.map +1 -1
  214. package/dist/src/utils/memoryDiscovery.test.js +99 -21
  215. package/dist/src/utils/memoryDiscovery.test.js.map +1 -1
  216. package/dist/src/utils/pathCorrector.d.ts +25 -0
  217. package/dist/src/utils/pathCorrector.js +33 -0
  218. package/dist/src/utils/pathCorrector.js.map +1 -0
  219. package/dist/src/utils/pathCorrector.test.d.ts +6 -0
  220. package/dist/src/utils/pathCorrector.test.js +83 -0
  221. package/dist/src/utils/pathCorrector.test.js.map +1 -0
  222. package/dist/src/utils/retry.d.ts +2 -1
  223. package/dist/src/utils/retry.js +22 -14
  224. package/dist/src/utils/retry.js.map +1 -1
  225. package/dist/src/utils/retry.test.js +83 -39
  226. package/dist/src/utils/retry.test.js.map +1 -1
  227. package/dist/src/utils/shell-utils.d.ts +1 -0
  228. package/dist/src/utils/shell-utils.js +1 -1
  229. package/dist/src/utils/shell-utils.js.map +1 -1
  230. package/dist/src/utils/tool-utils.js +2 -2
  231. package/dist/src/utils/tool-utils.js.map +1 -1
  232. package/dist/src/utils/tool-utils.test.js +8 -0
  233. package/dist/src/utils/tool-utils.test.js.map +1 -1
  234. package/dist/src/utils/userAccountManager.test.js +2 -1
  235. package/dist/src/utils/userAccountManager.test.js.map +1 -1
  236. package/dist/tsconfig.tsbuildinfo +1 -1
  237. package/package.json +1 -1
@@ -3,16 +3,21 @@
3
3
  * Copyright 2025 Google LLC
4
4
  * SPDX-License-Identifier: Apache-2.0
5
5
  */
6
- import { describe, it, expect, vi, beforeEach, afterEach, } from 'vitest';
6
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
7
7
  import { AgentExecutor } from './executor.js';
8
- import { AgentTerminateMode } from './types.js';
9
8
  import { makeFakeConfig } from '../test-utils/config.js';
10
9
  import { ToolRegistry } from '../tools/tool-registry.js';
11
10
  import { LSTool } from '../tools/ls.js';
12
11
  import { ReadFileTool } from '../tools/read-file.js';
13
12
  import { GeminiChat, StreamEventType, } from '../core/geminiChat.js';
13
+ import {} from '@google/genai';
14
14
  import { MockTool } from '../test-utils/mock-tool.js';
15
15
  import { getDirectoryContextString } from '../utils/environmentContext.js';
16
+ import { z } from 'zod';
17
+ import { promptIdContext } from '../utils/promptIdContext.js';
18
+ import { logAgentStart, logAgentFinish } from '../telemetry/loggers.js';
19
+ import { AgentStartEvent, AgentFinishEvent } from '../telemetry/types.js';
20
+ import { AgentTerminateMode } from './types.js';
16
21
  const { mockSendMessageStream, mockExecuteToolCall } = vi.hoisted(() => ({
17
22
  mockSendMessageStream: vi.fn(),
18
23
  mockExecuteToolCall: vi.fn(),
@@ -30,13 +35,40 @@ vi.mock('../core/nonInteractiveToolExecutor.js', () => ({
30
35
  executeToolCall: mockExecuteToolCall,
31
36
  }));
32
37
  vi.mock('../utils/environmentContext.js');
33
- const MockedGeminiChat = GeminiChat;
34
- // A mock tool that is NOT on the NON_INTERACTIVE_TOOL_ALLOWLIST
35
- const MOCK_TOOL_NOT_ALLOWED = new MockTool({ name: 'write_file' });
38
+ vi.mock('../telemetry/loggers.js', () => ({
39
+ logAgentStart: vi.fn(),
40
+ logAgentFinish: vi.fn(),
41
+ }));
42
+ vi.mock('../utils/promptIdContext.js', async (importOriginal) => {
43
+ const actual = await importOriginal();
44
+ return {
45
+ ...actual,
46
+ promptIdContext: {
47
+ ...actual.promptIdContext,
48
+ getStore: vi.fn(),
49
+ run: vi.fn((_id, fn) => fn()),
50
+ },
51
+ };
52
+ });
53
+ const MockedGeminiChat = vi.mocked(GeminiChat);
54
+ const mockedGetDirectoryContextString = vi.mocked(getDirectoryContextString);
55
+ const mockedPromptIdContext = vi.mocked(promptIdContext);
56
+ const mockedLogAgentStart = vi.mocked(logAgentStart);
57
+ const mockedLogAgentFinish = vi.mocked(logAgentFinish);
58
+ // Constants for testing
59
+ const TASK_COMPLETE_TOOL_NAME = 'complete_task';
60
+ const MOCK_TOOL_NOT_ALLOWED = new MockTool({ name: 'write_file_interactive' });
61
+ /**
62
+ * Helper to create a mock API response chunk.
63
+ * Uses conditional spread to handle readonly functionCalls property safely.
64
+ */
36
65
  const createMockResponseChunk = (parts, functionCalls) => ({
37
66
  candidates: [{ index: 0, content: { role: 'model', parts } }],
38
- functionCalls,
67
+ ...(functionCalls && functionCalls.length > 0 ? { functionCalls } : {}),
39
68
  });
69
+ /**
70
+ * Helper to mock a single turn of model response in the stream.
71
+ */
40
72
  const mockModelResponse = (functionCalls, thought, text) => {
41
73
  const parts = [];
42
74
  if (thought) {
@@ -47,9 +79,7 @@ const mockModelResponse = (functionCalls, thought, text) => {
47
79
  }
48
80
  if (text)
49
81
  parts.push({ text });
50
- const responseChunk = createMockResponseChunk(parts,
51
- // Ensure functionCalls is undefined if the array is empty, matching API behavior
52
- functionCalls.length > 0 ? functionCalls : undefined);
82
+ const responseChunk = createMockResponseChunk(parts, functionCalls);
53
83
  mockSendMessageStream.mockImplementationOnce(async () => (async function* () {
54
84
  yield {
55
85
  type: StreamEventType.CHUNK,
@@ -57,30 +87,59 @@ const mockModelResponse = (functionCalls, thought, text) => {
57
87
  };
58
88
  })());
59
89
  };
90
+ /**
91
+ * Helper to extract the message parameters sent to sendMessageStream.
92
+ * Provides type safety for inspecting mock calls.
93
+ */
94
+ const getMockMessageParams = (callIndex) => {
95
+ const call = mockSendMessageStream.mock.calls[callIndex];
96
+ expect(call).toBeDefined();
97
+ // Arg 1 of sendMessageStream is the message parameters
98
+ return call[1];
99
+ };
60
100
  let mockConfig;
61
101
  let parentToolRegistry;
62
- const createTestDefinition = (tools = [LSTool.Name], runConfigOverrides = {}, outputConfigOverrides = {}) => ({
63
- name: 'TestAgent',
64
- description: 'An agent for testing.',
65
- inputConfig: {
66
- inputs: { goal: { type: 'string', required: true, description: 'goal' } },
67
- },
68
- modelConfig: { model: 'gemini-test-model', temp: 0, top_p: 1 },
69
- runConfig: { max_time_minutes: 5, max_turns: 5, ...runConfigOverrides },
70
- promptConfig: { systemPrompt: 'Achieve the goal: ${goal}.' },
71
- toolConfig: { tools },
72
- outputConfig: { description: 'The final result.', ...outputConfigOverrides },
73
- });
102
+ /**
103
+ * Type-safe helper to create agent definitions for tests.
104
+ */
105
+ const createTestDefinition = (tools = [LSTool.Name], runConfigOverrides = {}, outputConfigMode = 'default', schema = z.string()) => {
106
+ let outputConfig;
107
+ if (outputConfigMode === 'default') {
108
+ outputConfig = {
109
+ outputName: 'finalResult',
110
+ description: 'The final result.',
111
+ schema,
112
+ };
113
+ }
114
+ return {
115
+ name: 'TestAgent',
116
+ description: 'An agent for testing.',
117
+ inputConfig: {
118
+ inputs: { goal: { type: 'string', required: true, description: 'goal' } },
119
+ },
120
+ modelConfig: { model: 'gemini-test-model', temp: 0, top_p: 1 },
121
+ runConfig: { max_time_minutes: 5, max_turns: 5, ...runConfigOverrides },
122
+ promptConfig: { systemPrompt: 'Achieve the goal: ${goal}.' },
123
+ toolConfig: { tools },
124
+ outputConfig,
125
+ };
126
+ };
74
127
  describe('AgentExecutor', () => {
75
128
  let activities;
76
129
  let onActivity;
77
130
  let abortController;
78
131
  let signal;
79
132
  beforeEach(async () => {
80
- mockSendMessageStream.mockClear();
81
- mockExecuteToolCall.mockClear();
82
- vi.clearAllMocks();
83
- // Use fake timers for timeout and concurrency testing
133
+ vi.resetAllMocks();
134
+ mockSendMessageStream.mockReset();
135
+ mockExecuteToolCall.mockReset();
136
+ mockedLogAgentStart.mockReset();
137
+ mockedLogAgentFinish.mockReset();
138
+ mockedPromptIdContext.getStore.mockReset();
139
+ mockedPromptIdContext.run.mockImplementation((_id, fn) => fn());
140
+ MockedGeminiChat.mockImplementation(() => ({
141
+ sendMessageStream: mockSendMessageStream,
142
+ }));
84
143
  vi.useFakeTimers();
85
144
  mockConfig = makeFakeConfig();
86
145
  parentToolRegistry = new ToolRegistry(mockConfig);
@@ -88,7 +147,7 @@ describe('AgentExecutor', () => {
88
147
  parentToolRegistry.registerTool(new ReadFileTool(mockConfig));
89
148
  parentToolRegistry.registerTool(MOCK_TOOL_NOT_ALLOWED);
90
149
  vi.spyOn(mockConfig, 'getToolRegistry').mockResolvedValue(parentToolRegistry);
91
- vi.mocked(getDirectoryContextString).mockResolvedValue('Mocked Environment Context');
150
+ mockedGetDirectoryContextString.mockResolvedValue('Mocked Environment Context');
92
151
  activities = [];
93
152
  onActivity = (activity) => activities.push(activity);
94
153
  abortController = new AbortController();
@@ -105,313 +164,515 @@ describe('AgentExecutor', () => {
105
164
  });
106
165
  it('SECURITY: should throw if a tool is not on the non-interactive allowlist', async () => {
107
166
  const definition = createTestDefinition([MOCK_TOOL_NOT_ALLOWED.name]);
108
- await expect(AgentExecutor.create(definition, mockConfig, onActivity)).rejects.toThrow(`Tool "${MOCK_TOOL_NOT_ALLOWED.name}" is not on the allow-list for non-interactive execution`);
167
+ await expect(AgentExecutor.create(definition, mockConfig, onActivity)).rejects.toThrow(/not on the allow-list for non-interactive execution/);
109
168
  });
110
169
  it('should create an isolated ToolRegistry for the agent', async () => {
111
170
  const definition = createTestDefinition([LSTool.Name, ReadFileTool.Name]);
112
171
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
113
- // @ts-expect-error - accessing private property for test validation
114
- const agentRegistry = executor.toolRegistry;
172
+ const agentRegistry = executor['toolRegistry'];
115
173
  expect(agentRegistry).not.toBe(parentToolRegistry);
116
174
  expect(agentRegistry.getAllToolNames()).toEqual(expect.arrayContaining([LSTool.Name, ReadFileTool.Name]));
117
175
  expect(agentRegistry.getAllToolNames()).toHaveLength(2);
118
176
  expect(agentRegistry.getTool(MOCK_TOOL_NOT_ALLOWED.name)).toBeUndefined();
119
177
  });
178
+ it('should use parentPromptId from context to create agentId', async () => {
179
+ const parentId = 'parent-id';
180
+ mockedPromptIdContext.getStore.mockReturnValue(parentId);
181
+ const definition = createTestDefinition();
182
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
183
+ expect(executor['agentId']).toMatch(new RegExp(`^${parentId}-${definition.name}-`));
184
+ });
120
185
  });
121
186
  describe('run (Execution Loop and Logic)', () => {
122
- it('should execute a successful work and extraction phase (Happy Path) and emit activities', async () => {
187
+ it('should log AgentFinish with error if run throws', async () => {
188
+ const definition = createTestDefinition();
189
+ // Make the definition invalid to cause an error during run
190
+ definition.inputConfig.inputs = {
191
+ goal: { type: 'string', required: true, description: 'goal' },
192
+ };
193
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
194
+ // Run without inputs to trigger validation error
195
+ await expect(executor.run({}, signal)).rejects.toThrow(/Missing required input parameters/);
196
+ expect(mockedLogAgentStart).toHaveBeenCalledTimes(1);
197
+ expect(mockedLogAgentFinish).toHaveBeenCalledTimes(1);
198
+ expect(mockedLogAgentFinish).toHaveBeenCalledWith(mockConfig, expect.objectContaining({
199
+ terminate_reason: AgentTerminateMode.ERROR,
200
+ }));
201
+ });
202
+ it('should execute successfully when model calls complete_task with output (Happy Path with Output)', async () => {
123
203
  const definition = createTestDefinition();
124
204
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
125
205
  const inputs = { goal: 'Find files' };
126
206
  // Turn 1: Model calls ls
127
207
  mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 'call1' }], 'T1: Listing');
128
208
  mockExecuteToolCall.mockResolvedValueOnce({
129
- callId: 'call1',
130
- resultDisplay: 'file1.txt',
131
- responseParts: [
132
- {
133
- functionResponse: {
134
- name: LSTool.Name,
135
- response: { result: 'file1.txt' },
136
- id: 'call1',
209
+ status: 'success',
210
+ request: {
211
+ callId: 'call1',
212
+ name: LSTool.Name,
213
+ args: { path: '.' },
214
+ isClientInitiated: false,
215
+ prompt_id: 'test-prompt',
216
+ },
217
+ tool: {},
218
+ invocation: {},
219
+ response: {
220
+ callId: 'call1',
221
+ resultDisplay: 'file1.txt',
222
+ responseParts: [
223
+ {
224
+ functionResponse: {
225
+ name: LSTool.Name,
226
+ response: { result: 'file1.txt' },
227
+ id: 'call1',
228
+ },
137
229
  },
138
- },
139
- ],
140
- error: undefined,
230
+ ],
231
+ error: undefined,
232
+ errorType: undefined,
233
+ contentLength: undefined,
234
+ },
141
235
  });
142
- // Turn 2: Model stops
143
- mockModelResponse([], 'T2: Done');
144
- // Extraction Phase
145
- mockModelResponse([], undefined, 'Result: file1.txt.');
236
+ // Turn 2: Model calls complete_task with required output
237
+ mockModelResponse([
238
+ {
239
+ name: TASK_COMPLETE_TOOL_NAME,
240
+ args: { finalResult: 'Found file1.txt' },
241
+ id: 'call2',
242
+ },
243
+ ], 'T2: Done');
146
244
  const output = await executor.run(inputs, signal);
147
- expect(mockSendMessageStream).toHaveBeenCalledTimes(3);
148
- expect(mockExecuteToolCall).toHaveBeenCalledTimes(1);
149
- // Verify System Prompt Templating
245
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
150
246
  const chatConstructorArgs = MockedGeminiChat.mock.calls[0];
151
247
  const chatConfig = chatConstructorArgs[1];
152
- expect(chatConfig?.systemInstruction).toContain('Achieve the goal: Find files.');
153
- // Verify environment context is appended
154
- expect(chatConfig?.systemInstruction).toContain('# Environment Context\nMocked Environment Context');
155
- // Verify standard rules are appended
156
- expect(chatConfig?.systemInstruction).toContain('You are running in a non-interactive mode.');
157
- // Verify absolute path rule is appended
158
- expect(chatConfig?.systemInstruction).toContain('Always use absolute paths for file operations.');
159
- // Verify Extraction Phase Call (Specific arguments)
160
- expect(mockSendMessageStream).toHaveBeenCalledWith('gemini-test-model', expect.objectContaining({
161
- // Extraction message should be based on outputConfig.description
162
- message: expect.arrayContaining([
163
- {
164
- text: expect.stringContaining('Based on your work so far, provide: The final result.'),
165
- },
166
- ]),
167
- config: expect.objectContaining({ tools: undefined }), // No tools in extraction
168
- }), expect.stringContaining('#extraction'));
169
- expect(output.result).toBe('Result: file1.txt.');
248
+ expect(chatConfig?.systemInstruction).toContain(`MUST call the \`${TASK_COMPLETE_TOOL_NAME}\` tool`);
249
+ const turn1Params = getMockMessageParams(0);
250
+ const firstToolGroup = turn1Params.config?.tools?.[0];
251
+ expect(firstToolGroup).toBeDefined();
252
+ if (!firstToolGroup || !('functionDeclarations' in firstToolGroup)) {
253
+ throw new Error('Test expectation failed: Config does not contain functionDeclarations.');
254
+ }
255
+ const sentTools = firstToolGroup.functionDeclarations;
256
+ expect(sentTools).toBeDefined();
257
+ expect(sentTools).toEqual(expect.arrayContaining([
258
+ expect.objectContaining({ name: LSTool.Name }),
259
+ expect.objectContaining({ name: TASK_COMPLETE_TOOL_NAME }),
260
+ ]));
261
+ const completeToolDef = sentTools.find((t) => t.name === TASK_COMPLETE_TOOL_NAME);
262
+ expect(completeToolDef?.parameters?.required).toContain('finalResult');
263
+ expect(output.result).toBe('Found file1.txt');
170
264
  expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
171
- // Verify Activity Stream (Observability)
265
+ // Telemetry checks
266
+ expect(mockedLogAgentStart).toHaveBeenCalledTimes(1);
267
+ expect(mockedLogAgentStart).toHaveBeenCalledWith(mockConfig, expect.any(AgentStartEvent));
268
+ expect(mockedLogAgentFinish).toHaveBeenCalledTimes(1);
269
+ expect(mockedLogAgentFinish).toHaveBeenCalledWith(mockConfig, expect.any(AgentFinishEvent));
270
+ const finishEvent = mockedLogAgentFinish.mock.calls[0][1];
271
+ expect(finishEvent.terminate_reason).toBe(AgentTerminateMode.GOAL);
272
+ // Context checks
273
+ expect(mockedPromptIdContext.run).toHaveBeenCalledTimes(2); // Two turns
274
+ const agentId = executor['agentId'];
275
+ expect(mockedPromptIdContext.run).toHaveBeenNthCalledWith(1, `${agentId}#0`, expect.any(Function));
276
+ expect(mockedPromptIdContext.run).toHaveBeenNthCalledWith(2, `${agentId}#1`, expect.any(Function));
172
277
  expect(activities).toEqual(expect.arrayContaining([
173
- // Thought subjects are extracted by the executor (parseThought)
174
278
  expect.objectContaining({
175
279
  type: 'THOUGHT_CHUNK',
176
280
  data: { text: 'T1: Listing' },
177
281
  }),
178
- expect.objectContaining({
179
- type: 'TOOL_CALL_START',
180
- data: { name: LSTool.Name, args: { path: '.' } },
181
- }),
182
282
  expect.objectContaining({
183
283
  type: 'TOOL_CALL_END',
184
284
  data: { name: LSTool.Name, output: 'file1.txt' },
185
285
  }),
186
286
  expect.objectContaining({
187
- type: 'THOUGHT_CHUNK',
188
- data: { text: 'T2: Done' },
287
+ type: 'TOOL_CALL_START',
288
+ data: {
289
+ name: TASK_COMPLETE_TOOL_NAME,
290
+ args: { finalResult: 'Found file1.txt' },
291
+ },
292
+ }),
293
+ expect.objectContaining({
294
+ type: 'TOOL_CALL_END',
295
+ data: {
296
+ name: TASK_COMPLETE_TOOL_NAME,
297
+ output: expect.stringContaining('Output submitted'),
298
+ },
189
299
  }),
190
300
  ]));
191
301
  });
192
- it('should execute parallel tool calls concurrently', async () => {
193
- const definition = createTestDefinition([LSTool.Name, ReadFileTool.Name]);
302
+ it('should execute successfully when model calls complete_task without output (Happy Path No Output)', async () => {
303
+ const definition = createTestDefinition([LSTool.Name], {}, 'none');
194
304
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
195
- const call1 = {
196
- name: LSTool.Name,
197
- args: { path: '/dir1' },
198
- id: 'call1',
199
- };
200
- // Using LSTool twice for simplicity in mocking standardized responses.
201
- const call2 = {
202
- name: LSTool.Name,
203
- args: { path: '/dir2' },
204
- id: 'call2',
205
- };
206
- // Turn 1: Model calls two tools simultaneously
207
- mockModelResponse([call1, call2], 'T1: Listing both');
208
- // Use concurrency tracking to ensure parallelism
209
- let activeCalls = 0;
210
- let maxActiveCalls = 0;
211
- mockExecuteToolCall.mockImplementation(async (_ctx, reqInfo) => {
212
- activeCalls++;
213
- maxActiveCalls = Math.max(maxActiveCalls, activeCalls);
214
- // Simulate latency. We must advance the fake timers for this to resolve.
215
- await new Promise((resolve) => setTimeout(resolve, 100));
216
- activeCalls--;
217
- return {
218
- callId: reqInfo.callId,
219
- resultDisplay: `Result for ${reqInfo.name}`,
305
+ mockModelResponse([
306
+ { name: LSTool.Name, args: { path: '.' }, id: 'call1' },
307
+ ]);
308
+ mockExecuteToolCall.mockResolvedValueOnce({
309
+ status: 'success',
310
+ request: {
311
+ callId: 'call1',
312
+ name: LSTool.Name,
313
+ args: { path: '.' },
314
+ isClientInitiated: false,
315
+ prompt_id: 'test-prompt',
316
+ },
317
+ tool: {},
318
+ invocation: {},
319
+ response: {
320
+ callId: 'call1',
321
+ resultDisplay: 'ok',
220
322
  responseParts: [
221
323
  {
222
324
  functionResponse: {
223
- name: reqInfo.name,
325
+ name: LSTool.Name,
224
326
  response: {},
225
- id: reqInfo.callId,
327
+ id: 'call1',
226
328
  },
227
329
  },
228
330
  ],
229
331
  error: undefined,
230
- };
332
+ errorType: undefined,
333
+ contentLength: undefined,
334
+ },
231
335
  });
232
- // Turn 2: Model stops
233
- mockModelResponse([]);
234
- // Extraction
235
- mockModelResponse([], undefined, 'Done.');
236
- const runPromise = executor.run({ goal: 'Parallel test' }, signal);
237
- // Advance timers while the parallel calls (Promise.all + setTimeout) are running
238
- await vi.advanceTimersByTimeAsync(150);
239
- await runPromise;
240
- expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
241
- expect(maxActiveCalls).toBe(2);
242
- // Verify the input to the next model call (Turn 2) contains both responses
243
- // sendMessageStream calls: [0] Turn 1, [1] Turn 2, [2] Extraction
244
- const turn2Input = mockSendMessageStream.mock.calls[1][1];
245
- const turn2Parts = turn2Input.message;
246
- // Promise.all preserves the order of the input array.
247
- expect(turn2Parts.length).toBe(2);
248
- expect(turn2Parts[0]).toEqual(expect.objectContaining({
249
- functionResponse: expect.objectContaining({ id: 'call1' }),
250
- }));
251
- expect(turn2Parts[1]).toEqual(expect.objectContaining({
252
- functionResponse: expect.objectContaining({ id: 'call2' }),
253
- }));
336
+ mockModelResponse([{ name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call2' }], 'Task finished.');
337
+ const output = await executor.run({ goal: 'Do work' }, signal);
338
+ const turn1Params = getMockMessageParams(0);
339
+ const firstToolGroup = turn1Params.config?.tools?.[0];
340
+ expect(firstToolGroup).toBeDefined();
341
+ if (!firstToolGroup || !('functionDeclarations' in firstToolGroup)) {
342
+ throw new Error('Test expectation failed: Config does not contain functionDeclarations.');
343
+ }
344
+ const sentTools = firstToolGroup.functionDeclarations;
345
+ expect(sentTools).toBeDefined();
346
+ const completeToolDef = sentTools.find((t) => t.name === TASK_COMPLETE_TOOL_NAME);
347
+ expect(completeToolDef?.parameters?.required).toEqual([]);
348
+ expect(completeToolDef?.description).toContain('signal that you have completed');
349
+ expect(output.result).toBe('Task completed successfully.');
350
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
254
351
  });
255
- it('should handle tool execution failure gracefully and report error', async () => {
256
- const definition = createTestDefinition([LSTool.Name]);
352
+ it('should error immediately if the model stops tools without calling complete_task (Protocol Violation)', async () => {
353
+ const definition = createTestDefinition();
257
354
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
258
- // Turn 1: Model calls ls, but it fails
259
355
  mockModelResponse([
260
- { name: LSTool.Name, args: { path: '/invalid' }, id: 'call1' },
356
+ { name: LSTool.Name, args: { path: '.' }, id: 'call1' },
261
357
  ]);
262
- const errorMessage = 'Internal failure.';
263
358
  mockExecuteToolCall.mockResolvedValueOnce({
264
- callId: 'call1',
265
- resultDisplay: `Error: ${errorMessage}`,
266
- responseParts: undefined, // Failed tools might return undefined parts
267
- error: { message: errorMessage },
359
+ status: 'success',
360
+ request: {
361
+ callId: 'call1',
362
+ name: LSTool.Name,
363
+ args: { path: '.' },
364
+ isClientInitiated: false,
365
+ prompt_id: 'test-prompt',
366
+ },
367
+ tool: {},
368
+ invocation: {},
369
+ response: {
370
+ callId: 'call1',
371
+ resultDisplay: 'ok',
372
+ responseParts: [
373
+ {
374
+ functionResponse: {
375
+ name: LSTool.Name,
376
+ response: {},
377
+ id: 'call1',
378
+ },
379
+ },
380
+ ],
381
+ error: undefined,
382
+ errorType: undefined,
383
+ contentLength: undefined,
384
+ },
268
385
  });
269
- // Turn 2: Model stops
270
- mockModelResponse([]);
271
- mockModelResponse([], undefined, 'Failed.');
272
- await executor.run({ goal: 'Failure test' }, signal);
273
- // Verify that the error was reported in the activity stream
386
+ mockModelResponse([], 'I think I am done.');
387
+ const output = await executor.run({ goal: 'Strict test' }, signal);
388
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
389
+ const expectedError = `Agent stopped calling tools but did not call '${TASK_COMPLETE_TOOL_NAME}' to finalize the session.`;
390
+ expect(output.terminate_reason).toBe(AgentTerminateMode.ERROR);
391
+ expect(output.result).toBe(expectedError);
392
+ // Telemetry check for error
393
+ expect(mockedLogAgentFinish).toHaveBeenCalledWith(mockConfig, expect.objectContaining({
394
+ terminate_reason: AgentTerminateMode.ERROR,
395
+ }));
396
+ expect(activities).toContainEqual(expect.objectContaining({
397
+ type: 'ERROR',
398
+ data: expect.objectContaining({
399
+ context: 'protocol_violation',
400
+ error: expectedError,
401
+ }),
402
+ }));
403
+ });
404
+ it('should report an error if complete_task is called with missing required arguments', async () => {
405
+ const definition = createTestDefinition();
406
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
407
+ // Turn 1: Missing arg
408
+ mockModelResponse([
409
+ {
410
+ name: TASK_COMPLETE_TOOL_NAME,
411
+ args: { wrongArg: 'oops' },
412
+ id: 'call1',
413
+ },
414
+ ]);
415
+ // Turn 2: Corrected
416
+ mockModelResponse([
417
+ {
418
+ name: TASK_COMPLETE_TOOL_NAME,
419
+ args: { finalResult: 'Corrected result' },
420
+ id: 'call2',
421
+ },
422
+ ]);
423
+ const output = await executor.run({ goal: 'Error test' }, signal);
424
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(2);
425
+ const expectedError = "Missing required argument 'finalResult' for completion.";
274
426
  expect(activities).toContainEqual(expect.objectContaining({
275
427
  type: 'ERROR',
276
428
  data: {
277
- error: errorMessage,
278
429
  context: 'tool_call',
279
- name: LSTool.Name,
430
+ name: TASK_COMPLETE_TOOL_NAME,
431
+ error: expectedError,
280
432
  },
281
433
  }));
282
- // Verify the input to the next model call (Turn 2) contains the fallback error message
283
- const turn2Input = mockSendMessageStream.mock.calls[1][1];
284
- const turn2Parts = turn2Input.message;
285
- expect(turn2Parts).toEqual([
434
+ const turn2Params = getMockMessageParams(1);
435
+ const turn2Parts = turn2Params.message;
436
+ expect(turn2Parts).toBeDefined();
437
+ expect(turn2Parts).toHaveLength(1);
438
+ expect(turn2Parts[0]).toEqual(expect.objectContaining({
439
+ functionResponse: expect.objectContaining({
440
+ name: TASK_COMPLETE_TOOL_NAME,
441
+ response: { error: expectedError },
442
+ id: 'call1',
443
+ }),
444
+ }));
445
+ expect(output.result).toBe('Corrected result');
446
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
447
+ });
448
+ it('should handle multiple calls to complete_task in the same turn (accept first, block rest)', async () => {
449
+ const definition = createTestDefinition([], {}, 'none');
450
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
451
+ // Turn 1: Duplicate calls
452
+ mockModelResponse([
453
+ { name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call1' },
454
+ { name: TASK_COMPLETE_TOOL_NAME, args: {}, id: 'call2' },
455
+ ]);
456
+ const output = await executor.run({ goal: 'Dup test' }, signal);
457
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
458
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
459
+ const completions = activities.filter((a) => a.type === 'TOOL_CALL_END' &&
460
+ a.data['name'] === TASK_COMPLETE_TOOL_NAME);
461
+ const errors = activities.filter((a) => a.type === 'ERROR' && a.data['name'] === TASK_COMPLETE_TOOL_NAME);
462
+ expect(completions).toHaveLength(1);
463
+ expect(errors).toHaveLength(1);
464
+ expect(errors[0].data['error']).toContain('Task already marked complete in this turn');
465
+ });
466
+ it('should execute parallel tool calls and then complete', async () => {
467
+ const definition = createTestDefinition([LSTool.Name]);
468
+ const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
469
+ const call1 = {
470
+ name: LSTool.Name,
471
+ args: { path: '/a' },
472
+ id: 'c1',
473
+ };
474
+ const call2 = {
475
+ name: LSTool.Name,
476
+ args: { path: '/b' },
477
+ id: 'c2',
478
+ };
479
+ // Turn 1: Parallel calls
480
+ mockModelResponse([call1, call2]);
481
+ // Concurrency mock
482
+ let callsStarted = 0;
483
+ let resolveCalls;
484
+ const bothStarted = new Promise((r) => {
485
+ resolveCalls = r;
486
+ });
487
+ mockExecuteToolCall.mockImplementation(async (_ctx, reqInfo) => {
488
+ callsStarted++;
489
+ if (callsStarted === 2)
490
+ resolveCalls();
491
+ await vi.advanceTimersByTimeAsync(100);
492
+ return {
493
+ status: 'success',
494
+ request: reqInfo,
495
+ tool: {},
496
+ invocation: {},
497
+ response: {
498
+ callId: reqInfo.callId,
499
+ resultDisplay: 'ok',
500
+ responseParts: [
501
+ {
502
+ functionResponse: {
503
+ name: reqInfo.name,
504
+ response: {},
505
+ id: reqInfo.callId,
506
+ },
507
+ },
508
+ ],
509
+ error: undefined,
510
+ errorType: undefined,
511
+ contentLength: undefined,
512
+ },
513
+ };
514
+ });
515
+ // Turn 2: Completion
516
+ mockModelResponse([
286
517
  {
287
- text: 'All tool calls failed. Please analyze the errors and try an alternative approach.',
518
+ name: TASK_COMPLETE_TOOL_NAME,
519
+ args: { finalResult: 'done' },
520
+ id: 'c3',
288
521
  },
289
522
  ]);
523
+ const runPromise = executor.run({ goal: 'Parallel' }, signal);
524
+ await vi.advanceTimersByTimeAsync(1);
525
+ await bothStarted;
526
+ await vi.advanceTimersByTimeAsync(150);
527
+ await vi.advanceTimersByTimeAsync(1);
528
+ const output = await runPromise;
529
+ expect(mockExecuteToolCall).toHaveBeenCalledTimes(2);
530
+ expect(output.terminate_reason).toBe(AgentTerminateMode.GOAL);
531
+ // Safe access to message parts
532
+ const turn2Params = getMockMessageParams(1);
533
+ const parts = turn2Params.message;
534
+ expect(parts).toBeDefined();
535
+ expect(parts).toHaveLength(2);
536
+ expect(parts).toEqual(expect.arrayContaining([
537
+ expect.objectContaining({
538
+ functionResponse: expect.objectContaining({ id: 'c1' }),
539
+ }),
540
+ expect.objectContaining({
541
+ functionResponse: expect.objectContaining({ id: 'c2' }),
542
+ }),
543
+ ]));
290
544
  });
291
- it('SECURITY: should block calls to tools not registered for the agent at runtime', async () => {
292
- // Agent definition only includes LSTool
545
+ it('SECURITY: should block unauthorized tools and provide explicit failure to model', async () => {
293
546
  const definition = createTestDefinition([LSTool.Name]);
294
547
  const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
295
- // Turn 1: Model hallucinates a call to ReadFileTool
296
- // (ReadFileTool exists in the parent registry but not the agent's isolated registry)
548
+ // Turn 1: Model tries to use a tool not in its config
549
+ const badCallId = 'bad_call_1';
297
550
  mockModelResponse([
298
551
  {
299
552
  name: ReadFileTool.Name,
300
- args: { path: 'config.txt' },
301
- id: 'call_blocked',
553
+ args: { path: 'secret.txt' },
554
+ id: badCallId,
555
+ },
556
+ ]);
557
+ // Turn 2: Model gives up and completes
558
+ mockModelResponse([
559
+ {
560
+ name: TASK_COMPLETE_TOOL_NAME,
561
+ args: { finalResult: 'Could not read file.' },
562
+ id: 'c2',
302
563
  },
303
564
  ]);
304
- // Turn 2: Model stops
305
- mockModelResponse([]);
306
- // Extraction
307
- mockModelResponse([], undefined, 'Done.');
308
565
  const consoleWarnSpy = vi
309
566
  .spyOn(console, 'warn')
310
567
  .mockImplementation(() => { });
311
- await executor.run({ goal: 'Security test' }, signal);
312
- // Verify executeToolCall was NEVER called because the tool was unauthorized
568
+ await executor.run({ goal: 'Sec test' }, signal);
569
+ // Verify external executor was not called (Security held)
313
570
  expect(mockExecuteToolCall).not.toHaveBeenCalled();
314
- expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining(`attempted to call unauthorized tool '${ReadFileTool.Name}'`));
571
+ // 2. Verify console warning
572
+ expect(consoleWarnSpy).toHaveBeenCalledWith(expect.stringContaining(`[AgentExecutor] Blocked call:`));
315
573
  consoleWarnSpy.mockRestore();
316
- // Verify the input to the next model call (Turn 2) indicates failure (as the only call was blocked)
317
- const turn2Input = mockSendMessageStream.mock.calls[1][1];
318
- const turn2Parts = turn2Input.message;
319
- expect(turn2Parts[0].text).toContain('All tool calls failed');
320
- });
321
- it('should use OutputConfig completion_criteria in the extraction message', async () => {
322
- const definition = createTestDefinition([LSTool.Name], {}, {
323
- description: 'A summary.',
324
- completion_criteria: ['Must include file names', 'Must be concise'],
325
- });
326
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
327
- // Turn 1: Model stops immediately
328
- mockModelResponse([]);
329
- // Extraction Phase
330
- mockModelResponse([], undefined, 'Result: Done.');
331
- await executor.run({ goal: 'Extraction test' }, signal);
332
- // Verify the extraction call (the second call)
333
- const extractionCallArgs = mockSendMessageStream.mock.calls[1][1];
334
- const extractionMessageParts = extractionCallArgs.message;
335
- const extractionText = extractionMessageParts[0].text;
336
- expect(extractionText).toContain('Based on your work so far, provide: A summary.');
337
- expect(extractionText).toContain('Be sure you have addressed:');
338
- expect(extractionText).toContain('- Must include file names');
339
- expect(extractionText).toContain('- Must be concise');
574
+ // Verify specific error was sent back to model
575
+ const turn2Params = getMockMessageParams(1);
576
+ const parts = turn2Params.message;
577
+ expect(parts).toBeDefined();
578
+ expect(parts[0]).toEqual(expect.objectContaining({
579
+ functionResponse: expect.objectContaining({
580
+ id: badCallId,
581
+ name: ReadFileTool.Name,
582
+ response: {
583
+ error: expect.stringContaining('Unauthorized tool call'),
584
+ },
585
+ }),
586
+ }));
587
+ // Verify Activity Stream reported the error
588
+ expect(activities).toContainEqual(expect.objectContaining({
589
+ type: 'ERROR',
590
+ data: expect.objectContaining({
591
+ context: 'tool_call_unauthorized',
592
+ name: ReadFileTool.Name,
593
+ }),
594
+ }));
340
595
  });
341
596
  });
342
597
  describe('run (Termination Conditions)', () => {
343
- const mockKeepAliveResponse = () => {
344
- mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 'loop' }], 'Looping');
345
- mockExecuteToolCall.mockResolvedValue({
346
- callId: 'loop',
347
- resultDisplay: 'ok',
348
- responseParts: [
349
- { functionResponse: { name: LSTool.Name, response: {}, id: 'loop' } },
350
- ],
351
- error: undefined,
598
+ const mockWorkResponse = (id) => {
599
+ mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id }]);
600
+ mockExecuteToolCall.mockResolvedValueOnce({
601
+ status: 'success',
602
+ request: {
603
+ callId: id,
604
+ name: LSTool.Name,
605
+ args: { path: '.' },
606
+ isClientInitiated: false,
607
+ prompt_id: 'test-prompt',
608
+ },
609
+ tool: {},
610
+ invocation: {},
611
+ response: {
612
+ callId: id,
613
+ resultDisplay: 'ok',
614
+ responseParts: [
615
+ { functionResponse: { name: LSTool.Name, response: {}, id } },
616
+ ],
617
+ error: undefined,
618
+ errorType: undefined,
619
+ contentLength: undefined,
620
+ },
352
621
  });
353
622
  };
354
623
  it('should terminate when max_turns is reached', async () => {
355
- const MAX_TURNS = 2;
624
+ const MAX = 2;
356
625
  const definition = createTestDefinition([LSTool.Name], {
357
- max_turns: MAX_TURNS,
626
+ max_turns: MAX,
358
627
  });
359
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
360
- // Turn 1
361
- mockKeepAliveResponse();
362
- // Turn 2
363
- mockKeepAliveResponse();
364
- const output = await executor.run({ goal: 'Termination test' }, signal);
628
+ const executor = await AgentExecutor.create(definition, mockConfig);
629
+ mockWorkResponse('t1');
630
+ mockWorkResponse('t2');
631
+ const output = await executor.run({ goal: 'Turns test' }, signal);
365
632
  expect(output.terminate_reason).toBe(AgentTerminateMode.MAX_TURNS);
366
- expect(mockSendMessageStream).toHaveBeenCalledTimes(MAX_TURNS);
367
- // Extraction phase should be skipped when termination is forced
368
- expect(mockSendMessageStream).not.toHaveBeenCalledWith(expect.any(String), expect.any(Object), expect.stringContaining('#extraction'));
633
+ expect(mockSendMessageStream).toHaveBeenCalledTimes(MAX);
369
634
  });
370
635
  it('should terminate if timeout is reached', async () => {
371
636
  const definition = createTestDefinition([LSTool.Name], {
372
- max_time_minutes: 5,
373
- max_turns: 100,
637
+ max_time_minutes: 1,
374
638
  });
375
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
376
- // Turn 1 setup
377
- mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 'loop' }], 'Looping');
378
- // Mock a tool call that takes a long time, causing the overall timeout
379
- mockExecuteToolCall.mockImplementation(async () => {
380
- // Advance time past the 5-minute limit during the tool call execution
381
- await vi.advanceTimersByTimeAsync(5 * 60 * 1000 + 1);
639
+ const executor = await AgentExecutor.create(definition, mockConfig);
640
+ mockModelResponse([{ name: LSTool.Name, args: { path: '.' }, id: 't1' }]);
641
+ // Long running tool
642
+ mockExecuteToolCall.mockImplementationOnce(async (_ctx, reqInfo) => {
643
+ await vi.advanceTimersByTimeAsync(61 * 1000);
382
644
  return {
383
- callId: 'loop',
384
- resultDisplay: 'ok',
385
- responseParts: [
386
- {
387
- functionResponse: { name: LSTool.Name, response: {}, id: 'loop' },
388
- },
389
- ],
390
- error: undefined,
645
+ status: 'success',
646
+ request: reqInfo,
647
+ tool: {},
648
+ invocation: {},
649
+ response: {
650
+ callId: 't1',
651
+ resultDisplay: 'ok',
652
+ responseParts: [],
653
+ error: undefined,
654
+ errorType: undefined,
655
+ contentLength: undefined,
656
+ },
391
657
  };
392
658
  });
393
- const output = await executor.run({ goal: 'Termination test' }, signal);
659
+ const output = await executor.run({ goal: 'Timeout test' }, signal);
394
660
  expect(output.terminate_reason).toBe(AgentTerminateMode.TIMEOUT);
395
- // Should only have called the model once before the timeout check stopped it
396
661
  expect(mockSendMessageStream).toHaveBeenCalledTimes(1);
397
662
  });
398
- it('should terminate when AbortSignal is triggered mid-stream', async () => {
663
+ it('should terminate when AbortSignal is triggered', async () => {
399
664
  const definition = createTestDefinition();
400
- const executor = await AgentExecutor.create(definition, mockConfig, onActivity);
401
- // Mock the model response stream
402
- mockSendMessageStream.mockImplementation(async () => (async function* () {
403
- // Yield the first chunk
665
+ const executor = await AgentExecutor.create(definition, mockConfig);
666
+ mockSendMessageStream.mockImplementationOnce(async () => (async function* () {
404
667
  yield {
405
668
  type: StreamEventType.CHUNK,
406
669
  value: createMockResponseChunk([
407
- { text: '**Thinking** Step 1', thought: true },
670
+ { text: 'Thinking...', thought: true },
408
671
  ]),
409
672
  };
410
- // Simulate abort happening mid-stream
411
673
  abortController.abort();
412
- // The loop in callModel should break immediately due to signal check.
413
674
  })());
414
- const output = await executor.run({ goal: 'Termination test' }, signal);
675
+ const output = await executor.run({ goal: 'Abort test' }, signal);
415
676
  expect(output.terminate_reason).toBe(AgentTerminateMode.ABORTED);
416
677
  });
417
678
  });