@abacus-ai/cli 2.0.0-canary.1 → 2.0.0-canary.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/README.md +25 -0
  2. package/dist/index.mjs +466 -438
  3. package/package.json +4 -1
  4. package/.oxlintrc.json +0 -8
  5. package/resources/abacus.ico +0 -0
  6. package/resources/entitlements.plist +0 -9
  7. package/src/__e2e__/README.md +0 -196
  8. package/src/__e2e__/agent-interactions.e2e.test.tsx +0 -61
  9. package/src/__e2e__/cli-commands.e2e.test.tsx +0 -77
  10. package/src/__e2e__/conversation-throttle.e2e.test.ts +0 -453
  11. package/src/__e2e__/conversation.e2e.test.tsx +0 -56
  12. package/src/__e2e__/diff-preview.e2e.test.tsx +0 -3399
  13. package/src/__e2e__/file-creation.e2e.test.tsx +0 -149
  14. package/src/__e2e__/helpers/test-helpers.ts +0 -449
  15. package/src/__e2e__/keyboard-navigation.e2e.test.tsx +0 -34
  16. package/src/__e2e__/llm-models.e2e.test.ts +0 -402
  17. package/src/__e2e__/mcp/mcp-callback-flow.e2e.test.tsx +0 -71
  18. package/src/__e2e__/mcp/mcp-full-app-ui.e2e.test.tsx +0 -167
  19. package/src/__e2e__/mcp/mcp-ui-rendering.e2e.test.tsx +0 -185
  20. package/src/__e2e__/repl.e2e.test.tsx +0 -78
  21. package/src/__e2e__/shell-compatibility.e2e.test.tsx +0 -76
  22. package/src/__e2e__/theme-mcp.e2e.test.tsx +0 -98
  23. package/src/__e2e__/tool-permissions.e2e.test.tsx +0 -66
  24. package/src/args.ts +0 -22
  25. package/src/components/__tests__/react-compiler.test.tsx +0 -78
  26. package/src/components/__tests__/status-indicator.test.tsx +0 -403
  27. package/src/components/composer/__tests__/bash-runner.test.tsx +0 -263
  28. package/src/components/composer/agent-mode-indicator.tsx +0 -63
  29. package/src/components/composer/bash-runner.tsx +0 -54
  30. package/src/components/composer/commands/default-commands.tsx +0 -615
  31. package/src/components/composer/commands/handler.tsx +0 -59
  32. package/src/components/composer/commands/picker.tsx +0 -273
  33. package/src/components/composer/commands/registry.ts +0 -233
  34. package/src/components/composer/commands/types.ts +0 -33
  35. package/src/components/composer/context.tsx +0 -88
  36. package/src/components/composer/file-mention-picker.tsx +0 -83
  37. package/src/components/composer/help.tsx +0 -44
  38. package/src/components/composer/index.tsx +0 -1007
  39. package/src/components/composer/mentions.ts +0 -57
  40. package/src/components/composer/message-queue.tsx +0 -70
  41. package/src/components/composer/mode-panel.tsx +0 -35
  42. package/src/components/composer/modes/__tests__/bash-handler.test.tsx +0 -755
  43. package/src/components/composer/modes/__tests__/bash-renderer.test.tsx +0 -1108
  44. package/src/components/composer/modes/bash-handler.tsx +0 -132
  45. package/src/components/composer/modes/bash-renderer.tsx +0 -175
  46. package/src/components/composer/modes/default-handlers.tsx +0 -33
  47. package/src/components/composer/modes/index.ts +0 -41
  48. package/src/components/composer/modes/types.ts +0 -21
  49. package/src/components/composer/persistent-shell.ts +0 -283
  50. package/src/components/composer/process.ts +0 -65
  51. package/src/components/composer/types.ts +0 -9
  52. package/src/components/composer/use-mention-search.ts +0 -68
  53. package/src/components/error-boundry.tsx +0 -60
  54. package/src/components/exit-message.tsx +0 -29
  55. package/src/components/expanded-view.tsx +0 -74
  56. package/src/components/file-completion.tsx +0 -127
  57. package/src/components/header.tsx +0 -47
  58. package/src/components/logo.tsx +0 -37
  59. package/src/components/segments.tsx +0 -356
  60. package/src/components/status-indicator.tsx +0 -306
  61. package/src/components/tool-group-summary.tsx +0 -263
  62. package/src/components/tool-permissions/ask-user-question-permission-ui.tsx +0 -319
  63. package/src/components/tool-permissions/diff-preview.tsx +0 -359
  64. package/src/components/tool-permissions/index.ts +0 -5
  65. package/src/components/tool-permissions/permission-options.tsx +0 -401
  66. package/src/components/tool-permissions/permission-preview-header.tsx +0 -57
  67. package/src/components/tool-permissions/tool-permission-ui.tsx +0 -420
  68. package/src/components/tools/agent/ask-user-question.tsx +0 -107
  69. package/src/components/tools/agent/enter-plan-mode.tsx +0 -55
  70. package/src/components/tools/agent/exit-plan-mode.tsx +0 -83
  71. package/src/components/tools/agent/handoff-to-main.tsx +0 -27
  72. package/src/components/tools/agent/subagent.tsx +0 -37
  73. package/src/components/tools/agent/todo-write.tsx +0 -104
  74. package/src/components/tools/browser/close-tab.tsx +0 -58
  75. package/src/components/tools/browser/computer.tsx +0 -70
  76. package/src/components/tools/browser/get-interactive-elements.tsx +0 -54
  77. package/src/components/tools/browser/get-tab-content.tsx +0 -51
  78. package/src/components/tools/browser/navigate-to.tsx +0 -59
  79. package/src/components/tools/browser/new-tab.tsx +0 -60
  80. package/src/components/tools/browser/perform-action.tsx +0 -63
  81. package/src/components/tools/browser/refresh-tab.tsx +0 -43
  82. package/src/components/tools/browser/switch-tab.tsx +0 -58
  83. package/src/components/tools/filesystem/delete-file.tsx +0 -104
  84. package/src/components/tools/filesystem/edit.tsx +0 -220
  85. package/src/components/tools/filesystem/list-dir.tsx +0 -78
  86. package/src/components/tools/filesystem/read-file.tsx +0 -180
  87. package/src/components/tools/filesystem/upload-image.tsx +0 -76
  88. package/src/components/tools/ide/ide-diagnostics.tsx +0 -62
  89. package/src/components/tools/index.ts +0 -91
  90. package/src/components/tools/mcp/mcp-tool.tsx +0 -158
  91. package/src/components/tools/search/fetch-url.tsx +0 -73
  92. package/src/components/tools/search/file-search.tsx +0 -78
  93. package/src/components/tools/search/grep.tsx +0 -90
  94. package/src/components/tools/search/semantic-search.tsx +0 -66
  95. package/src/components/tools/search/web-search.tsx +0 -71
  96. package/src/components/tools/shared/index.tsx +0 -48
  97. package/src/components/tools/shared/zod-coercion.ts +0 -35
  98. package/src/components/tools/terminal/bash-tool-output.tsx +0 -188
  99. package/src/components/tools/terminal/get-terminal-output.tsx +0 -91
  100. package/src/components/tools/terminal/run-in-terminal.tsx +0 -131
  101. package/src/components/tools/types.ts +0 -16
  102. package/src/components/tools.tsx +0 -68
  103. package/src/components/ui/__tests__/divider.test.tsx +0 -61
  104. package/src/components/ui/__tests__/gradient.test.tsx +0 -125
  105. package/src/components/ui/__tests__/input.test.tsx +0 -166
  106. package/src/components/ui/__tests__/select.test.tsx +0 -273
  107. package/src/components/ui/__tests__/shimmer.test.tsx +0 -99
  108. package/src/components/ui/blinking-indicator.tsx +0 -27
  109. package/src/components/ui/divider.tsx +0 -162
  110. package/src/components/ui/gradient.tsx +0 -56
  111. package/src/components/ui/input.tsx +0 -228
  112. package/src/components/ui/select.tsx +0 -151
  113. package/src/components/ui/shimmer.tsx +0 -76
  114. package/src/context/agent-mode.tsx +0 -95
  115. package/src/context/extension-file.tsx +0 -136
  116. package/src/context/network-activity.tsx +0 -45
  117. package/src/context/notification.tsx +0 -62
  118. package/src/context/shell-size.tsx +0 -49
  119. package/src/context/shell-title.tsx +0 -38
  120. package/src/entrypoints/print-mode.ts +0 -312
  121. package/src/entrypoints/repl.tsx +0 -389
  122. package/src/hooks/use-agent.ts +0 -15
  123. package/src/hooks/use-api-client.ts +0 -1
  124. package/src/hooks/use-available-height.ts +0 -8
  125. package/src/hooks/use-cleanup.ts +0 -29
  126. package/src/hooks/use-interrupt-manager.ts +0 -242
  127. package/src/hooks/use-models.ts +0 -22
  128. package/src/index.ts +0 -217
  129. package/src/lib/__tests__/ansi.test.ts +0 -255
  130. package/src/lib/__tests__/cli.test.ts +0 -122
  131. package/src/lib/__tests__/commands.test.ts +0 -325
  132. package/src/lib/__tests__/constants.test.ts +0 -15
  133. package/src/lib/__tests__/focusables.test.ts +0 -25
  134. package/src/lib/__tests__/fs.test.ts +0 -231
  135. package/src/lib/__tests__/markdown.test.tsx +0 -348
  136. package/src/lib/__tests__/mcpCommandHandler.test.ts +0 -173
  137. package/src/lib/__tests__/mcpManagement.test.ts +0 -38
  138. package/src/lib/__tests__/path-paste.test.ts +0 -144
  139. package/src/lib/__tests__/path.test.ts +0 -300
  140. package/src/lib/__tests__/queries.test.ts +0 -39
  141. package/src/lib/__tests__/standaloneMcpService.test.ts +0 -71
  142. package/src/lib/__tests__/text-buffer.test.ts +0 -328
  143. package/src/lib/__tests__/text-utils.test.ts +0 -32
  144. package/src/lib/__tests__/timing.test.ts +0 -78
  145. package/src/lib/__tests__/utils.test.ts +0 -238
  146. package/src/lib/__tests__/vim-buffer-actions.test.ts +0 -154
  147. package/src/lib/ansi.ts +0 -150
  148. package/src/lib/cli-push-server.ts +0 -112
  149. package/src/lib/cli.ts +0 -44
  150. package/src/lib/clipboard.ts +0 -226
  151. package/src/lib/command-utils.ts +0 -93
  152. package/src/lib/commands.ts +0 -270
  153. package/src/lib/constants.ts +0 -3
  154. package/src/lib/extension-connection.ts +0 -181
  155. package/src/lib/focusables.ts +0 -7
  156. package/src/lib/fs.ts +0 -533
  157. package/src/lib/markdown/code-block.tsx +0 -63
  158. package/src/lib/markdown/index.ts +0 -4
  159. package/src/lib/markdown/link.tsx +0 -19
  160. package/src/lib/markdown/markdown.tsx +0 -372
  161. package/src/lib/markdown/types.ts +0 -15
  162. package/src/lib/mcpCommandHandler.ts +0 -121
  163. package/src/lib/mcpManagement.ts +0 -44
  164. package/src/lib/path-paste.ts +0 -185
  165. package/src/lib/path.ts +0 -179
  166. package/src/lib/queries.ts +0 -15
  167. package/src/lib/standaloneMcpService.ts +0 -688
  168. package/src/lib/status-utils.ts +0 -237
  169. package/src/lib/test-utils.tsx +0 -72
  170. package/src/lib/text-buffer.ts +0 -2415
  171. package/src/lib/text-utils.ts +0 -272
  172. package/src/lib/timing.ts +0 -63
  173. package/src/lib/types.ts +0 -295
  174. package/src/lib/utils.ts +0 -182
  175. package/src/lib/vim-buffer-actions.ts +0 -732
  176. package/src/providers/agent.tsx +0 -1063
  177. package/src/providers/api-client.tsx +0 -43
  178. package/src/services/logger.ts +0 -85
  179. package/src/terminal/detection.ts +0 -187
  180. package/src/terminal/exit.ts +0 -279
  181. package/src/terminal/notification.ts +0 -83
  182. package/src/terminal/progress.ts +0 -201
  183. package/src/terminal/setup.ts +0 -797
  184. package/src/terminal/types.ts +0 -51
  185. package/src/theme/context.tsx +0 -57
  186. package/src/theme/index.ts +0 -4
  187. package/src/theme/themed.tsx +0 -35
  188. package/src/theme/themes.json +0 -546
  189. package/src/theme/types.ts +0 -110
  190. package/src/tools/types.ts +0 -59
  191. package/src/tools/utils/__tests__/zod-coercion.test.ts +0 -33
  192. package/src/tools/utils/tool-ui-components.tsx +0 -649
  193. package/src/tools/utils/zod-coercion.ts +0 -35
  194. package/tsconfig.json +0 -16
  195. package/tsconfig.node.json +0 -29
  196. package/tsconfig.test.json +0 -27
  197. package/tsdown.config.ts +0 -17
  198. package/vitest.config.ts +0 -76
@@ -1,402 +0,0 @@
1
- import * as fs from "fs";
2
- import * as path from "path";
3
- import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
4
-
5
- const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
6
- const RESULTS_FILE = path.join(process.cwd(), "llm-models-test-results.txt");
7
-
8
- const API_BASE_URL = (() => {
9
- const env = process.env.CODELLM_ENV || "prod";
10
- const urls: Record<string, string> = {
11
- prod: "https://apps.abacus.ai/api/v0",
12
- preprod: "https://preprod-apps.abacus.ai/api/v0",
13
- staging: "https://staging-apps.abacus.ai/api/v0",
14
- "staging-latest": "https://staging-latest-apps.abacus.ai/api/v0",
15
- };
16
- const url = urls[env] || urls.prod;
17
- console.log(`🌐 Using API environment: ${env} (${url})`);
18
- return url;
19
- })();
20
-
21
- async function createConversationWithRealAPI(
22
- apiKey: string,
23
- ): Promise<{ conversationId: string; response: any }> {
24
- const response = await fetch(`${API_BASE_URL}/_createCodellmDeploymentConversation`, {
25
- method: "POST",
26
- headers: {
27
- "Content-Type": "application/json",
28
- APIKEY: apiKey,
29
- },
30
- body: JSON.stringify({
31
- deploymentType: "CODE_LLM_AGENT",
32
- }),
33
- });
34
-
35
- const data = (await response.json()) as { result?: { deploymentConversationId: string } };
36
- console.log("📝 Real API conversation response:", JSON.stringify(data, null, 2));
37
- return {
38
- conversationId: data.result?.deploymentConversationId || "test-id",
39
- response: data,
40
- };
41
- }
42
-
43
- async function fetchAvailableLLMs(
44
- apiKey: string,
45
- ): Promise<{ llmName: string; displayName: string }[]> {
46
- const response = await fetch(`${API_BASE_URL}/_listCodeBots`, {
47
- method: "POST",
48
- headers: {
49
- "Content-Type": "application/json",
50
- APIKEY: apiKey,
51
- },
52
- body: JSON.stringify({
53
- supportsThoughtStreaming: true,
54
- }),
55
- });
56
-
57
- const data = (await response.json()) as { result?: { llmName: string; displayName: string }[] };
58
- const llms = (data.result || []).map((llm: any) => ({
59
- llmName: llm.llmName,
60
- displayName: llm.displayName || llm.llmName,
61
- }));
62
- console.log(`📋 Found ${llms.length} available LLMs`);
63
- return llms;
64
- }
65
-
66
- async function sendMessageWithRealAPI(
67
- apiKey: string,
68
- conversationId: string,
69
- message: string,
70
- llmName: string = "",
71
- ): Promise<{ fullResponse: string; hasToolUse: boolean; toolsUsed: string[]; hasError: boolean }> {
72
- const url = `${API_BASE_URL}/_codeLLMSendAgentMessageSSE`;
73
-
74
- // Use proper tool object format as expected by the API
75
- const supportedTools = [
76
- {
77
- name: "list_dir",
78
- arguments: ["relativeWorkspacePath", "explanation"],
79
- description: "List files in a directory",
80
- },
81
- {
82
- name: "read_file",
83
- arguments: [
84
- "targetFile",
85
- "startLineOneIndexed",
86
- "endLineOneIndexed",
87
- "shouldReadEntireFile",
88
- "explanation",
89
- ],
90
- description: "Read a file",
91
- },
92
- {
93
- name: "edit",
94
- arguments: [
95
- "targetFile",
96
- "codeEdit",
97
- "overwriteFile",
98
- "instructions",
99
- "startLine",
100
- "endLine",
101
- ],
102
- description: "Create or edit a file",
103
- },
104
- { name: "delete_file", arguments: ["targetFile", "explanation"], description: "Delete a file" },
105
- {
106
- name: "grep",
107
- arguments: [
108
- "pattern",
109
- "path",
110
- "glob",
111
- "output_mode",
112
- "-B",
113
- "-A",
114
- "-C",
115
- "-n",
116
- "-i",
117
- "type",
118
- "head_limit",
119
- "offset",
120
- "multiline",
121
- ],
122
- description: "A powerful search tool built on ripgrep",
123
- },
124
- { name: "file_search", arguments: ["explanation", "query"], description: "Search for a file" },
125
- {
126
- name: "semantic_search_server",
127
- arguments: ["command", "explanation"],
128
- description: "Semantic search",
129
- },
130
- {
131
- name: "run_in_terminal",
132
- arguments: ["command", "explanation", "isBackground", "id"],
133
- description: "Run a command in terminal",
134
- },
135
- { name: "get_terminal_output", arguments: ["id"], description: "Get terminal output" },
136
- { name: "fetch_url", arguments: ["url"], description: "Fetch content from a URL" },
137
- { name: "web_search", arguments: ["query", "explanation"], description: "Search the web" },
138
- {
139
- name: "upload_image",
140
- arguments: ["targetFile", "explanation"],
141
- description: "Upload an image",
142
- },
143
- { name: "todo_write", arguments: ["todos"], description: "Create task list" },
144
- { name: "enter_plan_mode", arguments: [], description: "Enter plan mode" },
145
- { name: "exit_plan_mode", arguments: ["planFilePath"], description: "Exit plan mode" },
146
- { name: "subagent", arguments: ["agent", "task"], description: "Switch to subagent" },
147
- ];
148
-
149
- const body = {
150
- llmName: llmName,
151
- supportedTools: supportedTools,
152
- mcpTools: [],
153
- message: message,
154
- deploymentConversationId: conversationId,
155
- folderPaths: JSON.stringify([]),
156
- rules: JSON.stringify([]),
157
- requestId: `test-${Date.now()}`,
158
- docInfos: [],
159
- userChanges: JSON.stringify({}),
160
- problemCount: 0,
161
- runningTerminals: JSON.stringify([]),
162
- supportsV2: true,
163
- supportsThoughtStreaming: true,
164
- supportsAbsolutePath: false,
165
- isPrintMode: false,
166
- supportsParallelToolUse: true,
167
- };
168
-
169
- const response = await fetch(url, {
170
- method: "POST",
171
- headers: {
172
- "Content-Type": "application/json",
173
- APIKEY: apiKey,
174
- "REAI-UI": "1",
175
- Accept: "text/event-stream",
176
- },
177
- body: JSON.stringify(body),
178
- });
179
-
180
- let fullResponse = "";
181
- let hasToolUse = false;
182
- let hasError = false;
183
- const toolsUsed: string[] = [];
184
-
185
- const toolPatterns = [
186
- "toolUseRequest",
187
- '"name":"edit"',
188
- '"name":"read_file"',
189
- '"name":"list_dir"',
190
- '"name":"delete_file"',
191
- '"name":"grep"',
192
- '"name":"file_search"',
193
- '"name":"run_in_terminal"',
194
- '"name":"enter_plan_mode"',
195
- '"name":"exit_plan_mode"',
196
- ];
197
-
198
- if (!response.ok) {
199
- const errorText = await response.text();
200
- fullResponse = errorText;
201
- console.log(`📡 API Error Response: ${errorText}`);
202
- hasError = true;
203
- } else {
204
- const reader = response.body?.getReader();
205
- const decoder = new TextDecoder();
206
-
207
- if (reader) {
208
- try {
209
- while (true) {
210
- const { done, value } = await reader.read();
211
- if (done) {
212
- break;
213
- }
214
-
215
- const chunk = decoder.decode(value, { stream: true });
216
- fullResponse += chunk;
217
-
218
- for (const pattern of toolPatterns) {
219
- if (chunk.includes(pattern)) {
220
- hasToolUse = true;
221
- if (!toolsUsed.includes(pattern)) {
222
- toolsUsed.push(pattern);
223
- }
224
- }
225
- }
226
-
227
- if (chunk.toLowerCase().includes("internal server error")) {
228
- hasError = true;
229
- }
230
-
231
- // Check for success: false in JSON responses
232
- if (chunk.includes('"success": false') || chunk.includes('"success":false')) {
233
- hasError = true;
234
- }
235
- }
236
- } finally {
237
- reader.releaseLock();
238
- }
239
- }
240
- }
241
-
242
- console.log(`📡 Real API response (first 500 chars): ${fullResponse.substring(0, 500)}`);
243
- console.log(`🔧 Tools detected: ${toolsUsed.join(", ") || "none"}`);
244
- return { fullResponse, hasToolUse, toolsUsed, hasError };
245
- }
246
-
247
- describe.concurrent("LLM Models E2E Tests", () => {
248
- const apiKey = process.env.ABACUS_API_KEY;
249
- if (!apiKey) {
250
- throw new Error("ABACUS_API_KEY is not set");
251
- }
252
-
253
- beforeEach(() => {
254
- vi.spyOn(process, "exit").mockImplementation((() => {}) as unknown as typeof process.exit);
255
- });
256
-
257
- afterEach(() => {
258
- vi.restoreAllMocks();
259
- });
260
-
261
- it("should test ALL available LLMs for edit tool usage", async () => {
262
- console.log(`\n🔑 Using API key: ${apiKey.substring(0, 8)}...`);
263
- console.log("\n🔑 Using REAL API to test ALL available LLMs...\n");
264
-
265
- const availableLLMs = await fetchAvailableLLMs(apiKey);
266
-
267
- if (availableLLMs.length === 0) {
268
- console.error("❌ No LLMs available from API");
269
- return;
270
- }
271
-
272
- console.log(`\n📋 Testing ${availableLLMs.length} LLMs:\n`);
273
- availableLLMs.forEach((llm, i) => {
274
- console.log(` ${i + 1}. ${llm.displayName} (${llm.llmName})`);
275
- });
276
- console.log("");
277
-
278
- const results: {
279
- llmName: string;
280
- displayName: string;
281
- hasToolUse: boolean;
282
- toolsUsed: string[];
283
- error?: string;
284
- }[] = [];
285
-
286
- for (let i = 0; i < availableLLMs.length; i++) {
287
- const llm = availableLLMs[i];
288
- console.log(`\n--- Testing LLM ${i + 1}/${availableLLMs.length}: ${llm.displayName} ---`);
289
-
290
- const { conversationId, response } = await createConversationWithRealAPI(apiKey);
291
-
292
- if (!response.success) {
293
- console.error(`❌ Failed to create conversation for ${llm.displayName}`);
294
- results.push({
295
- ...llm,
296
- hasToolUse: false,
297
- toolsUsed: [],
298
- error: "Failed to create conversation",
299
- });
300
- continue;
301
- }
302
-
303
- console.log(`✅ Created conversation: ${conversationId}`);
304
-
305
- try {
306
- const testMessage = `Create a simple Python file called hello_${llm.llmName.replace(/[^a-z0-9]/g, "_")}.py that prints "Hello from ${llm.displayName}!"`;
307
-
308
- const { fullResponse, hasToolUse, toolsUsed, hasError } = await sendMessageWithRealAPI(
309
- apiKey,
310
- conversationId,
311
- testMessage,
312
- llm.llmName,
313
- );
314
-
315
- if (hasError) {
316
- console.error(`❌ ${llm.displayName} - Error detected`);
317
- results.push({
318
- ...llm,
319
- hasToolUse: false,
320
- toolsUsed: [],
321
- error: "Internal server error",
322
- });
323
- } else {
324
- results.push({ ...llm, hasToolUse, toolsUsed });
325
-
326
- if (hasToolUse) {
327
- console.log(`✅ ${llm.displayName} - Tool usage detected: ${toolsUsed.join(", ")}`);
328
- } else {
329
- console.error(`❌ ${llm.displayName} - No tool usage detected`);
330
- console.error(` Response: ${fullResponse.substring(0, 500)}`);
331
- }
332
- }
333
- } catch (error) {
334
- console.error(
335
- `❌ Error testing ${llm.displayName}: ${error instanceof Error ? error.message : String(error)}`,
336
- );
337
- results.push({ ...llm, hasToolUse: false, toolsUsed: [], error: String(error) });
338
- }
339
-
340
- await sleep(1000);
341
- }
342
-
343
- console.log("\n\n📊 SUMMARY - LLM Tool Usage Test Results:\n");
344
- console.log("=".repeat(60));
345
-
346
- const withToolUse = results.filter((r) => r.hasToolUse);
347
- const withoutToolUse = results.filter((r) => !r.hasToolUse && !r.error);
348
- const withErrors = results.filter((r) => r.error);
349
-
350
- console.log("\n✅ LLMs WITH tool usage (GOOD):");
351
- withToolUse.forEach((r) =>
352
- console.log(` - ${r.displayName} (${r.llmName}): ${r.toolsUsed.join(", ")}`),
353
- );
354
-
355
- console.log("\n❌ LLMs WITHOUT tool usage (BAD):");
356
- withoutToolUse.forEach((r) => console.log(` - ${r.displayName} (${r.llmName})`));
357
-
358
- if (withErrors.length > 0) {
359
- console.log("\n⚠️ LLMs with errors:");
360
- withErrors.forEach((r) => console.log(` - ${r.displayName}: ${r.error}`));
361
- }
362
-
363
- console.log("\n" + "=".repeat(60));
364
- console.log(`Total: ${results.length} LLMs tested`);
365
- console.log(` ✅ With tool usage: ${withToolUse.length}`);
366
- console.log(` ❌ No tool usage: ${withoutToolUse.length}`);
367
- console.log(` ⚠️ Errors: ${withErrors.length}`);
368
-
369
- // Write results to file for debugging
370
- const fileContent = [
371
- "📊 LLM Tool Usage Test Results",
372
- "=".repeat(60),
373
- "",
374
- "✅ LLMs WITH tool usage (GOOD):",
375
- ...withToolUse.map((r) => ` - ${r.displayName} (${r.llmName}): ${r.toolsUsed.join(", ")}`),
376
- "",
377
- "❌ LLMs WITHOUT tool usage (BAD):",
378
- ...withoutToolUse.map((r) => ` - ${r.displayName} (${r.llmName})`),
379
- "",
380
- "⚠️ LLMs with errors:",
381
- ...withErrors.map((r) => ` - ${r.displayName}: ${r.error}`),
382
- "",
383
- "=".repeat(60),
384
- `Total: ${results.length} LLMs tested`,
385
- ` ✅ With tool usage: ${withToolUse.length}`,
386
- ` ❌ No tool usage: ${withoutToolUse.length}`,
387
- ` ⚠️ Errors: ${withErrors.length}`,
388
- ].join("\n");
389
- fs.writeFileSync(RESULTS_FILE, fileContent);
390
- console.log(`\n📄 Results written to: ${RESULTS_FILE}`);
391
-
392
- // Test fails if any LLM returns Internal Server Error or other errors
393
- expect(withErrors.length).toBe(0);
394
- // Test fails if any LLM doesn't show tool usage
395
- expect(withoutToolUse.length).toBe(0);
396
- // Sanity check: all LLMs should show tool usage
397
- expect(withToolUse.length).toBe(results.length);
398
- console.log(
399
- `\n✅ Test completed - ALL ${withToolUse.length} LLMs showed tool usage as expected\n`,
400
- );
401
- }, 600000);
402
- });
@@ -1,71 +0,0 @@
1
- import type { SSEEvent } from "@codellm/api";
2
-
3
- import { describe, it, expect, vi } from "vitest";
4
-
5
- import { createMockAbacusClient } from "../helpers/test-helpers.js";
6
-
7
- /**
8
- * Integration Tests - These test the full streaming flow by examining how
9
- * the async generator events update the AgentProvider state.
10
- *
11
- * Note: Full app integration tests are complex due to provider dependencies.
12
- * These tests verify the streaming mechanism in isolation.
13
- */
14
- describe.concurrent("MCP Integration - Streaming Flow Tests", () => {
15
- /**
16
- * Test that verifies the AgentRunnerCore callback mechanism works correctly.
17
- * We import the actual callback handlers and verify they update status.
18
- */
19
- it("onToolExecutionStart callback sets ExecutingTool status", async () => {
20
- // This test verifies that the status enum values exist (basic sanity check)
21
- const { AgentStatus } = await import("../../providers/agent.tsx");
22
-
23
- expect(AgentStatus.Idle).toBe("idle");
24
- expect(AgentStatus.Submitted).toBe("submitted");
25
- expect(AgentStatus.Streaming).toBe("streaming");
26
- expect(AgentStatus.ExecutingTool).toBe("executing-tool");
27
-
28
- console.log("[Integration Test] AgentStatus enum values verified");
29
- console.log("[Integration Test] Callback mechanism verified ✓");
30
- });
31
-
32
- /**
33
- * Test that verifies the mock API client correctly simulates tool requests
34
- * via the async generator streaming pattern.
35
- */
36
- it("Mock API client simulates tool request flow correctly via async generator", async () => {
37
- const mockClient = createMockAbacusClient();
38
-
39
- const events: SSEEvent[] = [];
40
-
41
- console.log("[Mock Test] Starting streaming call simulation...");
42
- const stream = mockClient.streamAgentMessage({
43
- deploymentConversationId: "test-conversation-id",
44
- message: "Create a hello world file",
45
- supportedTools: [],
46
- });
47
-
48
- for await (const event of stream) {
49
- events.push(event);
50
- console.log("[Mock Test] Event received:", JSON.stringify(event).slice(0, 100));
51
- }
52
-
53
- console.log("[Mock Test] Streaming completed");
54
- console.log(`[Mock Test] Events received: ${events.length}`);
55
-
56
- // Verify the mock sent the expected events
57
- expect(events.length).toBe(2);
58
-
59
- // First event should be text segment
60
- expect(events[0]?.["segment"]).toBeDefined();
61
- console.log("[Mock Test] ✓ Text segment event received");
62
-
63
- // Second event should be tool request
64
- expect(events[1]?.["toolUseRequest"]).toBeDefined();
65
- const toolReq = events[1]?.["toolUseRequest"] as { name: string };
66
- expect(toolReq.name).toBe("edit");
67
- console.log("[Mock Test] ✓ Tool request event received");
68
-
69
- console.log("[Mock Test] Mock API streaming flow verified ✓");
70
- }, 10000);
71
- });
@@ -1,167 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
-
3
- const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
4
-
5
- /**
6
- * Full App UI Rendering Tests
7
- * These tests render the app with proper provider wrapper and verify UI updates.
8
- */
9
- describe.sequential("MCP Full App UI Tests", () => {
10
- /**
11
- * Test that renders StatusIndicator inside the full provider stack
12
- * and verifies it updates correctly when status changes.
13
- */
14
- it("StatusIndicator in full provider stack shows correct UI", async () => {
15
- const { render, cleanup } = await import("../../lib/test-utils.tsx");
16
- const { createTestWrapper } = await import("../helpers/test-helpers.ts");
17
- const { StatusIndicator } = await import("../../components/status-indicator.tsx");
18
- const { AgentStatus } = await import("../../providers/agent.tsx");
19
- const React = await import("react");
20
- const stripAnsi = (await import("strip-ansi")).default;
21
-
22
- const TestWrapper = createTestWrapper();
23
-
24
- console.log("[Full Stack Test] Rendering StatusIndicator with full provider stack...");
25
-
26
- try {
27
- // Render StatusIndicator inside the full provider wrapper
28
- const instance = render(
29
- React.createElement(
30
- TestWrapper,
31
- null,
32
- React.createElement(StatusIndicator, { status: AgentStatus.ExecutingTool }),
33
- ),
34
- false,
35
- );
36
-
37
- await sleep(500);
38
-
39
- const frame = instance.lastFrame() ?? "";
40
- const plainFrame = stripAnsi(frame);
41
-
42
- console.log("[Full Stack Test] Rendered frame:", plainFrame);
43
-
44
- // Check if the indicator is visible
45
- const hasIndicator = plainFrame.includes("...");
46
-
47
- if (hasIndicator) {
48
- console.log("[Full Stack Test] ✓ StatusIndicator rendered correctly with full providers");
49
- expect(plainFrame).toContain("...");
50
- } else {
51
- console.log("[Full Stack Test] Frame content:", plainFrame.slice(0, 200));
52
- }
53
-
54
- // Test status transition within provider stack
55
- console.log("[Full Stack Test] Testing status transitions...");
56
-
57
- instance.rerender(
58
- React.createElement(
59
- TestWrapper,
60
- null,
61
- React.createElement(StatusIndicator, { status: AgentStatus.Idle }),
62
- ),
63
- );
64
-
65
- const idleFrame = stripAnsi(instance.lastFrame() ?? "");
66
- const idleHasIndicator = idleFrame.includes("...");
67
-
68
- console.log("[Full Stack Test] Idle status - indicator visible:", idleHasIndicator);
69
-
70
- // Idle should hide the indicator
71
- if (!idleHasIndicator || idleFrame.trim() === "") {
72
- console.log("[Full Stack Test] ✓ Idle correctly hides indicator");
73
- }
74
-
75
- console.log("[Full Stack Test] Full provider stack test completed ✓");
76
- } finally {
77
- cleanup();
78
- }
79
- }, 10000);
80
-
81
- /**
82
- * Test that simulates the full message → tool execution → UI update flow
83
- * using mocked services within the provider stack.
84
- */
85
- it("Full provider stack handles tool execution status flow", async () => {
86
- const { render, cleanup } = await import("../../lib/test-utils.tsx");
87
- const { createTestWrapper } = await import("../helpers/test-helpers.ts");
88
- const { StatusIndicator } = await import("../../components/status-indicator.tsx");
89
- const { AgentStatus } = await import("../../providers/agent.tsx");
90
- const React = await import("react");
91
- const stripAnsi = (await import("strip-ansi")).default;
92
-
93
- const TestWrapper = createTestWrapper();
94
-
95
- console.log("[Flow Test] Simulating MCP tool execution flow with full providers...");
96
-
97
- // Simulate the complete status flow
98
- const statusFlow = [
99
- { status: AgentStatus.Idle, name: "Initial (Idle)", shouldShow: false },
100
- { status: AgentStatus.Submitted, name: "Message Submitted", shouldShow: true },
101
- { status: AgentStatus.Streaming, name: "Streaming Response", shouldShow: true },
102
- { status: AgentStatus.ExecutingTool, name: "MCP Tool Executing", shouldShow: true },
103
- { status: AgentStatus.Streaming, name: "Streaming (after tool)", shouldShow: true },
104
- { status: AgentStatus.Idle, name: "Complete (Idle)", shouldShow: false },
105
- ];
106
-
107
- const results: { name: string; visible: boolean; expected: boolean; match: boolean }[] = [];
108
-
109
- try {
110
- const instance = render(
111
- React.createElement(
112
- TestWrapper,
113
- null,
114
- React.createElement(StatusIndicator, { status: AgentStatus.Idle }),
115
- ),
116
- false,
117
- );
118
-
119
- await sleep(300);
120
-
121
- for (const { status, name, shouldShow } of statusFlow) {
122
- instance.rerender(
123
- React.createElement(TestWrapper, null, React.createElement(StatusIndicator, { status })),
124
- );
125
-
126
- // Small delay to let React process
127
- await sleep(50);
128
-
129
- const frame = instance.lastFrame() ?? "";
130
- const plainFrame = stripAnsi(frame);
131
- const isVisible = plainFrame.includes("...") || plainFrame.includes("Resuming");
132
-
133
- results.push({
134
- name,
135
- visible: isVisible,
136
- expected: shouldShow,
137
- match: isVisible === shouldShow,
138
- });
139
- }
140
-
141
- console.log("[Flow Test] Status flow results:");
142
- results.forEach((r) => {
143
- const icon = r.match ? "✓" : "✗";
144
- console.log(
145
- ` ${icon} ${r.name}: ${r.visible ? "VISIBLE" : "hidden"} (expected: ${r.expected ? "VISIBLE" : "hidden"})`,
146
- );
147
- });
148
-
149
- // Verify all states matched expectations
150
- const allMatch = results.every((r) => r.match);
151
- console.log(`[Flow Test] All states matched: ${allMatch ? "✓ YES" : "✗ NO"}`);
152
-
153
- // The critical assertion: ExecutingTool MUST be visible
154
- const executingToolResult = results.find((r) => r.name === "MCP Tool Executing");
155
- expect(executingToolResult?.visible).toBe(true);
156
-
157
- // All results should match expectations
158
- for (const result of results) {
159
- expect(result.match).toBe(true);
160
- }
161
-
162
- console.log("[Flow Test] Full provider stack flow test completed ✓");
163
- } finally {
164
- cleanup();
165
- }
166
- }, 10000);
167
- });