@abacus-ai/cli 2.0.0-canary.1 → 2.0.0-canary.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/dist/index.mjs +448 -422
  2. package/package.json +4 -1
  3. package/.oxlintrc.json +0 -8
  4. package/resources/abacus.ico +0 -0
  5. package/resources/entitlements.plist +0 -9
  6. package/src/__e2e__/README.md +0 -196
  7. package/src/__e2e__/agent-interactions.e2e.test.tsx +0 -61
  8. package/src/__e2e__/cli-commands.e2e.test.tsx +0 -77
  9. package/src/__e2e__/conversation-throttle.e2e.test.ts +0 -453
  10. package/src/__e2e__/conversation.e2e.test.tsx +0 -56
  11. package/src/__e2e__/diff-preview.e2e.test.tsx +0 -3399
  12. package/src/__e2e__/file-creation.e2e.test.tsx +0 -149
  13. package/src/__e2e__/helpers/test-helpers.ts +0 -449
  14. package/src/__e2e__/keyboard-navigation.e2e.test.tsx +0 -34
  15. package/src/__e2e__/llm-models.e2e.test.ts +0 -402
  16. package/src/__e2e__/mcp/mcp-callback-flow.e2e.test.tsx +0 -71
  17. package/src/__e2e__/mcp/mcp-full-app-ui.e2e.test.tsx +0 -167
  18. package/src/__e2e__/mcp/mcp-ui-rendering.e2e.test.tsx +0 -185
  19. package/src/__e2e__/repl.e2e.test.tsx +0 -78
  20. package/src/__e2e__/shell-compatibility.e2e.test.tsx +0 -76
  21. package/src/__e2e__/theme-mcp.e2e.test.tsx +0 -98
  22. package/src/__e2e__/tool-permissions.e2e.test.tsx +0 -66
  23. package/src/args.ts +0 -22
  24. package/src/components/__tests__/react-compiler.test.tsx +0 -78
  25. package/src/components/__tests__/status-indicator.test.tsx +0 -403
  26. package/src/components/composer/__tests__/bash-runner.test.tsx +0 -263
  27. package/src/components/composer/agent-mode-indicator.tsx +0 -63
  28. package/src/components/composer/bash-runner.tsx +0 -54
  29. package/src/components/composer/commands/default-commands.tsx +0 -615
  30. package/src/components/composer/commands/handler.tsx +0 -59
  31. package/src/components/composer/commands/picker.tsx +0 -273
  32. package/src/components/composer/commands/registry.ts +0 -233
  33. package/src/components/composer/commands/types.ts +0 -33
  34. package/src/components/composer/context.tsx +0 -88
  35. package/src/components/composer/file-mention-picker.tsx +0 -83
  36. package/src/components/composer/help.tsx +0 -44
  37. package/src/components/composer/index.tsx +0 -1007
  38. package/src/components/composer/mentions.ts +0 -57
  39. package/src/components/composer/message-queue.tsx +0 -70
  40. package/src/components/composer/mode-panel.tsx +0 -35
  41. package/src/components/composer/modes/__tests__/bash-handler.test.tsx +0 -755
  42. package/src/components/composer/modes/__tests__/bash-renderer.test.tsx +0 -1108
  43. package/src/components/composer/modes/bash-handler.tsx +0 -132
  44. package/src/components/composer/modes/bash-renderer.tsx +0 -175
  45. package/src/components/composer/modes/default-handlers.tsx +0 -33
  46. package/src/components/composer/modes/index.ts +0 -41
  47. package/src/components/composer/modes/types.ts +0 -21
  48. package/src/components/composer/persistent-shell.ts +0 -283
  49. package/src/components/composer/process.ts +0 -65
  50. package/src/components/composer/types.ts +0 -9
  51. package/src/components/composer/use-mention-search.ts +0 -68
  52. package/src/components/error-boundry.tsx +0 -60
  53. package/src/components/exit-message.tsx +0 -29
  54. package/src/components/expanded-view.tsx +0 -74
  55. package/src/components/file-completion.tsx +0 -127
  56. package/src/components/header.tsx +0 -47
  57. package/src/components/logo.tsx +0 -37
  58. package/src/components/segments.tsx +0 -356
  59. package/src/components/status-indicator.tsx +0 -306
  60. package/src/components/tool-group-summary.tsx +0 -263
  61. package/src/components/tool-permissions/ask-user-question-permission-ui.tsx +0 -319
  62. package/src/components/tool-permissions/diff-preview.tsx +0 -359
  63. package/src/components/tool-permissions/index.ts +0 -5
  64. package/src/components/tool-permissions/permission-options.tsx +0 -401
  65. package/src/components/tool-permissions/permission-preview-header.tsx +0 -57
  66. package/src/components/tool-permissions/tool-permission-ui.tsx +0 -420
  67. package/src/components/tools/agent/ask-user-question.tsx +0 -107
  68. package/src/components/tools/agent/enter-plan-mode.tsx +0 -55
  69. package/src/components/tools/agent/exit-plan-mode.tsx +0 -83
  70. package/src/components/tools/agent/handoff-to-main.tsx +0 -27
  71. package/src/components/tools/agent/subagent.tsx +0 -37
  72. package/src/components/tools/agent/todo-write.tsx +0 -104
  73. package/src/components/tools/browser/close-tab.tsx +0 -58
  74. package/src/components/tools/browser/computer.tsx +0 -70
  75. package/src/components/tools/browser/get-interactive-elements.tsx +0 -54
  76. package/src/components/tools/browser/get-tab-content.tsx +0 -51
  77. package/src/components/tools/browser/navigate-to.tsx +0 -59
  78. package/src/components/tools/browser/new-tab.tsx +0 -60
  79. package/src/components/tools/browser/perform-action.tsx +0 -63
  80. package/src/components/tools/browser/refresh-tab.tsx +0 -43
  81. package/src/components/tools/browser/switch-tab.tsx +0 -58
  82. package/src/components/tools/filesystem/delete-file.tsx +0 -104
  83. package/src/components/tools/filesystem/edit.tsx +0 -220
  84. package/src/components/tools/filesystem/list-dir.tsx +0 -78
  85. package/src/components/tools/filesystem/read-file.tsx +0 -180
  86. package/src/components/tools/filesystem/upload-image.tsx +0 -76
  87. package/src/components/tools/ide/ide-diagnostics.tsx +0 -62
  88. package/src/components/tools/index.ts +0 -91
  89. package/src/components/tools/mcp/mcp-tool.tsx +0 -158
  90. package/src/components/tools/search/fetch-url.tsx +0 -73
  91. package/src/components/tools/search/file-search.tsx +0 -78
  92. package/src/components/tools/search/grep.tsx +0 -90
  93. package/src/components/tools/search/semantic-search.tsx +0 -66
  94. package/src/components/tools/search/web-search.tsx +0 -71
  95. package/src/components/tools/shared/index.tsx +0 -48
  96. package/src/components/tools/shared/zod-coercion.ts +0 -35
  97. package/src/components/tools/terminal/bash-tool-output.tsx +0 -188
  98. package/src/components/tools/terminal/get-terminal-output.tsx +0 -91
  99. package/src/components/tools/terminal/run-in-terminal.tsx +0 -131
  100. package/src/components/tools/types.ts +0 -16
  101. package/src/components/tools.tsx +0 -68
  102. package/src/components/ui/__tests__/divider.test.tsx +0 -61
  103. package/src/components/ui/__tests__/gradient.test.tsx +0 -125
  104. package/src/components/ui/__tests__/input.test.tsx +0 -166
  105. package/src/components/ui/__tests__/select.test.tsx +0 -273
  106. package/src/components/ui/__tests__/shimmer.test.tsx +0 -99
  107. package/src/components/ui/blinking-indicator.tsx +0 -27
  108. package/src/components/ui/divider.tsx +0 -162
  109. package/src/components/ui/gradient.tsx +0 -56
  110. package/src/components/ui/input.tsx +0 -228
  111. package/src/components/ui/select.tsx +0 -151
  112. package/src/components/ui/shimmer.tsx +0 -76
  113. package/src/context/agent-mode.tsx +0 -95
  114. package/src/context/extension-file.tsx +0 -136
  115. package/src/context/network-activity.tsx +0 -45
  116. package/src/context/notification.tsx +0 -62
  117. package/src/context/shell-size.tsx +0 -49
  118. package/src/context/shell-title.tsx +0 -38
  119. package/src/entrypoints/print-mode.ts +0 -312
  120. package/src/entrypoints/repl.tsx +0 -389
  121. package/src/hooks/use-agent.ts +0 -15
  122. package/src/hooks/use-api-client.ts +0 -1
  123. package/src/hooks/use-available-height.ts +0 -8
  124. package/src/hooks/use-cleanup.ts +0 -29
  125. package/src/hooks/use-interrupt-manager.ts +0 -242
  126. package/src/hooks/use-models.ts +0 -22
  127. package/src/index.ts +0 -217
  128. package/src/lib/__tests__/ansi.test.ts +0 -255
  129. package/src/lib/__tests__/cli.test.ts +0 -122
  130. package/src/lib/__tests__/commands.test.ts +0 -325
  131. package/src/lib/__tests__/constants.test.ts +0 -15
  132. package/src/lib/__tests__/focusables.test.ts +0 -25
  133. package/src/lib/__tests__/fs.test.ts +0 -231
  134. package/src/lib/__tests__/markdown.test.tsx +0 -348
  135. package/src/lib/__tests__/mcpCommandHandler.test.ts +0 -173
  136. package/src/lib/__tests__/mcpManagement.test.ts +0 -38
  137. package/src/lib/__tests__/path-paste.test.ts +0 -144
  138. package/src/lib/__tests__/path.test.ts +0 -300
  139. package/src/lib/__tests__/queries.test.ts +0 -39
  140. package/src/lib/__tests__/standaloneMcpService.test.ts +0 -71
  141. package/src/lib/__tests__/text-buffer.test.ts +0 -328
  142. package/src/lib/__tests__/text-utils.test.ts +0 -32
  143. package/src/lib/__tests__/timing.test.ts +0 -78
  144. package/src/lib/__tests__/utils.test.ts +0 -238
  145. package/src/lib/__tests__/vim-buffer-actions.test.ts +0 -154
  146. package/src/lib/ansi.ts +0 -150
  147. package/src/lib/cli-push-server.ts +0 -112
  148. package/src/lib/cli.ts +0 -44
  149. package/src/lib/clipboard.ts +0 -226
  150. package/src/lib/command-utils.ts +0 -93
  151. package/src/lib/commands.ts +0 -270
  152. package/src/lib/constants.ts +0 -3
  153. package/src/lib/extension-connection.ts +0 -181
  154. package/src/lib/focusables.ts +0 -7
  155. package/src/lib/fs.ts +0 -533
  156. package/src/lib/markdown/code-block.tsx +0 -63
  157. package/src/lib/markdown/index.ts +0 -4
  158. package/src/lib/markdown/link.tsx +0 -19
  159. package/src/lib/markdown/markdown.tsx +0 -372
  160. package/src/lib/markdown/types.ts +0 -15
  161. package/src/lib/mcpCommandHandler.ts +0 -121
  162. package/src/lib/mcpManagement.ts +0 -44
  163. package/src/lib/path-paste.ts +0 -185
  164. package/src/lib/path.ts +0 -179
  165. package/src/lib/queries.ts +0 -15
  166. package/src/lib/standaloneMcpService.ts +0 -688
  167. package/src/lib/status-utils.ts +0 -237
  168. package/src/lib/test-utils.tsx +0 -72
  169. package/src/lib/text-buffer.ts +0 -2415
  170. package/src/lib/text-utils.ts +0 -272
  171. package/src/lib/timing.ts +0 -63
  172. package/src/lib/types.ts +0 -295
  173. package/src/lib/utils.ts +0 -182
  174. package/src/lib/vim-buffer-actions.ts +0 -732
  175. package/src/providers/agent.tsx +0 -1063
  176. package/src/providers/api-client.tsx +0 -43
  177. package/src/services/logger.ts +0 -85
  178. package/src/terminal/detection.ts +0 -187
  179. package/src/terminal/exit.ts +0 -279
  180. package/src/terminal/notification.ts +0 -83
  181. package/src/terminal/progress.ts +0 -201
  182. package/src/terminal/setup.ts +0 -797
  183. package/src/terminal/types.ts +0 -51
  184. package/src/theme/context.tsx +0 -57
  185. package/src/theme/index.ts +0 -4
  186. package/src/theme/themed.tsx +0 -35
  187. package/src/theme/themes.json +0 -546
  188. package/src/theme/types.ts +0 -110
  189. package/src/tools/types.ts +0 -59
  190. package/src/tools/utils/__tests__/zod-coercion.test.ts +0 -33
  191. package/src/tools/utils/tool-ui-components.tsx +0 -649
  192. package/src/tools/utils/zod-coercion.ts +0 -35
  193. package/tsconfig.json +0 -16
  194. package/tsconfig.node.json +0 -29
  195. package/tsconfig.test.json +0 -27
  196. package/tsdown.config.ts +0 -17
  197. package/vitest.config.ts +0 -76
@@ -1,402 +0,0 @@
1
- import * as fs from "fs";
2
- import * as path from "path";
3
- import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
4
-
5
- const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
6
- const RESULTS_FILE = path.join(process.cwd(), "llm-models-test-results.txt");
7
-
8
- const API_BASE_URL = (() => {
9
- const env = process.env.CODELLM_ENV || "prod";
10
- const urls: Record<string, string> = {
11
- prod: "https://apps.abacus.ai/api/v0",
12
- preprod: "https://preprod-apps.abacus.ai/api/v0",
13
- staging: "https://staging-apps.abacus.ai/api/v0",
14
- "staging-latest": "https://staging-latest-apps.abacus.ai/api/v0",
15
- };
16
- const url = urls[env] || urls.prod;
17
- console.log(`🌐 Using API environment: ${env} (${url})`);
18
- return url;
19
- })();
20
-
21
- async function createConversationWithRealAPI(
22
- apiKey: string,
23
- ): Promise<{ conversationId: string; response: any }> {
24
- const response = await fetch(`${API_BASE_URL}/_createCodellmDeploymentConversation`, {
25
- method: "POST",
26
- headers: {
27
- "Content-Type": "application/json",
28
- APIKEY: apiKey,
29
- },
30
- body: JSON.stringify({
31
- deploymentType: "CODE_LLM_AGENT",
32
- }),
33
- });
34
-
35
- const data = (await response.json()) as { result?: { deploymentConversationId: string } };
36
- console.log("📝 Real API conversation response:", JSON.stringify(data, null, 2));
37
- return {
38
- conversationId: data.result?.deploymentConversationId || "test-id",
39
- response: data,
40
- };
41
- }
42
-
43
- async function fetchAvailableLLMs(
44
- apiKey: string,
45
- ): Promise<{ llmName: string; displayName: string }[]> {
46
- const response = await fetch(`${API_BASE_URL}/_listCodeBots`, {
47
- method: "POST",
48
- headers: {
49
- "Content-Type": "application/json",
50
- APIKEY: apiKey,
51
- },
52
- body: JSON.stringify({
53
- supportsThoughtStreaming: true,
54
- }),
55
- });
56
-
57
- const data = (await response.json()) as { result?: { llmName: string; displayName: string }[] };
58
- const llms = (data.result || []).map((llm: any) => ({
59
- llmName: llm.llmName,
60
- displayName: llm.displayName || llm.llmName,
61
- }));
62
- console.log(`📋 Found ${llms.length} available LLMs`);
63
- return llms;
64
- }
65
-
66
- async function sendMessageWithRealAPI(
67
- apiKey: string,
68
- conversationId: string,
69
- message: string,
70
- llmName: string = "",
71
- ): Promise<{ fullResponse: string; hasToolUse: boolean; toolsUsed: string[]; hasError: boolean }> {
72
- const url = `${API_BASE_URL}/_codeLLMSendAgentMessageSSE`;
73
-
74
- // Use proper tool object format as expected by the API
75
- const supportedTools = [
76
- {
77
- name: "list_dir",
78
- arguments: ["relativeWorkspacePath", "explanation"],
79
- description: "List files in a directory",
80
- },
81
- {
82
- name: "read_file",
83
- arguments: [
84
- "targetFile",
85
- "startLineOneIndexed",
86
- "endLineOneIndexed",
87
- "shouldReadEntireFile",
88
- "explanation",
89
- ],
90
- description: "Read a file",
91
- },
92
- {
93
- name: "edit",
94
- arguments: [
95
- "targetFile",
96
- "codeEdit",
97
- "overwriteFile",
98
- "instructions",
99
- "startLine",
100
- "endLine",
101
- ],
102
- description: "Create or edit a file",
103
- },
104
- { name: "delete_file", arguments: ["targetFile", "explanation"], description: "Delete a file" },
105
- {
106
- name: "grep",
107
- arguments: [
108
- "pattern",
109
- "path",
110
- "glob",
111
- "output_mode",
112
- "-B",
113
- "-A",
114
- "-C",
115
- "-n",
116
- "-i",
117
- "type",
118
- "head_limit",
119
- "offset",
120
- "multiline",
121
- ],
122
- description: "A powerful search tool built on ripgrep",
123
- },
124
- { name: "file_search", arguments: ["explanation", "query"], description: "Search for a file" },
125
- {
126
- name: "semantic_search_server",
127
- arguments: ["command", "explanation"],
128
- description: "Semantic search",
129
- },
130
- {
131
- name: "run_in_terminal",
132
- arguments: ["command", "explanation", "isBackground", "id"],
133
- description: "Run a command in terminal",
134
- },
135
- { name: "get_terminal_output", arguments: ["id"], description: "Get terminal output" },
136
- { name: "fetch_url", arguments: ["url"], description: "Fetch content from a URL" },
137
- { name: "web_search", arguments: ["query", "explanation"], description: "Search the web" },
138
- {
139
- name: "upload_image",
140
- arguments: ["targetFile", "explanation"],
141
- description: "Upload an image",
142
- },
143
- { name: "todo_write", arguments: ["todos"], description: "Create task list" },
144
- { name: "enter_plan_mode", arguments: [], description: "Enter plan mode" },
145
- { name: "exit_plan_mode", arguments: ["planFilePath"], description: "Exit plan mode" },
146
- { name: "subagent", arguments: ["agent", "task"], description: "Switch to subagent" },
147
- ];
148
-
149
- const body = {
150
- llmName: llmName,
151
- supportedTools: supportedTools,
152
- mcpTools: [],
153
- message: message,
154
- deploymentConversationId: conversationId,
155
- folderPaths: JSON.stringify([]),
156
- rules: JSON.stringify([]),
157
- requestId: `test-${Date.now()}`,
158
- docInfos: [],
159
- userChanges: JSON.stringify({}),
160
- problemCount: 0,
161
- runningTerminals: JSON.stringify([]),
162
- supportsV2: true,
163
- supportsThoughtStreaming: true,
164
- supportsAbsolutePath: false,
165
- isPrintMode: false,
166
- supportsParallelToolUse: true,
167
- };
168
-
169
- const response = await fetch(url, {
170
- method: "POST",
171
- headers: {
172
- "Content-Type": "application/json",
173
- APIKEY: apiKey,
174
- "REAI-UI": "1",
175
- Accept: "text/event-stream",
176
- },
177
- body: JSON.stringify(body),
178
- });
179
-
180
- let fullResponse = "";
181
- let hasToolUse = false;
182
- let hasError = false;
183
- const toolsUsed: string[] = [];
184
-
185
- const toolPatterns = [
186
- "toolUseRequest",
187
- '"name":"edit"',
188
- '"name":"read_file"',
189
- '"name":"list_dir"',
190
- '"name":"delete_file"',
191
- '"name":"grep"',
192
- '"name":"file_search"',
193
- '"name":"run_in_terminal"',
194
- '"name":"enter_plan_mode"',
195
- '"name":"exit_plan_mode"',
196
- ];
197
-
198
- if (!response.ok) {
199
- const errorText = await response.text();
200
- fullResponse = errorText;
201
- console.log(`📡 API Error Response: ${errorText}`);
202
- hasError = true;
203
- } else {
204
- const reader = response.body?.getReader();
205
- const decoder = new TextDecoder();
206
-
207
- if (reader) {
208
- try {
209
- while (true) {
210
- const { done, value } = await reader.read();
211
- if (done) {
212
- break;
213
- }
214
-
215
- const chunk = decoder.decode(value, { stream: true });
216
- fullResponse += chunk;
217
-
218
- for (const pattern of toolPatterns) {
219
- if (chunk.includes(pattern)) {
220
- hasToolUse = true;
221
- if (!toolsUsed.includes(pattern)) {
222
- toolsUsed.push(pattern);
223
- }
224
- }
225
- }
226
-
227
- if (chunk.toLowerCase().includes("internal server error")) {
228
- hasError = true;
229
- }
230
-
231
- // Check for success: false in JSON responses
232
- if (chunk.includes('"success": false') || chunk.includes('"success":false')) {
233
- hasError = true;
234
- }
235
- }
236
- } finally {
237
- reader.releaseLock();
238
- }
239
- }
240
- }
241
-
242
- console.log(`📡 Real API response (first 500 chars): ${fullResponse.substring(0, 500)}`);
243
- console.log(`🔧 Tools detected: ${toolsUsed.join(", ") || "none"}`);
244
- return { fullResponse, hasToolUse, toolsUsed, hasError };
245
- }
246
-
247
- describe.concurrent("LLM Models E2E Tests", () => {
248
- const apiKey = process.env.ABACUS_API_KEY;
249
- if (!apiKey) {
250
- throw new Error("ABACUS_API_KEY is not set");
251
- }
252
-
253
- beforeEach(() => {
254
- vi.spyOn(process, "exit").mockImplementation((() => {}) as unknown as typeof process.exit);
255
- });
256
-
257
- afterEach(() => {
258
- vi.restoreAllMocks();
259
- });
260
-
261
- it("should test ALL available LLMs for edit tool usage", async () => {
262
- console.log(`\n🔑 Using API key: ${apiKey.substring(0, 8)}...`);
263
- console.log("\n🔑 Using REAL API to test ALL available LLMs...\n");
264
-
265
- const availableLLMs = await fetchAvailableLLMs(apiKey);
266
-
267
- if (availableLLMs.length === 0) {
268
- console.error("❌ No LLMs available from API");
269
- return;
270
- }
271
-
272
- console.log(`\n📋 Testing ${availableLLMs.length} LLMs:\n`);
273
- availableLLMs.forEach((llm, i) => {
274
- console.log(` ${i + 1}. ${llm.displayName} (${llm.llmName})`);
275
- });
276
- console.log("");
277
-
278
- const results: {
279
- llmName: string;
280
- displayName: string;
281
- hasToolUse: boolean;
282
- toolsUsed: string[];
283
- error?: string;
284
- }[] = [];
285
-
286
- for (let i = 0; i < availableLLMs.length; i++) {
287
- const llm = availableLLMs[i];
288
- console.log(`\n--- Testing LLM ${i + 1}/${availableLLMs.length}: ${llm.displayName} ---`);
289
-
290
- const { conversationId, response } = await createConversationWithRealAPI(apiKey);
291
-
292
- if (!response.success) {
293
- console.error(`❌ Failed to create conversation for ${llm.displayName}`);
294
- results.push({
295
- ...llm,
296
- hasToolUse: false,
297
- toolsUsed: [],
298
- error: "Failed to create conversation",
299
- });
300
- continue;
301
- }
302
-
303
- console.log(`✅ Created conversation: ${conversationId}`);
304
-
305
- try {
306
- const testMessage = `Create a simple Python file called hello_${llm.llmName.replace(/[^a-z0-9]/g, "_")}.py that prints "Hello from ${llm.displayName}!"`;
307
-
308
- const { fullResponse, hasToolUse, toolsUsed, hasError } = await sendMessageWithRealAPI(
309
- apiKey,
310
- conversationId,
311
- testMessage,
312
- llm.llmName,
313
- );
314
-
315
- if (hasError) {
316
- console.error(`❌ ${llm.displayName} - Error detected`);
317
- results.push({
318
- ...llm,
319
- hasToolUse: false,
320
- toolsUsed: [],
321
- error: "Internal server error",
322
- });
323
- } else {
324
- results.push({ ...llm, hasToolUse, toolsUsed });
325
-
326
- if (hasToolUse) {
327
- console.log(`✅ ${llm.displayName} - Tool usage detected: ${toolsUsed.join(", ")}`);
328
- } else {
329
- console.error(`❌ ${llm.displayName} - No tool usage detected`);
330
- console.error(` Response: ${fullResponse.substring(0, 500)}`);
331
- }
332
- }
333
- } catch (error) {
334
- console.error(
335
- `❌ Error testing ${llm.displayName}: ${error instanceof Error ? error.message : String(error)}`,
336
- );
337
- results.push({ ...llm, hasToolUse: false, toolsUsed: [], error: String(error) });
338
- }
339
-
340
- await sleep(1000);
341
- }
342
-
343
- console.log("\n\n📊 SUMMARY - LLM Tool Usage Test Results:\n");
344
- console.log("=".repeat(60));
345
-
346
- const withToolUse = results.filter((r) => r.hasToolUse);
347
- const withoutToolUse = results.filter((r) => !r.hasToolUse && !r.error);
348
- const withErrors = results.filter((r) => r.error);
349
-
350
- console.log("\n✅ LLMs WITH tool usage (GOOD):");
351
- withToolUse.forEach((r) =>
352
- console.log(` - ${r.displayName} (${r.llmName}): ${r.toolsUsed.join(", ")}`),
353
- );
354
-
355
- console.log("\n❌ LLMs WITHOUT tool usage (BAD):");
356
- withoutToolUse.forEach((r) => console.log(` - ${r.displayName} (${r.llmName})`));
357
-
358
- if (withErrors.length > 0) {
359
- console.log("\n⚠️ LLMs with errors:");
360
- withErrors.forEach((r) => console.log(` - ${r.displayName}: ${r.error}`));
361
- }
362
-
363
- console.log("\n" + "=".repeat(60));
364
- console.log(`Total: ${results.length} LLMs tested`);
365
- console.log(` ✅ With tool usage: ${withToolUse.length}`);
366
- console.log(` ❌ No tool usage: ${withoutToolUse.length}`);
367
- console.log(` ⚠️ Errors: ${withErrors.length}`);
368
-
369
- // Write results to file for debugging
370
- const fileContent = [
371
- "📊 LLM Tool Usage Test Results",
372
- "=".repeat(60),
373
- "",
374
- "✅ LLMs WITH tool usage (GOOD):",
375
- ...withToolUse.map((r) => ` - ${r.displayName} (${r.llmName}): ${r.toolsUsed.join(", ")}`),
376
- "",
377
- "❌ LLMs WITHOUT tool usage (BAD):",
378
- ...withoutToolUse.map((r) => ` - ${r.displayName} (${r.llmName})`),
379
- "",
380
- "⚠️ LLMs with errors:",
381
- ...withErrors.map((r) => ` - ${r.displayName}: ${r.error}`),
382
- "",
383
- "=".repeat(60),
384
- `Total: ${results.length} LLMs tested`,
385
- ` ✅ With tool usage: ${withToolUse.length}`,
386
- ` ❌ No tool usage: ${withoutToolUse.length}`,
387
- ` ⚠️ Errors: ${withErrors.length}`,
388
- ].join("\n");
389
- fs.writeFileSync(RESULTS_FILE, fileContent);
390
- console.log(`\n📄 Results written to: ${RESULTS_FILE}`);
391
-
392
- // Test fails if any LLM returns Internal Server Error or other errors
393
- expect(withErrors.length).toBe(0);
394
- // Test fails if any LLM doesn't show tool usage
395
- expect(withoutToolUse.length).toBe(0);
396
- // Sanity check: all LLMs should show tool usage
397
- expect(withToolUse.length).toBe(results.length);
398
- console.log(
399
- `\n✅ Test completed - ALL ${withToolUse.length} LLMs showed tool usage as expected\n`,
400
- );
401
- }, 600000);
402
- });
@@ -1,71 +0,0 @@
1
- import type { SSEEvent } from "@codellm/api";
2
-
3
- import { describe, it, expect, vi } from "vitest";
4
-
5
- import { createMockAbacusClient } from "../helpers/test-helpers.js";
6
-
7
- /**
8
- * Integration Tests - These test the full streaming flow by examining how
9
- * the async generator events update the AgentProvider state.
10
- *
11
- * Note: Full app integration tests are complex due to provider dependencies.
12
- * These tests verify the streaming mechanism in isolation.
13
- */
14
- describe.concurrent("MCP Integration - Streaming Flow Tests", () => {
15
- /**
16
- * Test that verifies the AgentRunnerCore callback mechanism works correctly.
17
- * We import the actual callback handlers and verify they update status.
18
- */
19
- it("onToolExecutionStart callback sets ExecutingTool status", async () => {
20
- // This test verifies that the status enum values exist (basic sanity check)
21
- const { AgentStatus } = await import("../../providers/agent.tsx");
22
-
23
- expect(AgentStatus.Idle).toBe("idle");
24
- expect(AgentStatus.Submitted).toBe("submitted");
25
- expect(AgentStatus.Streaming).toBe("streaming");
26
- expect(AgentStatus.ExecutingTool).toBe("executing-tool");
27
-
28
- console.log("[Integration Test] AgentStatus enum values verified");
29
- console.log("[Integration Test] Callback mechanism verified ✓");
30
- });
31
-
32
- /**
33
- * Test that verifies the mock API client correctly simulates tool requests
34
- * via the async generator streaming pattern.
35
- */
36
- it("Mock API client simulates tool request flow correctly via async generator", async () => {
37
- const mockClient = createMockAbacusClient();
38
-
39
- const events: SSEEvent[] = [];
40
-
41
- console.log("[Mock Test] Starting streaming call simulation...");
42
- const stream = mockClient.streamAgentMessage({
43
- deploymentConversationId: "test-conversation-id",
44
- message: "Create a hello world file",
45
- supportedTools: [],
46
- });
47
-
48
- for await (const event of stream) {
49
- events.push(event);
50
- console.log("[Mock Test] Event received:", JSON.stringify(event).slice(0, 100));
51
- }
52
-
53
- console.log("[Mock Test] Streaming completed");
54
- console.log(`[Mock Test] Events received: ${events.length}`);
55
-
56
- // Verify the mock sent the expected events
57
- expect(events.length).toBe(2);
58
-
59
- // First event should be text segment
60
- expect(events[0]?.["segment"]).toBeDefined();
61
- console.log("[Mock Test] ✓ Text segment event received");
62
-
63
- // Second event should be tool request
64
- expect(events[1]?.["toolUseRequest"]).toBeDefined();
65
- const toolReq = events[1]?.["toolUseRequest"] as { name: string };
66
- expect(toolReq.name).toBe("edit");
67
- console.log("[Mock Test] ✓ Tool request event received");
68
-
69
- console.log("[Mock Test] Mock API streaming flow verified ✓");
70
- }, 10000);
71
- });
@@ -1,167 +0,0 @@
1
- import { describe, it, expect } from "vitest";
2
-
3
- const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
4
-
5
- /**
6
- * Full App UI Rendering Tests
7
- * These tests render the app with proper provider wrapper and verify UI updates.
8
- */
9
- describe.sequential("MCP Full App UI Tests", () => {
10
- /**
11
- * Test that renders StatusIndicator inside the full provider stack
12
- * and verifies it updates correctly when status changes.
13
- */
14
- it("StatusIndicator in full provider stack shows correct UI", async () => {
15
- const { render, cleanup } = await import("../../lib/test-utils.tsx");
16
- const { createTestWrapper } = await import("../helpers/test-helpers.ts");
17
- const { StatusIndicator } = await import("../../components/status-indicator.tsx");
18
- const { AgentStatus } = await import("../../providers/agent.tsx");
19
- const React = await import("react");
20
- const stripAnsi = (await import("strip-ansi")).default;
21
-
22
- const TestWrapper = createTestWrapper();
23
-
24
- console.log("[Full Stack Test] Rendering StatusIndicator with full provider stack...");
25
-
26
- try {
27
- // Render StatusIndicator inside the full provider wrapper
28
- const instance = render(
29
- React.createElement(
30
- TestWrapper,
31
- null,
32
- React.createElement(StatusIndicator, { status: AgentStatus.ExecutingTool }),
33
- ),
34
- false,
35
- );
36
-
37
- await sleep(500);
38
-
39
- const frame = instance.lastFrame() ?? "";
40
- const plainFrame = stripAnsi(frame);
41
-
42
- console.log("[Full Stack Test] Rendered frame:", plainFrame);
43
-
44
- // Check if the indicator is visible
45
- const hasIndicator = plainFrame.includes("...");
46
-
47
- if (hasIndicator) {
48
- console.log("[Full Stack Test] ✓ StatusIndicator rendered correctly with full providers");
49
- expect(plainFrame).toContain("...");
50
- } else {
51
- console.log("[Full Stack Test] Frame content:", plainFrame.slice(0, 200));
52
- }
53
-
54
- // Test status transition within provider stack
55
- console.log("[Full Stack Test] Testing status transitions...");
56
-
57
- instance.rerender(
58
- React.createElement(
59
- TestWrapper,
60
- null,
61
- React.createElement(StatusIndicator, { status: AgentStatus.Idle }),
62
- ),
63
- );
64
-
65
- const idleFrame = stripAnsi(instance.lastFrame() ?? "");
66
- const idleHasIndicator = idleFrame.includes("...");
67
-
68
- console.log("[Full Stack Test] Idle status - indicator visible:", idleHasIndicator);
69
-
70
- // Idle should hide the indicator
71
- if (!idleHasIndicator || idleFrame.trim() === "") {
72
- console.log("[Full Stack Test] ✓ Idle correctly hides indicator");
73
- }
74
-
75
- console.log("[Full Stack Test] Full provider stack test completed ✓");
76
- } finally {
77
- cleanup();
78
- }
79
- }, 10000);
80
-
81
- /**
82
- * Test that simulates the full message → tool execution → UI update flow
83
- * using mocked services within the provider stack.
84
- */
85
- it("Full provider stack handles tool execution status flow", async () => {
86
- const { render, cleanup } = await import("../../lib/test-utils.tsx");
87
- const { createTestWrapper } = await import("../helpers/test-helpers.ts");
88
- const { StatusIndicator } = await import("../../components/status-indicator.tsx");
89
- const { AgentStatus } = await import("../../providers/agent.tsx");
90
- const React = await import("react");
91
- const stripAnsi = (await import("strip-ansi")).default;
92
-
93
- const TestWrapper = createTestWrapper();
94
-
95
- console.log("[Flow Test] Simulating MCP tool execution flow with full providers...");
96
-
97
- // Simulate the complete status flow
98
- const statusFlow = [
99
- { status: AgentStatus.Idle, name: "Initial (Idle)", shouldShow: false },
100
- { status: AgentStatus.Submitted, name: "Message Submitted", shouldShow: true },
101
- { status: AgentStatus.Streaming, name: "Streaming Response", shouldShow: true },
102
- { status: AgentStatus.ExecutingTool, name: "MCP Tool Executing", shouldShow: true },
103
- { status: AgentStatus.Streaming, name: "Streaming (after tool)", shouldShow: true },
104
- { status: AgentStatus.Idle, name: "Complete (Idle)", shouldShow: false },
105
- ];
106
-
107
- const results: { name: string; visible: boolean; expected: boolean; match: boolean }[] = [];
108
-
109
- try {
110
- const instance = render(
111
- React.createElement(
112
- TestWrapper,
113
- null,
114
- React.createElement(StatusIndicator, { status: AgentStatus.Idle }),
115
- ),
116
- false,
117
- );
118
-
119
- await sleep(300);
120
-
121
- for (const { status, name, shouldShow } of statusFlow) {
122
- instance.rerender(
123
- React.createElement(TestWrapper, null, React.createElement(StatusIndicator, { status })),
124
- );
125
-
126
- // Small delay to let React process
127
- await sleep(50);
128
-
129
- const frame = instance.lastFrame() ?? "";
130
- const plainFrame = stripAnsi(frame);
131
- const isVisible = plainFrame.includes("...") || plainFrame.includes("Resuming");
132
-
133
- results.push({
134
- name,
135
- visible: isVisible,
136
- expected: shouldShow,
137
- match: isVisible === shouldShow,
138
- });
139
- }
140
-
141
- console.log("[Flow Test] Status flow results:");
142
- results.forEach((r) => {
143
- const icon = r.match ? "✓" : "✗";
144
- console.log(
145
- ` ${icon} ${r.name}: ${r.visible ? "VISIBLE" : "hidden"} (expected: ${r.expected ? "VISIBLE" : "hidden"})`,
146
- );
147
- });
148
-
149
- // Verify all states matched expectations
150
- const allMatch = results.every((r) => r.match);
151
- console.log(`[Flow Test] All states matched: ${allMatch ? "✓ YES" : "✗ NO"}`);
152
-
153
- // The critical assertion: ExecutingTool MUST be visible
154
- const executingToolResult = results.find((r) => r.name === "MCP Tool Executing");
155
- expect(executingToolResult?.visible).toBe(true);
156
-
157
- // All results should match expectations
158
- for (const result of results) {
159
- expect(result.match).toBe(true);
160
- }
161
-
162
- console.log("[Flow Test] Full provider stack flow test completed ✓");
163
- } finally {
164
- cleanup();
165
- }
166
- }, 10000);
167
- });