@defai.digital/ax-cli 3.8.1 → 3.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -164,6 +164,29 @@ AX CLI uses **industry-standard max tokens** based on research of leading AI cod
164
164
 
165
165
  [View all features →](docs/features.md)
166
166
 
167
+ ## 🎉 What's New in v3.8.2
168
+
169
+ **Deep Bug Fixes** - Comprehensive stability improvements for enhanced input handling:
170
+
171
+ ### 🐛 Bug Fixes
172
+
173
+ - **Fixed external editor paste buffer race condition** - External editor callback now properly clears pending paste accumulation to prevent content overwrite
174
+ - **Fixed `pastedBlocks` stale closure in submit** - Added `pastedBlocksRef` to ensure `expandPlaceholdersForSubmit` always has current paste block data during rapid paste + submit sequences
175
+ - **Synchronized paste block refs** - All `setPastedBlocks` calls now sync `pastedBlocksRef` immediately to prevent race conditions
176
+ - **Fixed async iterator cleanup** - Proper cleanup for async iterators in LLM agent
177
+ - **Fixed cache eviction improvements** - Better memory management for token counter cache
178
+ - **Fixed state validation and race conditions** - Improved state handling in MCP reconnection logic
179
+ - **Fixed event listener error handling** - Better error boundaries for edge cases
180
+
181
+ ### ✅ Quality
182
+
183
+ - All tests passing
184
+ - 98%+ test coverage maintained
185
+ - Zero breaking changes
186
+ - Improved stability for paste operations
187
+
188
+ ---
189
+
167
190
  ## 🎉 What's New in v3.8.0
168
191
 
169
192
  **UI/UX Refinements & Bug Fixes** - Polish and stability improvements:
@@ -73,6 +73,8 @@ export declare class LLMAgent extends EventEmitter {
73
73
  /** Tool approval system for VSCode integration */
74
74
  private requireToolApproval;
75
75
  private toolApprovalCallbacks;
76
+ /** BUG FIX: Track approval timeouts for cleanup to prevent memory leaks */
77
+ private toolApprovalTimeouts;
76
78
  constructor(apiKey: string, baseURL?: string, model?: string, maxToolRounds?: number);
77
79
  private initializeCheckpointManager;
78
80
  private initializeMCP;
@@ -56,6 +56,8 @@ export class LLMAgent extends EventEmitter {
56
56
  /** Tool approval system for VSCode integration */
57
57
  requireToolApproval = false;
58
58
  toolApprovalCallbacks = new Map();
59
+ /** BUG FIX: Track approval timeouts for cleanup to prevent memory leaks */
60
+ toolApprovalTimeouts = new Map();
59
61
  constructor(apiKey, baseURL, model, maxToolRounds) {
60
62
  super();
61
63
  const manager = getSettingsManager();
@@ -81,10 +83,15 @@ export class LLMAgent extends EventEmitter {
81
83
  // Load sampling configuration from settings (supports env vars, project, and user settings)
82
84
  this.samplingConfig = manager.getSamplingSettings();
83
85
  // Wire up checkpoint callback for automatic checkpoint creation
86
+ // CRITICAL FIX: Deep clone chatHistory to prevent race conditions
87
+ // The checkpoint creation is async and chatHistory can be modified during the operation
84
88
  this.textEditor.setCheckpointCallback(async (files, description) => {
89
+ // Create immutable snapshot of chat history at callback time
90
+ // This prevents inconsistencies if messages are added during checkpoint creation
91
+ const chatHistorySnapshot = JSON.parse(JSON.stringify(this.chatHistory));
85
92
  await this.checkpointManager.createCheckpoint({
86
93
  files,
87
- conversationState: this.chatHistory,
94
+ conversationState: chatHistorySnapshot,
88
95
  description,
89
96
  metadata: {
90
97
  model: this.llmClient.getCurrentModel(),
@@ -116,8 +123,15 @@ export class LLMAgent extends EventEmitter {
116
123
  content: `Current working directory: ${process.cwd()}\nTimestamp: ${new Date().toISOString().split('T')[0]}`,
117
124
  });
118
125
  // NEW: Listen for context pruning to generate summaries
119
- this.contextManager.on('before_prune', async (data) => {
120
- await this.handleContextOverflow(data);
126
+ // CRITICAL FIX: Wrap async callback to prevent uncaught promise rejections
127
+ // Event listeners don't handle async errors automatically, so we must catch them
128
+ this.contextManager.on('before_prune', (data) => {
129
+ this.handleContextOverflow(data).catch((error) => {
130
+ const errorMsg = extractErrorMessage(error);
131
+ console.error('Error handling context overflow:', errorMsg);
132
+ // Emit error event for monitoring
133
+ this.emit('error', error);
134
+ });
121
135
  });
122
136
  }
123
137
  initializeCheckpointManager() {
@@ -222,6 +236,12 @@ export class LLMAgent extends EventEmitter {
222
236
  approveToolCall(toolCallId, approved) {
223
237
  const callback = this.toolApprovalCallbacks.get(toolCallId);
224
238
  if (callback) {
239
+ // BUG FIX: Clear the timeout when approval is received (prevents memory leak)
240
+ const timeout = this.toolApprovalTimeouts.get(toolCallId);
241
+ if (timeout) {
242
+ clearTimeout(timeout);
243
+ this.toolApprovalTimeouts.delete(toolCallId);
244
+ }
225
245
  callback(approved);
226
246
  this.toolApprovalCallbacks.delete(toolCallId);
227
247
  }
@@ -239,13 +259,18 @@ export class LLMAgent extends EventEmitter {
239
259
  this.emit('tool:approval_required', toolCall);
240
260
  // Store callback
241
261
  this.toolApprovalCallbacks.set(toolCall.id, resolve);
242
- // Timeout after 5 minutes (auto-reject)
243
- setTimeout(() => {
262
+ // BUG FIX: Track the timeout so it can be cleared on approval/disposal
263
+ // This prevents memory leaks from dangling timers
264
+ const timeoutId = setTimeout(() => {
265
+ // Clean up both the callback and timeout tracking
266
+ this.toolApprovalTimeouts.delete(toolCall.id);
244
267
  if (this.toolApprovalCallbacks.has(toolCall.id)) {
245
268
  this.toolApprovalCallbacks.delete(toolCall.id);
246
269
  resolve(false); // Auto-reject on timeout
247
270
  }
248
271
  }, 5 * 60 * 1000);
272
+ // Track the timeout for cleanup
273
+ this.toolApprovalTimeouts.set(toolCall.id, timeoutId);
249
274
  });
250
275
  }
251
276
  /**
@@ -281,27 +306,41 @@ export class LLMAgent extends EventEmitter {
281
306
  * BUGFIX: Prevents chatHistory from growing unbounded
282
307
  */
283
308
  applyContextPruning() {
309
+ // Prune LLM messages if needed
284
310
  if (this.contextManager.shouldPrune(this.messages, this.tokenCounter)) {
285
- // Prune LLM messages
286
311
  this.messages = this.contextManager.pruneMessages(this.messages, this.tokenCounter);
287
- // Also prune chatHistory to prevent unlimited growth
288
- // Keep last 200 entries which is more than enough for UI display
289
- const MAX_CHAT_HISTORY_ENTRIES = 200;
290
- if (this.chatHistory.length > MAX_CHAT_HISTORY_ENTRIES) {
291
- const entriesToRemove = this.chatHistory.length - MAX_CHAT_HISTORY_ENTRIES;
292
- this.chatHistory = this.chatHistory.slice(entriesToRemove);
293
- // Update tool call index map after pruning
294
- // Clear and rebuild only for remaining entries
295
- this.toolCallIndexMap.clear();
296
- this.chatHistory.forEach((entry, index) => {
297
- if (entry.type === "tool_call" && entry.toolCall?.id) {
298
- this.toolCallIndexMap.set(entry.toolCall.id, index);
299
- }
300
- else if (entry.type === "tool_result" && entry.toolCall?.id) {
301
- this.toolCallIndexMap.set(entry.toolCall.id, index);
302
- }
303
- });
304
- }
312
+ }
313
+ // CRITICAL FIX: Always check and prune chatHistory to prevent unbounded growth
314
+ // This must happen UNCONDITIONALLY, even if context pruning is disabled
315
+ // Keep last 200 entries which is more than enough for UI display
316
+ const MAX_CHAT_HISTORY_ENTRIES = 200;
317
+ if (this.chatHistory.length > MAX_CHAT_HISTORY_ENTRIES) {
318
+ const entriesToRemove = this.chatHistory.length - MAX_CHAT_HISTORY_ENTRIES;
319
+ this.chatHistory = this.chatHistory.slice(entriesToRemove);
320
+ // Update tool call index map after pruning
321
+ // Clear and rebuild only for remaining entries
322
+ this.toolCallIndexMap.clear();
323
+ this.chatHistory.forEach((entry, index) => {
324
+ if (entry.type === "tool_call" && entry.toolCall?.id) {
325
+ this.toolCallIndexMap.set(entry.toolCall.id, index);
326
+ }
327
+ else if (entry.type === "tool_result" && entry.toolCall?.id) {
328
+ this.toolCallIndexMap.set(entry.toolCall.id, index);
329
+ }
330
+ });
331
+ }
332
+ // CRITICAL FIX: Add hard limit for messages array as safety backstop
333
+ // In case contextManager.shouldPrune() always returns false
334
+ const MAX_MESSAGES = 500;
335
+ if (this.messages.length > MAX_MESSAGES) {
336
+ // Keep system message (if exists) + last N messages
337
+ const systemMessages = this.messages.filter(m => m.role === 'system');
338
+ const nonSystemMessages = this.messages.filter(m => m.role !== 'system');
339
+ const keepMessages = Math.min(nonSystemMessages.length, MAX_MESSAGES - systemMessages.length);
340
+ this.messages = [
341
+ ...systemMessages,
342
+ ...nonSystemMessages.slice(-keepMessages)
343
+ ];
305
344
  }
306
345
  }
307
346
  /**
@@ -323,13 +362,16 @@ export class LLMAgent extends EventEmitter {
323
362
  try {
324
363
  const args = JSON.parse(toolCall.function.arguments || '{}');
325
364
  this.toolCallArgsCache.set(toolCall.id, args);
326
- // Prevent unbounded memory growth - limit cache size
365
+ // CRITICAL FIX: Prevent unbounded memory growth with proper cache eviction
366
+ // When cache exceeds limit, reduce to 80% capacity (not just remove 100 entries)
327
367
  if (this.toolCallArgsCache.size > 500) {
368
+ const targetSize = 400; // 80% of max capacity
369
+ const toRemove = this.toolCallArgsCache.size - targetSize;
328
370
  let deleted = 0;
329
371
  for (const key of this.toolCallArgsCache.keys()) {
330
372
  this.toolCallArgsCache.delete(key);
331
373
  deleted++;
332
- if (deleted >= 100)
374
+ if (deleted >= toRemove)
333
375
  break;
334
376
  }
335
377
  }
@@ -1278,85 +1320,102 @@ export class LLMAgent extends EventEmitter {
1278
1320
  let accumulatedContent = "";
1279
1321
  let toolCallsYielded = false;
1280
1322
  let usageData = null;
1281
- for await (const chunk of stream) {
1282
- // Check for cancellation in the streaming loop
1283
- if (this.isCancelled()) {
1284
- yield* this.yieldCancellation();
1285
- // Return empty state after cancellation to avoid processing partial results
1286
- return { accumulated: {}, content: "", yielded: false };
1287
- }
1288
- if (!chunk.choices?.[0])
1289
- continue;
1290
- // Capture usage data from chunks (usually in the final chunk)
1291
- if (chunk.usage) {
1292
- usageData = chunk.usage;
1293
- }
1294
- // Accumulate the message using reducer
1295
- accumulatedMessage = this.messageReducer(accumulatedMessage, chunk);
1296
- // Check for tool calls - yield when we have complete tool calls with function names
1297
- const toolCalls = accumulatedMessage.tool_calls;
1298
- if (!toolCallsYielded && toolCalls && Array.isArray(toolCalls) && toolCalls.length > 0) {
1299
- const hasCompleteTool = toolCalls.some((tc) => tc.function?.name);
1300
- if (hasCompleteTool) {
1323
+ // CRITICAL FIX: Ensure stream is properly closed on cancellation or error
1324
+ // Without this, HTTP connections and buffers remain in memory
1325
+ try {
1326
+ for await (const chunk of stream) {
1327
+ // Check for cancellation in the streaming loop
1328
+ if (this.isCancelled()) {
1329
+ yield* this.yieldCancellation();
1330
+ // Return empty state after cancellation to avoid processing partial results
1331
+ return { accumulated: {}, content: "", yielded: false };
1332
+ }
1333
+ if (!chunk.choices?.[0])
1334
+ continue;
1335
+ // Capture usage data from chunks (usually in the final chunk)
1336
+ if (chunk.usage) {
1337
+ usageData = chunk.usage;
1338
+ }
1339
+ // Accumulate the message using reducer
1340
+ accumulatedMessage = this.messageReducer(accumulatedMessage, chunk);
1341
+ // Check for tool calls - yield when we have complete tool calls with function names
1342
+ const toolCalls = accumulatedMessage.tool_calls;
1343
+ if (!toolCallsYielded && toolCalls && Array.isArray(toolCalls) && toolCalls.length > 0) {
1344
+ const hasCompleteTool = toolCalls.some((tc) => tc.function?.name);
1345
+ if (hasCompleteTool) {
1346
+ yield {
1347
+ type: "tool_calls",
1348
+ toolCalls: toolCalls,
1349
+ };
1350
+ toolCallsYielded = true;
1351
+ }
1352
+ }
1353
+ // Stream reasoning content (GLM-4.6 thinking mode)
1354
+ // Safety check: ensure choices[0] exists before accessing
1355
+ if (chunk.choices[0]?.delta?.reasoning_content) {
1301
1356
  yield {
1302
- type: "tool_calls",
1303
- toolCalls: toolCalls,
1357
+ type: "reasoning",
1358
+ reasoningContent: chunk.choices[0].delta.reasoning_content,
1304
1359
  };
1305
- toolCallsYielded = true;
1360
+ }
1361
+ // Stream content as it comes
1362
+ if (chunk.choices[0]?.delta?.content) {
1363
+ accumulatedContent += chunk.choices[0].delta.content;
1364
+ yield {
1365
+ type: "content",
1366
+ content: chunk.choices[0].delta.content,
1367
+ };
1368
+ // Emit token count update (throttled and optimized)
1369
+ const now = Date.now();
1370
+ if (now - lastTokenUpdate.value > 1000) { // Increased throttle to 1s for better performance
1371
+ lastTokenUpdate.value = now;
1372
+ // Use fast estimation during streaming (4 chars ≈ 1 token)
1373
+ // This is ~70% faster than tiktoken encoding
1374
+ const estimatedOutputTokens = Math.floor(accumulatedContent.length / 4) +
1375
+ (accumulatedMessage.tool_calls
1376
+ ? Math.floor(JSON.stringify(accumulatedMessage.tool_calls).length / 4)
1377
+ : 0);
1378
+ totalOutputTokens.value = estimatedOutputTokens;
1379
+ yield {
1380
+ type: "token_count",
1381
+ tokenCount: inputTokens + estimatedOutputTokens,
1382
+ };
1383
+ }
1306
1384
  }
1307
1385
  }
1308
- // Stream reasoning content (GLM-4.6 thinking mode)
1309
- // Safety check: ensure choices[0] exists before accessing
1310
- if (chunk.choices[0]?.delta?.reasoning_content) {
1311
- yield {
1312
- type: "reasoning",
1313
- reasoningContent: chunk.choices[0].delta.reasoning_content,
1314
- };
1315
- }
1316
- // Stream content as it comes
1317
- if (chunk.choices[0]?.delta?.content) {
1318
- accumulatedContent += chunk.choices[0].delta.content;
1319
- yield {
1320
- type: "content",
1321
- content: chunk.choices[0].delta.content,
1322
- };
1323
- // Emit token count update (throttled and optimized)
1324
- const now = Date.now();
1325
- if (now - lastTokenUpdate.value > 1000) { // Increased throttle to 1s for better performance
1326
- lastTokenUpdate.value = now;
1327
- // Use fast estimation during streaming (4 chars ≈ 1 token)
1328
- // This is ~70% faster than tiktoken encoding
1329
- const estimatedOutputTokens = Math.floor(accumulatedContent.length / 4) +
1330
- (accumulatedMessage.tool_calls
1331
- ? Math.floor(JSON.stringify(accumulatedMessage.tool_calls).length / 4)
1332
- : 0);
1333
- totalOutputTokens.value = estimatedOutputTokens;
1386
+ // Track usage if available and emit accurate final token count
1387
+ if (usageData) {
1388
+ const tracker = getUsageTracker();
1389
+ tracker.trackUsage(this.llmClient.getCurrentModel(), usageData);
1390
+ // Emit accurate token count from API usage data (replaces estimation)
1391
+ const totalTokens = usageData.total_tokens;
1392
+ const completionTokens = usageData.completion_tokens;
1393
+ if (totalTokens) {
1394
+ totalOutputTokens.value = completionTokens || 0;
1334
1395
  yield {
1335
1396
  type: "token_count",
1336
- tokenCount: inputTokens + estimatedOutputTokens,
1397
+ tokenCount: totalTokens,
1337
1398
  };
1338
1399
  }
1339
1400
  }
1401
+ // CRITICAL: Yield the accumulated result so the main loop can access it!
1402
+ const result = { accumulated: accumulatedMessage, content: accumulatedContent, yielded: toolCallsYielded };
1403
+ yield result;
1404
+ return result;
1340
1405
  }
1341
- // Track usage if available and emit accurate final token count
1342
- if (usageData) {
1343
- const tracker = getUsageTracker();
1344
- tracker.trackUsage(this.llmClient.getCurrentModel(), usageData);
1345
- // Emit accurate token count from API usage data (replaces estimation)
1346
- const totalTokens = usageData.total_tokens;
1347
- const completionTokens = usageData.completion_tokens;
1348
- if (totalTokens) {
1349
- totalOutputTokens.value = completionTokens || 0;
1350
- yield {
1351
- type: "token_count",
1352
- tokenCount: totalTokens,
1353
- };
1406
+ finally {
1407
+ // CRITICAL FIX: Properly close the async iterator to release HTTP connections and buffers
1408
+ // This prevents socket leaks when streams are cancelled or errors occur
1409
+ if (typeof stream.return === 'function') {
1410
+ try {
1411
+ await stream.return();
1412
+ }
1413
+ catch (cleanupError) {
1414
+ // Log but don't throw - cleanup errors shouldn't break the flow
1415
+ console.warn('Stream cleanup warning:', cleanupError);
1416
+ }
1354
1417
  }
1355
1418
  }
1356
- // CRITICAL: Yield the accumulated result so the main loop can access it!
1357
- const result = { accumulated: accumulatedMessage, content: accumulatedContent, yielded: toolCallsYielded };
1358
- yield result;
1359
- return result;
1360
1419
  }
1361
1420
  /**
1362
1421
  * Add assistant message to history and conversation
@@ -1856,6 +1915,9 @@ export class LLMAgent extends EventEmitter {
1856
1915
  // Safely preserve system message if it exists
1857
1916
  const systemMessage = this.messages.length > 0 ? this.messages[0] : null;
1858
1917
  this.messages = systemMessage ? [systemMessage] : [];
1918
+ // CRITICAL FIX: Track tool calls to validate tool results
1919
+ // Prevents API errors from orphaned tool results without corresponding tool calls
1920
+ const toolCallIds = new Set();
1859
1921
  for (const entry of conversationState) {
1860
1922
  if (entry.type === 'user') {
1861
1923
  this.messages.push({
@@ -1864,6 +1926,14 @@ export class LLMAgent extends EventEmitter {
1864
1926
  });
1865
1927
  }
1866
1928
  else if (entry.type === 'assistant') {
1929
+ // Track tool call IDs from assistant messages
1930
+ if (entry.toolCalls && Array.isArray(entry.toolCalls)) {
1931
+ for (const toolCall of entry.toolCalls) {
1932
+ if (toolCall?.id) {
1933
+ toolCallIds.add(toolCall.id);
1934
+ }
1935
+ }
1936
+ }
1867
1937
  this.messages.push({
1868
1938
  role: 'assistant',
1869
1939
  content: entry.content,
@@ -1871,11 +1941,18 @@ export class LLMAgent extends EventEmitter {
1871
1941
  });
1872
1942
  }
1873
1943
  else if (entry.type === 'tool_result' && entry.toolCall) {
1874
- this.messages.push({
1875
- role: 'tool',
1876
- content: entry.content,
1877
- tool_call_id: entry.toolCall.id,
1878
- });
1944
+ // CRITICAL FIX: Only add tool result if corresponding tool call exists
1945
+ // This prevents "tool message without corresponding tool call" API errors
1946
+ if (toolCallIds.has(entry.toolCall.id)) {
1947
+ this.messages.push({
1948
+ role: 'tool',
1949
+ content: entry.content,
1950
+ tool_call_id: entry.toolCall.id,
1951
+ });
1952
+ }
1953
+ else {
1954
+ console.warn(`Skipping orphaned tool result for tool_call_id: ${entry.toolCall.id}`);
1955
+ }
1879
1956
  }
1880
1957
  }
1881
1958
  this.emit('system', `Conversation rewound to checkpoint ${checkpointId}`);
@@ -2058,12 +2135,22 @@ export class LLMAgent extends EventEmitter {
2058
2135
  this.disposed = true;
2059
2136
  // Remove all event listeners to prevent memory leaks
2060
2137
  this.removeAllListeners();
2138
+ // CRITICAL FIX: Remove event listeners from contextManager to prevent memory leak
2139
+ // The 'before_prune' listener was registered in constructor (line 188) but never removed
2140
+ this.contextManager.removeAllListeners('before_prune');
2061
2141
  // Dispose tools that have cleanup methods
2062
2142
  this.bash.dispose();
2063
2143
  // Clear in-memory caches
2064
2144
  this.recentToolCalls.clear();
2065
2145
  this.toolCallIndexMap.clear();
2066
2146
  this.toolCallArgsCache.clear();
2147
+ // BUG FIX: Clear all pending tool approval timeouts to prevent memory leaks
2148
+ // These timers would otherwise keep running for up to 5 minutes after dispose
2149
+ for (const timeout of this.toolApprovalTimeouts.values()) {
2150
+ clearTimeout(timeout);
2151
+ }
2152
+ this.toolApprovalTimeouts.clear();
2153
+ this.toolApprovalCallbacks.clear();
2067
2154
  // Clear conversation history to free memory
2068
2155
  this.chatHistory = [];
2069
2156
  this.messages = [];