npm - @probelabs/probe - Versions diffs - 0.6.0-rc236 → 0.6.0-rc237 - Mend

@probelabs/probe 0.6.0-rc236 → 0.6.0-rc237

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/bin/binaries/probe-v0.6.0-rc237-aarch64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc237-aarch64-unknown-linux-musl.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc237-x86_64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc237-x86_64-pc-windows-msvc.zip +0 -0
package/bin/binaries/probe-v0.6.0-rc237-x86_64-unknown-linux-musl.tar.gz +0 -0
package/build/agent/ProbeAgent.js +91 -33
package/build/agent/dsl/environment.js +4 -1
package/build/agent/index.js +102 -25
package/build/tools/common.js +61 -0
package/cjs/agent/ProbeAgent.cjs +9691 -7277
package/cjs/index.cjs +9694 -7280
package/package.json +1 -1
package/src/agent/ProbeAgent.js +91 -33
package/src/agent/dsl/environment.js +4 -1
package/src/tools/common.js +61 -0
package/bin/binaries/probe-v0.6.0-rc236-aarch64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc236-aarch64-unknown-linux-musl.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc236-x86_64-apple-darwin.tar.gz +0 -0
package/bin/binaries/probe-v0.6.0-rc236-x86_64-pc-windows-msvc.zip +0 -0
package/bin/binaries/probe-v0.6.0-rc236-x86_64-unknown-linux-musl.tar.gz +0 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@probelabs/probe",
-  "version": "0.6.0-rc236",
+  "version": "0.6.0-rc237",
   "description": "Node.js wrapper for the probe code search tool",
   "main": "src/index.js",
   "module": "src/index.js",

package/src/agent/ProbeAgent.js CHANGED Viewed

@@ -64,7 +64,7 @@ import {
   attemptCompletionSchema,
   parseXmlToolCallWithThinking
 } from './tools.js';
-import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
+import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
 import {
   createWrappedTools,
   listFilesToolInstance,
@@ -3165,6 +3165,11 @@ Follow these instructions carefully:
       let sameResponseCount = 0;
       const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
+      // Circuit breaker for consecutive no-tool responses (regardless of content)
+      // This catches cases where agent alternates between similar "stuck" messages
+      let consecutiveNoToolCount = 0;
+      const MAX_CONSECUTIVE_NO_TOOL = 5;
       // Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
       while (currentIteration < maxIterations && !completionAttempted) {
         currentIteration++;
@@ -3435,41 +3440,66 @@ Follow these instructions carefully:
           if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
+          // Reset consecutive no-tool counter since we got a valid tool call
+          consecutiveNoToolCount = 0;
           if (toolName === 'attempt_completion') {
             completionAttempted = true;
             // END CHECKPOINT: Block completion if there are incomplete tasks
+            // However, allow completion if the agent is stuck and genuinely cannot proceed
             if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
-              const taskSummary = this.taskManager.getTaskSummary();
-              const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
-              const incompleteTasks = this.taskManager.getIncompleteTasks();
-              // Record telemetry for blocked completion
-              if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
-                this.tracer.recordTaskEvent('completion_blocked', {
-                  'task.incomplete_count': incompleteTasks.length,
-                  'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
-                  'task.iteration': currentIteration
-                });
-              }
+              const completionResult = typeof params.result === 'string' ? params.result : '';
+              const isStuckCompletion = detectStuckResponse(completionResult);
+              const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
-              if (this.debug) {
-                console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
-                console.log('[DEBUG] Incomplete tasks:', taskSummary);
-              }
+              // Allow stuck completions after many iterations to prevent infinite loops
+              if (isStuckCompletion && highIterationCount) {
+                if (this.debug) {
+                  console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
+                  console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
+                }
+                // Record telemetry for forced completion
+                if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
+                  this.tracer.recordTaskEvent('forced_stuck_completion', {
+                    'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
+                    'task.iteration': currentIteration,
+                    'task.max_iterations': maxIterations
+                  });
+                }
+                // Continue to process the completion instead of blocking
+              } else {
+                const taskSummary = this.taskManager.getTaskSummary();
+                const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
+                const incompleteTasks = this.taskManager.getIncompleteTasks();
+                // Record telemetry for blocked completion
+                if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
+                  this.tracer.recordTaskEvent('completion_blocked', {
+                    'task.incomplete_count': incompleteTasks.length,
+                    'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
+                    'task.iteration': currentIteration
+                  });
+                }
-              // Add reminder message and continue the loop
-              currentMessages.push({
-                role: 'assistant',
-                content: assistantResponseContent
-              });
-              currentMessages.push({
-                role: 'user',
-                content: blockedMessage
-              });
+                if (this.debug) {
+                  console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
+                  console.log('[DEBUG] Incomplete tasks:', taskSummary);
+                }
-              completionAttempted = false; // Reset to allow more iterations
-              continue; // Skip the break and continue the loop
+                // Add reminder message and continue the loop
+                currentMessages.push({
+                  role: 'assistant',
+                  content: assistantResponseContent
+                });
+                currentMessages.push({
+                  role: 'user',
+                  content: blockedMessage
+                });
+                completionAttempted = false; // Reset to allow more iterations
+                continue; // Skip the break and continue the loop
+              }
             }
             // Handle attempt_complete shorthand - use previous response
@@ -3898,10 +3928,19 @@ Follow these instructions carefully:
             break;
           }
-          // Check for repeated identical responses - if AI gives same response 3 times,
-          // accept it as the final answer instead of continuing the loop
-          if (lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse) {
+          // Increment consecutive no-tool counter (catches alternating stuck responses)
+          consecutiveNoToolCount++;
+          // Check for repeated identical responses OR semantically similar "stuck" responses
+          // This catches cases where AI alternates between slightly different "I cannot proceed" messages
+          const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
+          const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
+          if (isIdentical || isSemanticallyStuck) {
             sameResponseCount++;
+            if (this.debug && isSemanticallyStuck && !isIdentical) {
+              console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
+            }
             if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
               // Clean up the response - remove thinking tags
               let cleanedResponse = assistantResponseContent;
@@ -3915,7 +3954,7 @@ Follow these instructions carefully:
               if (hasSubstantialContent) {
                 if (this.debug) {
-                  console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
+                  console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
                 }
                 finalResult = cleanedResponse;
                 completionAttempted = true;
@@ -3923,11 +3962,30 @@ Follow these instructions carefully:
               }
             }
           } else {
-            // Different response, reset counter
+            // Different response (and not both stuck), reset counter
             lastNoToolResponse = assistantResponseContent;
             sameResponseCount = 1;
           }
+          // Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
+          // force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
+          if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
+            let cleanedResponse = assistantResponseContent;
+            cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
+            cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
+            if (cleanedResponse.length > 50) {
+              if (this.debug) {
+                console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
+              }
+              // Record this in telemetry
+              this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
+              finalResult = cleanedResponse;
+              completionAttempted = true;
+              break;
+            }
+          }
           // Add assistant response and ask for tool usage
           currentMessages.push({ role: 'assistant', content: assistantResponseContent });

package/src/agent/dsl/environment.js CHANGED Viewed

@@ -189,10 +189,13 @@ export function generateSandboxGlobals(options) {
   }
   // Bridge MCP tools
+  // Note: mcpBridge is used as a gate (non-null check), but actual execution
+  // goes through tool.execute() since MCPXmlBridge stores tools in mcpTools
+  // and doesn't have a callTool() method (that's on MCPManager)
   if (mcpBridge) {
     for (const [name, tool] of Object.entries(mcpTools)) {
       const rawMcpFn = async (params = {}) => {
-        return mcpBridge.callTool(name, params);
+        return tool.execute(params);
       };
       globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
     }

package/src/tools/common.js CHANGED Viewed

@@ -752,6 +752,67 @@ export function detectUnrecognizedToolCall(xmlString, validTools) {
 	return null;
 }
+/**
+ * Detect if a response indicates the agent is "stuck" and cannot proceed.
+ * This uses semantic pattern matching to catch variations of "I cannot proceed"
+ * that would bypass exact string matching.
+ *
+ * @param {string} response - The assistant response to check
+ * @returns {boolean} - True if the response indicates a stuck state
+ */
+export function detectStuckResponse(response) {
+	if (!response || typeof response !== 'string') {
+		return false;
+	}
+	const lowerResponse = response.toLowerCase();
+	// Patterns that indicate the agent is stuck
+	// Note: Use [''] to match both straight and curly apostrophes
+	const stuckPatterns = [
+		// Cannot proceed patterns
+		/\bi\s+cannot\s+proceed\b/i,
+		/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
+		/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
+		/\bblocked\b.*\b(?:proceed|continue)\b/i,
+		// Missing information patterns
+		/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
+		/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
+		/\bmissing\s+(?:required|necessary|essential)\b/i,
+		// Deadlock/loop patterns
+		/\bdeadlock\b/i,
+		/\bwe\s+are\s+in\s+a\s+loop\b/i,
+		/\bstuck\s+in\s+a\s+loop\b/i,
+		/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
+		// Cannot find/get patterns
+		/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
+		/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
+		// Exhausted options patterns
+		/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
+		/\bneither\s+of\s+these\s+methods\b/i,
+	];
+	for (const pattern of stuckPatterns) {
+		if (pattern.test(response)) {
+			return true;
+		}
+	}
+	return false;
+}
+/**
+ * Check if two responses are semantically similar (both indicate being stuck)
+ * This is a lightweight check that groups stuck responses together
+ *
+ * @param {string} response1 - First response
+ * @param {string} response2 - Second response
+ * @returns {boolean} - True if both responses indicate a stuck state
+ */
+export function areBothStuckResponses(response1, response2) {
+	return detectStuckResponse(response1) && detectStuckResponse(response2);
+}
 /**
  * Parse targets string into array of file specifications
  * Handles both space-separated and comma-separated targets for extract tool

package/bin/binaries/probe-v0.6.0-rc236-aarch64-apple-darwin.tar.gz DELETED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc236-aarch64-unknown-linux-musl.tar.gz DELETED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc236-x86_64-apple-darwin.tar.gz DELETED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc236-x86_64-pc-windows-msvc.zip DELETED Viewed

Binary file

package/bin/binaries/probe-v0.6.0-rc236-x86_64-unknown-linux-musl.tar.gz DELETED Viewed

Binary file