@probelabs/probe 0.6.0-rc236 → 0.6.0-rc237

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc236",
3
+ "version": "0.6.0-rc237",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -64,7 +64,7 @@ import {
64
64
  attemptCompletionSchema,
65
65
  parseXmlToolCallWithThinking
66
66
  } from './tools.js';
67
- import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
67
+ import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
68
68
  import {
69
69
  createWrappedTools,
70
70
  listFilesToolInstance,
@@ -3165,6 +3165,11 @@ Follow these instructions carefully:
3165
3165
  let sameResponseCount = 0;
3166
3166
  const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
3167
3167
 
3168
+ // Circuit breaker for consecutive no-tool responses (regardless of content)
3169
+ // This catches cases where agent alternates between similar "stuck" messages
3170
+ let consecutiveNoToolCount = 0;
3171
+ const MAX_CONSECUTIVE_NO_TOOL = 5;
3172
+
3168
3173
  // Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
3169
3174
  while (currentIteration < maxIterations && !completionAttempted) {
3170
3175
  currentIteration++;
@@ -3435,41 +3440,66 @@ Follow these instructions carefully:
3435
3440
 
3436
3441
  if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
3437
3442
 
3443
+ // Reset consecutive no-tool counter since we got a valid tool call
3444
+ consecutiveNoToolCount = 0;
3445
+
3438
3446
  if (toolName === 'attempt_completion') {
3439
3447
  completionAttempted = true;
3440
3448
 
3441
3449
  // END CHECKPOINT: Block completion if there are incomplete tasks
3450
+ // However, allow completion if the agent is stuck and genuinely cannot proceed
3442
3451
  if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
3443
- const taskSummary = this.taskManager.getTaskSummary();
3444
- const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3445
- const incompleteTasks = this.taskManager.getIncompleteTasks();
3446
-
3447
- // Record telemetry for blocked completion
3448
- if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3449
- this.tracer.recordTaskEvent('completion_blocked', {
3450
- 'task.incomplete_count': incompleteTasks.length,
3451
- 'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
3452
- 'task.iteration': currentIteration
3453
- });
3454
- }
3452
+ const completionResult = typeof params.result === 'string' ? params.result : '';
3453
+ const isStuckCompletion = detectStuckResponse(completionResult);
3454
+ const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
3455
3455
 
3456
- if (this.debug) {
3457
- console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
3458
- console.log('[DEBUG] Incomplete tasks:', taskSummary);
3459
- }
3456
+ // Allow stuck completions after many iterations to prevent infinite loops
3457
+ if (isStuckCompletion && highIterationCount) {
3458
+ if (this.debug) {
3459
+ console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
3460
+ console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
3461
+ }
3462
+ // Record telemetry for forced completion
3463
+ if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3464
+ this.tracer.recordTaskEvent('forced_stuck_completion', {
3465
+ 'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
3466
+ 'task.iteration': currentIteration,
3467
+ 'task.max_iterations': maxIterations
3468
+ });
3469
+ }
3470
+ // Continue to process the completion instead of blocking
3471
+ } else {
3472
+ const taskSummary = this.taskManager.getTaskSummary();
3473
+ const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3474
+ const incompleteTasks = this.taskManager.getIncompleteTasks();
3475
+
3476
+ // Record telemetry for blocked completion
3477
+ if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3478
+ this.tracer.recordTaskEvent('completion_blocked', {
3479
+ 'task.incomplete_count': incompleteTasks.length,
3480
+ 'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
3481
+ 'task.iteration': currentIteration
3482
+ });
3483
+ }
3460
3484
 
3461
- // Add reminder message and continue the loop
3462
- currentMessages.push({
3463
- role: 'assistant',
3464
- content: assistantResponseContent
3465
- });
3466
- currentMessages.push({
3467
- role: 'user',
3468
- content: blockedMessage
3469
- });
3485
+ if (this.debug) {
3486
+ console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
3487
+ console.log('[DEBUG] Incomplete tasks:', taskSummary);
3488
+ }
3470
3489
 
3471
- completionAttempted = false; // Reset to allow more iterations
3472
- continue; // Skip the break and continue the loop
3490
+ // Add reminder message and continue the loop
3491
+ currentMessages.push({
3492
+ role: 'assistant',
3493
+ content: assistantResponseContent
3494
+ });
3495
+ currentMessages.push({
3496
+ role: 'user',
3497
+ content: blockedMessage
3498
+ });
3499
+
3500
+ completionAttempted = false; // Reset to allow more iterations
3501
+ continue; // Skip the break and continue the loop
3502
+ }
3473
3503
  }
3474
3504
 
3475
3505
  // Handle attempt_complete shorthand - use previous response
@@ -3898,10 +3928,19 @@ Follow these instructions carefully:
3898
3928
  break;
3899
3929
  }
3900
3930
 
3901
- // Check for repeated identical responses - if AI gives same response 3 times,
3902
- // accept it as the final answer instead of continuing the loop
3903
- if (lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse) {
3931
+ // Increment consecutive no-tool counter (catches alternating stuck responses)
3932
+ consecutiveNoToolCount++;
3933
+
3934
+ // Check for repeated identical responses OR semantically similar "stuck" responses
3935
+ // This catches cases where AI alternates between slightly different "I cannot proceed" messages
3936
+ const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
3937
+ const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
3938
+
3939
+ if (isIdentical || isSemanticallyStuck) {
3904
3940
  sameResponseCount++;
3941
+ if (this.debug && isSemanticallyStuck && !isIdentical) {
3942
+ console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
3943
+ }
3905
3944
  if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
3906
3945
  // Clean up the response - remove thinking tags
3907
3946
  let cleanedResponse = assistantResponseContent;
@@ -3915,7 +3954,7 @@ Follow these instructions carefully:
3915
3954
 
3916
3955
  if (hasSubstantialContent) {
3917
3956
  if (this.debug) {
3918
- console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
3957
+ console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
3919
3958
  }
3920
3959
  finalResult = cleanedResponse;
3921
3960
  completionAttempted = true;
@@ -3923,11 +3962,30 @@ Follow these instructions carefully:
3923
3962
  }
3924
3963
  }
3925
3964
  } else {
3926
- // Different response, reset counter
3965
+ // Different response (and not both stuck), reset counter
3927
3966
  lastNoToolResponse = assistantResponseContent;
3928
3967
  sameResponseCount = 1;
3929
3968
  }
3930
3969
 
3970
+ // Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
3971
+ // force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
3972
+ if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
3973
+ let cleanedResponse = assistantResponseContent;
3974
+ cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
3975
+ cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
3976
+
3977
+ if (cleanedResponse.length > 50) {
3978
+ if (this.debug) {
3979
+ console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
3980
+ }
3981
+ // Record this in telemetry
3982
+ this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
3983
+ finalResult = cleanedResponse;
3984
+ completionAttempted = true;
3985
+ break;
3986
+ }
3987
+ }
3988
+
3931
3989
  // Add assistant response and ask for tool usage
3932
3990
  currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3933
3991
 
@@ -189,10 +189,13 @@ export function generateSandboxGlobals(options) {
189
189
  }
190
190
 
191
191
  // Bridge MCP tools
192
+ // Note: mcpBridge is used as a gate (non-null check), but actual execution
193
+ // goes through tool.execute() since MCPXmlBridge stores tools in mcpTools
194
+ // and doesn't have a callTool() method (that's on MCPManager)
192
195
  if (mcpBridge) {
193
196
  for (const [name, tool] of Object.entries(mcpTools)) {
194
197
  const rawMcpFn = async (params = {}) => {
195
- return mcpBridge.callTool(name, params);
198
+ return tool.execute(params);
196
199
  };
197
200
  globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
198
201
  }
@@ -752,6 +752,67 @@ export function detectUnrecognizedToolCall(xmlString, validTools) {
752
752
  return null;
753
753
  }
754
754
 
755
+ /**
756
+ * Detect if a response indicates the agent is "stuck" and cannot proceed.
757
+ * This uses semantic pattern matching to catch variations of "I cannot proceed"
758
+ * that would bypass exact string matching.
759
+ *
760
+ * @param {string} response - The assistant response to check
761
+ * @returns {boolean} - True if the response indicates a stuck state
762
+ */
763
+ export function detectStuckResponse(response) {
764
+ if (!response || typeof response !== 'string') {
765
+ return false;
766
+ }
767
+
768
+ const lowerResponse = response.toLowerCase();
769
+
770
+ // Patterns that indicate the agent is stuck
771
+ // Note: Use [''] to match both straight and curly apostrophes
772
+ const stuckPatterns = [
773
+ // Cannot proceed patterns
774
+ /\bi\s+cannot\s+proceed\b/i,
775
+ /\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
776
+ /\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
777
+ /\bblocked\b.*\b(?:proceed|continue)\b/i,
778
+ // Missing information patterns
779
+ /\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
780
+ /\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
781
+ /\bmissing\s+(?:required|necessary|essential)\b/i,
782
+ // Deadlock/loop patterns
783
+ /\bdeadlock\b/i,
784
+ /\bwe\s+are\s+in\s+a\s+loop\b/i,
785
+ /\bstuck\s+in\s+a\s+loop\b/i,
786
+ /\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
787
+ // Cannot find/get patterns
788
+ /\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
789
+ /\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
790
+ // Exhausted options patterns
791
+ /\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
792
+ /\bneither\s+of\s+these\s+methods\b/i,
793
+ ];
794
+
795
+ for (const pattern of stuckPatterns) {
796
+ if (pattern.test(response)) {
797
+ return true;
798
+ }
799
+ }
800
+
801
+ return false;
802
+ }
803
+
804
+ /**
805
+ * Check if two responses are semantically similar (both indicate being stuck)
806
+ * This is a lightweight check that groups stuck responses together
807
+ *
808
+ * @param {string} response1 - First response
809
+ * @param {string} response2 - Second response
810
+ * @returns {boolean} - True if both responses indicate a stuck state
811
+ */
812
+ export function areBothStuckResponses(response1, response2) {
813
+ return detectStuckResponse(response1) && detectStuckResponse(response2);
814
+ }
815
+
755
816
  /**
756
817
  * Parse targets string into array of file specifications
757
818
  * Handles both space-separated and comma-separated targets for extract tool