@probelabs/probe 0.6.0-rc235 → 0.6.0-rc237

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@probelabs/probe",
3
- "version": "0.6.0-rc235",
3
+ "version": "0.6.0-rc237",
4
4
  "description": "Node.js wrapper for the probe code search tool",
5
5
  "main": "src/index.js",
6
6
  "module": "src/index.js",
@@ -64,7 +64,7 @@ import {
64
64
  attemptCompletionSchema,
65
65
  parseXmlToolCallWithThinking
66
66
  } from './tools.js';
67
- import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
67
+ import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
68
68
  import {
69
69
  createWrappedTools,
70
70
  listFilesToolInstance,
@@ -839,7 +839,12 @@ export class ProbeAgent {
839
839
  delegationManager: this.delegationManager, // Per-instance delegation limits
840
840
  outputBuffer: this._outputBuffer,
841
841
  concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
842
- isToolAllowed
842
+ isToolAllowed,
843
+ // Lazy MCP getters — MCP is initialized after tools are created, so we use
844
+ // getter functions that resolve at call-time to get the current MCP state
845
+ getMcpBridge: () => this.mcpBridge,
846
+ getMcpTools: () => this.mcpBridge?.mcpTools || {},
847
+ isMcpToolAllowed: (toolName) => this._isMcpToolAllowed(toolName),
843
848
  };
844
849
 
845
850
  // Create base tools
@@ -3160,6 +3165,11 @@ Follow these instructions carefully:
3160
3165
  let sameResponseCount = 0;
3161
3166
  const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
3162
3167
 
3168
+ // Circuit breaker for consecutive no-tool responses (regardless of content)
3169
+ // This catches cases where agent alternates between similar "stuck" messages
3170
+ let consecutiveNoToolCount = 0;
3171
+ const MAX_CONSECUTIVE_NO_TOOL = 5;
3172
+
3163
3173
  // Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
3164
3174
  while (currentIteration < maxIterations && !completionAttempted) {
3165
3175
  currentIteration++;
@@ -3430,41 +3440,66 @@ Follow these instructions carefully:
3430
3440
 
3431
3441
  if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
3432
3442
 
3443
+ // Reset consecutive no-tool counter since we got a valid tool call
3444
+ consecutiveNoToolCount = 0;
3445
+
3433
3446
  if (toolName === 'attempt_completion') {
3434
3447
  completionAttempted = true;
3435
3448
 
3436
3449
  // END CHECKPOINT: Block completion if there are incomplete tasks
3450
+ // However, allow completion if the agent is stuck and genuinely cannot proceed
3437
3451
  if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
3438
- const taskSummary = this.taskManager.getTaskSummary();
3439
- const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3440
- const incompleteTasks = this.taskManager.getIncompleteTasks();
3441
-
3442
- // Record telemetry for blocked completion
3443
- if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3444
- this.tracer.recordTaskEvent('completion_blocked', {
3445
- 'task.incomplete_count': incompleteTasks.length,
3446
- 'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
3447
- 'task.iteration': currentIteration
3448
- });
3449
- }
3452
+ const completionResult = typeof params.result === 'string' ? params.result : '';
3453
+ const isStuckCompletion = detectStuckResponse(completionResult);
3454
+ const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
3450
3455
 
3451
- if (this.debug) {
3452
- console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
3453
- console.log('[DEBUG] Incomplete tasks:', taskSummary);
3454
- }
3456
+ // Allow stuck completions after many iterations to prevent infinite loops
3457
+ if (isStuckCompletion && highIterationCount) {
3458
+ if (this.debug) {
3459
+ console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
3460
+ console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
3461
+ }
3462
+ // Record telemetry for forced completion
3463
+ if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3464
+ this.tracer.recordTaskEvent('forced_stuck_completion', {
3465
+ 'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
3466
+ 'task.iteration': currentIteration,
3467
+ 'task.max_iterations': maxIterations
3468
+ });
3469
+ }
3470
+ // Continue to process the completion instead of blocking
3471
+ } else {
3472
+ const taskSummary = this.taskManager.getTaskSummary();
3473
+ const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
3474
+ const incompleteTasks = this.taskManager.getIncompleteTasks();
3475
+
3476
+ // Record telemetry for blocked completion
3477
+ if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
3478
+ this.tracer.recordTaskEvent('completion_blocked', {
3479
+ 'task.incomplete_count': incompleteTasks.length,
3480
+ 'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
3481
+ 'task.iteration': currentIteration
3482
+ });
3483
+ }
3455
3484
 
3456
- // Add reminder message and continue the loop
3457
- currentMessages.push({
3458
- role: 'assistant',
3459
- content: assistantResponseContent
3460
- });
3461
- currentMessages.push({
3462
- role: 'user',
3463
- content: blockedMessage
3464
- });
3485
+ if (this.debug) {
3486
+ console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
3487
+ console.log('[DEBUG] Incomplete tasks:', taskSummary);
3488
+ }
3465
3489
 
3466
- completionAttempted = false; // Reset to allow more iterations
3467
- continue; // Skip the break and continue the loop
3490
+ // Add reminder message and continue the loop
3491
+ currentMessages.push({
3492
+ role: 'assistant',
3493
+ content: assistantResponseContent
3494
+ });
3495
+ currentMessages.push({
3496
+ role: 'user',
3497
+ content: blockedMessage
3498
+ });
3499
+
3500
+ completionAttempted = false; // Reset to allow more iterations
3501
+ continue; // Skip the break and continue the loop
3502
+ }
3468
3503
  }
3469
3504
 
3470
3505
  // Handle attempt_complete shorthand - use previous response
@@ -3893,10 +3928,19 @@ Follow these instructions carefully:
3893
3928
  break;
3894
3929
  }
3895
3930
 
3896
- // Check for repeated identical responses - if AI gives same response 3 times,
3897
- // accept it as the final answer instead of continuing the loop
3898
- if (lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse) {
3931
+ // Increment consecutive no-tool counter (catches alternating stuck responses)
3932
+ consecutiveNoToolCount++;
3933
+
3934
+ // Check for repeated identical responses OR semantically similar "stuck" responses
3935
+ // This catches cases where AI alternates between slightly different "I cannot proceed" messages
3936
+ const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
3937
+ const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
3938
+
3939
+ if (isIdentical || isSemanticallyStuck) {
3899
3940
  sameResponseCount++;
3941
+ if (this.debug && isSemanticallyStuck && !isIdentical) {
3942
+ console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
3943
+ }
3900
3944
  if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
3901
3945
  // Clean up the response - remove thinking tags
3902
3946
  let cleanedResponse = assistantResponseContent;
@@ -3910,7 +3954,7 @@ Follow these instructions carefully:
3910
3954
 
3911
3955
  if (hasSubstantialContent) {
3912
3956
  if (this.debug) {
3913
- console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
3957
+ console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
3914
3958
  }
3915
3959
  finalResult = cleanedResponse;
3916
3960
  completionAttempted = true;
@@ -3918,11 +3962,30 @@ Follow these instructions carefully:
3918
3962
  }
3919
3963
  }
3920
3964
  } else {
3921
- // Different response, reset counter
3965
+ // Different response (and not both stuck), reset counter
3922
3966
  lastNoToolResponse = assistantResponseContent;
3923
3967
  sameResponseCount = 1;
3924
3968
  }
3925
3969
 
3970
+ // Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
3971
+ // force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
3972
+ if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
3973
+ let cleanedResponse = assistantResponseContent;
3974
+ cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
3975
+ cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
3976
+
3977
+ if (cleanedResponse.length > 50) {
3978
+ if (this.debug) {
3979
+ console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
3980
+ }
3981
+ // Record this in telemetry
3982
+ this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
3983
+ finalResult = cleanedResponse;
3984
+ completionAttempted = true;
3985
+ break;
3986
+ }
3987
+ }
3988
+
3926
3989
  // Add assistant response and ask for tool usage
3927
3990
  currentMessages.push({ role: 'assistant', content: assistantResponseContent });
3928
3991
 
@@ -189,10 +189,13 @@ export function generateSandboxGlobals(options) {
189
189
  }
190
190
 
191
191
  // Bridge MCP tools
192
+ // Note: mcpBridge is used as a gate (non-null check), but actual execution
193
+ // goes through tool.execute() since MCPXmlBridge stores tools in mcpTools
194
+ // and doesn't have a callTool() method (that's on MCPManager)
192
195
  if (mcpBridge) {
193
196
  for (const [name, tool] of Object.entries(mcpTools)) {
194
197
  const rawMcpFn = async (params = {}) => {
195
- return mcpBridge.callTool(name, params);
198
+ return tool.execute(params);
196
199
  };
197
200
  globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
198
201
  }
@@ -752,6 +752,67 @@ export function detectUnrecognizedToolCall(xmlString, validTools) {
752
752
  return null;
753
753
  }
754
754
 
755
+ /**
756
+ * Detect if a response indicates the agent is "stuck" and cannot proceed.
757
+ * This uses semantic pattern matching to catch variations of "I cannot proceed"
758
+ * that would bypass exact string matching.
759
+ *
760
+ * @param {string} response - The assistant response to check
761
+ * @returns {boolean} - True if the response indicates a stuck state
762
+ */
763
+ export function detectStuckResponse(response) {
764
+ if (!response || typeof response !== 'string') {
765
+ return false;
766
+ }
767
+
768
+ const lowerResponse = response.toLowerCase();
769
+
770
+ // Patterns that indicate the agent is stuck
771
+ // Note: Use [''] to match both straight and curly apostrophes
772
+ const stuckPatterns = [
773
+ // Cannot proceed patterns
774
+ /\bi\s+cannot\s+proceed\b/i,
775
+ /\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
776
+ /\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
777
+ /\bblocked\b.*\b(?:proceed|continue)\b/i,
778
+ // Missing information patterns
779
+ /\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
780
+ /\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
781
+ /\bmissing\s+(?:required|necessary|essential)\b/i,
782
+ // Deadlock/loop patterns
783
+ /\bdeadlock\b/i,
784
+ /\bwe\s+are\s+in\s+a\s+loop\b/i,
785
+ /\bstuck\s+in\s+a\s+loop\b/i,
786
+ /\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
787
+ // Cannot find/get patterns
788
+ /\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
789
+ /\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
790
+ // Exhausted options patterns
791
+ /\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
792
+ /\bneither\s+of\s+these\s+methods\b/i,
793
+ ];
794
+
795
+ for (const pattern of stuckPatterns) {
796
+ if (pattern.test(response)) {
797
+ return true;
798
+ }
799
+ }
800
+
801
+ return false;
802
+ }
803
+
804
+ /**
805
+ * Check if two responses are semantically similar (both indicate being stuck)
806
+ * This is a lightweight check that groups stuck responses together
807
+ *
808
+ * @param {string} response1 - First response
809
+ * @param {string} response2 - Second response
810
+ * @returns {boolean} - True if both responses indicate a stuck state
811
+ */
812
+ export function areBothStuckResponses(response1, response2) {
813
+ return detectStuckResponse(response1) && detectStuckResponse(response2);
814
+ }
815
+
755
816
  /**
756
817
  * Parse targets string into array of file specifications
757
818
  * Handles both space-separated and comma-separated targets for extract tool
@@ -175,28 +175,77 @@ export function createExecutePlanTool(options) {
175
175
  // Output buffer for direct-to-user content (bypasses LLM context window)
176
176
  const outputBuffer = options.outputBuffer || null;
177
177
 
178
- if (options.toolImplementations) {
179
- // Direct DSL options used by tests and manual scripts
180
- runtimeOptions = { ...options, tracer, sessionStore, outputBuffer };
181
- llmCallFn = options.llmCall;
182
- } else {
183
- // Agent configOptions — build everything from the agent's config
184
- llmCallFn = buildLLMCall(options);
185
- runtimeOptions = {
186
- toolImplementations: buildToolImplementations(options),
187
- llmCall: llmCallFn,
188
- mcpBridge: options.mcpBridge || null,
189
- mcpTools: options.mcpTools || {},
190
- mapConcurrency: options.mapConcurrency || 5,
191
- timeoutMs: options.timeoutMs || 300000,
192
- maxLoopIterations: options.maxLoopIterations || 5000,
193
- tracer,
194
- sessionStore,
195
- outputBuffer,
196
- };
178
+ // Lazy MCP getters — when using agent configOptions, MCP may be initialized after
179
+ // this tool is created. We use getters to resolve MCP state at execution time.
180
+ const getMcpBridge = options.getMcpBridge || (() => options.mcpBridge || null);
181
+ const getMcpTools = options.getMcpTools || (() => options.mcpTools || {});
182
+ const isMcpToolAllowed = options.isMcpToolAllowed || (() => true);
183
+
184
+ // Track which MCP bridge the current runtime was built with
185
+ let cachedMcpBridge = null;
186
+ let runtime = null;
187
+
188
+ /**
189
+ * Build or rebuild the DSL runtime.
190
+ * Called lazily on first execute() and when MCP bridge changes.
191
+ */
192
+ function buildRuntime() {
193
+ const currentMcpBridge = getMcpBridge();
194
+ const currentMcpTools = getMcpTools();
195
+
196
+ // Filter MCP tools through allowedTools
197
+ const filteredMcpTools = {};
198
+ for (const [name, tool] of Object.entries(currentMcpTools)) {
199
+ if (isMcpToolAllowed(name)) {
200
+ filteredMcpTools[name] = tool;
201
+ }
202
+ }
203
+
204
+ if (options.toolImplementations) {
205
+ // Direct DSL options — used by tests and manual scripts
206
+ runtimeOptions = {
207
+ ...options,
208
+ tracer,
209
+ sessionStore,
210
+ outputBuffer,
211
+ mcpBridge: currentMcpBridge,
212
+ mcpTools: filteredMcpTools,
213
+ };
214
+ llmCallFn = options.llmCall;
215
+ } else {
216
+ // Agent configOptions — build everything from the agent's config
217
+ llmCallFn = llmCallFn || buildLLMCall(options);
218
+ runtimeOptions = {
219
+ toolImplementations: buildToolImplementations(options),
220
+ llmCall: llmCallFn,
221
+ mcpBridge: currentMcpBridge,
222
+ mcpTools: filteredMcpTools,
223
+ mapConcurrency: options.mapConcurrency || 5,
224
+ timeoutMs: options.timeoutMs || 300000,
225
+ maxLoopIterations: options.maxLoopIterations || 5000,
226
+ tracer,
227
+ sessionStore,
228
+ outputBuffer,
229
+ };
230
+ }
231
+
232
+ cachedMcpBridge = currentMcpBridge;
233
+ runtime = createDSLRuntime(runtimeOptions);
234
+ return runtime;
235
+ }
236
+
237
+ /**
238
+ * Get or rebuild the runtime if MCP state has changed.
239
+ */
240
+ function getRuntime() {
241
+ const currentMcpBridge = getMcpBridge();
242
+ // Rebuild runtime if MCP bridge changed (null -> bridge, or different bridge)
243
+ if (!runtime || cachedMcpBridge !== currentMcpBridge) {
244
+ buildRuntime();
245
+ }
246
+ return runtime;
197
247
  }
198
248
 
199
- const runtime = createDSLRuntime(runtimeOptions);
200
249
  const maxRetries = options.maxRetries ?? 2;
201
250
 
202
251
  return tool({
@@ -272,7 +321,7 @@ RULES REMINDER:
272
321
  }
273
322
  }
274
323
 
275
- const result = await runtime.execute(currentCode, description);
324
+ const result = await getRuntime().execute(currentCode, description);
276
325
 
277
326
  if (result.status === 'success') {
278
327
  finalOutput = formatSuccess(result, description, attempt, outputBuffer);
@@ -283,7 +283,15 @@ export const searchTool = (options = {}) => {
283
283
  extractOptions.format = 'xml';
284
284
  }
285
285
 
286
- return await extract(extractOptions);
286
+ const extractResult = await extract(extractOptions);
287
+
288
+ // Strip workspace root prefix from extract output so paths are relative
289
+ if (resolutionBase && typeof extractResult === 'string') {
290
+ const wsPrefix = resolutionBase.endsWith('/') ? resolutionBase : resolutionBase + '/';
291
+ return extractResult.split(wsPrefix).join('');
292
+ }
293
+
294
+ return extractResult;
287
295
  } catch (error) {
288
296
  console.error('Delegated search failed, falling back to raw search:', error);
289
297
  try {