@probelabs/probe 0.6.0-rc235 → 0.6.0-rc237
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc237-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.js +97 -34
- package/build/agent/dsl/environment.js +4 -1
- package/build/agent/index.js +164 -46
- package/build/tools/common.js +61 -0
- package/build/tools/executePlan.js +70 -21
- package/build/tools/vercel.js +9 -1
- package/cjs/agent/ProbeAgent.cjs +9753 -7298
- package/cjs/index.cjs +9756 -7301
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +97 -34
- package/src/agent/dsl/environment.js +4 -1
- package/src/tools/common.js +61 -0
- package/src/tools/executePlan.js +70 -21
- package/src/tools/vercel.js +9 -1
- package/bin/binaries/probe-v0.6.0-rc235-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc235-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc235-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc235-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc235-x86_64-unknown-linux-musl.tar.gz +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -64,7 +64,7 @@ import {
|
|
|
64
64
|
attemptCompletionSchema,
|
|
65
65
|
parseXmlToolCallWithThinking
|
|
66
66
|
} from './tools.js';
|
|
67
|
-
import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
|
|
67
|
+
import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
|
|
68
68
|
import {
|
|
69
69
|
createWrappedTools,
|
|
70
70
|
listFilesToolInstance,
|
|
@@ -839,7 +839,12 @@ export class ProbeAgent {
|
|
|
839
839
|
delegationManager: this.delegationManager, // Per-instance delegation limits
|
|
840
840
|
outputBuffer: this._outputBuffer,
|
|
841
841
|
concurrencyLimiter: this.concurrencyLimiter, // Global AI concurrency limiter
|
|
842
|
-
isToolAllowed
|
|
842
|
+
isToolAllowed,
|
|
843
|
+
// Lazy MCP getters — MCP is initialized after tools are created, so we use
|
|
844
|
+
// getter functions that resolve at call-time to get the current MCP state
|
|
845
|
+
getMcpBridge: () => this.mcpBridge,
|
|
846
|
+
getMcpTools: () => this.mcpBridge?.mcpTools || {},
|
|
847
|
+
isMcpToolAllowed: (toolName) => this._isMcpToolAllowed(toolName),
|
|
843
848
|
};
|
|
844
849
|
|
|
845
850
|
// Create base tools
|
|
@@ -3160,6 +3165,11 @@ Follow these instructions carefully:
|
|
|
3160
3165
|
let sameResponseCount = 0;
|
|
3161
3166
|
const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
|
|
3162
3167
|
|
|
3168
|
+
// Circuit breaker for consecutive no-tool responses (regardless of content)
|
|
3169
|
+
// This catches cases where agent alternates between similar "stuck" messages
|
|
3170
|
+
let consecutiveNoToolCount = 0;
|
|
3171
|
+
const MAX_CONSECUTIVE_NO_TOOL = 5;
|
|
3172
|
+
|
|
3163
3173
|
// Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
|
|
3164
3174
|
while (currentIteration < maxIterations && !completionAttempted) {
|
|
3165
3175
|
currentIteration++;
|
|
@@ -3430,41 +3440,66 @@ Follow these instructions carefully:
|
|
|
3430
3440
|
|
|
3431
3441
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
3432
3442
|
|
|
3443
|
+
// Reset consecutive no-tool counter since we got a valid tool call
|
|
3444
|
+
consecutiveNoToolCount = 0;
|
|
3445
|
+
|
|
3433
3446
|
if (toolName === 'attempt_completion') {
|
|
3434
3447
|
completionAttempted = true;
|
|
3435
3448
|
|
|
3436
3449
|
// END CHECKPOINT: Block completion if there are incomplete tasks
|
|
3450
|
+
// However, allow completion if the agent is stuck and genuinely cannot proceed
|
|
3437
3451
|
if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
|
|
3438
|
-
const
|
|
3439
|
-
const
|
|
3440
|
-
const
|
|
3441
|
-
|
|
3442
|
-
// Record telemetry for blocked completion
|
|
3443
|
-
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3444
|
-
this.tracer.recordTaskEvent('completion_blocked', {
|
|
3445
|
-
'task.incomplete_count': incompleteTasks.length,
|
|
3446
|
-
'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
|
|
3447
|
-
'task.iteration': currentIteration
|
|
3448
|
-
});
|
|
3449
|
-
}
|
|
3452
|
+
const completionResult = typeof params.result === 'string' ? params.result : '';
|
|
3453
|
+
const isStuckCompletion = detectStuckResponse(completionResult);
|
|
3454
|
+
const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
|
|
3450
3455
|
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3454
|
-
|
|
3456
|
+
// Allow stuck completions after many iterations to prevent infinite loops
|
|
3457
|
+
if (isStuckCompletion && highIterationCount) {
|
|
3458
|
+
if (this.debug) {
|
|
3459
|
+
console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
|
|
3460
|
+
console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
|
|
3461
|
+
}
|
|
3462
|
+
// Record telemetry for forced completion
|
|
3463
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3464
|
+
this.tracer.recordTaskEvent('forced_stuck_completion', {
|
|
3465
|
+
'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
|
|
3466
|
+
'task.iteration': currentIteration,
|
|
3467
|
+
'task.max_iterations': maxIterations
|
|
3468
|
+
});
|
|
3469
|
+
}
|
|
3470
|
+
// Continue to process the completion instead of blocking
|
|
3471
|
+
} else {
|
|
3472
|
+
const taskSummary = this.taskManager.getTaskSummary();
|
|
3473
|
+
const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
|
|
3474
|
+
const incompleteTasks = this.taskManager.getIncompleteTasks();
|
|
3475
|
+
|
|
3476
|
+
// Record telemetry for blocked completion
|
|
3477
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3478
|
+
this.tracer.recordTaskEvent('completion_blocked', {
|
|
3479
|
+
'task.incomplete_count': incompleteTasks.length,
|
|
3480
|
+
'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
|
|
3481
|
+
'task.iteration': currentIteration
|
|
3482
|
+
});
|
|
3483
|
+
}
|
|
3455
3484
|
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
});
|
|
3461
|
-
currentMessages.push({
|
|
3462
|
-
role: 'user',
|
|
3463
|
-
content: blockedMessage
|
|
3464
|
-
});
|
|
3485
|
+
if (this.debug) {
|
|
3486
|
+
console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
|
|
3487
|
+
console.log('[DEBUG] Incomplete tasks:', taskSummary);
|
|
3488
|
+
}
|
|
3465
3489
|
|
|
3466
|
-
|
|
3467
|
-
|
|
3490
|
+
// Add reminder message and continue the loop
|
|
3491
|
+
currentMessages.push({
|
|
3492
|
+
role: 'assistant',
|
|
3493
|
+
content: assistantResponseContent
|
|
3494
|
+
});
|
|
3495
|
+
currentMessages.push({
|
|
3496
|
+
role: 'user',
|
|
3497
|
+
content: blockedMessage
|
|
3498
|
+
});
|
|
3499
|
+
|
|
3500
|
+
completionAttempted = false; // Reset to allow more iterations
|
|
3501
|
+
continue; // Skip the break and continue the loop
|
|
3502
|
+
}
|
|
3468
3503
|
}
|
|
3469
3504
|
|
|
3470
3505
|
// Handle attempt_complete shorthand - use previous response
|
|
@@ -3893,10 +3928,19 @@ Follow these instructions carefully:
|
|
|
3893
3928
|
break;
|
|
3894
3929
|
}
|
|
3895
3930
|
|
|
3896
|
-
//
|
|
3897
|
-
|
|
3898
|
-
|
|
3931
|
+
// Increment consecutive no-tool counter (catches alternating stuck responses)
|
|
3932
|
+
consecutiveNoToolCount++;
|
|
3933
|
+
|
|
3934
|
+
// Check for repeated identical responses OR semantically similar "stuck" responses
|
|
3935
|
+
// This catches cases where AI alternates between slightly different "I cannot proceed" messages
|
|
3936
|
+
const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
|
|
3937
|
+
const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
|
|
3938
|
+
|
|
3939
|
+
if (isIdentical || isSemanticallyStuck) {
|
|
3899
3940
|
sameResponseCount++;
|
|
3941
|
+
if (this.debug && isSemanticallyStuck && !isIdentical) {
|
|
3942
|
+
console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
|
|
3943
|
+
}
|
|
3900
3944
|
if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
|
|
3901
3945
|
// Clean up the response - remove thinking tags
|
|
3902
3946
|
let cleanedResponse = assistantResponseContent;
|
|
@@ -3910,7 +3954,7 @@ Follow these instructions carefully:
|
|
|
3910
3954
|
|
|
3911
3955
|
if (hasSubstantialContent) {
|
|
3912
3956
|
if (this.debug) {
|
|
3913
|
-
console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
3957
|
+
console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
3914
3958
|
}
|
|
3915
3959
|
finalResult = cleanedResponse;
|
|
3916
3960
|
completionAttempted = true;
|
|
@@ -3918,11 +3962,30 @@ Follow these instructions carefully:
|
|
|
3918
3962
|
}
|
|
3919
3963
|
}
|
|
3920
3964
|
} else {
|
|
3921
|
-
// Different response, reset counter
|
|
3965
|
+
// Different response (and not both stuck), reset counter
|
|
3922
3966
|
lastNoToolResponse = assistantResponseContent;
|
|
3923
3967
|
sameResponseCount = 1;
|
|
3924
3968
|
}
|
|
3925
3969
|
|
|
3970
|
+
// Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
|
|
3971
|
+
// force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
|
|
3972
|
+
if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
|
|
3973
|
+
let cleanedResponse = assistantResponseContent;
|
|
3974
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
|
|
3975
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
|
|
3976
|
+
|
|
3977
|
+
if (cleanedResponse.length > 50) {
|
|
3978
|
+
if (this.debug) {
|
|
3979
|
+
console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
|
|
3980
|
+
}
|
|
3981
|
+
// Record this in telemetry
|
|
3982
|
+
this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
|
|
3983
|
+
finalResult = cleanedResponse;
|
|
3984
|
+
completionAttempted = true;
|
|
3985
|
+
break;
|
|
3986
|
+
}
|
|
3987
|
+
}
|
|
3988
|
+
|
|
3926
3989
|
// Add assistant response and ask for tool usage
|
|
3927
3990
|
currentMessages.push({ role: 'assistant', content: assistantResponseContent });
|
|
3928
3991
|
|
|
@@ -189,10 +189,13 @@ export function generateSandboxGlobals(options) {
|
|
|
189
189
|
}
|
|
190
190
|
|
|
191
191
|
// Bridge MCP tools
|
|
192
|
+
// Note: mcpBridge is used as a gate (non-null check), but actual execution
|
|
193
|
+
// goes through tool.execute() since MCPXmlBridge stores tools in mcpTools
|
|
194
|
+
// and doesn't have a callTool() method (that's on MCPManager)
|
|
192
195
|
if (mcpBridge) {
|
|
193
196
|
for (const [name, tool] of Object.entries(mcpTools)) {
|
|
194
197
|
const rawMcpFn = async (params = {}) => {
|
|
195
|
-
return
|
|
198
|
+
return tool.execute(params);
|
|
196
199
|
};
|
|
197
200
|
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
198
201
|
}
|
package/build/agent/index.js
CHANGED
|
@@ -9832,6 +9832,43 @@ function detectUnrecognizedToolCall(xmlString, validTools) {
|
|
|
9832
9832
|
}
|
|
9833
9833
|
return null;
|
|
9834
9834
|
}
|
|
9835
|
+
function detectStuckResponse(response) {
|
|
9836
|
+
if (!response || typeof response !== "string") {
|
|
9837
|
+
return false;
|
|
9838
|
+
}
|
|
9839
|
+
const lowerResponse = response.toLowerCase();
|
|
9840
|
+
const stuckPatterns = [
|
|
9841
|
+
// Cannot proceed patterns
|
|
9842
|
+
/\bi\s+cannot\s+proceed\b/i,
|
|
9843
|
+
/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
|
|
9844
|
+
/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
|
|
9845
|
+
/\bblocked\b.*\b(?:proceed|continue)\b/i,
|
|
9846
|
+
// Missing information patterns
|
|
9847
|
+
/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
|
|
9848
|
+
/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
|
|
9849
|
+
/\bmissing\s+(?:required|necessary|essential)\b/i,
|
|
9850
|
+
// Deadlock/loop patterns
|
|
9851
|
+
/\bdeadlock\b/i,
|
|
9852
|
+
/\bwe\s+are\s+in\s+a\s+loop\b/i,
|
|
9853
|
+
/\bstuck\s+in\s+a\s+loop\b/i,
|
|
9854
|
+
/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
|
|
9855
|
+
// Cannot find/get patterns
|
|
9856
|
+
/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
|
|
9857
|
+
/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
|
|
9858
|
+
// Exhausted options patterns
|
|
9859
|
+
/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
|
|
9860
|
+
/\bneither\s+of\s+these\s+methods\b/i
|
|
9861
|
+
];
|
|
9862
|
+
for (const pattern of stuckPatterns) {
|
|
9863
|
+
if (pattern.test(response)) {
|
|
9864
|
+
return true;
|
|
9865
|
+
}
|
|
9866
|
+
}
|
|
9867
|
+
return false;
|
|
9868
|
+
}
|
|
9869
|
+
function areBothStuckResponses(response1, response2) {
|
|
9870
|
+
return detectStuckResponse(response1) && detectStuckResponse(response2);
|
|
9871
|
+
}
|
|
9835
9872
|
function parseTargets(targets) {
|
|
9836
9873
|
if (!targets || typeof targets !== "string") {
|
|
9837
9874
|
return [];
|
|
@@ -10508,7 +10545,12 @@ var init_vercel = __esm({
|
|
|
10508
10545
|
if (outline) {
|
|
10509
10546
|
extractOptions.format = "xml";
|
|
10510
10547
|
}
|
|
10511
|
-
|
|
10548
|
+
const extractResult = await extract(extractOptions);
|
|
10549
|
+
if (resolutionBase && typeof extractResult === "string") {
|
|
10550
|
+
const wsPrefix = resolutionBase.endsWith("/") ? resolutionBase : resolutionBase + "/";
|
|
10551
|
+
return extractResult.split(wsPrefix).join("");
|
|
10552
|
+
}
|
|
10553
|
+
return extractResult;
|
|
10512
10554
|
} catch (error) {
|
|
10513
10555
|
console.error("Delegated search failed, falling back to raw search:", error);
|
|
10514
10556
|
try {
|
|
@@ -21772,7 +21814,7 @@ function generateSandboxGlobals(options) {
|
|
|
21772
21814
|
if (mcpBridge) {
|
|
21773
21815
|
for (const [name, tool5] of Object.entries(mcpTools)) {
|
|
21774
21816
|
const rawMcpFn = async (params = {}) => {
|
|
21775
|
-
return
|
|
21817
|
+
return tool5.execute(params);
|
|
21776
21818
|
};
|
|
21777
21819
|
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
21778
21820
|
}
|
|
@@ -28898,25 +28940,56 @@ function createExecutePlanTool(options) {
|
|
|
28898
28940
|
const tracer = options.tracer || null;
|
|
28899
28941
|
const sessionStore = options.sessionStore || {};
|
|
28900
28942
|
const outputBuffer = options.outputBuffer || null;
|
|
28901
|
-
|
|
28902
|
-
|
|
28903
|
-
|
|
28904
|
-
|
|
28905
|
-
|
|
28906
|
-
|
|
28907
|
-
|
|
28908
|
-
|
|
28909
|
-
|
|
28910
|
-
|
|
28911
|
-
|
|
28912
|
-
|
|
28913
|
-
|
|
28914
|
-
|
|
28915
|
-
|
|
28916
|
-
|
|
28917
|
-
|
|
28943
|
+
const getMcpBridge = options.getMcpBridge || (() => options.mcpBridge || null);
|
|
28944
|
+
const getMcpTools = options.getMcpTools || (() => options.mcpTools || {});
|
|
28945
|
+
const isMcpToolAllowed = options.isMcpToolAllowed || (() => true);
|
|
28946
|
+
let cachedMcpBridge = null;
|
|
28947
|
+
let runtime = null;
|
|
28948
|
+
function buildRuntime() {
|
|
28949
|
+
const currentMcpBridge = getMcpBridge();
|
|
28950
|
+
const currentMcpTools = getMcpTools();
|
|
28951
|
+
const filteredMcpTools = {};
|
|
28952
|
+
for (const [name, tool5] of Object.entries(currentMcpTools)) {
|
|
28953
|
+
if (isMcpToolAllowed(name)) {
|
|
28954
|
+
filteredMcpTools[name] = tool5;
|
|
28955
|
+
}
|
|
28956
|
+
}
|
|
28957
|
+
if (options.toolImplementations) {
|
|
28958
|
+
runtimeOptions = {
|
|
28959
|
+
...options,
|
|
28960
|
+
tracer,
|
|
28961
|
+
sessionStore,
|
|
28962
|
+
outputBuffer,
|
|
28963
|
+
mcpBridge: currentMcpBridge,
|
|
28964
|
+
mcpTools: filteredMcpTools
|
|
28965
|
+
};
|
|
28966
|
+
llmCallFn = options.llmCall;
|
|
28967
|
+
} else {
|
|
28968
|
+
llmCallFn = llmCallFn || buildLLMCall(options);
|
|
28969
|
+
runtimeOptions = {
|
|
28970
|
+
toolImplementations: buildToolImplementations(options),
|
|
28971
|
+
llmCall: llmCallFn,
|
|
28972
|
+
mcpBridge: currentMcpBridge,
|
|
28973
|
+
mcpTools: filteredMcpTools,
|
|
28974
|
+
mapConcurrency: options.mapConcurrency || 5,
|
|
28975
|
+
timeoutMs: options.timeoutMs || 3e5,
|
|
28976
|
+
maxLoopIterations: options.maxLoopIterations || 5e3,
|
|
28977
|
+
tracer,
|
|
28978
|
+
sessionStore,
|
|
28979
|
+
outputBuffer
|
|
28980
|
+
};
|
|
28981
|
+
}
|
|
28982
|
+
cachedMcpBridge = currentMcpBridge;
|
|
28983
|
+
runtime = createDSLRuntime(runtimeOptions);
|
|
28984
|
+
return runtime;
|
|
28985
|
+
}
|
|
28986
|
+
function getRuntime() {
|
|
28987
|
+
const currentMcpBridge = getMcpBridge();
|
|
28988
|
+
if (!runtime || cachedMcpBridge !== currentMcpBridge) {
|
|
28989
|
+
buildRuntime();
|
|
28990
|
+
}
|
|
28991
|
+
return runtime;
|
|
28918
28992
|
}
|
|
28919
|
-
const runtime = createDSLRuntime(runtimeOptions);
|
|
28920
28993
|
const maxRetries = options.maxRetries ?? 2;
|
|
28921
28994
|
return tool4({
|
|
28922
28995
|
description: "Execute a JavaScript DSL program to orchestrate tool calls. Use for batch processing, paginated APIs, multi-step workflows where intermediate data is large. Write simple synchronous-looking code \u2014 do NOT use async/await.",
|
|
@@ -28982,7 +29055,7 @@ Original error: ${lastError}`;
|
|
|
28982
29055
|
return finalOutput;
|
|
28983
29056
|
}
|
|
28984
29057
|
}
|
|
28985
|
-
const result = await
|
|
29058
|
+
const result = await getRuntime().execute(currentCode, description);
|
|
28986
29059
|
if (result.status === "success") {
|
|
28987
29060
|
finalOutput = formatSuccess(result, description, attempt, outputBuffer);
|
|
28988
29061
|
planSpan?.setAttributes?.({
|
|
@@ -81422,7 +81495,12 @@ var init_ProbeAgent = __esm({
|
|
|
81422
81495
|
outputBuffer: this._outputBuffer,
|
|
81423
81496
|
concurrencyLimiter: this.concurrencyLimiter,
|
|
81424
81497
|
// Global AI concurrency limiter
|
|
81425
|
-
isToolAllowed
|
|
81498
|
+
isToolAllowed,
|
|
81499
|
+
// Lazy MCP getters — MCP is initialized after tools are created, so we use
|
|
81500
|
+
// getter functions that resolve at call-time to get the current MCP state
|
|
81501
|
+
getMcpBridge: () => this.mcpBridge,
|
|
81502
|
+
getMcpTools: () => this.mcpBridge?.mcpTools || {},
|
|
81503
|
+
isMcpToolAllowed: (toolName) => this._isMcpToolAllowed(toolName)
|
|
81426
81504
|
};
|
|
81427
81505
|
const baseTools = createTools(configOptions);
|
|
81428
81506
|
const wrappedTools = createWrappedTools(baseTools);
|
|
@@ -83310,6 +83388,8 @@ You are working with a workspace. Available paths: ${workspaceDesc}
|
|
|
83310
83388
|
let lastNoToolResponse = null;
|
|
83311
83389
|
let sameResponseCount = 0;
|
|
83312
83390
|
const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
|
|
83391
|
+
let consecutiveNoToolCount = 0;
|
|
83392
|
+
const MAX_CONSECUTIVE_NO_TOOL = 5;
|
|
83313
83393
|
while (currentIteration < maxIterations && !completionAttempted) {
|
|
83314
83394
|
currentIteration++;
|
|
83315
83395
|
if (this.cancelled) throw new Error("Request was cancelled by the user");
|
|
@@ -83489,33 +83569,51 @@ You are working with a workspace. Available paths: ${workspaceDesc}
|
|
|
83489
83569
|
const { toolName, params } = parsedTool;
|
|
83490
83570
|
this._recordToolDecisionTelemetry(toolName, params, assistantResponseContent.length, currentIteration);
|
|
83491
83571
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
83572
|
+
consecutiveNoToolCount = 0;
|
|
83492
83573
|
if (toolName === "attempt_completion") {
|
|
83493
83574
|
completionAttempted = true;
|
|
83494
83575
|
if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
|
|
83495
|
-
const
|
|
83496
|
-
const
|
|
83497
|
-
const
|
|
83498
|
-
if (
|
|
83499
|
-
this.
|
|
83500
|
-
|
|
83501
|
-
|
|
83502
|
-
|
|
83576
|
+
const completionResult = typeof params.result === "string" ? params.result : "";
|
|
83577
|
+
const isStuckCompletion = detectStuckResponse(completionResult);
|
|
83578
|
+
const highIterationCount = currentIteration > maxIterations * 0.7;
|
|
83579
|
+
if (isStuckCompletion && highIterationCount) {
|
|
83580
|
+
if (this.debug) {
|
|
83581
|
+
console.log("[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)");
|
|
83582
|
+
console.log("[DEBUG] Incomplete tasks will remain:", this.taskManager.getTaskSummary());
|
|
83583
|
+
}
|
|
83584
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === "function") {
|
|
83585
|
+
this.tracer.recordTaskEvent("forced_stuck_completion", {
|
|
83586
|
+
"task.incomplete_count": this.taskManager.getIncompleteTasks().length,
|
|
83587
|
+
"task.iteration": currentIteration,
|
|
83588
|
+
"task.max_iterations": maxIterations
|
|
83589
|
+
});
|
|
83590
|
+
}
|
|
83591
|
+
} else {
|
|
83592
|
+
const taskSummary = this.taskManager.getTaskSummary();
|
|
83593
|
+
const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
|
|
83594
|
+
const incompleteTasks = this.taskManager.getIncompleteTasks();
|
|
83595
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === "function") {
|
|
83596
|
+
this.tracer.recordTaskEvent("completion_blocked", {
|
|
83597
|
+
"task.incomplete_count": incompleteTasks.length,
|
|
83598
|
+
"task.incomplete_ids": incompleteTasks.map((t) => t.id).join(", "),
|
|
83599
|
+
"task.iteration": currentIteration
|
|
83600
|
+
});
|
|
83601
|
+
}
|
|
83602
|
+
if (this.debug) {
|
|
83603
|
+
console.log("[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks");
|
|
83604
|
+
console.log("[DEBUG] Incomplete tasks:", taskSummary);
|
|
83605
|
+
}
|
|
83606
|
+
currentMessages.push({
|
|
83607
|
+
role: "assistant",
|
|
83608
|
+
content: assistantResponseContent
|
|
83503
83609
|
});
|
|
83610
|
+
currentMessages.push({
|
|
83611
|
+
role: "user",
|
|
83612
|
+
content: blockedMessage
|
|
83613
|
+
});
|
|
83614
|
+
completionAttempted = false;
|
|
83615
|
+
continue;
|
|
83504
83616
|
}
|
|
83505
|
-
if (this.debug) {
|
|
83506
|
-
console.log("[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks");
|
|
83507
|
-
console.log("[DEBUG] Incomplete tasks:", taskSummary);
|
|
83508
|
-
}
|
|
83509
|
-
currentMessages.push({
|
|
83510
|
-
role: "assistant",
|
|
83511
|
-
content: assistantResponseContent
|
|
83512
|
-
});
|
|
83513
|
-
currentMessages.push({
|
|
83514
|
-
role: "user",
|
|
83515
|
-
content: blockedMessage
|
|
83516
|
-
});
|
|
83517
|
-
completionAttempted = false;
|
|
83518
|
-
continue;
|
|
83519
83617
|
}
|
|
83520
83618
|
if (params.result === "__PREVIOUS_RESPONSE__") {
|
|
83521
83619
|
const lastAssistantMessage = [...currentMessages].reverse().find(
|
|
@@ -83864,8 +83962,14 @@ ${errorXml}
|
|
|
83864
83962
|
}
|
|
83865
83963
|
break;
|
|
83866
83964
|
}
|
|
83867
|
-
|
|
83965
|
+
consecutiveNoToolCount++;
|
|
83966
|
+
const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
|
|
83967
|
+
const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
|
|
83968
|
+
if (isIdentical || isSemanticallyStuck) {
|
|
83868
83969
|
sameResponseCount++;
|
|
83970
|
+
if (this.debug && isSemanticallyStuck && !isIdentical) {
|
|
83971
|
+
console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
|
|
83972
|
+
}
|
|
83869
83973
|
if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
|
|
83870
83974
|
let cleanedResponse = assistantResponseContent;
|
|
83871
83975
|
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
|
|
@@ -83873,7 +83977,7 @@ ${errorXml}
|
|
|
83873
83977
|
const hasSubstantialContent = cleanedResponse.length > 50 && !cleanedResponse.includes("<api_call>") && !cleanedResponse.includes("<tool_name>") && !cleanedResponse.includes("<function>");
|
|
83874
83978
|
if (hasSubstantialContent) {
|
|
83875
83979
|
if (this.debug) {
|
|
83876
|
-
console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
83980
|
+
console.log(`[DEBUG] ${isIdentical ? "Same" : "Stuck"} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
83877
83981
|
}
|
|
83878
83982
|
finalResult = cleanedResponse;
|
|
83879
83983
|
completionAttempted = true;
|
|
@@ -83884,6 +83988,20 @@ ${errorXml}
|
|
|
83884
83988
|
lastNoToolResponse = assistantResponseContent;
|
|
83885
83989
|
sameResponseCount = 1;
|
|
83886
83990
|
}
|
|
83991
|
+
if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
|
|
83992
|
+
let cleanedResponse = assistantResponseContent;
|
|
83993
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
|
|
83994
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, "").trim();
|
|
83995
|
+
if (cleanedResponse.length > 50) {
|
|
83996
|
+
if (this.debug) {
|
|
83997
|
+
console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
|
|
83998
|
+
}
|
|
83999
|
+
this._recordErrorTelemetry("consecutive_no_tool_circuit_breaker", `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
|
|
84000
|
+
finalResult = cleanedResponse;
|
|
84001
|
+
completionAttempted = true;
|
|
84002
|
+
break;
|
|
84003
|
+
}
|
|
84004
|
+
}
|
|
83887
84005
|
currentMessages.push({ role: "assistant", content: assistantResponseContent });
|
|
83888
84006
|
const unrecognizedTool = detectUnrecognizedToolCall(assistantResponseContent, validTools);
|
|
83889
84007
|
let reminderContent;
|
package/build/tools/common.js
CHANGED
|
@@ -752,6 +752,67 @@ export function detectUnrecognizedToolCall(xmlString, validTools) {
|
|
|
752
752
|
return null;
|
|
753
753
|
}
|
|
754
754
|
|
|
755
|
+
/**
|
|
756
|
+
* Detect if a response indicates the agent is "stuck" and cannot proceed.
|
|
757
|
+
* This uses semantic pattern matching to catch variations of "I cannot proceed"
|
|
758
|
+
* that would bypass exact string matching.
|
|
759
|
+
*
|
|
760
|
+
* @param {string} response - The assistant response to check
|
|
761
|
+
* @returns {boolean} - True if the response indicates a stuck state
|
|
762
|
+
*/
|
|
763
|
+
export function detectStuckResponse(response) {
|
|
764
|
+
if (!response || typeof response !== 'string') {
|
|
765
|
+
return false;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const lowerResponse = response.toLowerCase();
|
|
769
|
+
|
|
770
|
+
// Patterns that indicate the agent is stuck
|
|
771
|
+
// Note: Use [''] to match both straight and curly apostrophes
|
|
772
|
+
const stuckPatterns = [
|
|
773
|
+
// Cannot proceed patterns
|
|
774
|
+
/\bi\s+cannot\s+proceed\b/i,
|
|
775
|
+
/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
|
|
776
|
+
/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
|
|
777
|
+
/\bblocked\b.*\b(?:proceed|continue)\b/i,
|
|
778
|
+
// Missing information patterns
|
|
779
|
+
/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
|
|
780
|
+
/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
|
|
781
|
+
/\bmissing\s+(?:required|necessary|essential)\b/i,
|
|
782
|
+
// Deadlock/loop patterns
|
|
783
|
+
/\bdeadlock\b/i,
|
|
784
|
+
/\bwe\s+are\s+in\s+a\s+loop\b/i,
|
|
785
|
+
/\bstuck\s+in\s+a\s+loop\b/i,
|
|
786
|
+
/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
|
|
787
|
+
// Cannot find/get patterns
|
|
788
|
+
/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
|
|
789
|
+
/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
|
|
790
|
+
// Exhausted options patterns
|
|
791
|
+
/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
|
|
792
|
+
/\bneither\s+of\s+these\s+methods\b/i,
|
|
793
|
+
];
|
|
794
|
+
|
|
795
|
+
for (const pattern of stuckPatterns) {
|
|
796
|
+
if (pattern.test(response)) {
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return false;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
/**
|
|
805
|
+
* Check if two responses are semantically similar (both indicate being stuck)
|
|
806
|
+
* This is a lightweight check that groups stuck responses together
|
|
807
|
+
*
|
|
808
|
+
* @param {string} response1 - First response
|
|
809
|
+
* @param {string} response2 - Second response
|
|
810
|
+
* @returns {boolean} - True if both responses indicate a stuck state
|
|
811
|
+
*/
|
|
812
|
+
export function areBothStuckResponses(response1, response2) {
|
|
813
|
+
return detectStuckResponse(response1) && detectStuckResponse(response2);
|
|
814
|
+
}
|
|
815
|
+
|
|
755
816
|
/**
|
|
756
817
|
* Parse targets string into array of file specifications
|
|
757
818
|
* Handles both space-separated and comma-separated targets for extract tool
|