@probelabs/probe 0.6.0-rc236 → 0.6.0-rc237
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc237-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc237-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.js +91 -33
- package/build/agent/dsl/environment.js +4 -1
- package/build/agent/index.js +102 -25
- package/build/tools/common.js +61 -0
- package/cjs/agent/ProbeAgent.cjs +9691 -7277
- package/cjs/index.cjs +9694 -7280
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +91 -33
- package/src/agent/dsl/environment.js +4 -1
- package/src/tools/common.js +61 -0
- package/bin/binaries/probe-v0.6.0-rc236-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-x86_64-unknown-linux-musl.tar.gz +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -64,7 +64,7 @@ import {
|
|
|
64
64
|
attemptCompletionSchema,
|
|
65
65
|
parseXmlToolCallWithThinking
|
|
66
66
|
} from './tools.js';
|
|
67
|
-
import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
|
|
67
|
+
import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
|
|
68
68
|
import {
|
|
69
69
|
createWrappedTools,
|
|
70
70
|
listFilesToolInstance,
|
|
@@ -3165,6 +3165,11 @@ Follow these instructions carefully:
|
|
|
3165
3165
|
let sameResponseCount = 0;
|
|
3166
3166
|
const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
|
|
3167
3167
|
|
|
3168
|
+
// Circuit breaker for consecutive no-tool responses (regardless of content)
|
|
3169
|
+
// This catches cases where agent alternates between similar "stuck" messages
|
|
3170
|
+
let consecutiveNoToolCount = 0;
|
|
3171
|
+
const MAX_CONSECUTIVE_NO_TOOL = 5;
|
|
3172
|
+
|
|
3168
3173
|
// Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
|
|
3169
3174
|
while (currentIteration < maxIterations && !completionAttempted) {
|
|
3170
3175
|
currentIteration++;
|
|
@@ -3435,41 +3440,66 @@ Follow these instructions carefully:
|
|
|
3435
3440
|
|
|
3436
3441
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
3437
3442
|
|
|
3443
|
+
// Reset consecutive no-tool counter since we got a valid tool call
|
|
3444
|
+
consecutiveNoToolCount = 0;
|
|
3445
|
+
|
|
3438
3446
|
if (toolName === 'attempt_completion') {
|
|
3439
3447
|
completionAttempted = true;
|
|
3440
3448
|
|
|
3441
3449
|
// END CHECKPOINT: Block completion if there are incomplete tasks
|
|
3450
|
+
// However, allow completion if the agent is stuck and genuinely cannot proceed
|
|
3442
3451
|
if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
|
|
3443
|
-
const
|
|
3444
|
-
const
|
|
3445
|
-
const
|
|
3446
|
-
|
|
3447
|
-
// Record telemetry for blocked completion
|
|
3448
|
-
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3449
|
-
this.tracer.recordTaskEvent('completion_blocked', {
|
|
3450
|
-
'task.incomplete_count': incompleteTasks.length,
|
|
3451
|
-
'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
|
|
3452
|
-
'task.iteration': currentIteration
|
|
3453
|
-
});
|
|
3454
|
-
}
|
|
3452
|
+
const completionResult = typeof params.result === 'string' ? params.result : '';
|
|
3453
|
+
const isStuckCompletion = detectStuckResponse(completionResult);
|
|
3454
|
+
const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
|
|
3455
3455
|
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3456
|
+
// Allow stuck completions after many iterations to prevent infinite loops
|
|
3457
|
+
if (isStuckCompletion && highIterationCount) {
|
|
3458
|
+
if (this.debug) {
|
|
3459
|
+
console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
|
|
3460
|
+
console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
|
|
3461
|
+
}
|
|
3462
|
+
// Record telemetry for forced completion
|
|
3463
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3464
|
+
this.tracer.recordTaskEvent('forced_stuck_completion', {
|
|
3465
|
+
'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
|
|
3466
|
+
'task.iteration': currentIteration,
|
|
3467
|
+
'task.max_iterations': maxIterations
|
|
3468
|
+
});
|
|
3469
|
+
}
|
|
3470
|
+
// Continue to process the completion instead of blocking
|
|
3471
|
+
} else {
|
|
3472
|
+
const taskSummary = this.taskManager.getTaskSummary();
|
|
3473
|
+
const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
|
|
3474
|
+
const incompleteTasks = this.taskManager.getIncompleteTasks();
|
|
3475
|
+
|
|
3476
|
+
// Record telemetry for blocked completion
|
|
3477
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3478
|
+
this.tracer.recordTaskEvent('completion_blocked', {
|
|
3479
|
+
'task.incomplete_count': incompleteTasks.length,
|
|
3480
|
+
'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
|
|
3481
|
+
'task.iteration': currentIteration
|
|
3482
|
+
});
|
|
3483
|
+
}
|
|
3460
3484
|
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
});
|
|
3466
|
-
currentMessages.push({
|
|
3467
|
-
role: 'user',
|
|
3468
|
-
content: blockedMessage
|
|
3469
|
-
});
|
|
3485
|
+
if (this.debug) {
|
|
3486
|
+
console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
|
|
3487
|
+
console.log('[DEBUG] Incomplete tasks:', taskSummary);
|
|
3488
|
+
}
|
|
3470
3489
|
|
|
3471
|
-
|
|
3472
|
-
|
|
3490
|
+
// Add reminder message and continue the loop
|
|
3491
|
+
currentMessages.push({
|
|
3492
|
+
role: 'assistant',
|
|
3493
|
+
content: assistantResponseContent
|
|
3494
|
+
});
|
|
3495
|
+
currentMessages.push({
|
|
3496
|
+
role: 'user',
|
|
3497
|
+
content: blockedMessage
|
|
3498
|
+
});
|
|
3499
|
+
|
|
3500
|
+
completionAttempted = false; // Reset to allow more iterations
|
|
3501
|
+
continue; // Skip the break and continue the loop
|
|
3502
|
+
}
|
|
3473
3503
|
}
|
|
3474
3504
|
|
|
3475
3505
|
// Handle attempt_complete shorthand - use previous response
|
|
@@ -3898,10 +3928,19 @@ Follow these instructions carefully:
|
|
|
3898
3928
|
break;
|
|
3899
3929
|
}
|
|
3900
3930
|
|
|
3901
|
-
//
|
|
3902
|
-
|
|
3903
|
-
|
|
3931
|
+
// Increment consecutive no-tool counter (catches alternating stuck responses)
|
|
3932
|
+
consecutiveNoToolCount++;
|
|
3933
|
+
|
|
3934
|
+
// Check for repeated identical responses OR semantically similar "stuck" responses
|
|
3935
|
+
// This catches cases where AI alternates between slightly different "I cannot proceed" messages
|
|
3936
|
+
const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
|
|
3937
|
+
const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
|
|
3938
|
+
|
|
3939
|
+
if (isIdentical || isSemanticallyStuck) {
|
|
3904
3940
|
sameResponseCount++;
|
|
3941
|
+
if (this.debug && isSemanticallyStuck && !isIdentical) {
|
|
3942
|
+
console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
|
|
3943
|
+
}
|
|
3905
3944
|
if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
|
|
3906
3945
|
// Clean up the response - remove thinking tags
|
|
3907
3946
|
let cleanedResponse = assistantResponseContent;
|
|
@@ -3915,7 +3954,7 @@ Follow these instructions carefully:
|
|
|
3915
3954
|
|
|
3916
3955
|
if (hasSubstantialContent) {
|
|
3917
3956
|
if (this.debug) {
|
|
3918
|
-
console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
3957
|
+
console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
3919
3958
|
}
|
|
3920
3959
|
finalResult = cleanedResponse;
|
|
3921
3960
|
completionAttempted = true;
|
|
@@ -3923,11 +3962,30 @@ Follow these instructions carefully:
|
|
|
3923
3962
|
}
|
|
3924
3963
|
}
|
|
3925
3964
|
} else {
|
|
3926
|
-
// Different response, reset counter
|
|
3965
|
+
// Different response (and not both stuck), reset counter
|
|
3927
3966
|
lastNoToolResponse = assistantResponseContent;
|
|
3928
3967
|
sameResponseCount = 1;
|
|
3929
3968
|
}
|
|
3930
3969
|
|
|
3970
|
+
// Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
|
|
3971
|
+
// force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
|
|
3972
|
+
if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
|
|
3973
|
+
let cleanedResponse = assistantResponseContent;
|
|
3974
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
|
|
3975
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
|
|
3976
|
+
|
|
3977
|
+
if (cleanedResponse.length > 50) {
|
|
3978
|
+
if (this.debug) {
|
|
3979
|
+
console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
|
|
3980
|
+
}
|
|
3981
|
+
// Record this in telemetry
|
|
3982
|
+
this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
|
|
3983
|
+
finalResult = cleanedResponse;
|
|
3984
|
+
completionAttempted = true;
|
|
3985
|
+
break;
|
|
3986
|
+
}
|
|
3987
|
+
}
|
|
3988
|
+
|
|
3931
3989
|
// Add assistant response and ask for tool usage
|
|
3932
3990
|
currentMessages.push({ role: 'assistant', content: assistantResponseContent });
|
|
3933
3991
|
|
|
@@ -189,10 +189,13 @@ export function generateSandboxGlobals(options) {
|
|
|
189
189
|
}
|
|
190
190
|
|
|
191
191
|
// Bridge MCP tools
|
|
192
|
+
// Note: mcpBridge is used as a gate (non-null check), but actual execution
|
|
193
|
+
// goes through tool.execute() since MCPXmlBridge stores tools in mcpTools
|
|
194
|
+
// and doesn't have a callTool() method (that's on MCPManager)
|
|
192
195
|
if (mcpBridge) {
|
|
193
196
|
for (const [name, tool] of Object.entries(mcpTools)) {
|
|
194
197
|
const rawMcpFn = async (params = {}) => {
|
|
195
|
-
return
|
|
198
|
+
return tool.execute(params);
|
|
196
199
|
};
|
|
197
200
|
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
198
201
|
}
|
package/build/agent/index.js
CHANGED
|
@@ -9832,6 +9832,43 @@ function detectUnrecognizedToolCall(xmlString, validTools) {
|
|
|
9832
9832
|
}
|
|
9833
9833
|
return null;
|
|
9834
9834
|
}
|
|
9835
|
+
function detectStuckResponse(response) {
|
|
9836
|
+
if (!response || typeof response !== "string") {
|
|
9837
|
+
return false;
|
|
9838
|
+
}
|
|
9839
|
+
const lowerResponse = response.toLowerCase();
|
|
9840
|
+
const stuckPatterns = [
|
|
9841
|
+
// Cannot proceed patterns
|
|
9842
|
+
/\bi\s+cannot\s+proceed\b/i,
|
|
9843
|
+
/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
|
|
9844
|
+
/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
|
|
9845
|
+
/\bblocked\b.*\b(?:proceed|continue)\b/i,
|
|
9846
|
+
// Missing information patterns
|
|
9847
|
+
/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
|
|
9848
|
+
/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
|
|
9849
|
+
/\bmissing\s+(?:required|necessary|essential)\b/i,
|
|
9850
|
+
// Deadlock/loop patterns
|
|
9851
|
+
/\bdeadlock\b/i,
|
|
9852
|
+
/\bwe\s+are\s+in\s+a\s+loop\b/i,
|
|
9853
|
+
/\bstuck\s+in\s+a\s+loop\b/i,
|
|
9854
|
+
/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
|
|
9855
|
+
// Cannot find/get patterns
|
|
9856
|
+
/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
|
|
9857
|
+
/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
|
|
9858
|
+
// Exhausted options patterns
|
|
9859
|
+
/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
|
|
9860
|
+
/\bneither\s+of\s+these\s+methods\b/i
|
|
9861
|
+
];
|
|
9862
|
+
for (const pattern of stuckPatterns) {
|
|
9863
|
+
if (pattern.test(response)) {
|
|
9864
|
+
return true;
|
|
9865
|
+
}
|
|
9866
|
+
}
|
|
9867
|
+
return false;
|
|
9868
|
+
}
|
|
9869
|
+
function areBothStuckResponses(response1, response2) {
|
|
9870
|
+
return detectStuckResponse(response1) && detectStuckResponse(response2);
|
|
9871
|
+
}
|
|
9835
9872
|
function parseTargets(targets) {
|
|
9836
9873
|
if (!targets || typeof targets !== "string") {
|
|
9837
9874
|
return [];
|
|
@@ -21777,7 +21814,7 @@ function generateSandboxGlobals(options) {
|
|
|
21777
21814
|
if (mcpBridge) {
|
|
21778
21815
|
for (const [name, tool5] of Object.entries(mcpTools)) {
|
|
21779
21816
|
const rawMcpFn = async (params = {}) => {
|
|
21780
|
-
return
|
|
21817
|
+
return tool5.execute(params);
|
|
21781
21818
|
};
|
|
21782
21819
|
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
21783
21820
|
}
|
|
@@ -83351,6 +83388,8 @@ You are working with a workspace. Available paths: ${workspaceDesc}
|
|
|
83351
83388
|
let lastNoToolResponse = null;
|
|
83352
83389
|
let sameResponseCount = 0;
|
|
83353
83390
|
const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
|
|
83391
|
+
let consecutiveNoToolCount = 0;
|
|
83392
|
+
const MAX_CONSECUTIVE_NO_TOOL = 5;
|
|
83354
83393
|
while (currentIteration < maxIterations && !completionAttempted) {
|
|
83355
83394
|
currentIteration++;
|
|
83356
83395
|
if (this.cancelled) throw new Error("Request was cancelled by the user");
|
|
@@ -83530,33 +83569,51 @@ You are working with a workspace. Available paths: ${workspaceDesc}
|
|
|
83530
83569
|
const { toolName, params } = parsedTool;
|
|
83531
83570
|
this._recordToolDecisionTelemetry(toolName, params, assistantResponseContent.length, currentIteration);
|
|
83532
83571
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
83572
|
+
consecutiveNoToolCount = 0;
|
|
83533
83573
|
if (toolName === "attempt_completion") {
|
|
83534
83574
|
completionAttempted = true;
|
|
83535
83575
|
if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
|
|
83536
|
-
const
|
|
83537
|
-
const
|
|
83538
|
-
const
|
|
83539
|
-
if (
|
|
83540
|
-
this.
|
|
83541
|
-
|
|
83542
|
-
|
|
83543
|
-
|
|
83576
|
+
const completionResult = typeof params.result === "string" ? params.result : "";
|
|
83577
|
+
const isStuckCompletion = detectStuckResponse(completionResult);
|
|
83578
|
+
const highIterationCount = currentIteration > maxIterations * 0.7;
|
|
83579
|
+
if (isStuckCompletion && highIterationCount) {
|
|
83580
|
+
if (this.debug) {
|
|
83581
|
+
console.log("[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)");
|
|
83582
|
+
console.log("[DEBUG] Incomplete tasks will remain:", this.taskManager.getTaskSummary());
|
|
83583
|
+
}
|
|
83584
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === "function") {
|
|
83585
|
+
this.tracer.recordTaskEvent("forced_stuck_completion", {
|
|
83586
|
+
"task.incomplete_count": this.taskManager.getIncompleteTasks().length,
|
|
83587
|
+
"task.iteration": currentIteration,
|
|
83588
|
+
"task.max_iterations": maxIterations
|
|
83589
|
+
});
|
|
83590
|
+
}
|
|
83591
|
+
} else {
|
|
83592
|
+
const taskSummary = this.taskManager.getTaskSummary();
|
|
83593
|
+
const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
|
|
83594
|
+
const incompleteTasks = this.taskManager.getIncompleteTasks();
|
|
83595
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === "function") {
|
|
83596
|
+
this.tracer.recordTaskEvent("completion_blocked", {
|
|
83597
|
+
"task.incomplete_count": incompleteTasks.length,
|
|
83598
|
+
"task.incomplete_ids": incompleteTasks.map((t) => t.id).join(", "),
|
|
83599
|
+
"task.iteration": currentIteration
|
|
83600
|
+
});
|
|
83601
|
+
}
|
|
83602
|
+
if (this.debug) {
|
|
83603
|
+
console.log("[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks");
|
|
83604
|
+
console.log("[DEBUG] Incomplete tasks:", taskSummary);
|
|
83605
|
+
}
|
|
83606
|
+
currentMessages.push({
|
|
83607
|
+
role: "assistant",
|
|
83608
|
+
content: assistantResponseContent
|
|
83544
83609
|
});
|
|
83610
|
+
currentMessages.push({
|
|
83611
|
+
role: "user",
|
|
83612
|
+
content: blockedMessage
|
|
83613
|
+
});
|
|
83614
|
+
completionAttempted = false;
|
|
83615
|
+
continue;
|
|
83545
83616
|
}
|
|
83546
|
-
if (this.debug) {
|
|
83547
|
-
console.log("[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks");
|
|
83548
|
-
console.log("[DEBUG] Incomplete tasks:", taskSummary);
|
|
83549
|
-
}
|
|
83550
|
-
currentMessages.push({
|
|
83551
|
-
role: "assistant",
|
|
83552
|
-
content: assistantResponseContent
|
|
83553
|
-
});
|
|
83554
|
-
currentMessages.push({
|
|
83555
|
-
role: "user",
|
|
83556
|
-
content: blockedMessage
|
|
83557
|
-
});
|
|
83558
|
-
completionAttempted = false;
|
|
83559
|
-
continue;
|
|
83560
83617
|
}
|
|
83561
83618
|
if (params.result === "__PREVIOUS_RESPONSE__") {
|
|
83562
83619
|
const lastAssistantMessage = [...currentMessages].reverse().find(
|
|
@@ -83905,8 +83962,14 @@ ${errorXml}
|
|
|
83905
83962
|
}
|
|
83906
83963
|
break;
|
|
83907
83964
|
}
|
|
83908
|
-
|
|
83965
|
+
consecutiveNoToolCount++;
|
|
83966
|
+
const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
|
|
83967
|
+
const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
|
|
83968
|
+
if (isIdentical || isSemanticallyStuck) {
|
|
83909
83969
|
sameResponseCount++;
|
|
83970
|
+
if (this.debug && isSemanticallyStuck && !isIdentical) {
|
|
83971
|
+
console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
|
|
83972
|
+
}
|
|
83910
83973
|
if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
|
|
83911
83974
|
let cleanedResponse = assistantResponseContent;
|
|
83912
83975
|
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
|
|
@@ -83914,7 +83977,7 @@ ${errorXml}
|
|
|
83914
83977
|
const hasSubstantialContent = cleanedResponse.length > 50 && !cleanedResponse.includes("<api_call>") && !cleanedResponse.includes("<tool_name>") && !cleanedResponse.includes("<function>");
|
|
83915
83978
|
if (hasSubstantialContent) {
|
|
83916
83979
|
if (this.debug) {
|
|
83917
|
-
console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
83980
|
+
console.log(`[DEBUG] ${isIdentical ? "Same" : "Stuck"} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
83918
83981
|
}
|
|
83919
83982
|
finalResult = cleanedResponse;
|
|
83920
83983
|
completionAttempted = true;
|
|
@@ -83925,6 +83988,20 @@ ${errorXml}
|
|
|
83925
83988
|
lastNoToolResponse = assistantResponseContent;
|
|
83926
83989
|
sameResponseCount = 1;
|
|
83927
83990
|
}
|
|
83991
|
+
if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
|
|
83992
|
+
let cleanedResponse = assistantResponseContent;
|
|
83993
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
|
|
83994
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, "").trim();
|
|
83995
|
+
if (cleanedResponse.length > 50) {
|
|
83996
|
+
if (this.debug) {
|
|
83997
|
+
console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
|
|
83998
|
+
}
|
|
83999
|
+
this._recordErrorTelemetry("consecutive_no_tool_circuit_breaker", `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
|
|
84000
|
+
finalResult = cleanedResponse;
|
|
84001
|
+
completionAttempted = true;
|
|
84002
|
+
break;
|
|
84003
|
+
}
|
|
84004
|
+
}
|
|
83928
84005
|
currentMessages.push({ role: "assistant", content: assistantResponseContent });
|
|
83929
84006
|
const unrecognizedTool = detectUnrecognizedToolCall(assistantResponseContent, validTools);
|
|
83930
84007
|
let reminderContent;
|
package/build/tools/common.js
CHANGED
|
@@ -752,6 +752,67 @@ export function detectUnrecognizedToolCall(xmlString, validTools) {
|
|
|
752
752
|
return null;
|
|
753
753
|
}
|
|
754
754
|
|
|
755
|
+
/**
|
|
756
|
+
* Detect if a response indicates the agent is "stuck" and cannot proceed.
|
|
757
|
+
* This uses semantic pattern matching to catch variations of "I cannot proceed"
|
|
758
|
+
* that would bypass exact string matching.
|
|
759
|
+
*
|
|
760
|
+
* @param {string} response - The assistant response to check
|
|
761
|
+
* @returns {boolean} - True if the response indicates a stuck state
|
|
762
|
+
*/
|
|
763
|
+
export function detectStuckResponse(response) {
|
|
764
|
+
if (!response || typeof response !== 'string') {
|
|
765
|
+
return false;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const lowerResponse = response.toLowerCase();
|
|
769
|
+
|
|
770
|
+
// Patterns that indicate the agent is stuck
|
|
771
|
+
// Note: Use [''] to match both straight and curly apostrophes
|
|
772
|
+
const stuckPatterns = [
|
|
773
|
+
// Cannot proceed patterns
|
|
774
|
+
/\bi\s+cannot\s+proceed\b/i,
|
|
775
|
+
/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
|
|
776
|
+
/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
|
|
777
|
+
/\bblocked\b.*\b(?:proceed|continue)\b/i,
|
|
778
|
+
// Missing information patterns
|
|
779
|
+
/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
|
|
780
|
+
/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
|
|
781
|
+
/\bmissing\s+(?:required|necessary|essential)\b/i,
|
|
782
|
+
// Deadlock/loop patterns
|
|
783
|
+
/\bdeadlock\b/i,
|
|
784
|
+
/\bwe\s+are\s+in\s+a\s+loop\b/i,
|
|
785
|
+
/\bstuck\s+in\s+a\s+loop\b/i,
|
|
786
|
+
/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
|
|
787
|
+
// Cannot find/get patterns
|
|
788
|
+
/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
|
|
789
|
+
/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
|
|
790
|
+
// Exhausted options patterns
|
|
791
|
+
/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
|
|
792
|
+
/\bneither\s+of\s+these\s+methods\b/i,
|
|
793
|
+
];
|
|
794
|
+
|
|
795
|
+
for (const pattern of stuckPatterns) {
|
|
796
|
+
if (pattern.test(response)) {
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return false;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
/**
|
|
805
|
+
* Check if two responses are semantically similar (both indicate being stuck)
|
|
806
|
+
* This is a lightweight check that groups stuck responses together
|
|
807
|
+
*
|
|
808
|
+
* @param {string} response1 - First response
|
|
809
|
+
* @param {string} response2 - Second response
|
|
810
|
+
* @returns {boolean} - True if both responses indicate a stuck state
|
|
811
|
+
*/
|
|
812
|
+
export function areBothStuckResponses(response1, response2) {
|
|
813
|
+
return detectStuckResponse(response1) && detectStuckResponse(response2);
|
|
814
|
+
}
|
|
815
|
+
|
|
755
816
|
/**
|
|
756
817
|
* Parse targets string into array of file specifications
|
|
757
818
|
* Handles both space-separated and comma-separated targets for extract tool
|