@probelabs/probe 0.6.0-rc236 → 0.6.0-rc238
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/binaries/probe-v0.6.0-rc238-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc238-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc238-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc238-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc238-x86_64-unknown-linux-musl.tar.gz +0 -0
- package/build/agent/ProbeAgent.js +91 -33
- package/build/agent/dsl/environment.js +29 -1
- package/build/agent/index.js +119 -25
- package/build/tools/common.js +61 -0
- package/cjs/agent/ProbeAgent.cjs +9708 -7277
- package/cjs/index.cjs +9711 -7280
- package/package.json +1 -1
- package/src/agent/ProbeAgent.js +91 -33
- package/src/agent/dsl/environment.js +29 -1
- package/src/tools/common.js +61 -0
- package/bin/binaries/probe-v0.6.0-rc236-aarch64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-aarch64-unknown-linux-musl.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-x86_64-apple-darwin.tar.gz +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-x86_64-pc-windows-msvc.zip +0 -0
- package/bin/binaries/probe-v0.6.0-rc236-x86_64-unknown-linux-musl.tar.gz +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -64,7 +64,7 @@ import {
|
|
|
64
64
|
attemptCompletionSchema,
|
|
65
65
|
parseXmlToolCallWithThinking
|
|
66
66
|
} from './tools.js';
|
|
67
|
-
import { createMessagePreview, detectUnrecognizedToolCall } from '../tools/common.js';
|
|
67
|
+
import { createMessagePreview, detectUnrecognizedToolCall, detectStuckResponse, areBothStuckResponses } from '../tools/common.js';
|
|
68
68
|
import {
|
|
69
69
|
createWrappedTools,
|
|
70
70
|
listFilesToolInstance,
|
|
@@ -3165,6 +3165,11 @@ Follow these instructions carefully:
|
|
|
3165
3165
|
let sameResponseCount = 0;
|
|
3166
3166
|
const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
|
|
3167
3167
|
|
|
3168
|
+
// Circuit breaker for consecutive no-tool responses (regardless of content)
|
|
3169
|
+
// This catches cases where agent alternates between similar "stuck" messages
|
|
3170
|
+
let consecutiveNoToolCount = 0;
|
|
3171
|
+
const MAX_CONSECUTIVE_NO_TOOL = 5;
|
|
3172
|
+
|
|
3168
3173
|
// Tool iteration loop (only for non-CLI engines like Vercel/Anthropic/OpenAI)
|
|
3169
3174
|
while (currentIteration < maxIterations && !completionAttempted) {
|
|
3170
3175
|
currentIteration++;
|
|
@@ -3435,41 +3440,66 @@ Follow these instructions carefully:
|
|
|
3435
3440
|
|
|
3436
3441
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
3437
3442
|
|
|
3443
|
+
// Reset consecutive no-tool counter since we got a valid tool call
|
|
3444
|
+
consecutiveNoToolCount = 0;
|
|
3445
|
+
|
|
3438
3446
|
if (toolName === 'attempt_completion') {
|
|
3439
3447
|
completionAttempted = true;
|
|
3440
3448
|
|
|
3441
3449
|
// END CHECKPOINT: Block completion if there are incomplete tasks
|
|
3450
|
+
// However, allow completion if the agent is stuck and genuinely cannot proceed
|
|
3442
3451
|
if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
|
|
3443
|
-
const
|
|
3444
|
-
const
|
|
3445
|
-
const
|
|
3446
|
-
|
|
3447
|
-
// Record telemetry for blocked completion
|
|
3448
|
-
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3449
|
-
this.tracer.recordTaskEvent('completion_blocked', {
|
|
3450
|
-
'task.incomplete_count': incompleteTasks.length,
|
|
3451
|
-
'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
|
|
3452
|
-
'task.iteration': currentIteration
|
|
3453
|
-
});
|
|
3454
|
-
}
|
|
3452
|
+
const completionResult = typeof params.result === 'string' ? params.result : '';
|
|
3453
|
+
const isStuckCompletion = detectStuckResponse(completionResult);
|
|
3454
|
+
const highIterationCount = currentIteration > maxIterations * 0.7; // >70% of max iterations
|
|
3455
3455
|
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3456
|
+
// Allow stuck completions after many iterations to prevent infinite loops
|
|
3457
|
+
if (isStuckCompletion && highIterationCount) {
|
|
3458
|
+
if (this.debug) {
|
|
3459
|
+
console.log('[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)');
|
|
3460
|
+
console.log('[DEBUG] Incomplete tasks will remain:', this.taskManager.getTaskSummary());
|
|
3461
|
+
}
|
|
3462
|
+
// Record telemetry for forced completion
|
|
3463
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3464
|
+
this.tracer.recordTaskEvent('forced_stuck_completion', {
|
|
3465
|
+
'task.incomplete_count': this.taskManager.getIncompleteTasks().length,
|
|
3466
|
+
'task.iteration': currentIteration,
|
|
3467
|
+
'task.max_iterations': maxIterations
|
|
3468
|
+
});
|
|
3469
|
+
}
|
|
3470
|
+
// Continue to process the completion instead of blocking
|
|
3471
|
+
} else {
|
|
3472
|
+
const taskSummary = this.taskManager.getTaskSummary();
|
|
3473
|
+
const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
|
|
3474
|
+
const incompleteTasks = this.taskManager.getIncompleteTasks();
|
|
3475
|
+
|
|
3476
|
+
// Record telemetry for blocked completion
|
|
3477
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === 'function') {
|
|
3478
|
+
this.tracer.recordTaskEvent('completion_blocked', {
|
|
3479
|
+
'task.incomplete_count': incompleteTasks.length,
|
|
3480
|
+
'task.incomplete_ids': incompleteTasks.map(t => t.id).join(', '),
|
|
3481
|
+
'task.iteration': currentIteration
|
|
3482
|
+
});
|
|
3483
|
+
}
|
|
3460
3484
|
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
});
|
|
3466
|
-
currentMessages.push({
|
|
3467
|
-
role: 'user',
|
|
3468
|
-
content: blockedMessage
|
|
3469
|
-
});
|
|
3485
|
+
if (this.debug) {
|
|
3486
|
+
console.log('[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks');
|
|
3487
|
+
console.log('[DEBUG] Incomplete tasks:', taskSummary);
|
|
3488
|
+
}
|
|
3470
3489
|
|
|
3471
|
-
|
|
3472
|
-
|
|
3490
|
+
// Add reminder message and continue the loop
|
|
3491
|
+
currentMessages.push({
|
|
3492
|
+
role: 'assistant',
|
|
3493
|
+
content: assistantResponseContent
|
|
3494
|
+
});
|
|
3495
|
+
currentMessages.push({
|
|
3496
|
+
role: 'user',
|
|
3497
|
+
content: blockedMessage
|
|
3498
|
+
});
|
|
3499
|
+
|
|
3500
|
+
completionAttempted = false; // Reset to allow more iterations
|
|
3501
|
+
continue; // Skip the break and continue the loop
|
|
3502
|
+
}
|
|
3473
3503
|
}
|
|
3474
3504
|
|
|
3475
3505
|
// Handle attempt_complete shorthand - use previous response
|
|
@@ -3898,10 +3928,19 @@ Follow these instructions carefully:
|
|
|
3898
3928
|
break;
|
|
3899
3929
|
}
|
|
3900
3930
|
|
|
3901
|
-
//
|
|
3902
|
-
|
|
3903
|
-
|
|
3931
|
+
// Increment consecutive no-tool counter (catches alternating stuck responses)
|
|
3932
|
+
consecutiveNoToolCount++;
|
|
3933
|
+
|
|
3934
|
+
// Check for repeated identical responses OR semantically similar "stuck" responses
|
|
3935
|
+
// This catches cases where AI alternates between slightly different "I cannot proceed" messages
|
|
3936
|
+
const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
|
|
3937
|
+
const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
|
|
3938
|
+
|
|
3939
|
+
if (isIdentical || isSemanticallyStuck) {
|
|
3904
3940
|
sameResponseCount++;
|
|
3941
|
+
if (this.debug && isSemanticallyStuck && !isIdentical) {
|
|
3942
|
+
console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
|
|
3943
|
+
}
|
|
3905
3944
|
if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
|
|
3906
3945
|
// Clean up the response - remove thinking tags
|
|
3907
3946
|
let cleanedResponse = assistantResponseContent;
|
|
@@ -3915,7 +3954,7 @@ Follow these instructions carefully:
|
|
|
3915
3954
|
|
|
3916
3955
|
if (hasSubstantialContent) {
|
|
3917
3956
|
if (this.debug) {
|
|
3918
|
-
console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
3957
|
+
console.log(`[DEBUG] ${isIdentical ? 'Same' : 'Stuck'} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
3919
3958
|
}
|
|
3920
3959
|
finalResult = cleanedResponse;
|
|
3921
3960
|
completionAttempted = true;
|
|
@@ -3923,11 +3962,30 @@ Follow these instructions carefully:
|
|
|
3923
3962
|
}
|
|
3924
3963
|
}
|
|
3925
3964
|
} else {
|
|
3926
|
-
// Different response, reset counter
|
|
3965
|
+
// Different response (and not both stuck), reset counter
|
|
3927
3966
|
lastNoToolResponse = assistantResponseContent;
|
|
3928
3967
|
sameResponseCount = 1;
|
|
3929
3968
|
}
|
|
3930
3969
|
|
|
3970
|
+
// Circuit breaker: If we've had MAX_CONSECUTIVE_NO_TOOL iterations without any tool call,
|
|
3971
|
+
// force completion to avoid infinite loops (e.g., agent alternating between "can't proceed" variations)
|
|
3972
|
+
if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
|
|
3973
|
+
let cleanedResponse = assistantResponseContent;
|
|
3974
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '').trim();
|
|
3975
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, '').trim();
|
|
3976
|
+
|
|
3977
|
+
if (cleanedResponse.length > 50) {
|
|
3978
|
+
if (this.debug) {
|
|
3979
|
+
console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
|
|
3980
|
+
}
|
|
3981
|
+
// Record this in telemetry
|
|
3982
|
+
this._recordErrorTelemetry('consecutive_no_tool_circuit_breaker', `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
|
|
3983
|
+
finalResult = cleanedResponse;
|
|
3984
|
+
completionAttempted = true;
|
|
3985
|
+
break;
|
|
3986
|
+
}
|
|
3987
|
+
}
|
|
3988
|
+
|
|
3931
3989
|
// Add assistant response and ask for tool usage
|
|
3932
3990
|
currentMessages.push({ role: 'assistant', content: assistantResponseContent });
|
|
3933
3991
|
|
|
@@ -117,6 +117,23 @@ function traceToolCall(toolName, fn, tracer, logFn) {
|
|
|
117
117
|
};
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
+
/**
|
|
121
|
+
* Try to parse a string as JSON if it looks like a JSON object or array.
|
|
122
|
+
* Checks if the first non-whitespace character is '{' or '[' before attempting parse.
|
|
123
|
+
* Returns the original string if it's not JSON.
|
|
124
|
+
*
|
|
125
|
+
* @param {string} text - The text to try parsing
|
|
126
|
+
* @returns {any} Parsed JSON value, or the original string
|
|
127
|
+
*/
|
|
128
|
+
function tryParseJSONValue(text) {
|
|
129
|
+
if (typeof text !== 'string') return text;
|
|
130
|
+
const firstChar = text.trimStart()[0];
|
|
131
|
+
if (firstChar === '{' || firstChar === '[') {
|
|
132
|
+
try { return JSON.parse(text); } catch (_) { /* not valid JSON */ }
|
|
133
|
+
}
|
|
134
|
+
return text;
|
|
135
|
+
}
|
|
136
|
+
|
|
120
137
|
/**
|
|
121
138
|
* Generate sandbox globals that bridge DSL function calls to real tool implementations.
|
|
122
139
|
*
|
|
@@ -189,10 +206,21 @@ export function generateSandboxGlobals(options) {
|
|
|
189
206
|
}
|
|
190
207
|
|
|
191
208
|
// Bridge MCP tools
|
|
209
|
+
// Note: mcpBridge is used as a gate (non-null check), but actual execution
|
|
210
|
+
// goes through tool.execute() since MCPXmlBridge stores tools in mcpTools
|
|
211
|
+
// and doesn't have a callTool() method (that's on MCPManager)
|
|
192
212
|
if (mcpBridge) {
|
|
193
213
|
for (const [name, tool] of Object.entries(mcpTools)) {
|
|
194
214
|
const rawMcpFn = async (params = {}) => {
|
|
195
|
-
|
|
215
|
+
const result = await tool.execute(params);
|
|
216
|
+
// Extract text from MCP response envelope: { content: [{ type: 'text', text: '...' }] }
|
|
217
|
+
const text = result?.content?.[0]?.text;
|
|
218
|
+
if (text === undefined) {
|
|
219
|
+
// No envelope — if raw result is a JSON-like string, try parsing it
|
|
220
|
+
if (typeof result === 'string') return tryParseJSONValue(result);
|
|
221
|
+
return result;
|
|
222
|
+
}
|
|
223
|
+
return tryParseJSONValue(text);
|
|
196
224
|
};
|
|
197
225
|
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
198
226
|
}
|
package/build/agent/index.js
CHANGED
|
@@ -9832,6 +9832,43 @@ function detectUnrecognizedToolCall(xmlString, validTools) {
|
|
|
9832
9832
|
}
|
|
9833
9833
|
return null;
|
|
9834
9834
|
}
|
|
9835
|
+
function detectStuckResponse(response) {
|
|
9836
|
+
if (!response || typeof response !== "string") {
|
|
9837
|
+
return false;
|
|
9838
|
+
}
|
|
9839
|
+
const lowerResponse = response.toLowerCase();
|
|
9840
|
+
const stuckPatterns = [
|
|
9841
|
+
// Cannot proceed patterns
|
|
9842
|
+
/\bi\s+cannot\s+proceed\b/i,
|
|
9843
|
+
/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
|
|
9844
|
+
/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
|
|
9845
|
+
/\bblocked\b.*\b(?:proceed|continue)\b/i,
|
|
9846
|
+
// Missing information patterns
|
|
9847
|
+
/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
|
|
9848
|
+
/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
|
|
9849
|
+
/\bmissing\s+(?:required|necessary|essential)\b/i,
|
|
9850
|
+
// Deadlock/loop patterns
|
|
9851
|
+
/\bdeadlock\b/i,
|
|
9852
|
+
/\bwe\s+are\s+in\s+a\s+loop\b/i,
|
|
9853
|
+
/\bstuck\s+in\s+a\s+loop\b/i,
|
|
9854
|
+
/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
|
|
9855
|
+
// Cannot find/get patterns
|
|
9856
|
+
/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
|
|
9857
|
+
/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
|
|
9858
|
+
// Exhausted options patterns
|
|
9859
|
+
/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
|
|
9860
|
+
/\bneither\s+of\s+these\s+methods\b/i
|
|
9861
|
+
];
|
|
9862
|
+
for (const pattern of stuckPatterns) {
|
|
9863
|
+
if (pattern.test(response)) {
|
|
9864
|
+
return true;
|
|
9865
|
+
}
|
|
9866
|
+
}
|
|
9867
|
+
return false;
|
|
9868
|
+
}
|
|
9869
|
+
function areBothStuckResponses(response1, response2) {
|
|
9870
|
+
return detectStuckResponse(response1) && detectStuckResponse(response2);
|
|
9871
|
+
}
|
|
9835
9872
|
function parseTargets(targets) {
|
|
9836
9873
|
if (!targets || typeof targets !== "string") {
|
|
9837
9874
|
return [];
|
|
@@ -21726,6 +21763,17 @@ function traceToolCall(toolName, fn, tracer, logFn) {
|
|
|
21726
21763
|
}
|
|
21727
21764
|
};
|
|
21728
21765
|
}
|
|
21766
|
+
function tryParseJSONValue(text) {
|
|
21767
|
+
if (typeof text !== "string") return text;
|
|
21768
|
+
const firstChar = text.trimStart()[0];
|
|
21769
|
+
if (firstChar === "{" || firstChar === "[") {
|
|
21770
|
+
try {
|
|
21771
|
+
return JSON.parse(text);
|
|
21772
|
+
} catch (_) {
|
|
21773
|
+
}
|
|
21774
|
+
}
|
|
21775
|
+
return text;
|
|
21776
|
+
}
|
|
21729
21777
|
function generateSandboxGlobals(options) {
|
|
21730
21778
|
const {
|
|
21731
21779
|
toolImplementations = {},
|
|
@@ -21777,7 +21825,13 @@ function generateSandboxGlobals(options) {
|
|
|
21777
21825
|
if (mcpBridge) {
|
|
21778
21826
|
for (const [name, tool5] of Object.entries(mcpTools)) {
|
|
21779
21827
|
const rawMcpFn = async (params = {}) => {
|
|
21780
|
-
|
|
21828
|
+
const result = await tool5.execute(params);
|
|
21829
|
+
const text = result?.content?.[0]?.text;
|
|
21830
|
+
if (text === void 0) {
|
|
21831
|
+
if (typeof result === "string") return tryParseJSONValue(result);
|
|
21832
|
+
return result;
|
|
21833
|
+
}
|
|
21834
|
+
return tryParseJSONValue(text);
|
|
21781
21835
|
};
|
|
21782
21836
|
globals[name] = traceToolCall(name, rawMcpFn, tracer, logFn);
|
|
21783
21837
|
}
|
|
@@ -83351,6 +83405,8 @@ You are working with a workspace. Available paths: ${workspaceDesc}
|
|
|
83351
83405
|
let lastNoToolResponse = null;
|
|
83352
83406
|
let sameResponseCount = 0;
|
|
83353
83407
|
const MAX_REPEATED_IDENTICAL_RESPONSES = 3;
|
|
83408
|
+
let consecutiveNoToolCount = 0;
|
|
83409
|
+
const MAX_CONSECUTIVE_NO_TOOL = 5;
|
|
83354
83410
|
while (currentIteration < maxIterations && !completionAttempted) {
|
|
83355
83411
|
currentIteration++;
|
|
83356
83412
|
if (this.cancelled) throw new Error("Request was cancelled by the user");
|
|
@@ -83530,33 +83586,51 @@ You are working with a workspace. Available paths: ${workspaceDesc}
|
|
|
83530
83586
|
const { toolName, params } = parsedTool;
|
|
83531
83587
|
this._recordToolDecisionTelemetry(toolName, params, assistantResponseContent.length, currentIteration);
|
|
83532
83588
|
if (this.debug) console.log(`[DEBUG] Parsed tool call: ${toolName} with params:`, params);
|
|
83589
|
+
consecutiveNoToolCount = 0;
|
|
83533
83590
|
if (toolName === "attempt_completion") {
|
|
83534
83591
|
completionAttempted = true;
|
|
83535
83592
|
if (this.enableTasks && this.taskManager && this.taskManager.hasIncompleteTasks()) {
|
|
83536
|
-
const
|
|
83537
|
-
const
|
|
83538
|
-
const
|
|
83539
|
-
if (
|
|
83540
|
-
this.
|
|
83541
|
-
|
|
83542
|
-
|
|
83543
|
-
|
|
83593
|
+
const completionResult = typeof params.result === "string" ? params.result : "";
|
|
83594
|
+
const isStuckCompletion = detectStuckResponse(completionResult);
|
|
83595
|
+
const highIterationCount = currentIteration > maxIterations * 0.7;
|
|
83596
|
+
if (isStuckCompletion && highIterationCount) {
|
|
83597
|
+
if (this.debug) {
|
|
83598
|
+
console.log("[DEBUG] Task checkpoint: Allowing stuck completion (agent genuinely cannot proceed)");
|
|
83599
|
+
console.log("[DEBUG] Incomplete tasks will remain:", this.taskManager.getTaskSummary());
|
|
83600
|
+
}
|
|
83601
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === "function") {
|
|
83602
|
+
this.tracer.recordTaskEvent("forced_stuck_completion", {
|
|
83603
|
+
"task.incomplete_count": this.taskManager.getIncompleteTasks().length,
|
|
83604
|
+
"task.iteration": currentIteration,
|
|
83605
|
+
"task.max_iterations": maxIterations
|
|
83606
|
+
});
|
|
83607
|
+
}
|
|
83608
|
+
} else {
|
|
83609
|
+
const taskSummary = this.taskManager.getTaskSummary();
|
|
83610
|
+
const blockedMessage = createTaskCompletionBlockedMessage(taskSummary);
|
|
83611
|
+
const incompleteTasks = this.taskManager.getIncompleteTasks();
|
|
83612
|
+
if (this.tracer && typeof this.tracer.recordTaskEvent === "function") {
|
|
83613
|
+
this.tracer.recordTaskEvent("completion_blocked", {
|
|
83614
|
+
"task.incomplete_count": incompleteTasks.length,
|
|
83615
|
+
"task.incomplete_ids": incompleteTasks.map((t) => t.id).join(", "),
|
|
83616
|
+
"task.iteration": currentIteration
|
|
83617
|
+
});
|
|
83618
|
+
}
|
|
83619
|
+
if (this.debug) {
|
|
83620
|
+
console.log("[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks");
|
|
83621
|
+
console.log("[DEBUG] Incomplete tasks:", taskSummary);
|
|
83622
|
+
}
|
|
83623
|
+
currentMessages.push({
|
|
83624
|
+
role: "assistant",
|
|
83625
|
+
content: assistantResponseContent
|
|
83544
83626
|
});
|
|
83627
|
+
currentMessages.push({
|
|
83628
|
+
role: "user",
|
|
83629
|
+
content: blockedMessage
|
|
83630
|
+
});
|
|
83631
|
+
completionAttempted = false;
|
|
83632
|
+
continue;
|
|
83545
83633
|
}
|
|
83546
|
-
if (this.debug) {
|
|
83547
|
-
console.log("[DEBUG] Task checkpoint: Blocking completion due to incomplete tasks");
|
|
83548
|
-
console.log("[DEBUG] Incomplete tasks:", taskSummary);
|
|
83549
|
-
}
|
|
83550
|
-
currentMessages.push({
|
|
83551
|
-
role: "assistant",
|
|
83552
|
-
content: assistantResponseContent
|
|
83553
|
-
});
|
|
83554
|
-
currentMessages.push({
|
|
83555
|
-
role: "user",
|
|
83556
|
-
content: blockedMessage
|
|
83557
|
-
});
|
|
83558
|
-
completionAttempted = false;
|
|
83559
|
-
continue;
|
|
83560
83634
|
}
|
|
83561
83635
|
if (params.result === "__PREVIOUS_RESPONSE__") {
|
|
83562
83636
|
const lastAssistantMessage = [...currentMessages].reverse().find(
|
|
@@ -83905,8 +83979,14 @@ ${errorXml}
|
|
|
83905
83979
|
}
|
|
83906
83980
|
break;
|
|
83907
83981
|
}
|
|
83908
|
-
|
|
83982
|
+
consecutiveNoToolCount++;
|
|
83983
|
+
const isIdentical = lastNoToolResponse !== null && assistantResponseContent === lastNoToolResponse;
|
|
83984
|
+
const isSemanticallyStuck = lastNoToolResponse !== null && areBothStuckResponses(lastNoToolResponse, assistantResponseContent);
|
|
83985
|
+
if (isIdentical || isSemanticallyStuck) {
|
|
83909
83986
|
sameResponseCount++;
|
|
83987
|
+
if (this.debug && isSemanticallyStuck && !isIdentical) {
|
|
83988
|
+
console.log(`[DEBUG] Detected semantically similar stuck response (count: ${sameResponseCount})`);
|
|
83989
|
+
}
|
|
83910
83990
|
if (sameResponseCount >= MAX_REPEATED_IDENTICAL_RESPONSES) {
|
|
83911
83991
|
let cleanedResponse = assistantResponseContent;
|
|
83912
83992
|
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
|
|
@@ -83914,7 +83994,7 @@ ${errorXml}
|
|
|
83914
83994
|
const hasSubstantialContent = cleanedResponse.length > 50 && !cleanedResponse.includes("<api_call>") && !cleanedResponse.includes("<tool_name>") && !cleanedResponse.includes("<function>");
|
|
83915
83995
|
if (hasSubstantialContent) {
|
|
83916
83996
|
if (this.debug) {
|
|
83917
|
-
console.log(`[DEBUG] Same response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
83997
|
+
console.log(`[DEBUG] ${isIdentical ? "Same" : "Stuck"} response repeated ${sameResponseCount} times - accepting as final answer (${cleanedResponse.length} chars)`);
|
|
83918
83998
|
}
|
|
83919
83999
|
finalResult = cleanedResponse;
|
|
83920
84000
|
completionAttempted = true;
|
|
@@ -83925,6 +84005,20 @@ ${errorXml}
|
|
|
83925
84005
|
lastNoToolResponse = assistantResponseContent;
|
|
83926
84006
|
sameResponseCount = 1;
|
|
83927
84007
|
}
|
|
84008
|
+
if (consecutiveNoToolCount >= MAX_CONSECUTIVE_NO_TOOL) {
|
|
84009
|
+
let cleanedResponse = assistantResponseContent;
|
|
84010
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*?<\/thinking>/gi, "").trim();
|
|
84011
|
+
cleanedResponse = cleanedResponse.replace(/<thinking>[\s\S]*$/gi, "").trim();
|
|
84012
|
+
if (cleanedResponse.length > 50) {
|
|
84013
|
+
if (this.debug) {
|
|
84014
|
+
console.log(`[DEBUG] Circuit breaker: ${consecutiveNoToolCount} consecutive no-tool responses - forcing completion`);
|
|
84015
|
+
}
|
|
84016
|
+
this._recordErrorTelemetry("consecutive_no_tool_circuit_breaker", `Forced completion after ${consecutiveNoToolCount} consecutive no-tool responses`, { responsePreview: cleanedResponse.substring(0, 500) }, currentIteration);
|
|
84017
|
+
finalResult = cleanedResponse;
|
|
84018
|
+
completionAttempted = true;
|
|
84019
|
+
break;
|
|
84020
|
+
}
|
|
84021
|
+
}
|
|
83928
84022
|
currentMessages.push({ role: "assistant", content: assistantResponseContent });
|
|
83929
84023
|
const unrecognizedTool = detectUnrecognizedToolCall(assistantResponseContent, validTools);
|
|
83930
84024
|
let reminderContent;
|
package/build/tools/common.js
CHANGED
|
@@ -752,6 +752,67 @@ export function detectUnrecognizedToolCall(xmlString, validTools) {
|
|
|
752
752
|
return null;
|
|
753
753
|
}
|
|
754
754
|
|
|
755
|
+
/**
|
|
756
|
+
* Detect if a response indicates the agent is "stuck" and cannot proceed.
|
|
757
|
+
* This uses semantic pattern matching to catch variations of "I cannot proceed"
|
|
758
|
+
* that would bypass exact string matching.
|
|
759
|
+
*
|
|
760
|
+
* @param {string} response - The assistant response to check
|
|
761
|
+
* @returns {boolean} - True if the response indicates a stuck state
|
|
762
|
+
*/
|
|
763
|
+
export function detectStuckResponse(response) {
|
|
764
|
+
if (!response || typeof response !== 'string') {
|
|
765
|
+
return false;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const lowerResponse = response.toLowerCase();
|
|
769
|
+
|
|
770
|
+
// Patterns that indicate the agent is stuck
|
|
771
|
+
// Note: Use [''] to match both straight and curly apostrophes
|
|
772
|
+
const stuckPatterns = [
|
|
773
|
+
// Cannot proceed patterns
|
|
774
|
+
/\bi\s+cannot\s+proceed\b/i,
|
|
775
|
+
/\bi\s+can['']t\s+(?:proceed|continue|move\s+forward)\b/i,
|
|
776
|
+
/\bunable\s+to\s+(?:proceed|continue|complete)\b/i,
|
|
777
|
+
/\bblocked\b.*\b(?:proceed|continue)\b/i,
|
|
778
|
+
// Missing information patterns
|
|
779
|
+
/\bneed\s+(?:the|an?)\s+\w+(?:\s+\w+)?\s+to\s+(?:proceed|continue)\b/i,
|
|
780
|
+
/\brequire[sd]?\s+(?:the|an?)\s+\w+\b.*\bto\s+(?:proceed|continue)\b/i,
|
|
781
|
+
/\bmissing\s+(?:required|necessary|essential)\b/i,
|
|
782
|
+
// Deadlock/loop patterns
|
|
783
|
+
/\bdeadlock\b/i,
|
|
784
|
+
/\bwe\s+are\s+in\s+a\s+loop\b/i,
|
|
785
|
+
/\bstuck\s+in\s+a\s+loop\b/i,
|
|
786
|
+
/\bi\s+(?:have|['']ve)\s+(?:explained|stated|mentioned)\s+(?:this|the\s+situation|it)\s+(?:multiple|several)\s+times\b/i,
|
|
787
|
+
// Cannot find/get patterns
|
|
788
|
+
/\bi\s+(?:cannot|can['']t|could\s+not|couldn['']t)\s+(?:find|locate|get|retrieve|obtain)\s+(?:the|this|that|an?)\b/i,
|
|
789
|
+
/\bno\s+way\s+to\s+(?:find|get|obtain|retrieve)\b/i,
|
|
790
|
+
// Exhausted options patterns
|
|
791
|
+
/\bi\s+(?:have|['']ve)\s+exhausted\s+(?:all|my)\s+(?:available\s+)?(?:options|methods|approaches)\b/i,
|
|
792
|
+
/\bneither\s+of\s+these\s+methods\b/i,
|
|
793
|
+
];
|
|
794
|
+
|
|
795
|
+
for (const pattern of stuckPatterns) {
|
|
796
|
+
if (pattern.test(response)) {
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
return false;
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
/**
|
|
805
|
+
* Check if two responses are semantically similar (both indicate being stuck)
|
|
806
|
+
* This is a lightweight check that groups stuck responses together
|
|
807
|
+
*
|
|
808
|
+
* @param {string} response1 - First response
|
|
809
|
+
* @param {string} response2 - Second response
|
|
810
|
+
* @returns {boolean} - True if both responses indicate a stuck state
|
|
811
|
+
*/
|
|
812
|
+
export function areBothStuckResponses(response1, response2) {
|
|
813
|
+
return detectStuckResponse(response1) && detectStuckResponse(response2);
|
|
814
|
+
}
|
|
815
|
+
|
|
755
816
|
/**
|
|
756
817
|
* Parse targets string into array of file specifications
|
|
757
818
|
* Handles both space-separated and comma-separated targets for extract tool
|