neoagent 2.5.2-beta.4 → 2.5.2-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/public/.last_build_id +1 -1
- package/server/public/flutter_bootstrap.js +1 -1
- package/server/public/main.dart.js +4 -4
- package/server/services/ai/engine.js +92 -351
- package/server/services/ai/tools.js +4 -1
- package/server/services/runtime/backends/local-vm.js +7 -7
package/package.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
686c0241f8a0ccd4581a0d28d54b568e
|
|
@@ -37,6 +37,6 @@ _flutter.buildConfig = {"engineRevision":"77e2e94772b6eb43759e34ed1ad7da4674e19c
|
|
|
37
37
|
|
|
38
38
|
_flutter.loader.load({
|
|
39
39
|
serviceWorkerSettings: {
|
|
40
|
-
serviceWorkerVersion: "
|
|
40
|
+
serviceWorkerVersion: "3861825221" /* Flutter's service worker is deprecated and will be removed in a future Flutter release. */
|
|
41
41
|
}
|
|
42
42
|
});
|
|
@@ -134794,7 +134794,7 @@ r===$&&A.b()
|
|
|
134794
134794
|
p.push(A.jP(q,A.j9(!1,new A.a_(B.uG,A.d8(new A.cA(B.jt,new A.a7N(r,q),q),q,q),q),!1,B.H,!0),q,q,0,0,0,q))}r=!1
|
|
134795
134795
|
if(!s.ay)if(!s.ch){r=s.e
|
|
134796
134796
|
r===$&&A.b()
|
|
134797
|
-
r=B.b.u("
|
|
134797
|
+
r=B.b.u("mqfhwocj-bf9ea71").length!==0&&r.b}if(r){r=s.d
|
|
134798
134798
|
r===$&&A.b()
|
|
134799
134799
|
r=r.aP&&!r.ai?84:0
|
|
134800
134800
|
s=s.e
|
|
@@ -140506,7 +140506,7 @@ $S:0}
|
|
|
140506
140506
|
A.a_6.prototype={}
|
|
140507
140507
|
A.SQ.prototype={
|
|
140508
140508
|
nb(a){var s=this
|
|
140509
|
-
if(B.b.u("
|
|
140509
|
+
if(B.b.u("mqfhwocj-bf9ea71").length===0||s.a!=null)return
|
|
140510
140510
|
s.AU()
|
|
140511
140511
|
s.a=A.on(B.RH,new A.bc8(s))},
|
|
140512
140512
|
AU(){var s=0,r=A.l(t.H),q,p=2,o=[],n=this,m,l,k,j,i,h,g,f
|
|
@@ -140524,7 +140524,7 @@ if(!t.f.b(k)){s=1
|
|
|
140524
140524
|
break}i=J.a3(k,"buildId")
|
|
140525
140525
|
h=i==null?null:B.b.u(J.p(i))
|
|
140526
140526
|
j=h==null?"":h
|
|
140527
|
-
if(J.bi(j)===0||J.d(j,"
|
|
140527
|
+
if(J.bi(j)===0||J.d(j,"mqfhwocj-bf9ea71")){s=1
|
|
140528
140528
|
break}n.b=!0
|
|
140529
140529
|
n.F()
|
|
140530
140530
|
p=2
|
|
@@ -140541,7 +140541,7 @@ case 2:return A.i(o.at(-1),r)}})
|
|
|
140541
140541
|
return A.k($async$AU,r)},
|
|
140542
140542
|
vE(){var s=0,r=A.l(t.H),q,p=2,o=[],n=this,m,l,k,j,i,h,g,f,e,d,c,b,a,a0,a1
|
|
140543
140543
|
var $async$vE=A.h(function(a2,a3){if(a2===1){o.push(a3)
|
|
140544
|
-
s=p}for(;;)switch(s){case 0:if(B.b.u("
|
|
140544
|
+
s=p}for(;;)switch(s){case 0:if(B.b.u("mqfhwocj-bf9ea71").length===0||n.c){s=1
|
|
140545
140545
|
break}n.c=!0
|
|
140546
140546
|
n.F()
|
|
140547
140547
|
p=4
|
|
@@ -137,6 +137,37 @@ function formatElapsedDuration(durationMs) {
|
|
|
137
137
|
return `${minutes}m ${seconds}s`;
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
+
function normalizeErrorKey(errorMsg) {
|
|
141
|
+
const msg = String(errorMsg || '').toLowerCase();
|
|
142
|
+
if (/outside.*(workspace|per-user)/i.test(msg)) return 'outside_workspace';
|
|
143
|
+
if (/eisdir|illegal operation on a directory/i.test(msg)) return 'eisdir';
|
|
144
|
+
if (/enoent|no such file/i.test(msg)) return 'enoent';
|
|
145
|
+
if (/can.?t cd to|no such directory/i.test(msg)) return 'bad_cwd';
|
|
146
|
+
if (/not found/i.test(msg)) return 'not_found';
|
|
147
|
+
return msg.slice(0, 60);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function trackErrorPattern(errorMsg, runMeta) {
|
|
151
|
+
if (!errorMsg) return;
|
|
152
|
+
const key = normalizeErrorKey(errorMsg);
|
|
153
|
+
if (!runMeta.errorPatterns) runMeta.errorPatterns = new Map();
|
|
154
|
+
runMeta.errorPatterns.set(key, (runMeta.errorPatterns.get(key) || 0) + 1);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function buildErrorPatternGuidance(key, count) {
|
|
158
|
+
if (count < 3) return null;
|
|
159
|
+
const guides = {
|
|
160
|
+
outside_workspace: 'read_file cannot access /tmp paths. Use execute_command with `cat <path>` instead.',
|
|
161
|
+
eisdir: 'That path is a directory, not a file. Use list_directory or execute_command with `ls` to inspect it.',
|
|
162
|
+
enoent: 'That path does not exist. Use execute_command with `find . -name "..."` to locate the correct path first.',
|
|
163
|
+
bad_cwd: 'The VM home directory is not ~/. Use absolute paths starting from /tmp or discover the workspace root first.',
|
|
164
|
+
not_found: 'This path or resource was not found. Try listing the parent directory or checking with a broader search first.',
|
|
165
|
+
};
|
|
166
|
+
const guide = guides[key];
|
|
167
|
+
if (!guide) return null;
|
|
168
|
+
return `REPEATED ERROR (${count}×): ${guide}`;
|
|
169
|
+
}
|
|
170
|
+
|
|
140
171
|
function resolveModelCallTimeoutMs(options = {}) {
|
|
141
172
|
const requested = Number(options?.modelCallTimeoutMs);
|
|
142
173
|
if (Number.isFinite(requested) && requested > 0) {
|
|
@@ -1468,117 +1499,6 @@ class AgentEngine {
|
|
|
1468
1499
|
};
|
|
1469
1500
|
}
|
|
1470
1501
|
|
|
1471
|
-
async decideLoopState({
|
|
1472
|
-
provider,
|
|
1473
|
-
providerName,
|
|
1474
|
-
model,
|
|
1475
|
-
messages,
|
|
1476
|
-
tools,
|
|
1477
|
-
analysis,
|
|
1478
|
-
plan,
|
|
1479
|
-
toolExecutions,
|
|
1480
|
-
lastReply,
|
|
1481
|
-
triggerSource,
|
|
1482
|
-
messagingSent,
|
|
1483
|
-
iteration,
|
|
1484
|
-
maxIterations,
|
|
1485
|
-
options,
|
|
1486
|
-
fallbackStatus,
|
|
1487
|
-
}) {
|
|
1488
|
-
const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
|
|
1489
|
-
const goalContext = resolveRunGoalContext(runMeta, analysis, plan);
|
|
1490
|
-
|
|
1491
|
-
const response = await this.requestStructuredJson({
|
|
1492
|
-
provider,
|
|
1493
|
-
providerName,
|
|
1494
|
-
model,
|
|
1495
|
-
messages,
|
|
1496
|
-
prompt: buildCompletionDecisionPrompt({
|
|
1497
|
-
triggerSource,
|
|
1498
|
-
messagingSent,
|
|
1499
|
-
goalContext,
|
|
1500
|
-
parallelWork: analysis?.parallel_work === true,
|
|
1501
|
-
tools,
|
|
1502
|
-
toolExecutions,
|
|
1503
|
-
lastReply,
|
|
1504
|
-
iteration,
|
|
1505
|
-
maxIterations,
|
|
1506
|
-
}),
|
|
1507
|
-
maxTokens: 320,
|
|
1508
|
-
normalize: (raw) => normalizeCompletionDecision(raw, fallbackStatus),
|
|
1509
|
-
fallback: { status: fallbackStatus },
|
|
1510
|
-
reasoningEffort: this.getReasoningEffort(providerName, options),
|
|
1511
|
-
telemetry: options,
|
|
1512
|
-
phase: 'loop_decision',
|
|
1513
|
-
});
|
|
1514
|
-
|
|
1515
|
-
return {
|
|
1516
|
-
decision: response.value,
|
|
1517
|
-
usage: response.usage,
|
|
1518
|
-
};
|
|
1519
|
-
}
|
|
1520
|
-
|
|
1521
|
-
async evaluateTaskCompleteSignal({
|
|
1522
|
-
provider,
|
|
1523
|
-
providerName,
|
|
1524
|
-
model,
|
|
1525
|
-
messages,
|
|
1526
|
-
tools,
|
|
1527
|
-
analysis,
|
|
1528
|
-
plan,
|
|
1529
|
-
toolExecutions,
|
|
1530
|
-
finalMessage,
|
|
1531
|
-
confidence,
|
|
1532
|
-
triggerSource,
|
|
1533
|
-
messagingSent,
|
|
1534
|
-
iteration,
|
|
1535
|
-
maxIterations,
|
|
1536
|
-
options,
|
|
1537
|
-
}) {
|
|
1538
|
-
const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
|
|
1539
|
-
const requiredConfidence = resolveRunGoalContext(runMeta, analysis, plan)
|
|
1540
|
-
.effectiveCompletionConfidence;
|
|
1541
|
-
const confidenceDecision = shouldAcceptTaskComplete({
|
|
1542
|
-
confidence,
|
|
1543
|
-
requiredConfidence,
|
|
1544
|
-
iteration,
|
|
1545
|
-
maxIterations,
|
|
1546
|
-
});
|
|
1547
|
-
if (!confidenceDecision.accept) {
|
|
1548
|
-
return {
|
|
1549
|
-
decision: {
|
|
1550
|
-
status: 'continue',
|
|
1551
|
-
reason: confidenceDecision.reason,
|
|
1552
|
-
},
|
|
1553
|
-
requiredConfidence,
|
|
1554
|
-
usage: 0,
|
|
1555
|
-
};
|
|
1556
|
-
}
|
|
1557
|
-
|
|
1558
|
-
const loopState = await this.decideLoopState({
|
|
1559
|
-
provider,
|
|
1560
|
-
providerName,
|
|
1561
|
-
model,
|
|
1562
|
-
messages,
|
|
1563
|
-
tools,
|
|
1564
|
-
analysis,
|
|
1565
|
-
plan,
|
|
1566
|
-
toolExecutions,
|
|
1567
|
-
lastReply: finalMessage,
|
|
1568
|
-
triggerSource,
|
|
1569
|
-
messagingSent,
|
|
1570
|
-
iteration,
|
|
1571
|
-
maxIterations,
|
|
1572
|
-
options,
|
|
1573
|
-
fallbackStatus: 'continue',
|
|
1574
|
-
});
|
|
1575
|
-
return {
|
|
1576
|
-
decision: loopState.decision,
|
|
1577
|
-
requiredConfidence,
|
|
1578
|
-
usage: loopState.usage || 0,
|
|
1579
|
-
};
|
|
1580
|
-
}
|
|
1581
|
-
|
|
1582
1502
|
async verifyFinalResponse({
|
|
1583
1503
|
provider,
|
|
1584
1504
|
providerName,
|
|
@@ -1732,73 +1652,6 @@ class AgentEngine {
|
|
|
1732
1652
|
return nextState;
|
|
1733
1653
|
}
|
|
1734
1654
|
|
|
1735
|
-
async recoverBlankMessagingReply({
|
|
1736
|
-
userId,
|
|
1737
|
-
runId,
|
|
1738
|
-
messages,
|
|
1739
|
-
provider,
|
|
1740
|
-
model,
|
|
1741
|
-
providerName,
|
|
1742
|
-
options,
|
|
1743
|
-
stepIndex,
|
|
1744
|
-
failedStepCount,
|
|
1745
|
-
toolExecutions = [],
|
|
1746
|
-
tools = []
|
|
1747
|
-
}) {
|
|
1748
|
-
const attempts = 3;
|
|
1749
|
-
let recoveredContent = '';
|
|
1750
|
-
let totalTokens = 0;
|
|
1751
|
-
|
|
1752
|
-
for (let attempt = 1; attempt <= attempts; attempt++) {
|
|
1753
|
-
console.warn(
|
|
1754
|
-
`[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} model=${model}`
|
|
1755
|
-
);
|
|
1756
|
-
try {
|
|
1757
|
-
const response = await withModelCallTimeout(
|
|
1758
|
-
provider.chat(
|
|
1759
|
-
sanitizeConversationMessages([
|
|
1760
|
-
...messages,
|
|
1761
|
-
{
|
|
1762
|
-
role: 'system',
|
|
1763
|
-
content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
|
|
1764
|
-
}
|
|
1765
|
-
]),
|
|
1766
|
-
[],
|
|
1767
|
-
{
|
|
1768
|
-
model,
|
|
1769
|
-
reasoningEffort: this.getReasoningEffort(providerName, options)
|
|
1770
|
-
}
|
|
1771
|
-
),
|
|
1772
|
-
options,
|
|
1773
|
-
`Blank messaging reply recovery ${attempt}`,
|
|
1774
|
-
);
|
|
1775
|
-
totalTokens += response.usage?.totalTokens || 0;
|
|
1776
|
-
recoveredContent = sanitizeModelOutput(response.content || '', { model });
|
|
1777
|
-
if (normalizeOutgoingMessage(recoveredContent)) {
|
|
1778
|
-
console.info(
|
|
1779
|
-
`[Run ${shortenRunId(runId)}] blank_reply_recovery succeeded attempt=${attempt}`
|
|
1780
|
-
);
|
|
1781
|
-
return { content: recoveredContent, tokens: totalTokens, recovered: true };
|
|
1782
|
-
}
|
|
1783
|
-
} catch (recoverErr) {
|
|
1784
|
-
console.warn(
|
|
1785
|
-
`[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} failed: ${summarizeForLog(recoverErr?.message || recoverErr, 180)}`
|
|
1786
|
-
);
|
|
1787
|
-
}
|
|
1788
|
-
}
|
|
1789
|
-
|
|
1790
|
-
const error = new Error(
|
|
1791
|
-
buildDeterministicMessagingFallback({
|
|
1792
|
-
failedStepCount,
|
|
1793
|
-
stepIndex,
|
|
1794
|
-
toolExecutions,
|
|
1795
|
-
})
|
|
1796
|
-
);
|
|
1797
|
-
error.code = 'BLANK_MESSAGING_REPLY';
|
|
1798
|
-
error.recoveryTokens = totalTokens;
|
|
1799
|
-
throw error;
|
|
1800
|
-
}
|
|
1801
|
-
|
|
1802
1655
|
getAvailableTools(app, options = {}) {
|
|
1803
1656
|
const { getAvailableTools } = require('./tools');
|
|
1804
1657
|
return getAvailableTools(app, options);
|
|
@@ -2216,14 +2069,16 @@ class AgentEngine {
|
|
|
2216
2069
|
runStartedAtMs,
|
|
2217
2070
|
));
|
|
2218
2071
|
const currentTool = String(runMeta?.progressLedger?.currentTool || '').trim();
|
|
2072
|
+
const runTitle = String(runMeta?.title || '').trim().slice(0, 60);
|
|
2073
|
+
const titlePrefix = runTitle ? `[${runTitle}] ` : '';
|
|
2219
2074
|
if (currentTool) {
|
|
2220
2075
|
return stalled
|
|
2221
|
-
?
|
|
2222
|
-
:
|
|
2076
|
+
? `${titlePrefix}Still working on ${currentTool}. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
|
|
2077
|
+
: `${titlePrefix}Still working on ${currentTool}. Run active ${runElapsed}; current step ${stepElapsed} so far.`;
|
|
2223
2078
|
}
|
|
2224
2079
|
return stalled
|
|
2225
|
-
?
|
|
2226
|
-
:
|
|
2080
|
+
? `${titlePrefix}Still working on this. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
|
|
2081
|
+
: `${titlePrefix}Still working on this. Run active ${runElapsed}.`;
|
|
2227
2082
|
}
|
|
2228
2083
|
|
|
2229
2084
|
async sendRuntimeMessagingHeartbeat(runId, options = {}) {
|
|
@@ -2281,8 +2136,8 @@ class AgentEngine {
|
|
|
2281
2136
|
}, { agentId: runMeta.agentId });
|
|
2282
2137
|
this.enqueueSystemSteering(
|
|
2283
2138
|
runId,
|
|
2284
|
-
'A runtime
|
|
2285
|
-
{ reason: '
|
|
2139
|
+
'A runtime progress update was just sent on your behalf because you were blocked in a tool. On your NEXT free turn: use send_interim_update to write 1-2 sentences in your own words describing what you are doing and why. Keep it short and concrete. Then continue toward the final answer.',
|
|
2140
|
+
{ reason: 'heartbeat_ai_followup' },
|
|
2286
2141
|
);
|
|
2287
2142
|
return { sent: true, content };
|
|
2288
2143
|
}
|
|
@@ -2424,9 +2279,10 @@ class AgentEngine {
|
|
|
2424
2279
|
return { sent: false, skipped: true };
|
|
2425
2280
|
}
|
|
2426
2281
|
|
|
2282
|
+
const elapsed = formatElapsedDuration(now - startedAtMs);
|
|
2427
2283
|
const nudge = stalled
|
|
2428
|
-
?
|
|
2429
|
-
:
|
|
2284
|
+
? `You have been running for ${elapsed} and appear stalled. Use send_interim_update RIGHT NOW to write 1-2 sentences explaining the blocker in your own words, then either resolve it or call task_complete with what you have. Do not leave the user without an answer.`
|
|
2285
|
+
: `You have been running for ${elapsed} without sending an update to the user. Use send_interim_update RIGHT NOW to write 1-2 sentences explaining what you are currently doing. Keep it short and concrete. Then continue working toward the final answer.`;
|
|
2430
2286
|
const queued = this.enqueueSystemSteering(runId, nudge, {
|
|
2431
2287
|
reason: stalled ? 'stalled_progress_check' : 'progress_check',
|
|
2432
2288
|
});
|
|
@@ -3165,37 +3021,6 @@ class AgentEngine {
|
|
|
3165
3021
|
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
|
|
3166
3022
|
.run(conversationId, 'assistant', lastContent, analysisUsage);
|
|
3167
3023
|
}
|
|
3168
|
-
const directAnswerDecision = await runWithModelFallback(
|
|
3169
|
-
'direct answer completion decision',
|
|
3170
|
-
() => this.decideLoopState({
|
|
3171
|
-
provider,
|
|
3172
|
-
providerName,
|
|
3173
|
-
model,
|
|
3174
|
-
messages,
|
|
3175
|
-
tools,
|
|
3176
|
-
analysis,
|
|
3177
|
-
plan,
|
|
3178
|
-
toolExecutions,
|
|
3179
|
-
lastReply: lastContent,
|
|
3180
|
-
triggerSource,
|
|
3181
|
-
messagingSent: false,
|
|
3182
|
-
iteration,
|
|
3183
|
-
maxIterations,
|
|
3184
|
-
options: { ...options, runId, userId, agentId },
|
|
3185
|
-
fallbackStatus: 'continue',
|
|
3186
|
-
}),
|
|
3187
|
-
);
|
|
3188
|
-
totalTokens += directAnswerDecision.usage || 0;
|
|
3189
|
-
if (directAnswerDecision.decision.status === 'continue') {
|
|
3190
|
-
messages.push({
|
|
3191
|
-
role: 'system',
|
|
3192
|
-
content: directAnswerDecision.decision.reason
|
|
3193
|
-
? `Continue working: ${directAnswerDecision.decision.reason}.`
|
|
3194
|
-
: 'The initial draft is not a finished answer. Continue working autonomously.',
|
|
3195
|
-
});
|
|
3196
|
-
lastContent = '';
|
|
3197
|
-
directAnswerEligible = false;
|
|
3198
|
-
}
|
|
3199
3024
|
}
|
|
3200
3025
|
|
|
3201
3026
|
// BUG FIX: consecutiveToolFailures was previously declared INSIDE the
|
|
@@ -3395,6 +3220,9 @@ class AgentEngine {
|
|
|
3395
3220
|
currentTool: null,
|
|
3396
3221
|
currentStepStartedAt: null,
|
|
3397
3222
|
});
|
|
3223
|
+
// Check for queued steering first — if something was injected while the
|
|
3224
|
+
// model was responding (e.g. a heartbeat nudge), give the model a chance
|
|
3225
|
+
// to act on it before we treat this as a final answer.
|
|
3398
3226
|
const systemSteeringAfterResponse = this.applyQueuedSystemSteering(runId, messages);
|
|
3399
3227
|
messages = systemSteeringAfterResponse.messages;
|
|
3400
3228
|
if (systemSteeringAfterResponse.appliedCount > 0) {
|
|
@@ -3412,65 +3240,17 @@ class AgentEngine {
|
|
|
3412
3240
|
lastContent = '';
|
|
3413
3241
|
continue;
|
|
3414
3242
|
}
|
|
3415
|
-
const messagingSent = this.activeRuns.get(runId)?.messagingSent || false;
|
|
3416
3243
|
if (this.shouldFastCompleteVoiceReply({
|
|
3417
3244
|
options,
|
|
3418
3245
|
toolExecutions,
|
|
3419
3246
|
failedStepCount,
|
|
3420
|
-
messagingSent,
|
|
3247
|
+
messagingSent: this.activeRuns.get(runId)?.messagingSent || false,
|
|
3421
3248
|
lastReply: lastContent,
|
|
3422
3249
|
})) {
|
|
3423
3250
|
break;
|
|
3424
3251
|
}
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
&& this.activeRuns.get(runId)?.noResponse !== true
|
|
3428
|
-
&& options.deliveryState?.noResponse !== true
|
|
3429
|
-
);
|
|
3430
|
-
const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
|
|
3431
|
-
const fallbackStatus = (
|
|
3432
|
-
proactiveRunNeedsDecision
|
|
3433
|
-
|| toolExecutions.length > 0
|
|
3434
|
-
|| failedStepCount > 0
|
|
3435
|
-
|| messagingSent
|
|
3436
|
-
|| visibleInterimActivity
|
|
3437
|
-
) ? 'continue' : 'complete';
|
|
3438
|
-
const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
|
|
3439
|
-
provider,
|
|
3440
|
-
providerName,
|
|
3441
|
-
model,
|
|
3442
|
-
messages,
|
|
3443
|
-
tools,
|
|
3444
|
-
analysis,
|
|
3445
|
-
plan,
|
|
3446
|
-
toolExecutions,
|
|
3447
|
-
lastReply: lastContent,
|
|
3448
|
-
triggerSource,
|
|
3449
|
-
messagingSent,
|
|
3450
|
-
iteration,
|
|
3451
|
-
maxIterations,
|
|
3452
|
-
options: { ...options, runId, userId, agentId },
|
|
3453
|
-
fallbackStatus,
|
|
3454
|
-
}));
|
|
3455
|
-
totalTokens += loopState.usage || 0;
|
|
3456
|
-
if (loopState.decision.status === 'continue') {
|
|
3457
|
-
if (iteration >= maxIterations) {
|
|
3458
|
-
throw new Error(
|
|
3459
|
-
`Completion judge found unfinished work at the iteration limit after ${maxIterations} iterations.`,
|
|
3460
|
-
);
|
|
3461
|
-
}
|
|
3462
|
-
messages.push({
|
|
3463
|
-
role: 'system',
|
|
3464
|
-
content: [
|
|
3465
|
-
loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
|
|
3466
|
-
messagingSent
|
|
3467
|
-
? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
|
|
3468
|
-
: 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
|
|
3469
|
-
].join(' ')
|
|
3470
|
-
});
|
|
3471
|
-
lastContent = '';
|
|
3472
|
-
continue;
|
|
3473
|
-
}
|
|
3252
|
+
// AI returned text with no tool calls → trust it as the final answer.
|
|
3253
|
+
directAnswerEligible = true;
|
|
3474
3254
|
break;
|
|
3475
3255
|
}
|
|
3476
3256
|
|
|
@@ -3564,82 +3344,20 @@ class AgentEngine {
|
|
|
3564
3344
|
}
|
|
3565
3345
|
|
|
3566
3346
|
// ── task_complete: AI explicitly signals the task is fully done ──
|
|
3567
|
-
//
|
|
3568
|
-
// regular tool execution, it is a loop-exit signal.
|
|
3347
|
+
// Trust the model — no separate judge LLM call needed.
|
|
3569
3348
|
if (toolName === 'task_complete') {
|
|
3570
3349
|
const finalMessage = String(toolArgs.message || '').trim();
|
|
3571
|
-
const confidence = normalizeCompletionConfidence(toolArgs.confidence || 'medium');
|
|
3572
|
-
const messagingSent = this.getRunMeta(runId)?.messagingSent === true;
|
|
3573
|
-
const completionResult = await runWithModelFallback(
|
|
3574
|
-
'task completion decision',
|
|
3575
|
-
() => this.evaluateTaskCompleteSignal({
|
|
3576
|
-
provider,
|
|
3577
|
-
providerName,
|
|
3578
|
-
model,
|
|
3579
|
-
messages,
|
|
3580
|
-
tools,
|
|
3581
|
-
analysis,
|
|
3582
|
-
plan,
|
|
3583
|
-
toolExecutions,
|
|
3584
|
-
finalMessage,
|
|
3585
|
-
confidence,
|
|
3586
|
-
triggerSource,
|
|
3587
|
-
messagingSent,
|
|
3588
|
-
iteration,
|
|
3589
|
-
maxIterations,
|
|
3590
|
-
options: { ...options, runId, userId, agentId },
|
|
3591
|
-
}),
|
|
3592
|
-
);
|
|
3593
|
-
totalTokens += completionResult.usage || 0;
|
|
3594
|
-
const completionDecision = completionResult.decision || {
|
|
3595
|
-
status: 'continue',
|
|
3596
|
-
reason: 'The completion signal could not be verified.',
|
|
3597
|
-
};
|
|
3598
|
-
const accepted = completionDecision.status !== 'continue';
|
|
3599
3350
|
this.recordRunEvent(userId, runId, 'task_complete_signaled', {
|
|
3600
|
-
|
|
3601
|
-
requiredConfidence: completionResult.requiredConfidence,
|
|
3602
|
-
accepted,
|
|
3603
|
-
judgeStatus: completionDecision.status,
|
|
3604
|
-
judgeReason: completionDecision.reason || '',
|
|
3351
|
+
accepted: true,
|
|
3605
3352
|
iteration,
|
|
3606
3353
|
messageLength: finalMessage.length,
|
|
3607
3354
|
}, { agentId });
|
|
3608
3355
|
console.info(
|
|
3609
|
-
`[Run ${shortenRunId(runId)}] task_complete
|
|
3356
|
+
`[Run ${shortenRunId(runId)}] task_complete accepted at iteration=${iteration}`
|
|
3610
3357
|
);
|
|
3611
|
-
|
|
3612
|
-
if (iteration >= maxIterations) {
|
|
3613
|
-
throw new Error(
|
|
3614
|
-
`Completion judge rejected task_complete at the iteration limit after ${maxIterations} iterations.`,
|
|
3615
|
-
);
|
|
3616
|
-
}
|
|
3617
|
-
messages.push({
|
|
3618
|
-
role: 'tool',
|
|
3619
|
-
name: toolName,
|
|
3620
|
-
tool_call_id: toolCall.id,
|
|
3621
|
-
content: JSON.stringify({
|
|
3622
|
-
status: 'continue',
|
|
3623
|
-
reason: completionDecision.reason,
|
|
3624
|
-
required_confidence: completionResult.requiredConfidence,
|
|
3625
|
-
}),
|
|
3626
|
-
});
|
|
3627
|
-
messages.push({
|
|
3628
|
-
role: 'system',
|
|
3629
|
-
content: `${completionDecision.reason} Do not ask the user to decide the next step unless external input is truly required.`
|
|
3630
|
-
});
|
|
3631
|
-
lastContent = '';
|
|
3632
|
-
continue;
|
|
3633
|
-
}
|
|
3634
|
-
if (completionDecision.reason) {
|
|
3635
|
-
messages.push({
|
|
3636
|
-
role: 'system',
|
|
3637
|
-
content: completionDecision.reason,
|
|
3638
|
-
});
|
|
3639
|
-
}
|
|
3640
|
-
lastContent = finalMessage; // empty string is valid; downstream handles it
|
|
3358
|
+
lastContent = finalMessage;
|
|
3641
3359
|
directAnswerEligible = true;
|
|
3642
|
-
break;
|
|
3360
|
+
break;
|
|
3643
3361
|
}
|
|
3644
3362
|
|
|
3645
3363
|
const repetitionGuard = this.getRunMeta(runId)?.repetitionGuard;
|
|
@@ -3849,6 +3567,11 @@ class AgentEngine {
|
|
|
3849
3567
|
|
|
3850
3568
|
if (toolErrorMessage) {
|
|
3851
3569
|
consecutiveToolFailures += 1;
|
|
3570
|
+
const currentRunMeta = this.getRunMeta(runId);
|
|
3571
|
+
trackErrorPattern(toolErrorMessage, currentRunMeta);
|
|
3572
|
+
const errorKey = normalizeErrorKey(toolErrorMessage);
|
|
3573
|
+
const errorCount = currentRunMeta?.errorPatterns?.get(errorKey) || 0;
|
|
3574
|
+
const patternGuide = buildErrorPatternGuidance(errorKey, errorCount);
|
|
3852
3575
|
const alternativeTools = summarizeAvailableTools(tools, { exclude: toolName });
|
|
3853
3576
|
messages.push({
|
|
3854
3577
|
role: 'system',
|
|
@@ -3857,6 +3580,7 @@ class AgentEngine {
|
|
|
3857
3580
|
'This tool failure is not, by itself, a user-facing blocker.',
|
|
3858
3581
|
'Continue autonomously: retry with corrected arguments, try an alternative tool/path, or verify the outcome using other available tools.',
|
|
3859
3582
|
alternativeTools ? `Other available tools in this run: ${alternativeTools}.` : '',
|
|
3583
|
+
patternGuide || '',
|
|
3860
3584
|
'Only stop and tell the user you are blocked if the remaining issue truly requires an external dependency or user action outside this run.'
|
|
3861
3585
|
].filter(Boolean).join(' ')
|
|
3862
3586
|
});
|
|
@@ -3965,26 +3689,43 @@ class AgentEngine {
|
|
|
3965
3689
|
const lastToolWasMessaging = runMeta?.lastToolName === 'send_message' || runMeta?.lastToolName === 'make_call';
|
|
3966
3690
|
|
|
3967
3691
|
if (triggerSource === 'messaging' && !normalizeOutgoingMessage(lastContent, options?.source || null) && !messagingSent) {
|
|
3968
|
-
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
3978
|
-
|
|
3979
|
-
|
|
3980
|
-
|
|
3981
|
-
|
|
3982
|
-
|
|
3692
|
+
// Simplified blank reply recovery: one model call with direct instruction,
|
|
3693
|
+
// then fall back to a deterministic message. No multi-attempt LLM loop.
|
|
3694
|
+
console.warn(`[Run ${shortenRunId(runId)}] blank_reply_recovery model=${model}`);
|
|
3695
|
+
let recoveredTokens = 0;
|
|
3696
|
+
try {
|
|
3697
|
+
const recoveryResponse = await withModelCallTimeout(
|
|
3698
|
+
provider.chat(
|
|
3699
|
+
sanitizeConversationMessages([
|
|
3700
|
+
...messages,
|
|
3701
|
+
{
|
|
3702
|
+
role: 'system',
|
|
3703
|
+
content: buildBlankMessagingReplyPrompt(1, options?.source || null)
|
|
3704
|
+
}
|
|
3705
|
+
]),
|
|
3706
|
+
[],
|
|
3707
|
+
{
|
|
3708
|
+
model,
|
|
3709
|
+
reasoningEffort: this.getReasoningEffort(providerName, options)
|
|
3710
|
+
}
|
|
3711
|
+
),
|
|
3712
|
+
options,
|
|
3713
|
+
'Blank messaging reply recovery',
|
|
3714
|
+
);
|
|
3715
|
+
recoveredTokens = recoveryResponse.usage?.totalTokens || 0;
|
|
3716
|
+
lastContent = sanitizeModelOutput(recoveryResponse.content || '', { model });
|
|
3717
|
+
} catch (recoverErr) {
|
|
3718
|
+
console.warn(`[Run ${shortenRunId(runId)}] blank_reply_recovery failed: ${summarizeForLog(recoverErr?.message || recoverErr, 180)}`);
|
|
3719
|
+
}
|
|
3720
|
+
totalTokens += recoveredTokens;
|
|
3721
|
+
if (!normalizeOutgoingMessage(lastContent, options?.source || null)) {
|
|
3722
|
+
lastContent = buildDeterministicMessagingFallback({ failedStepCount, stepIndex, toolExecutions });
|
|
3723
|
+
}
|
|
3983
3724
|
if (normalizeOutgoingMessage(lastContent, options?.source || null)) {
|
|
3984
3725
|
messages.push({ role: 'assistant', content: lastContent });
|
|
3985
3726
|
if (conversationId) {
|
|
3986
3727
|
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
|
|
3987
|
-
.run(conversationId, 'assistant', lastContent,
|
|
3728
|
+
.run(conversationId, 'assistant', lastContent, recoveredTokens);
|
|
3988
3729
|
}
|
|
3989
3730
|
}
|
|
3990
3731
|
}
|
|
@@ -1649,6 +1649,7 @@ async function executeTool(toolName, args, context, engine) {
|
|
|
1649
1649
|
case 'browser_extract': {
|
|
1650
1650
|
const { provider, backend } = await bc();
|
|
1651
1651
|
if (!provider) return { error: 'Browser controller not available' };
|
|
1652
|
+
if (!args.selector) return { error: 'browser_extract requires a "selector" argument' };
|
|
1652
1653
|
return { ...await provider.extract(args.selector, args.attribute, args.all), backend };
|
|
1653
1654
|
}
|
|
1654
1655
|
|
|
@@ -1661,7 +1662,9 @@ async function executeTool(toolName, args, context, engine) {
|
|
|
1661
1662
|
case 'browser_evaluate': {
|
|
1662
1663
|
const { provider, backend } = await bc();
|
|
1663
1664
|
if (!provider) return { error: 'Browser controller not available' };
|
|
1664
|
-
|
|
1665
|
+
const script = args.script ?? args.javascript;
|
|
1666
|
+
if (!script) return { error: 'browser_evaluate requires a "script" argument' };
|
|
1667
|
+
return { ...await provider.evaluate(script), backend };
|
|
1665
1668
|
}
|
|
1666
1669
|
|
|
1667
1670
|
case 'android_start_emulator': {
|
|
@@ -250,8 +250,8 @@ class VmBrowserProvider {
|
|
|
250
250
|
async typeText(text, options = {}) { return this.#materialize(await this.client.request('POST', '/browser/type-text', { text, ...options })); }
|
|
251
251
|
async pressKey(key, screenshot = true) { return this.#materialize(await this.client.request('POST', '/browser/press-key', { key, screenshot })); }
|
|
252
252
|
async scroll(deltaX, deltaY, screenshot = true) { return this.#materialize(await this.client.request('POST', '/browser/scroll', { deltaX, deltaY, screenshot })); }
|
|
253
|
-
extract(selector, attribute, all = false) { return this.client.request('POST', '/browser/extract', { selector, attribute, all }); }
|
|
254
|
-
evaluate(script) { return this.client.request('POST', '/browser/execute', { code: script }); }
|
|
253
|
+
async extract(selector, attribute, all = false) { return this.client.request('POST', '/browser/extract', { selector, attribute, all }); }
|
|
254
|
+
async evaluate(script) { return this.client.request('POST', '/browser/execute', { code: script }); }
|
|
255
255
|
async screenshot(options = {}) { return this.#materialize(await this.client.request('POST', '/browser/screenshot', options)); }
|
|
256
256
|
async screenshotJpeg(quality = 80, options = {}) {
|
|
257
257
|
const result = await this.client.request('POST', '/browser/screenshot-jpeg', { ...options, quality });
|
|
@@ -259,11 +259,11 @@ class VmBrowserProvider {
|
|
|
259
259
|
if (!content) throw new Error('VM browser screenshot-jpeg returned no data.');
|
|
260
260
|
return Buffer.from(content, 'base64');
|
|
261
261
|
}
|
|
262
|
-
launch(options = {}) { return this.client.request('POST', '/browser/launch', options); }
|
|
263
|
-
closeBrowser() { return this.client.request('POST', '/browser/close'); }
|
|
264
|
-
fill(selector, value) { return this.type(selector, value); }
|
|
265
|
-
extractContent(options = {}) { return this.client.request('POST', '/browser/extract', options); }
|
|
266
|
-
executeJS(code) { return this.evaluate(code); }
|
|
262
|
+
async launch(options = {}) { return this.client.request('POST', '/browser/launch', options); }
|
|
263
|
+
async closeBrowser() { return this.client.request('POST', '/browser/close'); }
|
|
264
|
+
async fill(selector, value) { return this.type(selector, value); }
|
|
265
|
+
async extractContent(options = {}) { return this.client.request('POST', '/browser/extract', options); }
|
|
266
|
+
async executeJS(code) { return this.evaluate(code); }
|
|
267
267
|
async getPageInfo() {
|
|
268
268
|
const status = await this.client.request('GET', '/browser/status');
|
|
269
269
|
this.headless = status?.headless !== false;
|