neoagent 2.5.2-beta.4 → 2.5.2-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/server/public/.last_build_id +1 -1
- package/server/public/flutter_bootstrap.js +1 -1
- package/server/public/main.dart.js +4 -4
- package/server/services/ai/engine.js +130 -354
- package/server/services/ai/tools.js +4 -1
- package/server/services/runtime/backends/local-vm.js +7 -7
package/package.json
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
1050e01e6b4a9c529922c7db724d0188
|
|
@@ -37,6 +37,6 @@ _flutter.buildConfig = {"engineRevision":"77e2e94772b6eb43759e34ed1ad7da4674e19c
|
|
|
37
37
|
|
|
38
38
|
_flutter.loader.load({
|
|
39
39
|
serviceWorkerSettings: {
|
|
40
|
-
serviceWorkerVersion: "
|
|
40
|
+
serviceWorkerVersion: "1247404091" /* Flutter's service worker is deprecated and will be removed in a future Flutter release. */
|
|
41
41
|
}
|
|
42
42
|
});
|
|
@@ -134794,7 +134794,7 @@ r===$&&A.b()
|
|
|
134794
134794
|
p.push(A.jP(q,A.j9(!1,new A.a_(B.uG,A.d8(new A.cA(B.jt,new A.a7N(r,q),q),q,q),q),!1,B.H,!0),q,q,0,0,0,q))}r=!1
|
|
134795
134795
|
if(!s.ay)if(!s.ch){r=s.e
|
|
134796
134796
|
r===$&&A.b()
|
|
134797
|
-
r=B.b.u("
|
|
134797
|
+
r=B.b.u("mqfmnj9i-24c1816").length!==0&&r.b}if(r){r=s.d
|
|
134798
134798
|
r===$&&A.b()
|
|
134799
134799
|
r=r.aP&&!r.ai?84:0
|
|
134800
134800
|
s=s.e
|
|
@@ -140506,7 +140506,7 @@ $S:0}
|
|
|
140506
140506
|
A.a_6.prototype={}
|
|
140507
140507
|
A.SQ.prototype={
|
|
140508
140508
|
nb(a){var s=this
|
|
140509
|
-
if(B.b.u("
|
|
140509
|
+
if(B.b.u("mqfmnj9i-24c1816").length===0||s.a!=null)return
|
|
140510
140510
|
s.AU()
|
|
140511
140511
|
s.a=A.on(B.RH,new A.bc8(s))},
|
|
140512
140512
|
AU(){var s=0,r=A.l(t.H),q,p=2,o=[],n=this,m,l,k,j,i,h,g,f
|
|
@@ -140524,7 +140524,7 @@ if(!t.f.b(k)){s=1
|
|
|
140524
140524
|
break}i=J.a3(k,"buildId")
|
|
140525
140525
|
h=i==null?null:B.b.u(J.p(i))
|
|
140526
140526
|
j=h==null?"":h
|
|
140527
|
-
if(J.bi(j)===0||J.d(j,"
|
|
140527
|
+
if(J.bi(j)===0||J.d(j,"mqfmnj9i-24c1816")){s=1
|
|
140528
140528
|
break}n.b=!0
|
|
140529
140529
|
n.F()
|
|
140530
140530
|
p=2
|
|
@@ -140541,7 +140541,7 @@ case 2:return A.i(o.at(-1),r)}})
|
|
|
140541
140541
|
return A.k($async$AU,r)},
|
|
140542
140542
|
vE(){var s=0,r=A.l(t.H),q,p=2,o=[],n=this,m,l,k,j,i,h,g,f,e,d,c,b,a,a0,a1
|
|
140543
140543
|
var $async$vE=A.h(function(a2,a3){if(a2===1){o.push(a3)
|
|
140544
|
-
s=p}for(;;)switch(s){case 0:if(B.b.u("
|
|
140544
|
+
s=p}for(;;)switch(s){case 0:if(B.b.u("mqfmnj9i-24c1816").length===0||n.c){s=1
|
|
140545
140545
|
break}n.c=!0
|
|
140546
140546
|
n.F()
|
|
140547
140547
|
p=4
|
|
@@ -137,6 +137,37 @@ function formatElapsedDuration(durationMs) {
|
|
|
137
137
|
return `${minutes}m ${seconds}s`;
|
|
138
138
|
}
|
|
139
139
|
|
|
140
|
+
function normalizeErrorKey(errorMsg) {
|
|
141
|
+
const msg = String(errorMsg || '').toLowerCase();
|
|
142
|
+
if (/outside.*(workspace|per-user)/i.test(msg)) return 'outside_workspace';
|
|
143
|
+
if (/eisdir|illegal operation on a directory/i.test(msg)) return 'eisdir';
|
|
144
|
+
if (/enoent|no such file/i.test(msg)) return 'enoent';
|
|
145
|
+
if (/can.?t cd to|no such directory/i.test(msg)) return 'bad_cwd';
|
|
146
|
+
if (/not found/i.test(msg)) return 'not_found';
|
|
147
|
+
return msg.slice(0, 60);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function trackErrorPattern(errorMsg, runMeta) {
|
|
151
|
+
if (!errorMsg) return;
|
|
152
|
+
const key = normalizeErrorKey(errorMsg);
|
|
153
|
+
if (!runMeta.errorPatterns) runMeta.errorPatterns = new Map();
|
|
154
|
+
runMeta.errorPatterns.set(key, (runMeta.errorPatterns.get(key) || 0) + 1);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function buildErrorPatternGuidance(key, count) {
|
|
158
|
+
if (count < 3) return null;
|
|
159
|
+
const guides = {
|
|
160
|
+
outside_workspace: 'read_file cannot access /tmp paths. Use execute_command with `cat <path>` instead.',
|
|
161
|
+
eisdir: 'That path is a directory, not a file. Use list_directory or execute_command with `ls` to inspect it.',
|
|
162
|
+
enoent: 'That path does not exist. Use execute_command with `find . -name "..."` to locate the correct path first.',
|
|
163
|
+
bad_cwd: 'The VM home directory is not ~/. Use absolute paths starting from /tmp or discover the workspace root first.',
|
|
164
|
+
not_found: 'This path or resource was not found. Try listing the parent directory or checking with a broader search first.',
|
|
165
|
+
};
|
|
166
|
+
const guide = guides[key];
|
|
167
|
+
if (!guide) return null;
|
|
168
|
+
return `REPEATED ERROR (${count}×): ${guide}`;
|
|
169
|
+
}
|
|
170
|
+
|
|
140
171
|
function resolveModelCallTimeoutMs(options = {}) {
|
|
141
172
|
const requested = Number(options?.modelCallTimeoutMs);
|
|
142
173
|
if (Number.isFinite(requested) && requested > 0) {
|
|
@@ -1468,117 +1499,6 @@ class AgentEngine {
|
|
|
1468
1499
|
};
|
|
1469
1500
|
}
|
|
1470
1501
|
|
|
1471
|
-
async decideLoopState({
|
|
1472
|
-
provider,
|
|
1473
|
-
providerName,
|
|
1474
|
-
model,
|
|
1475
|
-
messages,
|
|
1476
|
-
tools,
|
|
1477
|
-
analysis,
|
|
1478
|
-
plan,
|
|
1479
|
-
toolExecutions,
|
|
1480
|
-
lastReply,
|
|
1481
|
-
triggerSource,
|
|
1482
|
-
messagingSent,
|
|
1483
|
-
iteration,
|
|
1484
|
-
maxIterations,
|
|
1485
|
-
options,
|
|
1486
|
-
fallbackStatus,
|
|
1487
|
-
}) {
|
|
1488
|
-
const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
|
|
1489
|
-
const goalContext = resolveRunGoalContext(runMeta, analysis, plan);
|
|
1490
|
-
|
|
1491
|
-
const response = await this.requestStructuredJson({
|
|
1492
|
-
provider,
|
|
1493
|
-
providerName,
|
|
1494
|
-
model,
|
|
1495
|
-
messages,
|
|
1496
|
-
prompt: buildCompletionDecisionPrompt({
|
|
1497
|
-
triggerSource,
|
|
1498
|
-
messagingSent,
|
|
1499
|
-
goalContext,
|
|
1500
|
-
parallelWork: analysis?.parallel_work === true,
|
|
1501
|
-
tools,
|
|
1502
|
-
toolExecutions,
|
|
1503
|
-
lastReply,
|
|
1504
|
-
iteration,
|
|
1505
|
-
maxIterations,
|
|
1506
|
-
}),
|
|
1507
|
-
maxTokens: 320,
|
|
1508
|
-
normalize: (raw) => normalizeCompletionDecision(raw, fallbackStatus),
|
|
1509
|
-
fallback: { status: fallbackStatus },
|
|
1510
|
-
reasoningEffort: this.getReasoningEffort(providerName, options),
|
|
1511
|
-
telemetry: options,
|
|
1512
|
-
phase: 'loop_decision',
|
|
1513
|
-
});
|
|
1514
|
-
|
|
1515
|
-
return {
|
|
1516
|
-
decision: response.value,
|
|
1517
|
-
usage: response.usage,
|
|
1518
|
-
};
|
|
1519
|
-
}
|
|
1520
|
-
|
|
1521
|
-
async evaluateTaskCompleteSignal({
|
|
1522
|
-
provider,
|
|
1523
|
-
providerName,
|
|
1524
|
-
model,
|
|
1525
|
-
messages,
|
|
1526
|
-
tools,
|
|
1527
|
-
analysis,
|
|
1528
|
-
plan,
|
|
1529
|
-
toolExecutions,
|
|
1530
|
-
finalMessage,
|
|
1531
|
-
confidence,
|
|
1532
|
-
triggerSource,
|
|
1533
|
-
messagingSent,
|
|
1534
|
-
iteration,
|
|
1535
|
-
maxIterations,
|
|
1536
|
-
options,
|
|
1537
|
-
}) {
|
|
1538
|
-
const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
|
|
1539
|
-
const requiredConfidence = resolveRunGoalContext(runMeta, analysis, plan)
|
|
1540
|
-
.effectiveCompletionConfidence;
|
|
1541
|
-
const confidenceDecision = shouldAcceptTaskComplete({
|
|
1542
|
-
confidence,
|
|
1543
|
-
requiredConfidence,
|
|
1544
|
-
iteration,
|
|
1545
|
-
maxIterations,
|
|
1546
|
-
});
|
|
1547
|
-
if (!confidenceDecision.accept) {
|
|
1548
|
-
return {
|
|
1549
|
-
decision: {
|
|
1550
|
-
status: 'continue',
|
|
1551
|
-
reason: confidenceDecision.reason,
|
|
1552
|
-
},
|
|
1553
|
-
requiredConfidence,
|
|
1554
|
-
usage: 0,
|
|
1555
|
-
};
|
|
1556
|
-
}
|
|
1557
|
-
|
|
1558
|
-
const loopState = await this.decideLoopState({
|
|
1559
|
-
provider,
|
|
1560
|
-
providerName,
|
|
1561
|
-
model,
|
|
1562
|
-
messages,
|
|
1563
|
-
tools,
|
|
1564
|
-
analysis,
|
|
1565
|
-
plan,
|
|
1566
|
-
toolExecutions,
|
|
1567
|
-
lastReply: finalMessage,
|
|
1568
|
-
triggerSource,
|
|
1569
|
-
messagingSent,
|
|
1570
|
-
iteration,
|
|
1571
|
-
maxIterations,
|
|
1572
|
-
options,
|
|
1573
|
-
fallbackStatus: 'continue',
|
|
1574
|
-
});
|
|
1575
|
-
return {
|
|
1576
|
-
decision: loopState.decision,
|
|
1577
|
-
requiredConfidence,
|
|
1578
|
-
usage: loopState.usage || 0,
|
|
1579
|
-
};
|
|
1580
|
-
}
|
|
1581
|
-
|
|
1582
1502
|
async verifyFinalResponse({
|
|
1583
1503
|
provider,
|
|
1584
1504
|
providerName,
|
|
@@ -1732,73 +1652,6 @@ class AgentEngine {
|
|
|
1732
1652
|
return nextState;
|
|
1733
1653
|
}
|
|
1734
1654
|
|
|
1735
|
-
async recoverBlankMessagingReply({
|
|
1736
|
-
userId,
|
|
1737
|
-
runId,
|
|
1738
|
-
messages,
|
|
1739
|
-
provider,
|
|
1740
|
-
model,
|
|
1741
|
-
providerName,
|
|
1742
|
-
options,
|
|
1743
|
-
stepIndex,
|
|
1744
|
-
failedStepCount,
|
|
1745
|
-
toolExecutions = [],
|
|
1746
|
-
tools = []
|
|
1747
|
-
}) {
|
|
1748
|
-
const attempts = 3;
|
|
1749
|
-
let recoveredContent = '';
|
|
1750
|
-
let totalTokens = 0;
|
|
1751
|
-
|
|
1752
|
-
for (let attempt = 1; attempt <= attempts; attempt++) {
|
|
1753
|
-
console.warn(
|
|
1754
|
-
`[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} model=${model}`
|
|
1755
|
-
);
|
|
1756
|
-
try {
|
|
1757
|
-
const response = await withModelCallTimeout(
|
|
1758
|
-
provider.chat(
|
|
1759
|
-
sanitizeConversationMessages([
|
|
1760
|
-
...messages,
|
|
1761
|
-
{
|
|
1762
|
-
role: 'system',
|
|
1763
|
-
content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
|
|
1764
|
-
}
|
|
1765
|
-
]),
|
|
1766
|
-
[],
|
|
1767
|
-
{
|
|
1768
|
-
model,
|
|
1769
|
-
reasoningEffort: this.getReasoningEffort(providerName, options)
|
|
1770
|
-
}
|
|
1771
|
-
),
|
|
1772
|
-
options,
|
|
1773
|
-
`Blank messaging reply recovery ${attempt}`,
|
|
1774
|
-
);
|
|
1775
|
-
totalTokens += response.usage?.totalTokens || 0;
|
|
1776
|
-
recoveredContent = sanitizeModelOutput(response.content || '', { model });
|
|
1777
|
-
if (normalizeOutgoingMessage(recoveredContent)) {
|
|
1778
|
-
console.info(
|
|
1779
|
-
`[Run ${shortenRunId(runId)}] blank_reply_recovery succeeded attempt=${attempt}`
|
|
1780
|
-
);
|
|
1781
|
-
return { content: recoveredContent, tokens: totalTokens, recovered: true };
|
|
1782
|
-
}
|
|
1783
|
-
} catch (recoverErr) {
|
|
1784
|
-
console.warn(
|
|
1785
|
-
`[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} failed: ${summarizeForLog(recoverErr?.message || recoverErr, 180)}`
|
|
1786
|
-
);
|
|
1787
|
-
}
|
|
1788
|
-
}
|
|
1789
|
-
|
|
1790
|
-
const error = new Error(
|
|
1791
|
-
buildDeterministicMessagingFallback({
|
|
1792
|
-
failedStepCount,
|
|
1793
|
-
stepIndex,
|
|
1794
|
-
toolExecutions,
|
|
1795
|
-
})
|
|
1796
|
-
);
|
|
1797
|
-
error.code = 'BLANK_MESSAGING_REPLY';
|
|
1798
|
-
error.recoveryTokens = totalTokens;
|
|
1799
|
-
throw error;
|
|
1800
|
-
}
|
|
1801
|
-
|
|
1802
1655
|
getAvailableTools(app, options = {}) {
|
|
1803
1656
|
const { getAvailableTools } = require('./tools');
|
|
1804
1657
|
return getAvailableTools(app, options);
|
|
@@ -2216,14 +2069,16 @@ class AgentEngine {
|
|
|
2216
2069
|
runStartedAtMs,
|
|
2217
2070
|
));
|
|
2218
2071
|
const currentTool = String(runMeta?.progressLedger?.currentTool || '').trim();
|
|
2072
|
+
const runTitle = String(runMeta?.title || '').trim().slice(0, 60);
|
|
2073
|
+
const titlePrefix = runTitle ? `[${runTitle}] ` : '';
|
|
2219
2074
|
if (currentTool) {
|
|
2220
2075
|
return stalled
|
|
2221
|
-
?
|
|
2222
|
-
:
|
|
2076
|
+
? `${titlePrefix}Still working on ${currentTool}. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
|
|
2077
|
+
: `${titlePrefix}Still working on ${currentTool}. Run active ${runElapsed}; current step ${stepElapsed} so far.`;
|
|
2223
2078
|
}
|
|
2224
2079
|
return stalled
|
|
2225
|
-
?
|
|
2226
|
-
:
|
|
2080
|
+
? `${titlePrefix}Still working on this. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
|
|
2081
|
+
: `${titlePrefix}Still working on this. Run active ${runElapsed}.`;
|
|
2227
2082
|
}
|
|
2228
2083
|
|
|
2229
2084
|
async sendRuntimeMessagingHeartbeat(runId, options = {}) {
|
|
@@ -2281,8 +2136,8 @@ class AgentEngine {
|
|
|
2281
2136
|
}, { agentId: runMeta.agentId });
|
|
2282
2137
|
this.enqueueSystemSteering(
|
|
2283
2138
|
runId,
|
|
2284
|
-
'A runtime
|
|
2285
|
-
{ reason: '
|
|
2139
|
+
'A runtime progress update was just sent on your behalf because you were blocked in a tool. On your NEXT free turn: use send_interim_update to write 1-2 sentences in your own words describing what you are doing and why. Keep it short and concrete. Then continue toward the final answer.',
|
|
2140
|
+
{ reason: 'heartbeat_ai_followup' },
|
|
2286
2141
|
);
|
|
2287
2142
|
return { sent: true, content };
|
|
2288
2143
|
}
|
|
@@ -2373,7 +2228,7 @@ class AgentEngine {
|
|
|
2373
2228
|
if (!runMeta || runMeta.aborted || runMeta.triggerSource !== 'messaging') {
|
|
2374
2229
|
return { sent: false, skipped: true };
|
|
2375
2230
|
}
|
|
2376
|
-
if (runMeta.
|
|
2231
|
+
if (runMeta.terminalInterim) {
|
|
2377
2232
|
return { sent: false, skipped: true };
|
|
2378
2233
|
}
|
|
2379
2234
|
|
|
@@ -2424,9 +2279,10 @@ class AgentEngine {
|
|
|
2424
2279
|
return { sent: false, skipped: true };
|
|
2425
2280
|
}
|
|
2426
2281
|
|
|
2282
|
+
const elapsed = formatElapsedDuration(now - startedAtMs);
|
|
2427
2283
|
const nudge = stalled
|
|
2428
|
-
?
|
|
2429
|
-
:
|
|
2284
|
+
? `You have been running for ${elapsed} and appear stalled. Use send_interim_update RIGHT NOW to write 1-2 sentences explaining the blocker in your own words, then either resolve it or call task_complete with what you have. Do not leave the user without an answer.`
|
|
2285
|
+
: `You have been running for ${elapsed} without sending an update to the user. Use send_interim_update RIGHT NOW to write 1-2 sentences explaining what you are currently doing. Keep it short and concrete. Then continue working toward the final answer.`;
|
|
2430
2286
|
const queued = this.enqueueSystemSteering(runId, nudge, {
|
|
2431
2287
|
reason: stalled ? 'stalled_progress_check' : 'progress_check',
|
|
2432
2288
|
});
|
|
@@ -2740,6 +2596,7 @@ class AgentEngine {
|
|
|
2740
2596
|
this.activeRuns.set(runId, {
|
|
2741
2597
|
userId,
|
|
2742
2598
|
agentId,
|
|
2599
|
+
title: runTitle,
|
|
2743
2600
|
status: 'running',
|
|
2744
2601
|
aborted: false,
|
|
2745
2602
|
messagingSent: false,
|
|
@@ -3165,37 +3022,6 @@ class AgentEngine {
|
|
|
3165
3022
|
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
|
|
3166
3023
|
.run(conversationId, 'assistant', lastContent, analysisUsage);
|
|
3167
3024
|
}
|
|
3168
|
-
const directAnswerDecision = await runWithModelFallback(
|
|
3169
|
-
'direct answer completion decision',
|
|
3170
|
-
() => this.decideLoopState({
|
|
3171
|
-
provider,
|
|
3172
|
-
providerName,
|
|
3173
|
-
model,
|
|
3174
|
-
messages,
|
|
3175
|
-
tools,
|
|
3176
|
-
analysis,
|
|
3177
|
-
plan,
|
|
3178
|
-
toolExecutions,
|
|
3179
|
-
lastReply: lastContent,
|
|
3180
|
-
triggerSource,
|
|
3181
|
-
messagingSent: false,
|
|
3182
|
-
iteration,
|
|
3183
|
-
maxIterations,
|
|
3184
|
-
options: { ...options, runId, userId, agentId },
|
|
3185
|
-
fallbackStatus: 'continue',
|
|
3186
|
-
}),
|
|
3187
|
-
);
|
|
3188
|
-
totalTokens += directAnswerDecision.usage || 0;
|
|
3189
|
-
if (directAnswerDecision.decision.status === 'continue') {
|
|
3190
|
-
messages.push({
|
|
3191
|
-
role: 'system',
|
|
3192
|
-
content: directAnswerDecision.decision.reason
|
|
3193
|
-
? `Continue working: ${directAnswerDecision.decision.reason}.`
|
|
3194
|
-
: 'The initial draft is not a finished answer. Continue working autonomously.',
|
|
3195
|
-
});
|
|
3196
|
-
lastContent = '';
|
|
3197
|
-
directAnswerEligible = false;
|
|
3198
|
-
}
|
|
3199
3025
|
}
|
|
3200
3026
|
|
|
3201
3027
|
// BUG FIX: consecutiveToolFailures was previously declared INSIDE the
|
|
@@ -3395,6 +3221,9 @@ class AgentEngine {
|
|
|
3395
3221
|
currentTool: null,
|
|
3396
3222
|
currentStepStartedAt: null,
|
|
3397
3223
|
});
|
|
3224
|
+
// Check for queued steering first — if something was injected while the
|
|
3225
|
+
// model was responding (e.g. a heartbeat nudge), give the model a chance
|
|
3226
|
+
// to act on it before we treat this as a final answer.
|
|
3398
3227
|
const systemSteeringAfterResponse = this.applyQueuedSystemSteering(runId, messages);
|
|
3399
3228
|
messages = systemSteeringAfterResponse.messages;
|
|
3400
3229
|
if (systemSteeringAfterResponse.appliedCount > 0) {
|
|
@@ -3412,65 +3241,17 @@ class AgentEngine {
|
|
|
3412
3241
|
lastContent = '';
|
|
3413
3242
|
continue;
|
|
3414
3243
|
}
|
|
3415
|
-
const messagingSent = this.activeRuns.get(runId)?.messagingSent || false;
|
|
3416
3244
|
if (this.shouldFastCompleteVoiceReply({
|
|
3417
3245
|
options,
|
|
3418
3246
|
toolExecutions,
|
|
3419
3247
|
failedStepCount,
|
|
3420
|
-
messagingSent,
|
|
3248
|
+
messagingSent: this.activeRuns.get(runId)?.messagingSent || false,
|
|
3421
3249
|
lastReply: lastContent,
|
|
3422
3250
|
})) {
|
|
3423
3251
|
break;
|
|
3424
3252
|
}
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
&& this.activeRuns.get(runId)?.noResponse !== true
|
|
3428
|
-
&& options.deliveryState?.noResponse !== true
|
|
3429
|
-
);
|
|
3430
|
-
const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
|
|
3431
|
-
const fallbackStatus = (
|
|
3432
|
-
proactiveRunNeedsDecision
|
|
3433
|
-
|| toolExecutions.length > 0
|
|
3434
|
-
|| failedStepCount > 0
|
|
3435
|
-
|| messagingSent
|
|
3436
|
-
|| visibleInterimActivity
|
|
3437
|
-
) ? 'continue' : 'complete';
|
|
3438
|
-
const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
|
|
3439
|
-
provider,
|
|
3440
|
-
providerName,
|
|
3441
|
-
model,
|
|
3442
|
-
messages,
|
|
3443
|
-
tools,
|
|
3444
|
-
analysis,
|
|
3445
|
-
plan,
|
|
3446
|
-
toolExecutions,
|
|
3447
|
-
lastReply: lastContent,
|
|
3448
|
-
triggerSource,
|
|
3449
|
-
messagingSent,
|
|
3450
|
-
iteration,
|
|
3451
|
-
maxIterations,
|
|
3452
|
-
options: { ...options, runId, userId, agentId },
|
|
3453
|
-
fallbackStatus,
|
|
3454
|
-
}));
|
|
3455
|
-
totalTokens += loopState.usage || 0;
|
|
3456
|
-
if (loopState.decision.status === 'continue') {
|
|
3457
|
-
if (iteration >= maxIterations) {
|
|
3458
|
-
throw new Error(
|
|
3459
|
-
`Completion judge found unfinished work at the iteration limit after ${maxIterations} iterations.`,
|
|
3460
|
-
);
|
|
3461
|
-
}
|
|
3462
|
-
messages.push({
|
|
3463
|
-
role: 'system',
|
|
3464
|
-
content: [
|
|
3465
|
-
loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
|
|
3466
|
-
messagingSent
|
|
3467
|
-
? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
|
|
3468
|
-
: 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
|
|
3469
|
-
].join(' ')
|
|
3470
|
-
});
|
|
3471
|
-
lastContent = '';
|
|
3472
|
-
continue;
|
|
3473
|
-
}
|
|
3253
|
+
// AI returned text with no tool calls → trust it as the final answer.
|
|
3254
|
+
directAnswerEligible = true;
|
|
3474
3255
|
break;
|
|
3475
3256
|
}
|
|
3476
3257
|
|
|
@@ -3564,82 +3345,20 @@ class AgentEngine {
|
|
|
3564
3345
|
}
|
|
3565
3346
|
|
|
3566
3347
|
// ── task_complete: AI explicitly signals the task is fully done ──
|
|
3567
|
-
//
|
|
3568
|
-
// regular tool execution, it is a loop-exit signal.
|
|
3348
|
+
// Trust the model — no separate judge LLM call needed.
|
|
3569
3349
|
if (toolName === 'task_complete') {
|
|
3570
3350
|
const finalMessage = String(toolArgs.message || '').trim();
|
|
3571
|
-
const confidence = normalizeCompletionConfidence(toolArgs.confidence || 'medium');
|
|
3572
|
-
const messagingSent = this.getRunMeta(runId)?.messagingSent === true;
|
|
3573
|
-
const completionResult = await runWithModelFallback(
|
|
3574
|
-
'task completion decision',
|
|
3575
|
-
() => this.evaluateTaskCompleteSignal({
|
|
3576
|
-
provider,
|
|
3577
|
-
providerName,
|
|
3578
|
-
model,
|
|
3579
|
-
messages,
|
|
3580
|
-
tools,
|
|
3581
|
-
analysis,
|
|
3582
|
-
plan,
|
|
3583
|
-
toolExecutions,
|
|
3584
|
-
finalMessage,
|
|
3585
|
-
confidence,
|
|
3586
|
-
triggerSource,
|
|
3587
|
-
messagingSent,
|
|
3588
|
-
iteration,
|
|
3589
|
-
maxIterations,
|
|
3590
|
-
options: { ...options, runId, userId, agentId },
|
|
3591
|
-
}),
|
|
3592
|
-
);
|
|
3593
|
-
totalTokens += completionResult.usage || 0;
|
|
3594
|
-
const completionDecision = completionResult.decision || {
|
|
3595
|
-
status: 'continue',
|
|
3596
|
-
reason: 'The completion signal could not be verified.',
|
|
3597
|
-
};
|
|
3598
|
-
const accepted = completionDecision.status !== 'continue';
|
|
3599
3351
|
this.recordRunEvent(userId, runId, 'task_complete_signaled', {
|
|
3600
|
-
|
|
3601
|
-
requiredConfidence: completionResult.requiredConfidence,
|
|
3602
|
-
accepted,
|
|
3603
|
-
judgeStatus: completionDecision.status,
|
|
3604
|
-
judgeReason: completionDecision.reason || '',
|
|
3352
|
+
accepted: true,
|
|
3605
3353
|
iteration,
|
|
3606
3354
|
messageLength: finalMessage.length,
|
|
3607
3355
|
}, { agentId });
|
|
3608
3356
|
console.info(
|
|
3609
|
-
`[Run ${shortenRunId(runId)}] task_complete
|
|
3357
|
+
`[Run ${shortenRunId(runId)}] task_complete accepted at iteration=${iteration}`
|
|
3610
3358
|
);
|
|
3611
|
-
|
|
3612
|
-
if (iteration >= maxIterations) {
|
|
3613
|
-
throw new Error(
|
|
3614
|
-
`Completion judge rejected task_complete at the iteration limit after ${maxIterations} iterations.`,
|
|
3615
|
-
);
|
|
3616
|
-
}
|
|
3617
|
-
messages.push({
|
|
3618
|
-
role: 'tool',
|
|
3619
|
-
name: toolName,
|
|
3620
|
-
tool_call_id: toolCall.id,
|
|
3621
|
-
content: JSON.stringify({
|
|
3622
|
-
status: 'continue',
|
|
3623
|
-
reason: completionDecision.reason,
|
|
3624
|
-
required_confidence: completionResult.requiredConfidence,
|
|
3625
|
-
}),
|
|
3626
|
-
});
|
|
3627
|
-
messages.push({
|
|
3628
|
-
role: 'system',
|
|
3629
|
-
content: `${completionDecision.reason} Do not ask the user to decide the next step unless external input is truly required.`
|
|
3630
|
-
});
|
|
3631
|
-
lastContent = '';
|
|
3632
|
-
continue;
|
|
3633
|
-
}
|
|
3634
|
-
if (completionDecision.reason) {
|
|
3635
|
-
messages.push({
|
|
3636
|
-
role: 'system',
|
|
3637
|
-
content: completionDecision.reason,
|
|
3638
|
-
});
|
|
3639
|
-
}
|
|
3640
|
-
lastContent = finalMessage; // empty string is valid; downstream handles it
|
|
3359
|
+
lastContent = finalMessage;
|
|
3641
3360
|
directAnswerEligible = true;
|
|
3642
|
-
break;
|
|
3361
|
+
break;
|
|
3643
3362
|
}
|
|
3644
3363
|
|
|
3645
3364
|
const repetitionGuard = this.getRunMeta(runId)?.repetitionGuard;
|
|
@@ -3849,6 +3568,11 @@ class AgentEngine {
|
|
|
3849
3568
|
|
|
3850
3569
|
if (toolErrorMessage) {
|
|
3851
3570
|
consecutiveToolFailures += 1;
|
|
3571
|
+
const currentRunMeta = this.getRunMeta(runId);
|
|
3572
|
+
trackErrorPattern(toolErrorMessage, currentRunMeta);
|
|
3573
|
+
const errorKey = normalizeErrorKey(toolErrorMessage);
|
|
3574
|
+
const errorCount = currentRunMeta?.errorPatterns?.get(errorKey) || 0;
|
|
3575
|
+
const patternGuide = buildErrorPatternGuidance(errorKey, errorCount);
|
|
3852
3576
|
const alternativeTools = summarizeAvailableTools(tools, { exclude: toolName });
|
|
3853
3577
|
messages.push({
|
|
3854
3578
|
role: 'system',
|
|
@@ -3857,6 +3581,7 @@ class AgentEngine {
|
|
|
3857
3581
|
'This tool failure is not, by itself, a user-facing blocker.',
|
|
3858
3582
|
'Continue autonomously: retry with corrected arguments, try an alternative tool/path, or verify the outcome using other available tools.',
|
|
3859
3583
|
alternativeTools ? `Other available tools in this run: ${alternativeTools}.` : '',
|
|
3584
|
+
patternGuide || '',
|
|
3860
3585
|
'Only stop and tell the user you are blocked if the remaining issue truly requires an external dependency or user action outside this run.'
|
|
3861
3586
|
].filter(Boolean).join(' ')
|
|
3862
3587
|
});
|
|
@@ -3965,26 +3690,43 @@ class AgentEngine {
|
|
|
3965
3690
|
const lastToolWasMessaging = runMeta?.lastToolName === 'send_message' || runMeta?.lastToolName === 'make_call';
|
|
3966
3691
|
|
|
3967
3692
|
if (triggerSource === 'messaging' && !normalizeOutgoingMessage(lastContent, options?.source || null) && !messagingSent) {
|
|
3968
|
-
|
|
3969
|
-
|
|
3970
|
-
|
|
3971
|
-
|
|
3972
|
-
|
|
3973
|
-
|
|
3974
|
-
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
3978
|
-
|
|
3979
|
-
|
|
3980
|
-
|
|
3981
|
-
|
|
3982
|
-
|
|
3693
|
+
// Simplified blank reply recovery: one model call with direct instruction,
|
|
3694
|
+
// then fall back to a deterministic message. No multi-attempt LLM loop.
|
|
3695
|
+
console.warn(`[Run ${shortenRunId(runId)}] blank_reply_recovery model=${model}`);
|
|
3696
|
+
let recoveredTokens = 0;
|
|
3697
|
+
try {
|
|
3698
|
+
const recoveryResponse = await withModelCallTimeout(
|
|
3699
|
+
provider.chat(
|
|
3700
|
+
sanitizeConversationMessages([
|
|
3701
|
+
...messages,
|
|
3702
|
+
{
|
|
3703
|
+
role: 'system',
|
|
3704
|
+
content: buildBlankMessagingReplyPrompt(1, options?.source || null)
|
|
3705
|
+
}
|
|
3706
|
+
]),
|
|
3707
|
+
[],
|
|
3708
|
+
{
|
|
3709
|
+
model,
|
|
3710
|
+
reasoningEffort: this.getReasoningEffort(providerName, options)
|
|
3711
|
+
}
|
|
3712
|
+
),
|
|
3713
|
+
options,
|
|
3714
|
+
'Blank messaging reply recovery',
|
|
3715
|
+
);
|
|
3716
|
+
recoveredTokens = recoveryResponse.usage?.totalTokens || 0;
|
|
3717
|
+
lastContent = sanitizeModelOutput(recoveryResponse.content || '', { model });
|
|
3718
|
+
} catch (recoverErr) {
|
|
3719
|
+
console.warn(`[Run ${shortenRunId(runId)}] blank_reply_recovery failed: ${summarizeForLog(recoverErr?.message || recoverErr, 180)}`);
|
|
3720
|
+
}
|
|
3721
|
+
totalTokens += recoveredTokens;
|
|
3722
|
+
if (!normalizeOutgoingMessage(lastContent, options?.source || null)) {
|
|
3723
|
+
lastContent = buildDeterministicMessagingFallback({ failedStepCount, stepIndex, toolExecutions });
|
|
3724
|
+
}
|
|
3983
3725
|
if (normalizeOutgoingMessage(lastContent, options?.source || null)) {
|
|
3984
3726
|
messages.push({ role: 'assistant', content: lastContent });
|
|
3985
3727
|
if (conversationId) {
|
|
3986
3728
|
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
|
|
3987
|
-
.run(conversationId, 'assistant', lastContent,
|
|
3729
|
+
.run(conversationId, 'assistant', lastContent, recoveredTokens);
|
|
3988
3730
|
}
|
|
3989
3731
|
}
|
|
3990
3732
|
}
|
|
@@ -4007,9 +3749,43 @@ class AgentEngine {
|
|
|
4007
3749
|
);
|
|
4008
3750
|
}
|
|
4009
3751
|
if (iteration >= maxIterations) {
|
|
4010
|
-
|
|
3752
|
+
// Grace call: budget exhausted but no content yet.
|
|
3753
|
+
// Strip tools and ask the model to summarise what it accomplished.
|
|
3754
|
+
// Mirrors the Hermes handle_max_iterations() pattern.
|
|
3755
|
+
console.warn(`[Run ${shortenRunId(runId)}] iteration_limit runId=${shortenRunId(runId)} — making grace call`);
|
|
3756
|
+
try {
|
|
3757
|
+
const graceMessages = sanitizeConversationMessages([
|
|
3758
|
+
...messages,
|
|
3759
|
+
{
|
|
3760
|
+
role: 'user',
|
|
3761
|
+
content: 'You have reached the maximum number of tool-calling iterations allowed. Please provide a final response summarising what you found and accomplished so far, without calling any more tools.',
|
|
3762
|
+
},
|
|
3763
|
+
]);
|
|
3764
|
+
const graceResponse = await withModelCallTimeout(
|
|
3765
|
+
provider.chat(graceMessages, [], {
|
|
3766
|
+
model,
|
|
3767
|
+
reasoningEffort: this.getReasoningEffort(providerName, options),
|
|
3768
|
+
}),
|
|
3769
|
+
options,
|
|
3770
|
+
`Grace call after ${maxIterations} iterations`,
|
|
3771
|
+
);
|
|
3772
|
+
totalTokens += graceResponse.usage?.totalTokens || 0;
|
|
3773
|
+
lastContent = sanitizeModelOutput(graceResponse.content || '', { model });
|
|
3774
|
+
if (lastContent) {
|
|
3775
|
+
messages.push({ role: 'assistant', content: lastContent });
|
|
3776
|
+
if (conversationId) {
|
|
3777
|
+
db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
|
|
3778
|
+
.run(conversationId, 'assistant', lastContent, graceResponse.usage?.totalTokens || 0);
|
|
3779
|
+
}
|
|
3780
|
+
}
|
|
3781
|
+
} catch (graceErr) {
|
|
3782
|
+
console.warn(`[Run ${shortenRunId(runId)}] grace call failed: ${graceErr?.message}`);
|
|
3783
|
+
}
|
|
3784
|
+
if (!normalizeOutgoingMessage(lastContent, options?.source || null)) {
|
|
3785
|
+
throw new Error(`Iteration limit reached before explicit completion after ${maxIterations} iterations.`);
|
|
3786
|
+
}
|
|
4011
3787
|
}
|
|
4012
|
-
if (stepIndex > 0 && !lastToolWasMessaging) {
|
|
3788
|
+
if (stepIndex > 0 && !lastToolWasMessaging && iteration < maxIterations) {
|
|
4013
3789
|
throw new Error('Run ended without an explicit completion or blocker reply.');
|
|
4014
3790
|
}
|
|
4015
3791
|
}
|
|
@@ -1649,6 +1649,7 @@ async function executeTool(toolName, args, context, engine) {
|
|
|
1649
1649
|
case 'browser_extract': {
|
|
1650
1650
|
const { provider, backend } = await bc();
|
|
1651
1651
|
if (!provider) return { error: 'Browser controller not available' };
|
|
1652
|
+
if (!args.selector) return { error: 'browser_extract requires a "selector" argument' };
|
|
1652
1653
|
return { ...await provider.extract(args.selector, args.attribute, args.all), backend };
|
|
1653
1654
|
}
|
|
1654
1655
|
|
|
@@ -1661,7 +1662,9 @@ async function executeTool(toolName, args, context, engine) {
|
|
|
1661
1662
|
case 'browser_evaluate': {
|
|
1662
1663
|
const { provider, backend } = await bc();
|
|
1663
1664
|
if (!provider) return { error: 'Browser controller not available' };
|
|
1664
|
-
|
|
1665
|
+
const script = args.script ?? args.javascript;
|
|
1666
|
+
if (!script) return { error: 'browser_evaluate requires a "script" argument' };
|
|
1667
|
+
return { ...await provider.evaluate(script), backend };
|
|
1665
1668
|
}
|
|
1666
1669
|
|
|
1667
1670
|
case 'android_start_emulator': {
|
|
@@ -250,8 +250,8 @@ class VmBrowserProvider {
|
|
|
250
250
|
async typeText(text, options = {}) { return this.#materialize(await this.client.request('POST', '/browser/type-text', { text, ...options })); }
|
|
251
251
|
async pressKey(key, screenshot = true) { return this.#materialize(await this.client.request('POST', '/browser/press-key', { key, screenshot })); }
|
|
252
252
|
async scroll(deltaX, deltaY, screenshot = true) { return this.#materialize(await this.client.request('POST', '/browser/scroll', { deltaX, deltaY, screenshot })); }
|
|
253
|
-
extract(selector, attribute, all = false) { return this.client.request('POST', '/browser/extract', { selector, attribute, all }); }
|
|
254
|
-
evaluate(script) { return this.client.request('POST', '/browser/execute', { code: script }); }
|
|
253
|
+
async extract(selector, attribute, all = false) { return this.client.request('POST', '/browser/extract', { selector, attribute, all }); }
|
|
254
|
+
async evaluate(script) { return this.client.request('POST', '/browser/execute', { code: script }); }
|
|
255
255
|
async screenshot(options = {}) { return this.#materialize(await this.client.request('POST', '/browser/screenshot', options)); }
|
|
256
256
|
async screenshotJpeg(quality = 80, options = {}) {
|
|
257
257
|
const result = await this.client.request('POST', '/browser/screenshot-jpeg', { ...options, quality });
|
|
@@ -259,11 +259,11 @@ class VmBrowserProvider {
|
|
|
259
259
|
if (!content) throw new Error('VM browser screenshot-jpeg returned no data.');
|
|
260
260
|
return Buffer.from(content, 'base64');
|
|
261
261
|
}
|
|
262
|
-
launch(options = {}) { return this.client.request('POST', '/browser/launch', options); }
|
|
263
|
-
closeBrowser() { return this.client.request('POST', '/browser/close'); }
|
|
264
|
-
fill(selector, value) { return this.type(selector, value); }
|
|
265
|
-
extractContent(options = {}) { return this.client.request('POST', '/browser/extract', options); }
|
|
266
|
-
executeJS(code) { return this.evaluate(code); }
|
|
262
|
+
async launch(options = {}) { return this.client.request('POST', '/browser/launch', options); }
|
|
263
|
+
async closeBrowser() { return this.client.request('POST', '/browser/close'); }
|
|
264
|
+
async fill(selector, value) { return this.type(selector, value); }
|
|
265
|
+
async extractContent(options = {}) { return this.client.request('POST', '/browser/extract', options); }
|
|
266
|
+
async executeJS(code) { return this.evaluate(code); }
|
|
267
267
|
async getPageInfo() {
|
|
268
268
|
const status = await this.client.request('GET', '/browser/status');
|
|
269
269
|
this.headless = status?.headless !== false;
|