neoagent 2.5.2-beta.3 → 2.5.2-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "neoagent",
3
- "version": "2.5.2-beta.3",
3
+ "version": "2.5.2-beta.4",
4
4
  "description": "Proactive personal AI agent with no limits",
5
5
  "license": "AGPL-3.0-only",
6
6
  "main": "server/index.js",
@@ -1 +1 @@
1
- abe2552b23bc51626fa18b7baf5d91d0
1
+ 7ca2b8a1d9b5130d74aacec76db363e6
@@ -37,6 +37,6 @@ _flutter.buildConfig = {"engineRevision":"77e2e94772b6eb43759e34ed1ad7da4674e19c
37
37
 
38
38
  _flutter.loader.load({
39
39
  serviceWorkerSettings: {
40
- serviceWorkerVersion: "946389838" /* Flutter's service worker is deprecated and will be removed in a future Flutter release. */
40
+ serviceWorkerVersion: "299879781" /* Flutter's service worker is deprecated and will be removed in a future Flutter release. */
41
41
  }
42
42
  });
@@ -134794,7 +134794,7 @@ r===$&&A.b()
134794
134794
  p.push(A.jP(q,A.j9(!1,new A.a_(B.uG,A.d8(new A.cA(B.jt,new A.a7N(r,q),q),q,q),q),!1,B.H,!0),q,q,0,0,0,q))}r=!1
134795
134795
  if(!s.ay)if(!s.ch){r=s.e
134796
134796
  r===$&&A.b()
134797
- r=B.b.u("mqex3krf-3096c64").length!==0&&r.b}if(r){r=s.d
134797
+ r=B.b.u("mqf2hpvd-a15fc11").length!==0&&r.b}if(r){r=s.d
134798
134798
  r===$&&A.b()
134799
134799
  r=r.aP&&!r.ai?84:0
134800
134800
  s=s.e
@@ -140506,7 +140506,7 @@ $S:0}
140506
140506
  A.a_6.prototype={}
140507
140507
  A.SQ.prototype={
140508
140508
  nb(a){var s=this
140509
- if(B.b.u("mqex3krf-3096c64").length===0||s.a!=null)return
140509
+ if(B.b.u("mqf2hpvd-a15fc11").length===0||s.a!=null)return
140510
140510
  s.AU()
140511
140511
  s.a=A.on(B.RH,new A.bc8(s))},
140512
140512
  AU(){var s=0,r=A.l(t.H),q,p=2,o=[],n=this,m,l,k,j,i,h,g,f
@@ -140524,7 +140524,7 @@ if(!t.f.b(k)){s=1
140524
140524
  break}i=J.a3(k,"buildId")
140525
140525
  h=i==null?null:B.b.u(J.p(i))
140526
140526
  j=h==null?"":h
140527
- if(J.bi(j)===0||J.d(j,"mqex3krf-3096c64")){s=1
140527
+ if(J.bi(j)===0||J.d(j,"mqf2hpvd-a15fc11")){s=1
140528
140528
  break}n.b=!0
140529
140529
  n.F()
140530
140530
  p=2
@@ -140541,7 +140541,7 @@ case 2:return A.i(o.at(-1),r)}})
140541
140541
  return A.k($async$AU,r)},
140542
140542
  vE(){var s=0,r=A.l(t.H),q,p=2,o=[],n=this,m,l,k,j,i,h,g,f,e,d,c,b,a,a0,a1
140543
140543
  var $async$vE=A.h(function(a2,a3){if(a2===1){o.push(a3)
140544
- s=p}for(;;)switch(s){case 0:if(B.b.u("mqex3krf-3096c64").length===0||n.c){s=1
140544
+ s=p}for(;;)switch(s){case 0:if(B.b.u("mqf2hpvd-a15fc11").length===0||n.c){s=1
140545
140545
  break}n.c=!0
140546
140546
  n.F()
140547
140547
  p=4
@@ -123,6 +123,7 @@ async function buildArtifactFromCandidate(candidate, fallbackKind = 'artifact')
123
123
  artifact.size = (await fs.promises.stat(artifact.path)).size;
124
124
  } catch (error) {
125
125
  console.warn('[deliverables] Failed to stat artifact candidate:', artifact.path, error?.message || error);
126
+ return null;
126
127
  }
127
128
  }
128
129
  return artifact.path || artifact.uri ? artifact : null;
@@ -117,6 +117,7 @@ const MESSAGING_PROGRESS_REPEAT_MS = 90 * 1000;
117
117
  const MESSAGING_PROGRESS_STALL_MS = 240 * 1000;
118
118
  const MESSAGING_PROGRESS_TICK_MS = 15 * 1000;
119
119
  const GOAL_CONTRACT_SUCCESS_CRITERIA_LIMIT = 12;
120
+ const MODEL_CALL_TIMEOUT_MS = 5 * 60 * 1000;
120
121
 
121
122
  function isoNow() {
122
123
  return new Date().toISOString();
@@ -136,6 +137,31 @@ function formatElapsedDuration(durationMs) {
136
137
  return `${minutes}m ${seconds}s`;
137
138
  }
138
139
 
140
+ function resolveModelCallTimeoutMs(options = {}) {
141
+ const requested = Number(options?.modelCallTimeoutMs);
142
+ if (Number.isFinite(requested) && requested > 0) {
143
+ return Math.max(10, requested);
144
+ }
145
+ return MODEL_CALL_TIMEOUT_MS;
146
+ }
147
+
148
+ async function withModelCallTimeout(promise, options = {}, label = 'Model call') {
149
+ const timeoutMs = resolveModelCallTimeoutMs(options);
150
+ let timer = null;
151
+ const timeout = new Promise((_, reject) => {
152
+ timer = setTimeout(() => {
153
+ const error = new Error(`${label} timed out after ${formatElapsedDuration(timeoutMs)}.`);
154
+ error.code = 'MODEL_CALL_TIMEOUT';
155
+ reject(error);
156
+ }, timeoutMs);
157
+ });
158
+ try {
159
+ return await Promise.race([Promise.resolve(promise), timeout]);
160
+ } finally {
161
+ if (timer) clearTimeout(timer);
162
+ }
163
+ }
164
+
139
165
  function cloneInterimHistory(history = []) {
140
166
  if (!Array.isArray(history)) return [];
141
167
  return history.map((item) => ({
@@ -187,6 +213,23 @@ function hasVisibleInterimActivity(runMeta) {
187
213
  );
188
214
  }
189
215
 
216
+ function requireSuccessfulMessagingDelivery(result, label = 'Messaging delivery') {
217
+ if (result?.success === true && result?.suppressed !== true) {
218
+ return result;
219
+ }
220
+ const reason = String(
221
+ result?.error
222
+ || result?.reason
223
+ || result?.result?.error
224
+ || result?.result?.reason
225
+ || 'the platform did not confirm delivery',
226
+ ).trim();
227
+ const error = new Error(`${label} failed: ${reason}`);
228
+ error.code = 'MESSAGING_DELIVERY_FAILED';
229
+ error.deliveryResult = result || null;
230
+ throw error;
231
+ }
232
+
190
233
  function normalizeGoalCriteria(value) {
191
234
  if (!Array.isArray(value)) return [];
192
235
  const seen = new Set();
@@ -257,7 +300,7 @@ function mergeGoalContracts(existing = null, patch = null) {
257
300
  const nextPatch = normalizeGoalContract(patch) || null;
258
301
  if (!current && !nextPatch) return null;
259
302
 
260
- const goal = String(nextPatch?.goal || current?.goal || '').trim();
303
+ const goal = String(current?.goal || nextPatch?.goal || '').trim();
261
304
  const successCriteria = normalizeGoalCriteria([
262
305
  ...(current?.successCriteria || []),
263
306
  ...(nextPatch?.successCriteria || []),
@@ -363,7 +406,6 @@ function resolveRunGoalContext(runMeta, analysis = null, plan = null) {
363
406
  }
364
407
 
365
408
  function buildCompletionDecisionPrompt({
366
- mode,
367
409
  triggerSource,
368
410
  messagingSent = false,
369
411
  goalContext,
@@ -373,52 +415,28 @@ function buildCompletionDecisionPrompt({
373
415
  lastReply,
374
416
  iteration,
375
417
  maxIterations,
376
- progressSummary = '',
377
- platform = null,
378
418
  }) {
379
- const draftReply = mode === 'messaging'
380
- ? (normalizeOutgoingMessage(lastReply || '', platform, { collapseWhitespace: false })
381
- ? String(lastReply || '').trim()
382
- : '')
383
- : normalizeOutgoingMessage(lastReply) || '';
419
+ const draftReply = normalizeOutgoingMessage(lastReply) || '';
384
420
  const lines = [
385
421
  'Return JSON only.',
422
+ 'Decide whether this run should continue autonomously or stop now.',
423
+ 'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason"}',
424
+ 'Rules:',
425
+ '- Use "continue" whenever any safe next step remains in this same run.',
426
+ '- Use "complete" only when the requested outcome is actually achieved and the latest draft is the finished user-facing answer.',
427
+ '- Use "blocked" only when a specific external dependency, missing user input, or permission outside this run is required and the latest draft is the blocker reply.',
428
+ '- If the latest draft asks the user for a missing required value, confirmation, or choice needed to proceed, use "blocked" so the run waits instead of repeating the same ask.',
429
+ '- A progress note, next-step note, apology, plan, or promise to investigate is "continue", not "complete".',
430
+ '- A single failed tool attempt is not blocked if another safe retry, verification step, or alternative path remains.',
431
+ '- A tool-specific API error, timeout, rate limit, or missing result inside this run is usually "continue", not "blocked", if any other available tool could still make progress.',
432
+ `- If completion_confidence_required is ${goalContext.effectiveCompletionConfidence} and the latest draft depends on unverified assumptions, use "continue" so the run can gather evidence, inspect state, or narrow the reply.`,
433
+ triggerSource === 'messaging' && messagingSent
434
+ ? '- A final reply was already delivered via send_message. Use "complete" unless concrete task work remains.'
435
+ : triggerSource === 'messaging'
436
+ ? '- For messaging, do not stop on a partial status message. Continue unless the task is actually complete or externally blocked.'
437
+ : '- Do not stop just because you wrote a status update. Continue unless the task is actually complete or externally blocked.',
386
438
  ];
387
439
 
388
- if (mode === 'messaging') {
389
- lines.push(
390
- 'A messaging run is about to stop after sending user-visible progress, but no final delivery has happened yet.',
391
- 'Decide whether the run should keep working, finish with the completed result now, or stop with one blocker reply now.',
392
- 'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason","final_reply":"string"}',
393
- 'Rules:',
394
- '- Use "continue" whenever any safe next step remains in this same run.',
395
- '- Use "complete" only when the requested outcome is actually achieved and final_reply is the finished user-facing answer to send now.',
396
- '- Use "blocked" only when a specific external dependency, missing user input, or permission outside this run is required and final_reply is the concise blocker reply to send now.',
397
- '- A progress note, next-step note, apology, plan, or "I will investigate" draft is "continue", not "complete" and not "blocked".',
398
- '- If user-visible progress was already sent and no final delivery exists yet, do not stop silently and do not stop on a status-only draft.',
399
- '- final_reply must be empty when status is "continue".',
400
- );
401
- } else {
402
- lines.push(
403
- 'Decide whether this run should continue autonomously or stop now.',
404
- 'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason"}',
405
- 'Rules:',
406
- '- Use "continue" whenever any safe next step remains in this same run.',
407
- '- Use "complete" only when the requested outcome is actually achieved or a truthful final user reply is already ready now.',
408
- '- Use "blocked" only when a specific external dependency outside this run is required.',
409
- '- If the latest draft asks the user for a missing required value, confirmation, or choice needed to proceed, use "blocked" so the run waits instead of repeating the same ask.',
410
- '- A progress update is not complete.',
411
- '- A single failed tool attempt is not blocked if another safe retry, verification step, or alternative path remains.',
412
- '- A tool-specific API error, timeout, rate limit, or missing result inside this run is usually "continue", not "blocked", if any other available tool could still make progress.',
413
- `- If completion_confidence_required is ${goalContext.effectiveCompletionConfidence} and the latest draft depends on unverified assumptions, use "continue" so the run can gather evidence, inspect state, or narrow the reply.`,
414
- triggerSource === 'messaging' && messagingSent
415
- ? '- A reply was already delivered to the user via send_message. Use "complete" unless there is concrete remaining work (e.g., a tool call you still need to make) before the task is truly done. Do not send follow-up elaborations or re-introductions.'
416
- : triggerSource === 'messaging'
417
- ? '- For messaging, do not stop on a partial status message. Continue unless the task is actually complete or externally blocked. If you already asked for missing user input, choose "blocked" and wait.'
418
- : '- Do not stop just because you wrote a status update. Continue unless the task is actually complete or externally blocked.',
419
- );
420
- }
421
-
422
440
  lines.push(
423
441
  goalContext.effectiveGoal ? `Goal: ${goalContext.effectiveGoal}` : '',
424
442
  goalContext.persistedGoalPrompt,
@@ -428,44 +446,14 @@ function buildCompletionDecisionPrompt({
428
446
  : '',
429
447
  `Current iteration: ${iteration} of ${maxIterations}.`,
430
448
  `Available tools in this run: ${summarizeAvailableTools(tools) || 'none'}`,
431
- mode === 'messaging' && progressSummary ? `Progress ledger: ${progressSummary}` : '',
432
449
  `Recent tool evidence:\n${summarizeToolExecutions(toolExecutions, 8) || 'none'}`,
433
450
  `Latest draft reply:\n${draftReply || '(empty)'}`,
434
- mode === 'messaging' ? buildPlatformFormattingGuide(platform) : '',
435
451
  );
436
452
  return lines.filter(Boolean).join('\n');
437
453
  }
438
454
 
439
- function normalizeCompletionDecision(raw, {
440
- mode,
441
- fallbackStatus = 'continue',
442
- platform = null,
443
- draftReply = '',
444
- }) {
455
+ function normalizeCompletionDecision(raw, fallbackStatus = 'continue') {
445
456
  const allowed = new Set(['continue', 'complete', 'blocked']);
446
- if (mode === 'messaging') {
447
- let status = allowed.has(String(raw.status || '').trim().toLowerCase())
448
- ? String(raw.status || '').trim().toLowerCase()
449
- : 'continue';
450
- let finalReply = normalizeOutgoingMessage(raw.final_reply || '', platform, {
451
- collapseWhitespace: false,
452
- })
453
- ? String(raw.final_reply || '').trim()
454
- : '';
455
- if (status === 'continue') {
456
- finalReply = '';
457
- } else if (!finalReply && draftReply) {
458
- finalReply = draftReply;
459
- } else if (!finalReply) {
460
- status = 'continue';
461
- }
462
- return {
463
- status,
464
- reason: String(raw.reason || '').trim().slice(0, 400),
465
- final_reply: finalReply,
466
- };
467
- }
468
-
469
457
  const requestedStatus = String(raw.status || '').trim().toLowerCase();
470
458
  return {
471
459
  status: allowed.has(requestedStatus) ? requestedStatus : fallbackStatus,
@@ -473,16 +461,6 @@ function normalizeCompletionDecision(raw, {
473
461
  };
474
462
  }
475
463
 
476
- function shouldRequireMessagingFinalityCheck(runMeta) {
477
- return Boolean(
478
- runMeta
479
- && runMeta.triggerSource === 'messaging'
480
- && runMeta.finalDeliverySent !== true
481
- && !runMeta.terminalInterim
482
- && hasVisibleInterimActivity(runMeta)
483
- );
484
- }
485
-
486
464
  function planningDepthForForceMode(forceMode) {
487
465
  return forceMode === 'plan_execute' ? 'deep' : 'light';
488
466
  }
@@ -706,6 +684,7 @@ class AgentEngine {
706
684
  this.taskRuntime = services.taskRuntime || null;
707
685
  this.memoryManager = services.memoryManager || null;
708
686
  this.voiceRuntimeManager = services.voiceRuntimeManager || null;
687
+ this.messagingDeliveryRetry = services.messagingDeliveryRetry || {};
709
688
  }
710
689
 
711
690
  async buildSystemPrompt(userId, context = {}) {
@@ -926,21 +905,6 @@ class AgentEngine {
926
905
  .run(JSON.stringify(next), runId);
927
906
  }
928
907
 
929
- replaceLatestConversationAssistantMessage(conversationId, content) {
930
- if (!conversationId) return false;
931
- const messageId = db.prepare(
932
- `SELECT id
933
- FROM conversation_messages
934
- WHERE conversation_id = ? AND role = 'assistant'
935
- ORDER BY id DESC
936
- LIMIT 1`
937
- ).get(conversationId)?.id;
938
- if (!messageId) return false;
939
- db.prepare('UPDATE conversation_messages SET content = ? WHERE id = ?')
940
- .run(content, messageId);
941
- return true;
942
- }
943
-
944
908
  updateRunGoalContract(runId, patch = {}, options = {}) {
945
909
  const runMeta = this.getRunMeta(runId);
946
910
  if (!runMeta) return null;
@@ -1031,6 +995,7 @@ class AgentEngine {
1031
995
  markRunFinalDelivery(runId, content = '', timestamp = isoNow()) {
1032
996
  const runMeta = this.getRunMeta(runId);
1033
997
  if (!runMeta) return null;
998
+ runMeta.messagingSent = true;
1034
999
  runMeta.finalDeliverySent = true;
1035
1000
  runMeta.lastSentMessage = String(content || '').trim() || runMeta.lastSentMessage || '';
1036
1001
  const ledger = this.updateRunProgress(runId, {
@@ -1142,13 +1107,14 @@ class AgentEngine {
1142
1107
  if (!platform || !chatId || !this.messagingManager) {
1143
1108
  return { sent: false, skipped: true, reason: 'Messaging context is not available.' };
1144
1109
  }
1145
- await this.messagingManager.sendMessage(userId, platform, chatId, normalizedContent, {
1110
+ const deliveryResult = await this.messagingManager.sendMessage(userId, platform, chatId, normalizedContent, {
1146
1111
  agentId,
1147
1112
  runId,
1148
1113
  persistConversation: true,
1149
1114
  metadata,
1150
1115
  deliveryKind: 'interim',
1151
1116
  });
1117
+ requireSuccessfulMessagingDelivery(deliveryResult, 'Interim messaging delivery');
1152
1118
  } else if (triggerSource === 'voice_live') {
1153
1119
  const voiceSessionId = runMeta.voiceSessionId || null;
1154
1120
  const manager = this.voiceRuntimeManager || this.app?.locals?.voiceRuntimeManager || null;
@@ -1242,42 +1208,72 @@ class AgentEngine {
1242
1208
  phase = 'structured',
1243
1209
  }) {
1244
1210
  const startedAt = Date.now();
1245
- const response = await withProviderRetry(
1246
- () => provider.chat(
1247
- sanitizeConversationMessages([
1248
- ...messages,
1249
- { role: 'system', content: prompt },
1250
- ]),
1251
- [],
1252
- {
1253
- model,
1254
- maxTokens,
1255
- reasoningEffort: reasoningEffort || this.getReasoningEffort(providerName, {}),
1256
- }
1257
- ),
1258
- { label: `Engine ${model} (structured)` }
1259
- );
1260
- if (telemetry?.runId && telemetry?.userId) {
1261
- recordModelUsage({
1262
- runId: telemetry.runId,
1263
- stepId: telemetry.stepId || null,
1264
- userId: telemetry.userId,
1265
- agentId: telemetry.agentId || null,
1266
- provider: providerName,
1267
- model,
1268
- phase,
1269
- usage: response.usage,
1270
- latencyMs: Date.now() - startedAt,
1211
+ const structuredStep = `model:${phase}`;
1212
+ if (telemetry?.runId) {
1213
+ this.updateRunProgress(telemetry.runId, {
1214
+ currentPhase: 'model',
1215
+ currentStep: structuredStep,
1216
+ currentTool: null,
1217
+ currentStepStartedAt: isoNow(),
1271
1218
  });
1272
1219
  }
1273
1220
 
1274
- const parsed = parseJsonObject(response.content || '');
1275
- const normalizedUsage = normalizeUsage(response.usage);
1276
- return {
1277
- value: normalize(parsed || {}, fallback),
1278
- raw: response.content || '',
1279
- usage: normalizedUsage?.totalTokens || 0,
1280
- };
1221
+ let completed = false;
1222
+ try {
1223
+ const response = await withProviderRetry(
1224
+ () => withModelCallTimeout(
1225
+ provider.chat(
1226
+ sanitizeConversationMessages([
1227
+ ...messages,
1228
+ { role: 'system', content: prompt },
1229
+ ]),
1230
+ [],
1231
+ {
1232
+ model,
1233
+ maxTokens,
1234
+ reasoningEffort: reasoningEffort || this.getReasoningEffort(providerName, {}),
1235
+ }
1236
+ ),
1237
+ telemetry || {},
1238
+ `${phase} model call`,
1239
+ ),
1240
+ { label: `Engine ${model} (structured)` }
1241
+ );
1242
+ completed = true;
1243
+ if (telemetry?.runId && telemetry?.userId) {
1244
+ recordModelUsage({
1245
+ runId: telemetry.runId,
1246
+ stepId: telemetry.stepId || null,
1247
+ userId: telemetry.userId,
1248
+ agentId: telemetry.agentId || null,
1249
+ provider: providerName,
1250
+ model,
1251
+ phase,
1252
+ usage: response.usage,
1253
+ latencyMs: Date.now() - startedAt,
1254
+ });
1255
+ }
1256
+
1257
+ const parsed = parseJsonObject(response.content || '');
1258
+ const normalizedUsage = normalizeUsage(response.usage);
1259
+ return {
1260
+ value: normalize(parsed || {}, fallback),
1261
+ raw: response.content || '',
1262
+ usage: normalizedUsage?.totalTokens || 0,
1263
+ };
1264
+ } finally {
1265
+ const runMeta = telemetry?.runId ? this.getRunMeta(telemetry.runId) : null;
1266
+ if (runMeta?.progressLedger?.currentStep === structuredStep) {
1267
+ this.updateRunProgress(telemetry.runId, {
1268
+ currentPhase: 'idle',
1269
+ currentStep: null,
1270
+ currentTool: null,
1271
+ currentStepStartedAt: null,
1272
+ }, {
1273
+ verified: completed,
1274
+ });
1275
+ }
1276
+ }
1281
1277
  }
1282
1278
 
1283
1279
  async requestModelResponse({
@@ -1304,8 +1300,16 @@ class AgentEngine {
1304
1300
  if (options.stream !== false) {
1305
1301
  let emittedContent = false;
1306
1302
  const stream = provider.stream(requestMessages, tools, callOptions);
1303
+ const iterator = stream[Symbol.asyncIterator]();
1307
1304
  try {
1308
- for await (const chunk of stream) {
1305
+ while (true) {
1306
+ const next = await withModelCallTimeout(
1307
+ iterator.next(),
1308
+ options,
1309
+ `Model stream iteration ${iteration}`,
1310
+ );
1311
+ if (next.done) break;
1312
+ const chunk = next.value;
1309
1313
  if (chunk.type === 'content') {
1310
1314
  emittedContent = true;
1311
1315
  streamContent += chunk.content;
@@ -1329,13 +1333,18 @@ class AgentEngine {
1329
1333
  }
1330
1334
  }
1331
1335
  } catch (err) {
1336
+ Promise.resolve(iterator.return?.()).catch(() => {});
1332
1337
  // Once tokens have streamed to the client a retry would duplicate
1333
1338
  // output, so only the pre-stream window is safe to replay.
1334
1339
  if (emittedContent) err.__providerRetryUnsafe = true;
1335
1340
  throw err;
1336
1341
  }
1337
1342
  } else {
1338
- response = await provider.chat(requestMessages, tools, callOptions);
1343
+ response = await withModelCallTimeout(
1344
+ provider.chat(requestMessages, tools, callOptions),
1345
+ options,
1346
+ `Model iteration ${iteration}`,
1347
+ );
1339
1348
  }
1340
1349
 
1341
1350
  return { response, streamContent };
@@ -1485,7 +1494,6 @@ class AgentEngine {
1485
1494
  model,
1486
1495
  messages,
1487
1496
  prompt: buildCompletionDecisionPrompt({
1488
- mode: 'loop',
1489
1497
  triggerSource,
1490
1498
  messagingSent,
1491
1499
  goalContext,
@@ -1497,10 +1505,7 @@ class AgentEngine {
1497
1505
  maxIterations,
1498
1506
  }),
1499
1507
  maxTokens: 320,
1500
- normalize: (raw) => normalizeCompletionDecision(raw, {
1501
- mode: 'loop',
1502
- fallbackStatus,
1503
- }),
1508
+ normalize: (raw) => normalizeCompletionDecision(raw, fallbackStatus),
1504
1509
  fallback: { status: fallbackStatus },
1505
1510
  reasoningEffort: this.getReasoningEffort(providerName, options),
1506
1511
  telemetry: options,
@@ -1513,6 +1518,67 @@ class AgentEngine {
1513
1518
  };
1514
1519
  }
1515
1520
 
1521
+ async evaluateTaskCompleteSignal({
1522
+ provider,
1523
+ providerName,
1524
+ model,
1525
+ messages,
1526
+ tools,
1527
+ analysis,
1528
+ plan,
1529
+ toolExecutions,
1530
+ finalMessage,
1531
+ confidence,
1532
+ triggerSource,
1533
+ messagingSent,
1534
+ iteration,
1535
+ maxIterations,
1536
+ options,
1537
+ }) {
1538
+ const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
1539
+ const requiredConfidence = resolveRunGoalContext(runMeta, analysis, plan)
1540
+ .effectiveCompletionConfidence;
1541
+ const confidenceDecision = shouldAcceptTaskComplete({
1542
+ confidence,
1543
+ requiredConfidence,
1544
+ iteration,
1545
+ maxIterations,
1546
+ });
1547
+ if (!confidenceDecision.accept) {
1548
+ return {
1549
+ decision: {
1550
+ status: 'continue',
1551
+ reason: confidenceDecision.reason,
1552
+ },
1553
+ requiredConfidence,
1554
+ usage: 0,
1555
+ };
1556
+ }
1557
+
1558
+ const loopState = await this.decideLoopState({
1559
+ provider,
1560
+ providerName,
1561
+ model,
1562
+ messages,
1563
+ tools,
1564
+ analysis,
1565
+ plan,
1566
+ toolExecutions,
1567
+ lastReply: finalMessage,
1568
+ triggerSource,
1569
+ messagingSent,
1570
+ iteration,
1571
+ maxIterations,
1572
+ options,
1573
+ fallbackStatus: 'continue',
1574
+ });
1575
+ return {
1576
+ decision: loopState.decision,
1577
+ requiredConfidence,
1578
+ usage: loopState.usage || 0,
1579
+ };
1580
+ }
1581
+
1516
1582
  async verifyFinalResponse({
1517
1583
  provider,
1518
1584
  providerName,
@@ -1623,11 +1689,15 @@ class AgentEngine {
1623
1689
  }
1624
1690
  ];
1625
1691
 
1626
- const response = await provider.chat(promptMessages, [], {
1627
- model,
1628
- maxTokens: 800,
1629
- reasoningEffort: this.getReasoningEffort(providerName, options),
1630
- });
1692
+ const response = await withModelCallTimeout(
1693
+ provider.chat(promptMessages, [], {
1694
+ model,
1695
+ maxTokens: 800,
1696
+ reasoningEffort: this.getReasoningEffort(providerName, options),
1697
+ }),
1698
+ options,
1699
+ 'Conversation state refresh',
1700
+ );
1631
1701
  const parsed = parseJsonObject(response.content || '') || {};
1632
1702
  const nextState = {
1633
1703
  summary: String(parsed.summary || existingState?.summary || '').trim(),
@@ -1684,19 +1754,23 @@ class AgentEngine {
1684
1754
  `[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} model=${model}`
1685
1755
  );
1686
1756
  try {
1687
- const response = await provider.chat(
1688
- sanitizeConversationMessages([
1689
- ...messages,
1757
+ const response = await withModelCallTimeout(
1758
+ provider.chat(
1759
+ sanitizeConversationMessages([
1760
+ ...messages,
1761
+ {
1762
+ role: 'system',
1763
+ content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
1764
+ }
1765
+ ]),
1766
+ [],
1690
1767
  {
1691
- role: 'system',
1692
- content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
1768
+ model,
1769
+ reasoningEffort: this.getReasoningEffort(providerName, options)
1693
1770
  }
1694
- ]),
1695
- [],
1696
- {
1697
- model,
1698
- reasoningEffort: this.getReasoningEffort(providerName, options)
1699
- }
1771
+ ),
1772
+ options,
1773
+ `Blank messaging reply recovery ${attempt}`,
1700
1774
  );
1701
1775
  totalTokens += response.usage?.totalTokens || 0;
1702
1776
  recoveredContent = sanitizeModelOutput(response.content || '', { model });
@@ -2127,169 +2201,6 @@ class AgentEngine {
2127
2201
  return { messages, appliedCount: queued.length };
2128
2202
  }
2129
2203
 
2130
- async decideMessagingCompletionState({
2131
- provider,
2132
- providerName,
2133
- model,
2134
- messages,
2135
- analysis,
2136
- plan,
2137
- tools,
2138
- toolExecutions,
2139
- lastReply,
2140
- iteration,
2141
- maxIterations,
2142
- runId,
2143
- options,
2144
- }) {
2145
- const runMeta = this.getRunMeta(runId);
2146
- const goalContext = resolveRunGoalContext(runMeta, analysis, plan);
2147
- const platform = options?.source || null;
2148
- const normalizedDraft = normalizeOutgoingMessage(lastReply || '', platform, {
2149
- collapseWhitespace: false,
2150
- });
2151
- const draftReply = normalizedDraft ? String(lastReply || '').trim() : '';
2152
- const ledger = runMeta?.progressLedger || null;
2153
- const progressSummary = [
2154
- `progress_state=${ledger?.progressState || 'active'}`,
2155
- `current_phase=${ledger?.currentPhase || 'idle'}`,
2156
- `current_tool=${ledger?.currentTool || 'none'}`,
2157
- `heartbeat_count=${Number(ledger?.heartbeatCount || 0)}`,
2158
- `last_visible_update=${ledger?.lastUserVisibleUpdateAt || 'none'}`,
2159
- `last_verified_progress=${ledger?.lastVerifiedProgressAt || 'none'}`,
2160
- `last_final_delivery=${ledger?.lastFinalDeliveryAt || 'none'}`,
2161
- ].join('; ');
2162
-
2163
- const response = await this.requestStructuredJson({
2164
- provider,
2165
- providerName,
2166
- model,
2167
- messages,
2168
- prompt: buildCompletionDecisionPrompt({
2169
- mode: 'messaging',
2170
- goalContext,
2171
- parallelWork: analysis?.parallel_work === true,
2172
- tools,
2173
- toolExecutions,
2174
- lastReply: draftReply,
2175
- iteration,
2176
- maxIterations,
2177
- progressSummary,
2178
- platform,
2179
- }),
2180
- maxTokens: 480,
2181
- normalize: (raw) => normalizeCompletionDecision(raw, {
2182
- mode: 'messaging',
2183
- platform,
2184
- draftReply,
2185
- }),
2186
- fallback: {
2187
- status: 'continue',
2188
- reason: '',
2189
- final_reply: '',
2190
- },
2191
- reasoningEffort: this.getReasoningEffort(providerName, options),
2192
- telemetry: options,
2193
- phase: 'messaging_completion',
2194
- });
2195
-
2196
- return {
2197
- decision: response.value,
2198
- usage: response.usage,
2199
- };
2200
- }
2201
-
2202
- async resolveMessagingCompletionDecision({
2203
- provider,
2204
- providerName,
2205
- model,
2206
- messages,
2207
- analysis,
2208
- plan,
2209
- tools,
2210
- toolExecutions,
2211
- lastReply,
2212
- iteration,
2213
- maxIterations,
2214
- runId,
2215
- conversationId,
2216
- options,
2217
- }) {
2218
- const runMeta = this.getRunMeta(runId);
2219
- if (!shouldRequireMessagingFinalityCheck(runMeta)) {
2220
- return {
2221
- action: 'none',
2222
- content: lastReply,
2223
- reason: '',
2224
- usage: 0,
2225
- };
2226
- }
2227
-
2228
- let completionDecision;
2229
- try {
2230
- completionDecision = await this.decideMessagingCompletionState({
2231
- provider,
2232
- providerName,
2233
- model,
2234
- messages,
2235
- analysis,
2236
- plan,
2237
- tools,
2238
- toolExecutions,
2239
- lastReply,
2240
- iteration,
2241
- maxIterations,
2242
- runId,
2243
- options,
2244
- });
2245
- } catch (error) {
2246
- if (iteration >= maxIterations) {
2247
- const wrapped = new Error(
2248
- `Messaging completion check failed after visible progress: ${error?.message || error}`,
2249
- );
2250
- wrapped.disableAutonomousRetry = error?.disableAutonomousRetry === true;
2251
- throw wrapped;
2252
- }
2253
- return {
2254
- action: 'continue',
2255
- content: '',
2256
- reason: 'The run still needs an explicit final result or blocker decision.',
2257
- usage: 0,
2258
- };
2259
- }
2260
-
2261
- const decision = completionDecision.decision || { status: 'continue', reason: '' };
2262
- if (decision.status === 'continue') {
2263
- if (iteration >= maxIterations) {
2264
- throw new Error(
2265
- 'Messaging run reached the iteration limit before producing a final answer or blocker after visible progress.',
2266
- );
2267
- }
2268
- return {
2269
- action: 'continue',
2270
- content: '',
2271
- reason: decision.reason || 'The current draft is still only progress.',
2272
- usage: completionDecision.usage || 0,
2273
- };
2274
- }
2275
-
2276
- const finalContent = String(decision.final_reply || lastReply || '').trim();
2277
- if (finalContent && messages[messages.length - 1]?.role === 'assistant') {
2278
- messages[messages.length - 1] = {
2279
- ...messages[messages.length - 1],
2280
- content: finalContent,
2281
- };
2282
- this.replaceLatestConversationAssistantMessage(conversationId, finalContent);
2283
- }
2284
-
2285
- return {
2286
- action: decision.status === 'blocked' ? 'blocked' : 'complete',
2287
- content: finalContent,
2288
- reason: decision.reason || '',
2289
- usage: completionDecision.usage || 0,
2290
- };
2291
- }
2292
-
2293
2204
  buildMessagingHeartbeatText(runMeta, options = {}) {
2294
2205
  const stalled = options.stalled === true;
2295
2206
  const now = Date.now();
@@ -2327,7 +2238,7 @@ class AgentEngine {
2327
2238
 
2328
2239
  const createdAt = isoNow();
2329
2240
  const content = this.buildMessagingHeartbeatText(runMeta, options);
2330
- await this.messagingManager.sendMessage(
2241
+ const deliveryResult = await this.messagingManager.sendMessage(
2331
2242
  runMeta.userId,
2332
2243
  runMeta.messagingContext.platform,
2333
2244
  runMeta.messagingContext.chatId,
@@ -2345,6 +2256,7 @@ class AgentEngine {
2345
2256
  deliveryKind: 'interim',
2346
2257
  },
2347
2258
  );
2259
+ requireSuccessfulMessagingDelivery(deliveryResult, 'Messaging heartbeat delivery');
2348
2260
 
2349
2261
  runMeta.lastInterimMessage = content;
2350
2262
  if (!Array.isArray(runMeta.interimMessages)) {
@@ -2421,9 +2333,31 @@ class AgentEngine {
2421
2333
  await this.messagingManager.sendTyping(userId, platform, chatId, true, { agentId }).catch(() => {});
2422
2334
  await new Promise((resolve) => setTimeout(resolve, delay));
2423
2335
  }
2424
- await this.messagingManager.sendMessage(userId, platform, chatId, chunks[i], { runId, agentId }).catch((err) =>
2425
- console.error('[Engine] Auto-reply fallback failed:', err.message)
2426
- );
2336
+ try {
2337
+ await withProviderRetry(async () => {
2338
+ const deliveryResult = await this.messagingManager.sendMessage(
2339
+ userId,
2340
+ platform,
2341
+ chatId,
2342
+ chunks[i],
2343
+ { runId, agentId },
2344
+ );
2345
+ return requireSuccessfulMessagingDelivery(deliveryResult, 'Final messaging delivery');
2346
+ }, {
2347
+ ...this.messagingDeliveryRetry,
2348
+ label: `MessagingDelivery ${platform}`,
2349
+ isRetryable: (error) => (
2350
+ error?.retryable !== false
2351
+ && (
2352
+ error?.code === 'MESSAGING_DELIVERY_FAILED'
2353
+ || isTransientError(error)
2354
+ )
2355
+ ),
2356
+ });
2357
+ } catch (error) {
2358
+ error.disableAutonomousRetry = true;
2359
+ throw error;
2360
+ }
2427
2361
  }
2428
2362
 
2429
2363
  runMeta.lastSentMessage = chunks[chunks.length - 1] || cleanedContent;
@@ -2474,7 +2408,10 @@ class AgentEngine {
2474
2408
  return { sent: false, skipped: true };
2475
2409
  }
2476
2410
 
2477
- if (ledger.currentPhase === 'tool' && ledger.currentStepStartedAt) {
2411
+ if (
2412
+ (ledger.currentPhase === 'tool' || ledger.currentPhase === 'model')
2413
+ && ledger.currentStepStartedAt
2414
+ ) {
2478
2415
  return this.sendRuntimeMessagingHeartbeat(runId, { stalled });
2479
2416
  }
2480
2417
 
@@ -2788,7 +2725,12 @@ class AgentEngine {
2788
2725
  const carriedExplicitMessageSent = retryMessagingState.explicitMessageSent === true;
2789
2726
  const carriedInterimHistory = cloneInterimHistory(retryMessagingState.interimHistory);
2790
2727
  const carriedLastInterimMessage = carriedInterimHistory[carriedInterimHistory.length - 1]?.content || '';
2791
- const carriedGoalContract = normalizeGoalContract(retryMessagingState.goalContract);
2728
+ const carriedGoalContract = mergeGoalContracts(
2729
+ normalizeGoalContract({
2730
+ goal: clampRunContext(userMessage, 1200),
2731
+ }),
2732
+ retryMessagingState.goalContract,
2733
+ );
2792
2734
  const startedAtIso = isoNow();
2793
2735
  const progressLedger = buildInitialProgressLedger({
2794
2736
  startedAt: startedAtIso,
@@ -3223,6 +3165,37 @@ class AgentEngine {
3223
3165
  db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
3224
3166
  .run(conversationId, 'assistant', lastContent, analysisUsage);
3225
3167
  }
3168
+ const directAnswerDecision = await runWithModelFallback(
3169
+ 'direct answer completion decision',
3170
+ () => this.decideLoopState({
3171
+ provider,
3172
+ providerName,
3173
+ model,
3174
+ messages,
3175
+ tools,
3176
+ analysis,
3177
+ plan,
3178
+ toolExecutions,
3179
+ lastReply: lastContent,
3180
+ triggerSource,
3181
+ messagingSent: false,
3182
+ iteration,
3183
+ maxIterations,
3184
+ options: { ...options, runId, userId, agentId },
3185
+ fallbackStatus: 'continue',
3186
+ }),
3187
+ );
3188
+ totalTokens += directAnswerDecision.usage || 0;
3189
+ if (directAnswerDecision.decision.status === 'continue') {
3190
+ messages.push({
3191
+ role: 'system',
3192
+ content: directAnswerDecision.decision.reason
3193
+ ? `Continue working: ${directAnswerDecision.decision.reason}.`
3194
+ : 'The initial draft is not a finished answer. Continue working autonomously.',
3195
+ });
3196
+ lastContent = '';
3197
+ directAnswerEligible = false;
3198
+ }
3226
3199
  }
3227
3200
 
3228
3201
  // BUG FIX: consecutiveToolFailures was previously declared INSIDE the
@@ -3248,14 +3221,16 @@ class AgentEngine {
3248
3221
  currentStep: `model:${iteration}`,
3249
3222
  currentTool: null,
3250
3223
  currentStepStartedAt: isoNow(),
3251
- }, {
3252
- verified: true,
3253
3224
  });
3254
3225
 
3255
3226
  let metrics = this.estimatePromptMetrics(messages, tools);
3256
3227
  const contextWindow = provider.getContextWindow(model);
3257
3228
  if (metrics.totalEstimatedTokens > contextWindow * loopPolicy.compactionThreshold) {
3258
- messages = await compact(messages, provider, model, contextWindow);
3229
+ messages = await withModelCallTimeout(
3230
+ compact(messages, provider, model, contextWindow),
3231
+ options,
3232
+ `Context compaction before iteration ${iteration}`,
3233
+ );
3259
3234
  messages = sanitizeConversationMessages(messages);
3260
3235
  this.emit(userId, 'run:compaction', { runId, iteration });
3261
3236
  metrics = this.estimatePromptMetrics(messages, tools);
@@ -3393,6 +3368,9 @@ class AgentEngine {
3393
3368
  toolCallCount: response.toolCalls?.length || 0,
3394
3369
  contentPreview: String(lastContent || streamContent || '').slice(0, 240),
3395
3370
  }, { agentId });
3371
+ this.updateRunProgress(runId, {}, {
3372
+ verified: true,
3373
+ });
3396
3374
 
3397
3375
  const assistantMessage = { role: 'assistant', content: lastContent };
3398
3376
  if (response.toolCalls?.length) assistantMessage.tool_calls = response.toolCalls;
@@ -3416,8 +3394,6 @@ class AgentEngine {
3416
3394
  currentStep: null,
3417
3395
  currentTool: null,
3418
3396
  currentStepStartedAt: null,
3419
- }, {
3420
- verified: true,
3421
3397
  });
3422
3398
  const systemSteeringAfterResponse = this.applyQueuedSystemSteering(runId, messages);
3423
3399
  messages = systemSteeringAfterResponse.messages;
@@ -3446,88 +3422,54 @@ class AgentEngine {
3446
3422
  })) {
3447
3423
  break;
3448
3424
  }
3449
- const runMetaAfterResponse = this.getRunMeta(runId);
3450
- if (shouldRequireMessagingFinalityCheck(runMetaAfterResponse)) {
3451
- const messagingCompletion = await this.resolveMessagingCompletionDecision({
3452
- provider,
3453
- providerName,
3454
- model,
3455
- messages,
3456
- analysis,
3457
- plan,
3458
- tools,
3459
- toolExecutions,
3460
- lastReply: lastContent,
3461
- iteration,
3462
- maxIterations,
3463
- runId,
3464
- conversationId,
3465
- options: { ...options, runId, userId, agentId },
3466
- });
3467
- totalTokens += messagingCompletion.usage || 0;
3468
- if (messagingCompletion.action === 'continue') {
3469
- messages.push({
3470
- role: 'system',
3471
- content: [
3472
- messagingCompletion.reason
3473
- ? `Continue working: ${messagingCompletion.reason}.`
3474
- : 'Continue working autonomously.',
3475
- 'The messaging user has already seen progress. Do not stop until you either have the finished answer now or a concrete blocker reply now.',
3476
- ].join(' ')
3477
- });
3478
- lastContent = '';
3479
- continue;
3480
- }
3481
- if (typeof messagingCompletion.content === 'string') {
3482
- lastContent = messagingCompletion.content;
3483
- }
3484
- break;
3485
- }
3486
- if (iteration < maxIterations) {
3487
- const proactiveRunNeedsDecision = (
3488
- (triggerSource === 'schedule' || triggerSource === 'tasks')
3489
- && this.activeRuns.get(runId)?.noResponse !== true
3490
- && options.deliveryState?.noResponse !== true
3491
- );
3492
- const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
3493
- const fallbackStatus = (
3494
- proactiveRunNeedsDecision
3495
- || toolExecutions.length > 0
3496
- || failedStepCount > 0
3497
- || messagingSent
3498
- || visibleInterimActivity
3499
- ) ? 'continue' : 'complete';
3500
- const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
3501
- provider,
3502
- providerName,
3503
- model,
3504
- messages,
3505
- tools,
3506
- analysis,
3507
- plan,
3508
- toolExecutions,
3509
- lastReply: lastContent,
3510
- triggerSource,
3511
- messagingSent,
3512
- iteration,
3513
- maxIterations,
3514
- options: { ...options, runId, userId, agentId },
3515
- fallbackStatus,
3516
- }));
3517
- totalTokens += loopState.usage || 0;
3518
- if (loopState.decision.status === 'continue') {
3519
- messages.push({
3520
- role: 'system',
3521
- content: [
3522
- loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
3523
- messagingSent
3524
- ? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
3525
- : 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
3526
- ].join(' ')
3527
- });
3528
- lastContent = '';
3529
- continue;
3425
+ const proactiveRunNeedsDecision = (
3426
+ (triggerSource === 'schedule' || triggerSource === 'tasks')
3427
+ && this.activeRuns.get(runId)?.noResponse !== true
3428
+ && options.deliveryState?.noResponse !== true
3429
+ );
3430
+ const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
3431
+ const fallbackStatus = (
3432
+ proactiveRunNeedsDecision
3433
+ || toolExecutions.length > 0
3434
+ || failedStepCount > 0
3435
+ || messagingSent
3436
+ || visibleInterimActivity
3437
+ ) ? 'continue' : 'complete';
3438
+ const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
3439
+ provider,
3440
+ providerName,
3441
+ model,
3442
+ messages,
3443
+ tools,
3444
+ analysis,
3445
+ plan,
3446
+ toolExecutions,
3447
+ lastReply: lastContent,
3448
+ triggerSource,
3449
+ messagingSent,
3450
+ iteration,
3451
+ maxIterations,
3452
+ options: { ...options, runId, userId, agentId },
3453
+ fallbackStatus,
3454
+ }));
3455
+ totalTokens += loopState.usage || 0;
3456
+ if (loopState.decision.status === 'continue') {
3457
+ if (iteration >= maxIterations) {
3458
+ throw new Error(
3459
+ `Completion judge found unfinished work at the iteration limit after ${maxIterations} iterations.`,
3460
+ );
3530
3461
  }
3462
+ messages.push({
3463
+ role: 'system',
3464
+ content: [
3465
+ loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
3466
+ messagingSent
3467
+ ? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
3468
+ : 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
3469
+ ].join(' ')
3470
+ });
3471
+ lastContent = '';
3472
+ continue;
3531
3473
  }
3532
3474
  break;
3533
3475
  }
@@ -3537,6 +3479,15 @@ class AgentEngine {
3537
3479
  && response.toolCalls.every((toolCall) => this.isReadOnlyToolCall(toolCall))
3538
3480
  );
3539
3481
  if (canRunParallelBatch) {
3482
+ const parallelToolNames = response.toolCalls
3483
+ .map((toolCall) => toolCall.function?.name)
3484
+ .filter(Boolean);
3485
+ this.updateRunProgress(runId, {
3486
+ currentPhase: 'tool',
3487
+ currentStep: `parallel:${iteration}`,
3488
+ currentTool: parallelToolNames.join(', ') || 'parallel tools',
3489
+ currentStepStartedAt: isoNow(),
3490
+ });
3540
3491
  const batch = await this.executeReadOnlyBatch(response.toolCalls, {
3541
3492
  userId,
3542
3493
  runId,
@@ -3588,6 +3539,14 @@ class AgentEngine {
3588
3539
  deliverableArtifacts,
3589
3540
  compactionMetrics: compactionMetrics.slice(-20),
3590
3541
  });
3542
+ this.updateRunProgress(runId, {
3543
+ currentPhase: 'idle',
3544
+ currentStep: null,
3545
+ currentTool: null,
3546
+ currentStepStartedAt: null,
3547
+ }, {
3548
+ verified: true,
3549
+ });
3591
3550
  continue;
3592
3551
  }
3593
3552
 
@@ -3610,23 +3569,51 @@ class AgentEngine {
3610
3569
  if (toolName === 'task_complete') {
3611
3570
  const finalMessage = String(toolArgs.message || '').trim();
3612
3571
  const confidence = normalizeCompletionConfidence(toolArgs.confidence || 'medium');
3613
- const completionDecision = shouldAcceptTaskComplete({
3614
- confidence,
3615
- requiredConfidence: analysis?.completion_confidence_required || 'medium',
3616
- iteration,
3617
- maxIterations,
3618
- });
3572
+ const messagingSent = this.getRunMeta(runId)?.messagingSent === true;
3573
+ const completionResult = await runWithModelFallback(
3574
+ 'task completion decision',
3575
+ () => this.evaluateTaskCompleteSignal({
3576
+ provider,
3577
+ providerName,
3578
+ model,
3579
+ messages,
3580
+ tools,
3581
+ analysis,
3582
+ plan,
3583
+ toolExecutions,
3584
+ finalMessage,
3585
+ confidence,
3586
+ triggerSource,
3587
+ messagingSent,
3588
+ iteration,
3589
+ maxIterations,
3590
+ options: { ...options, runId, userId, agentId },
3591
+ }),
3592
+ );
3593
+ totalTokens += completionResult.usage || 0;
3594
+ const completionDecision = completionResult.decision || {
3595
+ status: 'continue',
3596
+ reason: 'The completion signal could not be verified.',
3597
+ };
3598
+ const accepted = completionDecision.status !== 'continue';
3619
3599
  this.recordRunEvent(userId, runId, 'task_complete_signaled', {
3620
3600
  confidence,
3621
- requiredConfidence: analysis?.completion_confidence_required || 'medium',
3622
- accepted: completionDecision.accept,
3601
+ requiredConfidence: completionResult.requiredConfidence,
3602
+ accepted,
3603
+ judgeStatus: completionDecision.status,
3604
+ judgeReason: completionDecision.reason || '',
3623
3605
  iteration,
3624
3606
  messageLength: finalMessage.length,
3625
3607
  }, { agentId });
3626
3608
  console.info(
3627
- `[Run ${shortenRunId(runId)}] task_complete signaled at iteration=${iteration} confidence=${confidence} accepted=${completionDecision.accept}`
3609
+ `[Run ${shortenRunId(runId)}] task_complete signaled at iteration=${iteration} confidence=${confidence} judge=${completionDecision.status} accepted=${accepted}`
3628
3610
  );
3629
- if (!completionDecision.accept) {
3611
+ if (!accepted) {
3612
+ if (iteration >= maxIterations) {
3613
+ throw new Error(
3614
+ `Completion judge rejected task_complete at the iteration limit after ${maxIterations} iterations.`,
3615
+ );
3616
+ }
3630
3617
  messages.push({
3631
3618
  role: 'tool',
3632
3619
  name: toolName,
@@ -3634,13 +3621,14 @@ class AgentEngine {
3634
3621
  content: JSON.stringify({
3635
3622
  status: 'continue',
3636
3623
  reason: completionDecision.reason,
3637
- required_confidence: analysis?.completion_confidence_required || 'medium',
3624
+ required_confidence: completionResult.requiredConfidence,
3638
3625
  }),
3639
3626
  });
3640
3627
  messages.push({
3641
3628
  role: 'system',
3642
3629
  content: `${completionDecision.reason} Do not ask the user to decide the next step unless external input is truly required.`
3643
3630
  });
3631
+ lastContent = '';
3644
3632
  continue;
3645
3633
  }
3646
3634
  if (completionDecision.reason) {
@@ -3712,7 +3700,6 @@ class AgentEngine {
3712
3700
  currentTool: toolName,
3713
3701
  currentStepStartedAt: isoNow(),
3714
3702
  }, {
3715
- verified: true,
3716
3703
  stepId,
3717
3704
  });
3718
3705
 
@@ -4139,20 +4126,6 @@ class AgentEngine {
4139
4126
  refreshConversationSummary(conversationId, provider, model, historyWindow).catch((err) => {
4140
4127
  console.error('[AI] Conversation summary refresh failed:', err.message);
4141
4128
  });
4142
- await this.refreshConversationState({
4143
- conversationId,
4144
- runId,
4145
- provider,
4146
- providerName,
4147
- model,
4148
- finalReply: finalResponseText,
4149
- analysis,
4150
- verification,
4151
- historyWindow,
4152
- options: { ...options, userId, agentId },
4153
- }).catch((err) => {
4154
- console.error('[AI] Conversation working state refresh failed:', err.message);
4155
- });
4156
4129
  }
4157
4130
  }
4158
4131
 
@@ -4186,6 +4159,23 @@ class AgentEngine {
4186
4159
  }
4187
4160
  }
4188
4161
 
4162
+ if (conversationId && options.skipConversationMaintenance !== true) {
4163
+ await this.refreshConversationState({
4164
+ conversationId,
4165
+ runId,
4166
+ provider,
4167
+ providerName,
4168
+ model,
4169
+ finalReply: finalResponseText,
4170
+ analysis,
4171
+ verification,
4172
+ historyWindow,
4173
+ options: { ...options, userId, agentId },
4174
+ }).catch((err) => {
4175
+ console.error('[AI] Conversation working state refresh failed:', err.message);
4176
+ });
4177
+ }
4178
+
4189
4179
  console.info(
4190
4180
  `[Run ${shortenRunId(runId)}] completed trigger=${triggerSource} steps=${stepIndex} tokens=${totalTokens} durationMs=${runMeta?.startedAt ? Date.now() - runMeta.startedAt : 0} finalResponse=${finalResponseText ? 'yes' : 'no'} sentMessages=${runMeta?.sentMessages?.length || 0}`
4191
4181
  );
@@ -4272,6 +4262,8 @@ class AgentEngine {
4272
4262
  triggerSource === 'messaging'
4273
4263
  && options.source
4274
4264
  && options.chatId
4265
+ && runMeta?.finalDeliverySent !== true
4266
+ && runMeta?.messagingSent !== true
4275
4267
  && err?.disableAutonomousRetry !== true
4276
4268
  && !isRateLimitError
4277
4269
  && retryCount < this.getMessagingRetryLimit(maxIterations)
@@ -4342,7 +4334,7 @@ class AgentEngine {
4342
4334
  let messagingFailureContent = '';
4343
4335
  let sendSucceeded = false;
4344
4336
  if (triggerSource === 'messaging' && options.source && options.chatId) {
4345
- if (!runMeta?.messagingSent) {
4337
+ if (!runMeta?.finalDeliverySent && !runMeta?.messagingSent) {
4346
4338
  const manager = this.messagingManager;
4347
4339
  if (manager) {
4348
4340
  const failureScenario = buildMessagingFailureScenario({
@@ -4359,10 +4351,14 @@ class AgentEngine {
4359
4351
  content: `The run encountered a runtime error and cannot continue reliably. Use the actual run scenario below to explain the blocker naturally.\n\nScenario:\n${failureScenario || 'No additional scenario details were captured.'}\n\nDo not call tools. Write exactly one short user message. Do not ask the user to resend or restate the same task. Only ask the user for something if a specific external input, permission, or configuration change is actually required. Do not promise future work unless it will happen automatically before this reply is sent.\n\n${buildPlatformFormattingGuide(options?.source || null)}`
4360
4352
  }
4361
4353
  ]);
4362
- const modelReply = await provider.chat(failedMessage, [], {
4363
- model,
4364
- reasoningEffort: this.getReasoningEffort(providerName, options)
4365
- });
4354
+ const modelReply = await withModelCallTimeout(
4355
+ provider.chat(failedMessage, [], {
4356
+ model,
4357
+ reasoningEffort: this.getReasoningEffort(providerName, options)
4358
+ }),
4359
+ options,
4360
+ 'Messaging failure reply',
4361
+ );
4366
4362
  const drafted = sanitizeModelOutput(modelReply.content || '', { model });
4367
4363
  if (normalizeOutgoingMessage(drafted, options?.source || null)) {
4368
4364
  messagingFailureContent = drafted.trim();
@@ -4381,7 +4377,14 @@ class AgentEngine {
4381
4377
  }
4382
4378
 
4383
4379
  try {
4384
- await manager.sendMessage(userId, options.source, options.chatId, messagingFailureContent, { runId, agentId });
4380
+ const deliveryResult = await manager.sendMessage(
4381
+ userId,
4382
+ options.source,
4383
+ options.chatId,
4384
+ messagingFailureContent,
4385
+ { runId, agentId },
4386
+ );
4387
+ requireSuccessfulMessagingDelivery(deliveryResult, 'Messaging failure delivery');
4385
4388
  sendSucceeded = true;
4386
4389
  if (runMeta) {
4387
4390
  runMeta.lastSentMessage = messagingFailureContent;
@@ -9,6 +9,7 @@ const {
9
9
  normalizeOutgoingMessageForPlatform,
10
10
  } = require('../messaging/formatting_guides');
11
11
  const { INTERIM_KINDS, normalizeInterimKind } = require('./interim');
12
+ const { normalizeWhatsAppId } = require('../../utils/whatsapp');
12
13
  const {
13
14
  executeIntegratedTool,
14
15
  getIntegratedToolDefinitions,
@@ -320,6 +321,31 @@ function normalizeMessagingTarget(target = {}) {
320
321
  return { platform, to };
321
322
  }
322
323
 
324
+ function canonicalMessagingAddress(platform, value) {
325
+ const normalizedPlatform = String(platform || '').trim().toLowerCase();
326
+ const raw = String(value || '').trim();
327
+ if (!normalizedPlatform || !raw) return '';
328
+ if (normalizedPlatform !== 'whatsapp') return raw;
329
+
330
+ const lower = raw.toLowerCase();
331
+ const normalizedId = normalizeWhatsAppId(lower);
332
+ if (!normalizedId) return '';
333
+ if (lower.includes('@g.us')) return `group:${normalizedId}`;
334
+ if (lower.includes('@lid')) return `lid:${normalizedId}`;
335
+ return `direct:${normalizedId}`;
336
+ }
337
+
338
+ function isOriginMessagingDelivery({ triggerSource, source, chatId, platform, to }) {
339
+ if (triggerSource !== 'messaging') return true;
340
+ const originPlatform = String(source || '').trim().toLowerCase();
341
+ const targetPlatform = String(platform || '').trim().toLowerCase();
342
+ if (!originPlatform || !targetPlatform || originPlatform !== targetPlatform) return false;
343
+
344
+ const originAddress = canonicalMessagingAddress(originPlatform, chatId);
345
+ const targetAddress = canonicalMessagingAddress(targetPlatform, to);
346
+ return Boolean(originAddress && targetAddress && originAddress === targetAddress);
347
+ }
348
+
323
349
  function buildAndroidUiMatchProperties(extra = {}) {
324
350
  return {
325
351
  x: { type: 'number', description: 'Absolute X coordinate' },
@@ -2244,7 +2270,18 @@ async function executeTool(toolName, args, context, engine) {
2244
2270
  persistConversation: triggerSource === 'schedule' || triggerSource === 'tasks'
2245
2271
  });
2246
2272
  // Track that the agent explicitly sent a message during this run
2247
- if (!suppressReply && sendResult?.suppressed !== true) {
2273
+ if (
2274
+ !suppressReply
2275
+ && sendResult?.success === true
2276
+ && sendResult?.suppressed !== true
2277
+ && isOriginMessagingDelivery({
2278
+ triggerSource,
2279
+ source: context.source,
2280
+ chatId: context.chatId,
2281
+ platform: args.platform,
2282
+ to: args.to,
2283
+ })
2284
+ ) {
2248
2285
  markProactiveMessageSent({ runState, deliveryState, content: normalizedMessage });
2249
2286
  if (runState && triggerSource === 'messaging') {
2250
2287
  runState.explicitMessageSent = true;
@@ -515,6 +515,13 @@ class MessagingManager extends EventEmitter {
515
515
  }
516
516
 
517
517
  const result = await platform.sendMessage(to, normalizedContent, sendOptions);
518
+ if (result?.success === false) {
519
+ const reason = result.error || result.reason || 'platform rejected the message';
520
+ const error = new Error(`Platform ${platformName} delivery failed: ${reason}`);
521
+ error.code = 'MESSAGING_DELIVERY_FAILED';
522
+ error.deliveryResult = result;
523
+ throw error;
524
+ }
518
525
 
519
526
  db.prepare('INSERT INTO messages (user_id, agent_id, run_id, role, content, platform, platform_chat_id, media_path, metadata) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)')
520
527
  .run(userId, agentId, runId, 'assistant', normalizedContent, platformName, to, mediaPath, metadata ? JSON.stringify(metadata) : null);