neoagent 2.5.2-beta.2 → 2.5.2-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -116,6 +116,8 @@ const MESSAGING_PROGRESS_FIRST_UPDATE_MS = 60 * 1000;
116
116
  const MESSAGING_PROGRESS_REPEAT_MS = 90 * 1000;
117
117
  const MESSAGING_PROGRESS_STALL_MS = 240 * 1000;
118
118
  const MESSAGING_PROGRESS_TICK_MS = 15 * 1000;
119
+ const GOAL_CONTRACT_SUCCESS_CRITERIA_LIMIT = 12;
120
+ const MODEL_CALL_TIMEOUT_MS = 5 * 60 * 1000;
119
121
 
120
122
  function isoNow() {
121
123
  return new Date().toISOString();
@@ -135,6 +137,31 @@ function formatElapsedDuration(durationMs) {
135
137
  return `${minutes}m ${seconds}s`;
136
138
  }
137
139
 
140
+ function resolveModelCallTimeoutMs(options = {}) {
141
+ const requested = Number(options?.modelCallTimeoutMs);
142
+ if (Number.isFinite(requested) && requested > 0) {
143
+ return Math.max(10, requested);
144
+ }
145
+ return MODEL_CALL_TIMEOUT_MS;
146
+ }
147
+
148
+ async function withModelCallTimeout(promise, options = {}, label = 'Model call') {
149
+ const timeoutMs = resolveModelCallTimeoutMs(options);
150
+ let timer = null;
151
+ const timeout = new Promise((_, reject) => {
152
+ timer = setTimeout(() => {
153
+ const error = new Error(`${label} timed out after ${formatElapsedDuration(timeoutMs)}.`);
154
+ error.code = 'MODEL_CALL_TIMEOUT';
155
+ reject(error);
156
+ }, timeoutMs);
157
+ });
158
+ try {
159
+ return await Promise.race([Promise.resolve(promise), timeout]);
160
+ } finally {
161
+ if (timer) clearTimeout(timer);
162
+ }
163
+ }
164
+
138
165
  function cloneInterimHistory(history = []) {
139
166
  if (!Array.isArray(history)) return [];
140
167
  return history.map((item) => ({
@@ -186,6 +213,254 @@ function hasVisibleInterimActivity(runMeta) {
186
213
  );
187
214
  }
188
215
 
216
+ function requireSuccessfulMessagingDelivery(result, label = 'Messaging delivery') {
217
+ if (result?.success === true && result?.suppressed !== true) {
218
+ return result;
219
+ }
220
+ const reason = String(
221
+ result?.error
222
+ || result?.reason
223
+ || result?.result?.error
224
+ || result?.result?.reason
225
+ || 'the platform did not confirm delivery',
226
+ ).trim();
227
+ const error = new Error(`${label} failed: ${reason}`);
228
+ error.code = 'MESSAGING_DELIVERY_FAILED';
229
+ error.deliveryResult = result || null;
230
+ throw error;
231
+ }
232
+
233
+ function normalizeGoalCriteria(value) {
234
+ if (!Array.isArray(value)) return [];
235
+ const seen = new Set();
236
+ const items = [];
237
+ for (const entry of value) {
238
+ const text = String(entry || '').trim();
239
+ if (!text) continue;
240
+ const signature = text.toLowerCase();
241
+ if (seen.has(signature)) continue;
242
+ seen.add(signature);
243
+ items.push(text);
244
+ if (items.length >= GOAL_CONTRACT_SUCCESS_CRITERIA_LIMIT) break;
245
+ }
246
+ return items;
247
+ }
248
+
249
+ function normalizeGoalContract(raw = null) {
250
+ if (!raw || typeof raw !== 'object') return null;
251
+ const goal = String(raw.goal || '').trim();
252
+ const successCriteria = normalizeGoalCriteria(
253
+ raw.successCriteria || raw.success_criteria || [],
254
+ );
255
+ const rawCompletionConfidence = String(
256
+ raw.completionConfidenceRequired || raw.completion_confidence_required || '',
257
+ ).trim();
258
+ const completionConfidenceRequired = rawCompletionConfidence
259
+ ? normalizeCompletionConfidence(rawCompletionConfidence)
260
+ : '';
261
+ const progressUpdatePolicy = ['none', 'optional', 'required'].includes(String(
262
+ raw.progressUpdatePolicy || raw.progress_update_policy || '',
263
+ ).trim().toLowerCase())
264
+ ? String(raw.progressUpdatePolicy || raw.progress_update_policy || '').trim().toLowerCase()
265
+ : '';
266
+ const autonomyLevel = ['minimal', 'normal', 'high'].includes(String(
267
+ raw.autonomyLevel || raw.autonomy_level || '',
268
+ ).trim().toLowerCase())
269
+ ? String(raw.autonomyLevel || raw.autonomy_level || '').trim().toLowerCase()
270
+ : '';
271
+ const complexity = ['simple', 'standard', 'complex'].includes(String(
272
+ raw.complexity || '',
273
+ ).trim().toLowerCase())
274
+ ? String(raw.complexity || '').trim().toLowerCase()
275
+ : '';
276
+
277
+ if (
278
+ !goal
279
+ && successCriteria.length === 0
280
+ && !completionConfidenceRequired
281
+ && !progressUpdatePolicy
282
+ && !autonomyLevel
283
+ && !complexity
284
+ ) {
285
+ return null;
286
+ }
287
+
288
+ return {
289
+ goal,
290
+ successCriteria,
291
+ completionConfidenceRequired,
292
+ progressUpdatePolicy: progressUpdatePolicy || '',
293
+ autonomyLevel: autonomyLevel || '',
294
+ complexity: complexity || '',
295
+ };
296
+ }
297
+
298
+ function mergeGoalContracts(existing = null, patch = null) {
299
+ const current = normalizeGoalContract(existing) || null;
300
+ const nextPatch = normalizeGoalContract(patch) || null;
301
+ if (!current && !nextPatch) return null;
302
+
303
+ const goal = String(current?.goal || nextPatch?.goal || '').trim();
304
+ const successCriteria = normalizeGoalCriteria([
305
+ ...(current?.successCriteria || []),
306
+ ...(nextPatch?.successCriteria || []),
307
+ ]);
308
+ const completionConfidenceRequired = nextPatch?.completionConfidenceRequired
309
+ || current?.completionConfidenceRequired
310
+ || 'medium';
311
+ const progressUpdatePolicy = nextPatch?.progressUpdatePolicy
312
+ || current?.progressUpdatePolicy
313
+ || '';
314
+ const autonomyLevel = nextPatch?.autonomyLevel
315
+ || current?.autonomyLevel
316
+ || '';
317
+ const complexity = nextPatch?.complexity
318
+ || current?.complexity
319
+ || '';
320
+
321
+ return normalizeGoalContract({
322
+ goal,
323
+ successCriteria,
324
+ completionConfidenceRequired,
325
+ progressUpdatePolicy,
326
+ autonomyLevel,
327
+ complexity,
328
+ });
329
+ }
330
+
331
+ function goalContractFromAnalysis(analysis = null) {
332
+ if (!analysis || typeof analysis !== 'object') return null;
333
+ return normalizeGoalContract({
334
+ goal: analysis.goal,
335
+ successCriteria: analysis.success_criteria,
336
+ completionConfidenceRequired: analysis.completion_confidence_required,
337
+ progressUpdatePolicy: analysis.progress_update_policy,
338
+ autonomyLevel: analysis.autonomy_level,
339
+ complexity: analysis.complexity,
340
+ });
341
+ }
342
+
343
+ function goalContractFromPlan(plan = null) {
344
+ if (!plan || typeof plan !== 'object') return null;
345
+ return normalizeGoalContract({
346
+ successCriteria: plan.success_criteria,
347
+ });
348
+ }
349
+
350
+ function buildResolvedGoalContract(runMeta, analysis = null, plan = null) {
351
+ let contract = mergeGoalContracts(runMeta?.goalContract || null, goalContractFromAnalysis(analysis));
352
+ contract = mergeGoalContracts(contract, goalContractFromPlan(plan));
353
+ return contract;
354
+ }
355
+
356
+ function buildGoalContractPrompt(contract, label = 'Persistent run goal') {
357
+ const normalized = normalizeGoalContract(contract);
358
+ if (!normalized) return '';
359
+ const lines = [];
360
+ if (normalized.goal) {
361
+ lines.push(`${label}: ${normalized.goal}`);
362
+ }
363
+ if (normalized.successCriteria.length > 0) {
364
+ lines.push(`Persistent success criteria:\n- ${normalized.successCriteria.join('\n- ')}`);
365
+ }
366
+ const contractLine = [
367
+ normalized.complexity ? `complexity=${normalized.complexity}` : '',
368
+ normalized.autonomyLevel ? `autonomy_level=${normalized.autonomyLevel}` : '',
369
+ normalized.progressUpdatePolicy ? `progress_update_policy=${normalized.progressUpdatePolicy}` : '',
370
+ normalized.completionConfidenceRequired ? `completion_confidence_required=${normalized.completionConfidenceRequired}` : '',
371
+ ].filter(Boolean).join('; ');
372
+ if (contractLine) {
373
+ lines.push(`Persistent autonomy contract: ${contractLine}`);
374
+ }
375
+ return lines.join('\n');
376
+ }
377
+
378
+ function resolveRunGoalContext(runMeta, analysis = null, plan = null) {
379
+ const goalContract = buildResolvedGoalContract(runMeta, analysis, plan);
380
+ const successCriteria = goalContract?.successCriteria?.length
381
+ ? goalContract.successCriteria.slice(0, 6)
382
+ : (Array.isArray(plan?.success_criteria)
383
+ ? plan.success_criteria
384
+ .map((item) => String(item || '').trim())
385
+ .filter(Boolean)
386
+ .slice(0, 6)
387
+ : []);
388
+ const effectiveGoal = goalContract?.goal || analysis?.goal || '';
389
+ const effectiveComplexity = goalContract?.complexity || analysis?.complexity || 'standard';
390
+ const effectiveAutonomyLevel = goalContract?.autonomyLevel || analysis?.autonomy_level || 'normal';
391
+ const effectiveProgressPolicy = goalContract?.progressUpdatePolicy || analysis?.progress_update_policy || 'optional';
392
+ const effectiveCompletionConfidence = goalContract?.completionConfidenceRequired
393
+ || analysis?.completion_confidence_required
394
+ || 'medium';
395
+ const persistedGoalPrompt = buildGoalContractPrompt(goalContract);
396
+ return {
397
+ goalContract,
398
+ successCriteria,
399
+ effectiveGoal,
400
+ effectiveComplexity,
401
+ effectiveAutonomyLevel,
402
+ effectiveProgressPolicy,
403
+ effectiveCompletionConfidence,
404
+ persistedGoalPrompt,
405
+ };
406
+ }
407
+
408
+ function buildCompletionDecisionPrompt({
409
+ triggerSource,
410
+ messagingSent = false,
411
+ goalContext,
412
+ parallelWork = false,
413
+ tools,
414
+ toolExecutions,
415
+ lastReply,
416
+ iteration,
417
+ maxIterations,
418
+ }) {
419
+ const draftReply = normalizeOutgoingMessage(lastReply) || '';
420
+ const lines = [
421
+ 'Return JSON only.',
422
+ 'Decide whether this run should continue autonomously or stop now.',
423
+ 'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason"}',
424
+ 'Rules:',
425
+ '- Use "continue" whenever any safe next step remains in this same run.',
426
+ '- Use "complete" only when the requested outcome is actually achieved and the latest draft is the finished user-facing answer.',
427
+ '- Use "blocked" only when a specific external dependency, missing user input, or permission outside this run is required and the latest draft is the blocker reply.',
428
+ '- If the latest draft asks the user for a missing required value, confirmation, or choice needed to proceed, use "blocked" so the run waits instead of repeating the same ask.',
429
+ '- A progress note, next-step note, apology, plan, or promise to investigate is "continue", not "complete".',
430
+ '- A single failed tool attempt is not blocked if another safe retry, verification step, or alternative path remains.',
431
+ '- A tool-specific API error, timeout, rate limit, or missing result inside this run is usually "continue", not "blocked", if any other available tool could still make progress.',
432
+ `- If completion_confidence_required is ${goalContext.effectiveCompletionConfidence} and the latest draft depends on unverified assumptions, use "continue" so the run can gather evidence, inspect state, or narrow the reply.`,
433
+ triggerSource === 'messaging' && messagingSent
434
+ ? '- A final reply was already delivered via send_message. Use "complete" unless concrete task work remains.'
435
+ : triggerSource === 'messaging'
436
+ ? '- For messaging, do not stop on a partial status message. Continue unless the task is actually complete or externally blocked.'
437
+ : '- Do not stop just because you wrote a status update. Continue unless the task is actually complete or externally blocked.',
438
+ ];
439
+
440
+ lines.push(
441
+ goalContext.effectiveGoal ? `Goal: ${goalContext.effectiveGoal}` : '',
442
+ goalContext.persistedGoalPrompt,
443
+ `Autonomy contract: complexity=${goalContext.effectiveComplexity}; autonomy_level=${goalContext.effectiveAutonomyLevel}; progress_update_policy=${goalContext.effectiveProgressPolicy}; parallel_work=${parallelWork === true}; completion_confidence_required=${goalContext.effectiveCompletionConfidence}.`,
444
+ goalContext.successCriteria.length > 0
445
+ ? `Success criteria:\n${goalContext.successCriteria.map((item, index) => `${index + 1}. ${item}`).join('\n')}`
446
+ : '',
447
+ `Current iteration: ${iteration} of ${maxIterations}.`,
448
+ `Available tools in this run: ${summarizeAvailableTools(tools) || 'none'}`,
449
+ `Recent tool evidence:\n${summarizeToolExecutions(toolExecutions, 8) || 'none'}`,
450
+ `Latest draft reply:\n${draftReply || '(empty)'}`,
451
+ );
452
+ return lines.filter(Boolean).join('\n');
453
+ }
454
+
455
+ function normalizeCompletionDecision(raw, fallbackStatus = 'continue') {
456
+ const allowed = new Set(['continue', 'complete', 'blocked']);
457
+ const requestedStatus = String(raw.status || '').trim().toLowerCase();
458
+ return {
459
+ status: allowed.has(requestedStatus) ? requestedStatus : fallbackStatus,
460
+ reason: String(raw.reason || '').trim().slice(0, 400),
461
+ };
462
+ }
463
+
189
464
  function planningDepthForForceMode(forceMode) {
190
465
  return forceMode === 'plan_execute' ? 'deep' : 'light';
191
466
  }
@@ -409,6 +684,7 @@ class AgentEngine {
409
684
  this.taskRuntime = services.taskRuntime || null;
410
685
  this.memoryManager = services.memoryManager || null;
411
686
  this.voiceRuntimeManager = services.voiceRuntimeManager || null;
687
+ this.messagingDeliveryRetry = services.messagingDeliveryRetry || {};
412
688
  }
413
689
 
414
690
  async buildSystemPrompt(userId, context = {}) {
@@ -629,6 +905,18 @@ class AgentEngine {
629
905
  .run(JSON.stringify(next), runId);
630
906
  }
631
907
 
908
+ updateRunGoalContract(runId, patch = {}, options = {}) {
909
+ const runMeta = this.getRunMeta(runId);
910
+ if (!runMeta) return null;
911
+ runMeta.goalContract = mergeGoalContracts(runMeta.goalContract, patch);
912
+ if (options.persist !== false) {
913
+ this.persistRunMetadata(runId, {
914
+ goalContract: runMeta.goalContract,
915
+ });
916
+ }
917
+ return runMeta.goalContract;
918
+ }
919
+
632
920
  buildProgressLedgerSnapshot(runMeta) {
633
921
  if (!runMeta?.progressLedger) return null;
634
922
  return {
@@ -707,6 +995,7 @@ class AgentEngine {
707
995
  markRunFinalDelivery(runId, content = '', timestamp = isoNow()) {
708
996
  const runMeta = this.getRunMeta(runId);
709
997
  if (!runMeta) return null;
998
+ runMeta.messagingSent = true;
710
999
  runMeta.finalDeliverySent = true;
711
1000
  runMeta.lastSentMessage = String(content || '').trim() || runMeta.lastSentMessage || '';
712
1001
  const ledger = this.updateRunProgress(runId, {
@@ -818,13 +1107,14 @@ class AgentEngine {
818
1107
  if (!platform || !chatId || !this.messagingManager) {
819
1108
  return { sent: false, skipped: true, reason: 'Messaging context is not available.' };
820
1109
  }
821
- await this.messagingManager.sendMessage(userId, platform, chatId, normalizedContent, {
1110
+ const deliveryResult = await this.messagingManager.sendMessage(userId, platform, chatId, normalizedContent, {
822
1111
  agentId,
823
1112
  runId,
824
1113
  persistConversation: true,
825
1114
  metadata,
826
1115
  deliveryKind: 'interim',
827
1116
  });
1117
+ requireSuccessfulMessagingDelivery(deliveryResult, 'Interim messaging delivery');
828
1118
  } else if (triggerSource === 'voice_live') {
829
1119
  const voiceSessionId = runMeta.voiceSessionId || null;
830
1120
  const manager = this.voiceRuntimeManager || this.app?.locals?.voiceRuntimeManager || null;
@@ -918,42 +1208,72 @@ class AgentEngine {
918
1208
  phase = 'structured',
919
1209
  }) {
920
1210
  const startedAt = Date.now();
921
- const response = await withProviderRetry(
922
- () => provider.chat(
923
- sanitizeConversationMessages([
924
- ...messages,
925
- { role: 'system', content: prompt },
926
- ]),
927
- [],
928
- {
929
- model,
930
- maxTokens,
931
- reasoningEffort: reasoningEffort || this.getReasoningEffort(providerName, {}),
932
- }
933
- ),
934
- { label: `Engine ${model} (structured)` }
935
- );
936
- if (telemetry?.runId && telemetry?.userId) {
937
- recordModelUsage({
938
- runId: telemetry.runId,
939
- stepId: telemetry.stepId || null,
940
- userId: telemetry.userId,
941
- agentId: telemetry.agentId || null,
942
- provider: providerName,
943
- model,
944
- phase,
945
- usage: response.usage,
946
- latencyMs: Date.now() - startedAt,
1211
+ const structuredStep = `model:${phase}`;
1212
+ if (telemetry?.runId) {
1213
+ this.updateRunProgress(telemetry.runId, {
1214
+ currentPhase: 'model',
1215
+ currentStep: structuredStep,
1216
+ currentTool: null,
1217
+ currentStepStartedAt: isoNow(),
947
1218
  });
948
1219
  }
949
1220
 
950
- const parsed = parseJsonObject(response.content || '');
951
- const normalizedUsage = normalizeUsage(response.usage);
952
- return {
953
- value: normalize(parsed || {}, fallback),
954
- raw: response.content || '',
955
- usage: normalizedUsage?.totalTokens || 0,
956
- };
1221
+ let completed = false;
1222
+ try {
1223
+ const response = await withProviderRetry(
1224
+ () => withModelCallTimeout(
1225
+ provider.chat(
1226
+ sanitizeConversationMessages([
1227
+ ...messages,
1228
+ { role: 'system', content: prompt },
1229
+ ]),
1230
+ [],
1231
+ {
1232
+ model,
1233
+ maxTokens,
1234
+ reasoningEffort: reasoningEffort || this.getReasoningEffort(providerName, {}),
1235
+ }
1236
+ ),
1237
+ telemetry || {},
1238
+ `${phase} model call`,
1239
+ ),
1240
+ { label: `Engine ${model} (structured)` }
1241
+ );
1242
+ completed = true;
1243
+ if (telemetry?.runId && telemetry?.userId) {
1244
+ recordModelUsage({
1245
+ runId: telemetry.runId,
1246
+ stepId: telemetry.stepId || null,
1247
+ userId: telemetry.userId,
1248
+ agentId: telemetry.agentId || null,
1249
+ provider: providerName,
1250
+ model,
1251
+ phase,
1252
+ usage: response.usage,
1253
+ latencyMs: Date.now() - startedAt,
1254
+ });
1255
+ }
1256
+
1257
+ const parsed = parseJsonObject(response.content || '');
1258
+ const normalizedUsage = normalizeUsage(response.usage);
1259
+ return {
1260
+ value: normalize(parsed || {}, fallback),
1261
+ raw: response.content || '',
1262
+ usage: normalizedUsage?.totalTokens || 0,
1263
+ };
1264
+ } finally {
1265
+ const runMeta = telemetry?.runId ? this.getRunMeta(telemetry.runId) : null;
1266
+ if (runMeta?.progressLedger?.currentStep === structuredStep) {
1267
+ this.updateRunProgress(telemetry.runId, {
1268
+ currentPhase: 'idle',
1269
+ currentStep: null,
1270
+ currentTool: null,
1271
+ currentStepStartedAt: null,
1272
+ }, {
1273
+ verified: completed,
1274
+ });
1275
+ }
1276
+ }
957
1277
  }
958
1278
 
959
1279
  async requestModelResponse({
@@ -980,8 +1300,16 @@ class AgentEngine {
980
1300
  if (options.stream !== false) {
981
1301
  let emittedContent = false;
982
1302
  const stream = provider.stream(requestMessages, tools, callOptions);
1303
+ const iterator = stream[Symbol.asyncIterator]();
983
1304
  try {
984
- for await (const chunk of stream) {
1305
+ while (true) {
1306
+ const next = await withModelCallTimeout(
1307
+ iterator.next(),
1308
+ options,
1309
+ `Model stream iteration ${iteration}`,
1310
+ );
1311
+ if (next.done) break;
1312
+ const chunk = next.value;
985
1313
  if (chunk.type === 'content') {
986
1314
  emittedContent = true;
987
1315
  streamContent += chunk.content;
@@ -1005,13 +1333,18 @@ class AgentEngine {
1005
1333
  }
1006
1334
  }
1007
1335
  } catch (err) {
1336
+ Promise.resolve(iterator.return?.()).catch(() => {});
1008
1337
  // Once tokens have streamed to the client a retry would duplicate
1009
1338
  // output, so only the pre-stream window is safe to replay.
1010
1339
  if (emittedContent) err.__providerRetryUnsafe = true;
1011
1340
  throw err;
1012
1341
  }
1013
1342
  } else {
1014
- response = await provider.chat(requestMessages, tools, callOptions);
1343
+ response = await withModelCallTimeout(
1344
+ provider.chat(requestMessages, tools, callOptions),
1345
+ options,
1346
+ `Model iteration ${iteration}`,
1347
+ );
1015
1348
  }
1016
1349
 
1017
1350
  return { response, streamContent };
@@ -1152,53 +1485,27 @@ class AgentEngine {
1152
1485
  options,
1153
1486
  fallbackStatus,
1154
1487
  }) {
1155
- const successCriteria = Array.isArray(plan?.success_criteria)
1156
- ? plan.success_criteria
1157
- .map((item) => String(item || '').trim())
1158
- .filter(Boolean)
1159
- .slice(0, 6)
1160
- : [];
1488
+ const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
1489
+ const goalContext = resolveRunGoalContext(runMeta, analysis, plan);
1161
1490
 
1162
1491
  const response = await this.requestStructuredJson({
1163
1492
  provider,
1164
1493
  providerName,
1165
1494
  model,
1166
1495
  messages,
1167
- prompt: [
1168
- 'Return JSON only.',
1169
- 'Decide whether this run should continue autonomously or stop now.',
1170
- 'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason"}',
1171
- 'Rules:',
1172
- '- Use "continue" whenever any safe next step remains in this same run.',
1173
- '- Use "complete" only when the requested outcome is actually achieved or a truthful final user reply is already ready now.',
1174
- '- Use "blocked" only when a specific external dependency outside this run is required.',
1175
- '- If the latest draft asks the user for a missing required value, confirmation, or choice needed to proceed, use "blocked" so the run waits instead of repeating the same ask.',
1176
- '- A progress update is not complete.',
1177
- '- A single failed tool attempt is not blocked if another safe retry, verification step, or alternative path remains.',
1178
- '- A tool-specific API error, timeout, rate limit, or missing result inside this run is usually "continue", not "blocked", if any other available tool could still make progress.',
1179
- '- If completion_confidence_required is high and the latest draft depends on unverified assumptions, use "continue" so the run can gather evidence, inspect state, or narrow the reply.',
1180
- triggerSource === 'messaging' && messagingSent
1181
- ? '- A reply was already delivered to the user via send_message. Use "complete" unless there is concrete remaining work (e.g., a tool call you still need to make) before the task is truly done. Do not send follow-up elaborations or re-introductions.'
1182
- : triggerSource === 'messaging'
1183
- ? '- For messaging, do not stop on a partial status message. Continue unless the task is actually complete or externally blocked. If you already asked for missing user input, choose "blocked" and wait.'
1184
- : '- Do not stop just because you wrote a status update. Continue unless the task is actually complete or externally blocked.',
1185
- analysis?.goal ? `Goal: ${analysis.goal}` : '',
1186
- `Autonomy contract: complexity=${analysis?.complexity || 'standard'}; autonomy_level=${analysis?.autonomy_level || 'normal'}; progress_update_policy=${analysis?.progress_update_policy || 'optional'}; parallel_work=${analysis?.parallel_work === true}; completion_confidence_required=${analysis?.completion_confidence_required || 'medium'}.`,
1187
- successCriteria.length > 0 ? `Success criteria:\n${successCriteria.map((item, index) => `${index + 1}. ${item}`).join('\n')}` : '',
1188
- `Current iteration: ${iteration} of ${maxIterations}.`,
1189
- `Available tools in this run: ${summarizeAvailableTools(tools) || 'none'}`,
1190
- `Recent tool evidence:\n${summarizeToolExecutions(toolExecutions, 8) || 'none'}`,
1191
- `Latest draft reply:\n${normalizeOutgoingMessage(lastReply) || '(empty)'}`,
1192
- ].filter(Boolean).join('\n'),
1496
+ prompt: buildCompletionDecisionPrompt({
1497
+ triggerSource,
1498
+ messagingSent,
1499
+ goalContext,
1500
+ parallelWork: analysis?.parallel_work === true,
1501
+ tools,
1502
+ toolExecutions,
1503
+ lastReply,
1504
+ iteration,
1505
+ maxIterations,
1506
+ }),
1193
1507
  maxTokens: 320,
1194
- normalize: (raw) => {
1195
- const allowed = new Set(['continue', 'complete', 'blocked']);
1196
- const requestedStatus = String(raw.status || '').trim().toLowerCase();
1197
- return {
1198
- status: allowed.has(requestedStatus) ? requestedStatus : fallbackStatus,
1199
- reason: String(raw.reason || '').trim().slice(0, 400),
1200
- };
1201
- },
1508
+ normalize: (raw) => normalizeCompletionDecision(raw, fallbackStatus),
1202
1509
  fallback: { status: fallbackStatus },
1203
1510
  reasoningEffort: this.getReasoningEffort(providerName, options),
1204
1511
  telemetry: options,
@@ -1211,6 +1518,67 @@ class AgentEngine {
1211
1518
  };
1212
1519
  }
1213
1520
 
1521
+ async evaluateTaskCompleteSignal({
1522
+ provider,
1523
+ providerName,
1524
+ model,
1525
+ messages,
1526
+ tools,
1527
+ analysis,
1528
+ plan,
1529
+ toolExecutions,
1530
+ finalMessage,
1531
+ confidence,
1532
+ triggerSource,
1533
+ messagingSent,
1534
+ iteration,
1535
+ maxIterations,
1536
+ options,
1537
+ }) {
1538
+ const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
1539
+ const requiredConfidence = resolveRunGoalContext(runMeta, analysis, plan)
1540
+ .effectiveCompletionConfidence;
1541
+ const confidenceDecision = shouldAcceptTaskComplete({
1542
+ confidence,
1543
+ requiredConfidence,
1544
+ iteration,
1545
+ maxIterations,
1546
+ });
1547
+ if (!confidenceDecision.accept) {
1548
+ return {
1549
+ decision: {
1550
+ status: 'continue',
1551
+ reason: confidenceDecision.reason,
1552
+ },
1553
+ requiredConfidence,
1554
+ usage: 0,
1555
+ };
1556
+ }
1557
+
1558
+ const loopState = await this.decideLoopState({
1559
+ provider,
1560
+ providerName,
1561
+ model,
1562
+ messages,
1563
+ tools,
1564
+ analysis,
1565
+ plan,
1566
+ toolExecutions,
1567
+ lastReply: finalMessage,
1568
+ triggerSource,
1569
+ messagingSent,
1570
+ iteration,
1571
+ maxIterations,
1572
+ options,
1573
+ fallbackStatus: 'continue',
1574
+ });
1575
+ return {
1576
+ decision: loopState.decision,
1577
+ requiredConfidence,
1578
+ usage: loopState.usage || 0,
1579
+ };
1580
+ }
1581
+
1214
1582
  async verifyFinalResponse({
1215
1583
  provider,
1216
1584
  providerName,
@@ -1321,11 +1689,15 @@ class AgentEngine {
1321
1689
  }
1322
1690
  ];
1323
1691
 
1324
- const response = await provider.chat(promptMessages, [], {
1325
- model,
1326
- maxTokens: 800,
1327
- reasoningEffort: this.getReasoningEffort(providerName, options),
1328
- });
1692
+ const response = await withModelCallTimeout(
1693
+ provider.chat(promptMessages, [], {
1694
+ model,
1695
+ maxTokens: 800,
1696
+ reasoningEffort: this.getReasoningEffort(providerName, options),
1697
+ }),
1698
+ options,
1699
+ 'Conversation state refresh',
1700
+ );
1329
1701
  const parsed = parseJsonObject(response.content || '') || {};
1330
1702
  const nextState = {
1331
1703
  summary: String(parsed.summary || existingState?.summary || '').trim(),
@@ -1382,19 +1754,23 @@ class AgentEngine {
1382
1754
  `[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} model=${model}`
1383
1755
  );
1384
1756
  try {
1385
- const response = await provider.chat(
1386
- sanitizeConversationMessages([
1387
- ...messages,
1757
+ const response = await withModelCallTimeout(
1758
+ provider.chat(
1759
+ sanitizeConversationMessages([
1760
+ ...messages,
1761
+ {
1762
+ role: 'system',
1763
+ content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
1764
+ }
1765
+ ]),
1766
+ [],
1388
1767
  {
1389
- role: 'system',
1390
- content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
1768
+ model,
1769
+ reasoningEffort: this.getReasoningEffort(providerName, options)
1391
1770
  }
1392
- ]),
1393
- [],
1394
- {
1395
- model,
1396
- reasoningEffort: this.getReasoningEffort(providerName, options)
1397
- }
1771
+ ),
1772
+ options,
1773
+ `Blank messaging reply recovery ${attempt}`,
1398
1774
  );
1399
1775
  totalTokens += response.usage?.totalTokens || 0;
1400
1776
  recoveredContent = sanitizeModelOutput(response.content || '', { model });
@@ -1827,21 +2203,27 @@ class AgentEngine {
1827
2203
 
1828
2204
  buildMessagingHeartbeatText(runMeta, options = {}) {
1829
2205
  const stalled = options.stalled === true;
1830
- const fallbackStartedAtMs = Number.isFinite(runMeta?.startedAt) ? runMeta.startedAt : Date.now();
1831
- const startedAtMs = timestampMs(
2206
+ const now = Date.now();
2207
+ const runStartedAtMs = Number.isFinite(runMeta?.startedAt) ? runMeta.startedAt : now;
2208
+ const stepStartedAtMs = timestampMs(
1832
2209
  runMeta?.progressLedger?.currentStepStartedAt,
1833
- fallbackStartedAtMs,
2210
+ 0,
1834
2211
  );
1835
- const elapsed = formatElapsedDuration(Date.now() - startedAtMs);
2212
+ const runElapsed = formatElapsedDuration(now - runStartedAtMs);
2213
+ const stepElapsed = formatElapsedDuration(now - (stepStartedAtMs || runStartedAtMs));
2214
+ const unverifiedElapsed = formatElapsedDuration(now - timestampMs(
2215
+ runMeta?.progressLedger?.lastVerifiedProgressAt,
2216
+ runStartedAtMs,
2217
+ ));
1836
2218
  const currentTool = String(runMeta?.progressLedger?.currentTool || '').trim();
1837
2219
  if (currentTool) {
1838
2220
  return stalled
1839
- ? `Still working on ${currentTool}. This run has not made verified progress for ${elapsed}.`
1840
- : `Still working on ${currentTool}. ${elapsed} elapsed so far.`;
2221
+ ? `Still working on ${currentTool}. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
2222
+ : `Still working on ${currentTool}. Run active ${runElapsed}; current step ${stepElapsed} so far.`;
1841
2223
  }
1842
2224
  return stalled
1843
- ? `Still working on this. This run has not made verified progress for ${elapsed}.`
1844
- : `Still working on this. ${elapsed} elapsed so far.`;
2225
+ ? `Still working on this. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
2226
+ : `Still working on this. Run active ${runElapsed}.`;
1845
2227
  }
1846
2228
 
1847
2229
  async sendRuntimeMessagingHeartbeat(runId, options = {}) {
@@ -1856,7 +2238,7 @@ class AgentEngine {
1856
2238
 
1857
2239
  const createdAt = isoNow();
1858
2240
  const content = this.buildMessagingHeartbeatText(runMeta, options);
1859
- await this.messagingManager.sendMessage(
2241
+ const deliveryResult = await this.messagingManager.sendMessage(
1860
2242
  runMeta.userId,
1861
2243
  runMeta.messagingContext.platform,
1862
2244
  runMeta.messagingContext.chatId,
@@ -1874,6 +2256,7 @@ class AgentEngine {
1874
2256
  deliveryKind: 'interim',
1875
2257
  },
1876
2258
  );
2259
+ requireSuccessfulMessagingDelivery(deliveryResult, 'Messaging heartbeat delivery');
1877
2260
 
1878
2261
  runMeta.lastInterimMessage = content;
1879
2262
  if (!Array.isArray(runMeta.interimMessages)) {
@@ -1950,9 +2333,31 @@ class AgentEngine {
1950
2333
  await this.messagingManager.sendTyping(userId, platform, chatId, true, { agentId }).catch(() => {});
1951
2334
  await new Promise((resolve) => setTimeout(resolve, delay));
1952
2335
  }
1953
- await this.messagingManager.sendMessage(userId, platform, chatId, chunks[i], { runId, agentId }).catch((err) =>
1954
- console.error('[Engine] Auto-reply fallback failed:', err.message)
1955
- );
2336
+ try {
2337
+ await withProviderRetry(async () => {
2338
+ const deliveryResult = await this.messagingManager.sendMessage(
2339
+ userId,
2340
+ platform,
2341
+ chatId,
2342
+ chunks[i],
2343
+ { runId, agentId },
2344
+ );
2345
+ return requireSuccessfulMessagingDelivery(deliveryResult, 'Final messaging delivery');
2346
+ }, {
2347
+ ...this.messagingDeliveryRetry,
2348
+ label: `MessagingDelivery ${platform}`,
2349
+ isRetryable: (error) => (
2350
+ error?.retryable !== false
2351
+ && (
2352
+ error?.code === 'MESSAGING_DELIVERY_FAILED'
2353
+ || isTransientError(error)
2354
+ )
2355
+ ),
2356
+ });
2357
+ } catch (error) {
2358
+ error.disableAutonomousRetry = true;
2359
+ throw error;
2360
+ }
1956
2361
  }
1957
2362
 
1958
2363
  runMeta.lastSentMessage = chunks[chunks.length - 1] || cleanedContent;
@@ -2003,7 +2408,10 @@ class AgentEngine {
2003
2408
  return { sent: false, skipped: true };
2004
2409
  }
2005
2410
 
2006
- if (ledger.currentPhase === 'tool' && ledger.currentStepStartedAt) {
2411
+ if (
2412
+ (ledger.currentPhase === 'tool' || ledger.currentPhase === 'model')
2413
+ && ledger.currentStepStartedAt
2414
+ ) {
2007
2415
  return this.sendRuntimeMessagingHeartbeat(runId, { stalled });
2008
2416
  }
2009
2417
 
@@ -2317,6 +2725,12 @@ class AgentEngine {
2317
2725
  const carriedExplicitMessageSent = retryMessagingState.explicitMessageSent === true;
2318
2726
  const carriedInterimHistory = cloneInterimHistory(retryMessagingState.interimHistory);
2319
2727
  const carriedLastInterimMessage = carriedInterimHistory[carriedInterimHistory.length - 1]?.content || '';
2728
+ const carriedGoalContract = mergeGoalContracts(
2729
+ normalizeGoalContract({
2730
+ goal: clampRunContext(userMessage, 1200),
2731
+ }),
2732
+ retryMessagingState.goalContract,
2733
+ );
2320
2734
  const startedAtIso = isoNow();
2321
2735
  const progressLedger = buildInitialProgressLedger({
2322
2736
  startedAt: startedAtIso,
@@ -2358,10 +2772,12 @@ class AgentEngine {
2358
2772
  chatId: options.chatId || null,
2359
2773
  }
2360
2774
  : null,
2775
+ goalContract: carriedGoalContract,
2361
2776
  progressLedger,
2362
2777
  });
2363
2778
  this.persistRunMetadata(runId, {
2364
2779
  progressLedger,
2780
+ goalContract: carriedGoalContract,
2365
2781
  });
2366
2782
  this.startMessagingProgressSupervisor(runId);
2367
2783
  this.emit(userId, 'run:start', { runId, agentId, title: runTitle, model, triggerType, triggerSource });
@@ -2459,6 +2875,12 @@ class AgentEngine {
2459
2875
  if (threadStateMessage) {
2460
2876
  messages.push({ role: 'system', content: threadStateMessage });
2461
2877
  }
2878
+ if (carriedGoalContract) {
2879
+ messages.push({
2880
+ role: 'system',
2881
+ content: buildGoalContractPrompt(carriedGoalContract, 'Persisted run goal'),
2882
+ });
2883
+ }
2462
2884
  this.recordRunEvent(userId, runId, 'memory_injected', {
2463
2885
  hasRecallContext: Boolean(recallMsg),
2464
2886
  hasThreadState: Boolean(threadStateMessage),
@@ -2537,6 +2959,7 @@ class AgentEngine {
2537
2959
  taskAnalysis: analysis,
2538
2960
  capabilityHealth,
2539
2961
  });
2962
+ this.updateRunGoalContract(runId, goalContractFromAnalysis(analysis));
2540
2963
  this.emit(userId, 'run:analysis', {
2541
2964
  runId,
2542
2965
  ...analysis,
@@ -2655,6 +3078,9 @@ class AgentEngine {
2655
3078
  plan: deliverablePlan,
2656
3079
  },
2657
3080
  });
3081
+ this.updateRunGoalContract(runId, {
3082
+ goal: deliverableWorkflow.selection.goal,
3083
+ });
2658
3084
  this.recordRunEvent(userId, runId, 'deliverable_workflow_selected', {
2659
3085
  type: deliverableWorkflow.selection.type,
2660
3086
  confidence: deliverableWorkflow.selection.confidence,
@@ -2691,6 +3117,7 @@ class AgentEngine {
2691
3117
  JSON.stringify(plan).slice(0, 20000)
2692
3118
  );
2693
3119
  this.persistRunMetadata(runId, { executionPlan: plan });
3120
+ this.updateRunGoalContract(runId, goalContractFromPlan(plan));
2694
3121
  this.emit(userId, 'run:plan', {
2695
3122
  runId,
2696
3123
  steps: plan.steps,
@@ -2699,6 +3126,13 @@ class AgentEngine {
2699
3126
  });
2700
3127
  }
2701
3128
 
3129
+ const runGoalContract = this.getRunMeta(runId)?.goalContract || null;
3130
+ if (runGoalContract) {
3131
+ messages.push({
3132
+ role: 'system',
3133
+ content: buildGoalContractPrompt(runGoalContract, 'Run goal contract'),
3134
+ });
3135
+ }
2702
3136
  messages.push({
2703
3137
  role: 'system',
2704
3138
  content: buildExecutionGuidance({
@@ -2731,6 +3165,37 @@ class AgentEngine {
2731
3165
  db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
2732
3166
  .run(conversationId, 'assistant', lastContent, analysisUsage);
2733
3167
  }
3168
+ const directAnswerDecision = await runWithModelFallback(
3169
+ 'direct answer completion decision',
3170
+ () => this.decideLoopState({
3171
+ provider,
3172
+ providerName,
3173
+ model,
3174
+ messages,
3175
+ tools,
3176
+ analysis,
3177
+ plan,
3178
+ toolExecutions,
3179
+ lastReply: lastContent,
3180
+ triggerSource,
3181
+ messagingSent: false,
3182
+ iteration,
3183
+ maxIterations,
3184
+ options: { ...options, runId, userId, agentId },
3185
+ fallbackStatus: 'continue',
3186
+ }),
3187
+ );
3188
+ totalTokens += directAnswerDecision.usage || 0;
3189
+ if (directAnswerDecision.decision.status === 'continue') {
3190
+ messages.push({
3191
+ role: 'system',
3192
+ content: directAnswerDecision.decision.reason
3193
+ ? `Continue working: ${directAnswerDecision.decision.reason}.`
3194
+ : 'The initial draft is not a finished answer. Continue working autonomously.',
3195
+ });
3196
+ lastContent = '';
3197
+ directAnswerEligible = false;
3198
+ }
2734
3199
  }
2735
3200
 
2736
3201
  // BUG FIX: consecutiveToolFailures was previously declared INSIDE the
@@ -2756,14 +3221,16 @@ class AgentEngine {
2756
3221
  currentStep: `model:${iteration}`,
2757
3222
  currentTool: null,
2758
3223
  currentStepStartedAt: isoNow(),
2759
- }, {
2760
- verified: true,
2761
3224
  });
2762
3225
 
2763
3226
  let metrics = this.estimatePromptMetrics(messages, tools);
2764
3227
  const contextWindow = provider.getContextWindow(model);
2765
3228
  if (metrics.totalEstimatedTokens > contextWindow * loopPolicy.compactionThreshold) {
2766
- messages = await compact(messages, provider, model, contextWindow);
3229
+ messages = await withModelCallTimeout(
3230
+ compact(messages, provider, model, contextWindow),
3231
+ options,
3232
+ `Context compaction before iteration ${iteration}`,
3233
+ );
2767
3234
  messages = sanitizeConversationMessages(messages);
2768
3235
  this.emit(userId, 'run:compaction', { runId, iteration });
2769
3236
  metrics = this.estimatePromptMetrics(messages, tools);
@@ -2901,6 +3368,9 @@ class AgentEngine {
2901
3368
  toolCallCount: response.toolCalls?.length || 0,
2902
3369
  contentPreview: String(lastContent || streamContent || '').slice(0, 240),
2903
3370
  }, { agentId });
3371
+ this.updateRunProgress(runId, {}, {
3372
+ verified: true,
3373
+ });
2904
3374
 
2905
3375
  const assistantMessage = { role: 'assistant', content: lastContent };
2906
3376
  if (response.toolCalls?.length) assistantMessage.tool_calls = response.toolCalls;
@@ -2924,8 +3394,6 @@ class AgentEngine {
2924
3394
  currentStep: null,
2925
3395
  currentTool: null,
2926
3396
  currentStepStartedAt: null,
2927
- }, {
2928
- verified: true,
2929
3397
  });
2930
3398
  const systemSteeringAfterResponse = this.applyQueuedSystemSteering(runId, messages);
2931
3399
  messages = systemSteeringAfterResponse.messages;
@@ -2954,51 +3422,54 @@ class AgentEngine {
2954
3422
  })) {
2955
3423
  break;
2956
3424
  }
2957
- if (iteration < maxIterations) {
2958
- const proactiveRunNeedsDecision = (
2959
- (triggerSource === 'schedule' || triggerSource === 'tasks')
2960
- && this.activeRuns.get(runId)?.noResponse !== true
2961
- && options.deliveryState?.noResponse !== true
2962
- );
2963
- const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
2964
- const fallbackStatus = (
2965
- proactiveRunNeedsDecision
2966
- || toolExecutions.length > 0
2967
- || failedStepCount > 0
2968
- || messagingSent
2969
- || visibleInterimActivity
2970
- ) ? 'continue' : 'complete';
2971
- const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
2972
- provider,
2973
- providerName,
2974
- model,
2975
- messages,
2976
- tools,
2977
- analysis,
2978
- plan,
2979
- toolExecutions,
2980
- lastReply: lastContent,
2981
- triggerSource,
2982
- messagingSent,
2983
- iteration,
2984
- maxIterations,
2985
- options: { ...options, runId, userId, agentId },
2986
- fallbackStatus,
2987
- }));
2988
- totalTokens += loopState.usage || 0;
2989
- if (loopState.decision.status === 'continue') {
2990
- messages.push({
2991
- role: 'system',
2992
- content: [
2993
- loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
2994
- messagingSent
2995
- ? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
2996
- : 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
2997
- ].join(' ')
2998
- });
2999
- lastContent = '';
3000
- continue;
3425
+ const proactiveRunNeedsDecision = (
3426
+ (triggerSource === 'schedule' || triggerSource === 'tasks')
3427
+ && this.activeRuns.get(runId)?.noResponse !== true
3428
+ && options.deliveryState?.noResponse !== true
3429
+ );
3430
+ const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
3431
+ const fallbackStatus = (
3432
+ proactiveRunNeedsDecision
3433
+ || toolExecutions.length > 0
3434
+ || failedStepCount > 0
3435
+ || messagingSent
3436
+ || visibleInterimActivity
3437
+ ) ? 'continue' : 'complete';
3438
+ const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
3439
+ provider,
3440
+ providerName,
3441
+ model,
3442
+ messages,
3443
+ tools,
3444
+ analysis,
3445
+ plan,
3446
+ toolExecutions,
3447
+ lastReply: lastContent,
3448
+ triggerSource,
3449
+ messagingSent,
3450
+ iteration,
3451
+ maxIterations,
3452
+ options: { ...options, runId, userId, agentId },
3453
+ fallbackStatus,
3454
+ }));
3455
+ totalTokens += loopState.usage || 0;
3456
+ if (loopState.decision.status === 'continue') {
3457
+ if (iteration >= maxIterations) {
3458
+ throw new Error(
3459
+ `Completion judge found unfinished work at the iteration limit after ${maxIterations} iterations.`,
3460
+ );
3001
3461
  }
3462
+ messages.push({
3463
+ role: 'system',
3464
+ content: [
3465
+ loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
3466
+ messagingSent
3467
+ ? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
3468
+ : 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
3469
+ ].join(' ')
3470
+ });
3471
+ lastContent = '';
3472
+ continue;
3002
3473
  }
3003
3474
  break;
3004
3475
  }
@@ -3008,6 +3479,15 @@ class AgentEngine {
3008
3479
  && response.toolCalls.every((toolCall) => this.isReadOnlyToolCall(toolCall))
3009
3480
  );
3010
3481
  if (canRunParallelBatch) {
3482
+ const parallelToolNames = response.toolCalls
3483
+ .map((toolCall) => toolCall.function?.name)
3484
+ .filter(Boolean);
3485
+ this.updateRunProgress(runId, {
3486
+ currentPhase: 'tool',
3487
+ currentStep: `parallel:${iteration}`,
3488
+ currentTool: parallelToolNames.join(', ') || 'parallel tools',
3489
+ currentStepStartedAt: isoNow(),
3490
+ });
3011
3491
  const batch = await this.executeReadOnlyBatch(response.toolCalls, {
3012
3492
  userId,
3013
3493
  runId,
@@ -3059,6 +3539,14 @@ class AgentEngine {
3059
3539
  deliverableArtifacts,
3060
3540
  compactionMetrics: compactionMetrics.slice(-20),
3061
3541
  });
3542
+ this.updateRunProgress(runId, {
3543
+ currentPhase: 'idle',
3544
+ currentStep: null,
3545
+ currentTool: null,
3546
+ currentStepStartedAt: null,
3547
+ }, {
3548
+ verified: true,
3549
+ });
3062
3550
  continue;
3063
3551
  }
3064
3552
 
@@ -3081,23 +3569,51 @@ class AgentEngine {
3081
3569
  if (toolName === 'task_complete') {
3082
3570
  const finalMessage = String(toolArgs.message || '').trim();
3083
3571
  const confidence = normalizeCompletionConfidence(toolArgs.confidence || 'medium');
3084
- const completionDecision = shouldAcceptTaskComplete({
3085
- confidence,
3086
- requiredConfidence: analysis?.completion_confidence_required || 'medium',
3087
- iteration,
3088
- maxIterations,
3089
- });
3572
+ const messagingSent = this.getRunMeta(runId)?.messagingSent === true;
3573
+ const completionResult = await runWithModelFallback(
3574
+ 'task completion decision',
3575
+ () => this.evaluateTaskCompleteSignal({
3576
+ provider,
3577
+ providerName,
3578
+ model,
3579
+ messages,
3580
+ tools,
3581
+ analysis,
3582
+ plan,
3583
+ toolExecutions,
3584
+ finalMessage,
3585
+ confidence,
3586
+ triggerSource,
3587
+ messagingSent,
3588
+ iteration,
3589
+ maxIterations,
3590
+ options: { ...options, runId, userId, agentId },
3591
+ }),
3592
+ );
3593
+ totalTokens += completionResult.usage || 0;
3594
+ const completionDecision = completionResult.decision || {
3595
+ status: 'continue',
3596
+ reason: 'The completion signal could not be verified.',
3597
+ };
3598
+ const accepted = completionDecision.status !== 'continue';
3090
3599
  this.recordRunEvent(userId, runId, 'task_complete_signaled', {
3091
3600
  confidence,
3092
- requiredConfidence: analysis?.completion_confidence_required || 'medium',
3093
- accepted: completionDecision.accept,
3601
+ requiredConfidence: completionResult.requiredConfidence,
3602
+ accepted,
3603
+ judgeStatus: completionDecision.status,
3604
+ judgeReason: completionDecision.reason || '',
3094
3605
  iteration,
3095
3606
  messageLength: finalMessage.length,
3096
3607
  }, { agentId });
3097
3608
  console.info(
3098
- `[Run ${shortenRunId(runId)}] task_complete signaled at iteration=${iteration} confidence=${confidence} accepted=${completionDecision.accept}`
3609
+ `[Run ${shortenRunId(runId)}] task_complete signaled at iteration=${iteration} confidence=${confidence} judge=${completionDecision.status} accepted=${accepted}`
3099
3610
  );
3100
- if (!completionDecision.accept) {
3611
+ if (!accepted) {
3612
+ if (iteration >= maxIterations) {
3613
+ throw new Error(
3614
+ `Completion judge rejected task_complete at the iteration limit after ${maxIterations} iterations.`,
3615
+ );
3616
+ }
3101
3617
  messages.push({
3102
3618
  role: 'tool',
3103
3619
  name: toolName,
@@ -3105,13 +3621,14 @@ class AgentEngine {
3105
3621
  content: JSON.stringify({
3106
3622
  status: 'continue',
3107
3623
  reason: completionDecision.reason,
3108
- required_confidence: analysis?.completion_confidence_required || 'medium',
3624
+ required_confidence: completionResult.requiredConfidence,
3109
3625
  }),
3110
3626
  });
3111
3627
  messages.push({
3112
3628
  role: 'system',
3113
3629
  content: `${completionDecision.reason} Do not ask the user to decide the next step unless external input is truly required.`
3114
3630
  });
3631
+ lastContent = '';
3115
3632
  continue;
3116
3633
  }
3117
3634
  if (completionDecision.reason) {
@@ -3183,7 +3700,6 @@ class AgentEngine {
3183
3700
  currentTool: toolName,
3184
3701
  currentStepStartedAt: isoNow(),
3185
3702
  }, {
3186
- verified: true,
3187
3703
  stepId,
3188
3704
  });
3189
3705
 
@@ -3610,20 +4126,6 @@ class AgentEngine {
3610
4126
  refreshConversationSummary(conversationId, provider, model, historyWindow).catch((err) => {
3611
4127
  console.error('[AI] Conversation summary refresh failed:', err.message);
3612
4128
  });
3613
- await this.refreshConversationState({
3614
- conversationId,
3615
- runId,
3616
- provider,
3617
- providerName,
3618
- model,
3619
- finalReply: finalResponseText,
3620
- analysis,
3621
- verification,
3622
- historyWindow,
3623
- options: { ...options, userId, agentId },
3624
- }).catch((err) => {
3625
- console.error('[AI] Conversation working state refresh failed:', err.message);
3626
- });
3627
4129
  }
3628
4130
  }
3629
4131
 
@@ -3657,6 +4159,23 @@ class AgentEngine {
3657
4159
  }
3658
4160
  }
3659
4161
 
4162
+ if (conversationId && options.skipConversationMaintenance !== true) {
4163
+ await this.refreshConversationState({
4164
+ conversationId,
4165
+ runId,
4166
+ provider,
4167
+ providerName,
4168
+ model,
4169
+ finalReply: finalResponseText,
4170
+ analysis,
4171
+ verification,
4172
+ historyWindow,
4173
+ options: { ...options, userId, agentId },
4174
+ }).catch((err) => {
4175
+ console.error('[AI] Conversation working state refresh failed:', err.message);
4176
+ });
4177
+ }
4178
+
3660
4179
  console.info(
3661
4180
  `[Run ${shortenRunId(runId)}] completed trigger=${triggerSource} steps=${stepIndex} tokens=${totalTokens} durationMs=${runMeta?.startedAt ? Date.now() - runMeta.startedAt : 0} finalResponse=${finalResponseText ? 'yes' : 'no'} sentMessages=${runMeta?.sentMessages?.length || 0}`
3662
4181
  );
@@ -3743,6 +4262,8 @@ class AgentEngine {
3743
4262
  triggerSource === 'messaging'
3744
4263
  && options.source
3745
4264
  && options.chatId
4265
+ && runMeta?.finalDeliverySent !== true
4266
+ && runMeta?.messagingSent !== true
3746
4267
  && err?.disableAutonomousRetry !== true
3747
4268
  && !isRateLimitError
3748
4269
  && retryCount < this.getMessagingRetryLimit(maxIterations)
@@ -3784,6 +4305,10 @@ class AgentEngine {
3784
4305
  ...(Array.isArray(options?.messagingRetryState?.interimHistory) ? options.messagingRetryState.interimHistory : []),
3785
4306
  ...(Array.isArray(runMeta?.interimMessages) ? runMeta.interimMessages : []),
3786
4307
  ]),
4308
+ goalContract: mergeGoalContracts(
4309
+ options?.messagingRetryState?.goalContract || null,
4310
+ runMeta?.goalContract || null,
4311
+ ),
3787
4312
  lastUserVisibleUpdateAt: runMeta?.progressLedger?.lastUserVisibleUpdateAt || options?.messagingRetryState?.lastUserVisibleUpdateAt || null,
3788
4313
  lastFinalDeliveryAt: runMeta?.progressLedger?.lastFinalDeliveryAt || options?.messagingRetryState?.lastFinalDeliveryAt || null,
3789
4314
  heartbeatCount: Number(runMeta?.progressLedger?.heartbeatCount || options?.messagingRetryState?.heartbeatCount || 0),
@@ -3809,7 +4334,7 @@ class AgentEngine {
3809
4334
  let messagingFailureContent = '';
3810
4335
  let sendSucceeded = false;
3811
4336
  if (triggerSource === 'messaging' && options.source && options.chatId) {
3812
- if (!runMeta?.messagingSent) {
4337
+ if (!runMeta?.finalDeliverySent && !runMeta?.messagingSent) {
3813
4338
  const manager = this.messagingManager;
3814
4339
  if (manager) {
3815
4340
  const failureScenario = buildMessagingFailureScenario({
@@ -3826,10 +4351,14 @@ class AgentEngine {
3826
4351
  content: `The run encountered a runtime error and cannot continue reliably. Use the actual run scenario below to explain the blocker naturally.\n\nScenario:\n${failureScenario || 'No additional scenario details were captured.'}\n\nDo not call tools. Write exactly one short user message. Do not ask the user to resend or restate the same task. Only ask the user for something if a specific external input, permission, or configuration change is actually required. Do not promise future work unless it will happen automatically before this reply is sent.\n\n${buildPlatformFormattingGuide(options?.source || null)}`
3827
4352
  }
3828
4353
  ]);
3829
- const modelReply = await provider.chat(failedMessage, [], {
3830
- model,
3831
- reasoningEffort: this.getReasoningEffort(providerName, options)
3832
- });
4354
+ const modelReply = await withModelCallTimeout(
4355
+ provider.chat(failedMessage, [], {
4356
+ model,
4357
+ reasoningEffort: this.getReasoningEffort(providerName, options)
4358
+ }),
4359
+ options,
4360
+ 'Messaging failure reply',
4361
+ );
3833
4362
  const drafted = sanitizeModelOutput(modelReply.content || '', { model });
3834
4363
  if (normalizeOutgoingMessage(drafted, options?.source || null)) {
3835
4364
  messagingFailureContent = drafted.trim();
@@ -3848,7 +4377,14 @@ class AgentEngine {
3848
4377
  }
3849
4378
 
3850
4379
  try {
3851
- await manager.sendMessage(userId, options.source, options.chatId, messagingFailureContent, { runId, agentId });
4380
+ const deliveryResult = await manager.sendMessage(
4381
+ userId,
4382
+ options.source,
4383
+ options.chatId,
4384
+ messagingFailureContent,
4385
+ { runId, agentId },
4386
+ );
4387
+ requireSuccessfulMessagingDelivery(deliveryResult, 'Messaging failure delivery');
3852
4388
  sendSucceeded = true;
3853
4389
  if (runMeta) {
3854
4390
  runMeta.lastSentMessage = messagingFailureContent;