npm - neoagent - Versions diffs - 2.5.2-beta.2 → 2.5.2-beta.4 - Mend

neoagent 2.5.2-beta.2 → 2.5.2-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/server/public/.last_build_id +1 -1
package/server/public/flutter_bootstrap.js +1 -1
package/server/public/main.dart.js +4 -4
package/server/services/ai/deliverables/artifact_helpers.js +1 -0
package/server/services/ai/engine.js +722 -186
package/server/services/ai/tools.js +38 -1
package/server/services/messaging/manager.js +7 -0

package/server/services/ai/engine.js CHANGED Viewed

@@ -116,6 +116,8 @@ const MESSAGING_PROGRESS_FIRST_UPDATE_MS = 60 * 1000;
 const MESSAGING_PROGRESS_REPEAT_MS = 90 * 1000;
 const MESSAGING_PROGRESS_STALL_MS = 240 * 1000;
 const MESSAGING_PROGRESS_TICK_MS = 15 * 1000;
+const GOAL_CONTRACT_SUCCESS_CRITERIA_LIMIT = 12;
+const MODEL_CALL_TIMEOUT_MS = 5 * 60 * 1000;
 function isoNow() {
   return new Date().toISOString();
@@ -135,6 +137,31 @@ function formatElapsedDuration(durationMs) {
   return `${minutes}m ${seconds}s`;
 }
+function resolveModelCallTimeoutMs(options = {}) {
+  const requested = Number(options?.modelCallTimeoutMs);
+  if (Number.isFinite(requested) && requested > 0) {
+    return Math.max(10, requested);
+  }
+  return MODEL_CALL_TIMEOUT_MS;
+}
+async function withModelCallTimeout(promise, options = {}, label = 'Model call') {
+  const timeoutMs = resolveModelCallTimeoutMs(options);
+  let timer = null;
+  const timeout = new Promise((_, reject) => {
+    timer = setTimeout(() => {
+      const error = new Error(`${label} timed out after ${formatElapsedDuration(timeoutMs)}.`);
+      error.code = 'MODEL_CALL_TIMEOUT';
+      reject(error);
+    }, timeoutMs);
+  });
+  try {
+    return await Promise.race([Promise.resolve(promise), timeout]);
+  } finally {
+    if (timer) clearTimeout(timer);
+  }
+}
 function cloneInterimHistory(history = []) {
   if (!Array.isArray(history)) return [];
   return history.map((item) => ({
@@ -186,6 +213,254 @@ function hasVisibleInterimActivity(runMeta) {
   );
 }
+function requireSuccessfulMessagingDelivery(result, label = 'Messaging delivery') {
+  if (result?.success === true && result?.suppressed !== true) {
+    return result;
+  }
+  const reason = String(
+    result?.error
+    || result?.reason
+    || result?.result?.error
+    || result?.result?.reason
+    || 'the platform did not confirm delivery',
+  ).trim();
+  const error = new Error(`${label} failed: ${reason}`);
+  error.code = 'MESSAGING_DELIVERY_FAILED';
+  error.deliveryResult = result || null;
+  throw error;
+}
+function normalizeGoalCriteria(value) {
+  if (!Array.isArray(value)) return [];
+  const seen = new Set();
+  const items = [];
+  for (const entry of value) {
+    const text = String(entry || '').trim();
+    if (!text) continue;
+    const signature = text.toLowerCase();
+    if (seen.has(signature)) continue;
+    seen.add(signature);
+    items.push(text);
+    if (items.length >= GOAL_CONTRACT_SUCCESS_CRITERIA_LIMIT) break;
+  }
+  return items;
+}
+function normalizeGoalContract(raw = null) {
+  if (!raw || typeof raw !== 'object') return null;
+  const goal = String(raw.goal || '').trim();
+  const successCriteria = normalizeGoalCriteria(
+    raw.successCriteria || raw.success_criteria || [],
+  );
+  const rawCompletionConfidence = String(
+    raw.completionConfidenceRequired || raw.completion_confidence_required || '',
+  ).trim();
+  const completionConfidenceRequired = rawCompletionConfidence
+    ? normalizeCompletionConfidence(rawCompletionConfidence)
+    : '';
+  const progressUpdatePolicy = ['none', 'optional', 'required'].includes(String(
+    raw.progressUpdatePolicy || raw.progress_update_policy || '',
+  ).trim().toLowerCase())
+    ? String(raw.progressUpdatePolicy || raw.progress_update_policy || '').trim().toLowerCase()
+    : '';
+  const autonomyLevel = ['minimal', 'normal', 'high'].includes(String(
+    raw.autonomyLevel || raw.autonomy_level || '',
+  ).trim().toLowerCase())
+    ? String(raw.autonomyLevel || raw.autonomy_level || '').trim().toLowerCase()
+    : '';
+  const complexity = ['simple', 'standard', 'complex'].includes(String(
+    raw.complexity || '',
+  ).trim().toLowerCase())
+    ? String(raw.complexity || '').trim().toLowerCase()
+    : '';
+  if (
+    !goal
+    && successCriteria.length === 0
+    && !completionConfidenceRequired
+    && !progressUpdatePolicy
+    && !autonomyLevel
+    && !complexity
+  ) {
+    return null;
+  }
+  return {
+    goal,
+    successCriteria,
+    completionConfidenceRequired,
+    progressUpdatePolicy: progressUpdatePolicy || '',
+    autonomyLevel: autonomyLevel || '',
+    complexity: complexity || '',
+  };
+}
+function mergeGoalContracts(existing = null, patch = null) {
+  const current = normalizeGoalContract(existing) || null;
+  const nextPatch = normalizeGoalContract(patch) || null;
+  if (!current && !nextPatch) return null;
+  const goal = String(current?.goal || nextPatch?.goal || '').trim();
+  const successCriteria = normalizeGoalCriteria([
+    ...(current?.successCriteria || []),
+    ...(nextPatch?.successCriteria || []),
+  ]);
+  const completionConfidenceRequired = nextPatch?.completionConfidenceRequired
+    || current?.completionConfidenceRequired
+    || 'medium';
+  const progressUpdatePolicy = nextPatch?.progressUpdatePolicy
+    || current?.progressUpdatePolicy
+    || '';
+  const autonomyLevel = nextPatch?.autonomyLevel
+    || current?.autonomyLevel
+    || '';
+  const complexity = nextPatch?.complexity
+    || current?.complexity
+    || '';
+  return normalizeGoalContract({
+    goal,
+    successCriteria,
+    completionConfidenceRequired,
+    progressUpdatePolicy,
+    autonomyLevel,
+    complexity,
+  });
+}
+function goalContractFromAnalysis(analysis = null) {
+  if (!analysis || typeof analysis !== 'object') return null;
+  return normalizeGoalContract({
+    goal: analysis.goal,
+    successCriteria: analysis.success_criteria,
+    completionConfidenceRequired: analysis.completion_confidence_required,
+    progressUpdatePolicy: analysis.progress_update_policy,
+    autonomyLevel: analysis.autonomy_level,
+    complexity: analysis.complexity,
+  });
+}
+function goalContractFromPlan(plan = null) {
+  if (!plan || typeof plan !== 'object') return null;
+  return normalizeGoalContract({
+    successCriteria: plan.success_criteria,
+  });
+}
+function buildResolvedGoalContract(runMeta, analysis = null, plan = null) {
+  let contract = mergeGoalContracts(runMeta?.goalContract || null, goalContractFromAnalysis(analysis));
+  contract = mergeGoalContracts(contract, goalContractFromPlan(plan));
+  return contract;
+}
+function buildGoalContractPrompt(contract, label = 'Persistent run goal') {
+  const normalized = normalizeGoalContract(contract);
+  if (!normalized) return '';
+  const lines = [];
+  if (normalized.goal) {
+    lines.push(`${label}: ${normalized.goal}`);
+  }
+  if (normalized.successCriteria.length > 0) {
+    lines.push(`Persistent success criteria:\n- ${normalized.successCriteria.join('\n- ')}`);
+  }
+  const contractLine = [
+    normalized.complexity ? `complexity=${normalized.complexity}` : '',
+    normalized.autonomyLevel ? `autonomy_level=${normalized.autonomyLevel}` : '',
+    normalized.progressUpdatePolicy ? `progress_update_policy=${normalized.progressUpdatePolicy}` : '',
+    normalized.completionConfidenceRequired ? `completion_confidence_required=${normalized.completionConfidenceRequired}` : '',
+  ].filter(Boolean).join('; ');
+  if (contractLine) {
+    lines.push(`Persistent autonomy contract: ${contractLine}`);
+  }
+  return lines.join('\n');
+}
+function resolveRunGoalContext(runMeta, analysis = null, plan = null) {
+  const goalContract = buildResolvedGoalContract(runMeta, analysis, plan);
+  const successCriteria = goalContract?.successCriteria?.length
+    ? goalContract.successCriteria.slice(0, 6)
+    : (Array.isArray(plan?.success_criteria)
+      ? plan.success_criteria
+        .map((item) => String(item || '').trim())
+        .filter(Boolean)
+        .slice(0, 6)
+      : []);
+  const effectiveGoal = goalContract?.goal || analysis?.goal || '';
+  const effectiveComplexity = goalContract?.complexity || analysis?.complexity || 'standard';
+  const effectiveAutonomyLevel = goalContract?.autonomyLevel || analysis?.autonomy_level || 'normal';
+  const effectiveProgressPolicy = goalContract?.progressUpdatePolicy || analysis?.progress_update_policy || 'optional';
+  const effectiveCompletionConfidence = goalContract?.completionConfidenceRequired
+    || analysis?.completion_confidence_required
+    || 'medium';
+  const persistedGoalPrompt = buildGoalContractPrompt(goalContract);
+  return {
+    goalContract,
+    successCriteria,
+    effectiveGoal,
+    effectiveComplexity,
+    effectiveAutonomyLevel,
+    effectiveProgressPolicy,
+    effectiveCompletionConfidence,
+    persistedGoalPrompt,
+  };
+}
+function buildCompletionDecisionPrompt({
+  triggerSource,
+  messagingSent = false,
+  goalContext,
+  parallelWork = false,
+  tools,
+  toolExecutions,
+  lastReply,
+  iteration,
+  maxIterations,
+}) {
+  const draftReply = normalizeOutgoingMessage(lastReply) || '';
+  const lines = [
+    'Return JSON only.',
+    'Decide whether this run should continue autonomously or stop now.',
+    'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason"}',
+    'Rules:',
+    '- Use "continue" whenever any safe next step remains in this same run.',
+    '- Use "complete" only when the requested outcome is actually achieved and the latest draft is the finished user-facing answer.',
+    '- Use "blocked" only when a specific external dependency, missing user input, or permission outside this run is required and the latest draft is the blocker reply.',
+    '- If the latest draft asks the user for a missing required value, confirmation, or choice needed to proceed, use "blocked" so the run waits instead of repeating the same ask.',
+    '- A progress note, next-step note, apology, plan, or promise to investigate is "continue", not "complete".',
+    '- A single failed tool attempt is not blocked if another safe retry, verification step, or alternative path remains.',
+    '- A tool-specific API error, timeout, rate limit, or missing result inside this run is usually "continue", not "blocked", if any other available tool could still make progress.',
+    `- If completion_confidence_required is ${goalContext.effectiveCompletionConfidence} and the latest draft depends on unverified assumptions, use "continue" so the run can gather evidence, inspect state, or narrow the reply.`,
+    triggerSource === 'messaging' && messagingSent
+      ? '- A final reply was already delivered via send_message. Use "complete" unless concrete task work remains.'
+      : triggerSource === 'messaging'
+        ? '- For messaging, do not stop on a partial status message. Continue unless the task is actually complete or externally blocked.'
+        : '- Do not stop just because you wrote a status update. Continue unless the task is actually complete or externally blocked.',
+  ];
+  lines.push(
+    goalContext.effectiveGoal ? `Goal: ${goalContext.effectiveGoal}` : '',
+    goalContext.persistedGoalPrompt,
+    `Autonomy contract: complexity=${goalContext.effectiveComplexity}; autonomy_level=${goalContext.effectiveAutonomyLevel}; progress_update_policy=${goalContext.effectiveProgressPolicy}; parallel_work=${parallelWork === true}; completion_confidence_required=${goalContext.effectiveCompletionConfidence}.`,
+    goalContext.successCriteria.length > 0
+      ? `Success criteria:\n${goalContext.successCriteria.map((item, index) => `${index + 1}. ${item}`).join('\n')}`
+      : '',
+    `Current iteration: ${iteration} of ${maxIterations}.`,
+    `Available tools in this run: ${summarizeAvailableTools(tools) || 'none'}`,
+    `Recent tool evidence:\n${summarizeToolExecutions(toolExecutions, 8) || 'none'}`,
+    `Latest draft reply:\n${draftReply || '(empty)'}`,
+  );
+  return lines.filter(Boolean).join('\n');
+}
+function normalizeCompletionDecision(raw, fallbackStatus = 'continue') {
+  const allowed = new Set(['continue', 'complete', 'blocked']);
+  const requestedStatus = String(raw.status || '').trim().toLowerCase();
+  return {
+    status: allowed.has(requestedStatus) ? requestedStatus : fallbackStatus,
+    reason: String(raw.reason || '').trim().slice(0, 400),
+  };
+}
 function planningDepthForForceMode(forceMode) {
   return forceMode === 'plan_execute' ? 'deep' : 'light';
 }
@@ -409,6 +684,7 @@ class AgentEngine {
     this.taskRuntime = services.taskRuntime || null;
     this.memoryManager = services.memoryManager || null;
     this.voiceRuntimeManager = services.voiceRuntimeManager || null;
+    this.messagingDeliveryRetry = services.messagingDeliveryRetry || {};
   }
   async buildSystemPrompt(userId, context = {}) {
@@ -629,6 +905,18 @@ class AgentEngine {
       .run(JSON.stringify(next), runId);
   }
+  updateRunGoalContract(runId, patch = {}, options = {}) {
+    const runMeta = this.getRunMeta(runId);
+    if (!runMeta) return null;
+    runMeta.goalContract = mergeGoalContracts(runMeta.goalContract, patch);
+    if (options.persist !== false) {
+      this.persistRunMetadata(runId, {
+        goalContract: runMeta.goalContract,
+      });
+    }
+    return runMeta.goalContract;
+  }
   buildProgressLedgerSnapshot(runMeta) {
     if (!runMeta?.progressLedger) return null;
     return {
@@ -707,6 +995,7 @@ class AgentEngine {
   markRunFinalDelivery(runId, content = '', timestamp = isoNow()) {
     const runMeta = this.getRunMeta(runId);
     if (!runMeta) return null;
+    runMeta.messagingSent = true;
     runMeta.finalDeliverySent = true;
     runMeta.lastSentMessage = String(content || '').trim() || runMeta.lastSentMessage || '';
     const ledger = this.updateRunProgress(runId, {
@@ -818,13 +1107,14 @@ class AgentEngine {
       if (!platform || !chatId || !this.messagingManager) {
         return { sent: false, skipped: true, reason: 'Messaging context is not available.' };
       }
-      await this.messagingManager.sendMessage(userId, platform, chatId, normalizedContent, {
+      const deliveryResult = await this.messagingManager.sendMessage(userId, platform, chatId, normalizedContent, {
         agentId,
         runId,
         persistConversation: true,
         metadata,
         deliveryKind: 'interim',
       });
+      requireSuccessfulMessagingDelivery(deliveryResult, 'Interim messaging delivery');
     } else if (triggerSource === 'voice_live') {
       const voiceSessionId = runMeta.voiceSessionId || null;
       const manager = this.voiceRuntimeManager || this.app?.locals?.voiceRuntimeManager || null;
@@ -918,42 +1208,72 @@ class AgentEngine {
     phase = 'structured',
   }) {
     const startedAt = Date.now();
-    const response = await withProviderRetry(
-      () => provider.chat(
-        sanitizeConversationMessages([
-          ...messages,
-          { role: 'system', content: prompt },
-        ]),
-        [],
-        {
-          model,
-          maxTokens,
-          reasoningEffort: reasoningEffort || this.getReasoningEffort(providerName, {}),
-        }
-      ),
-      { label: `Engine ${model} (structured)` }
-    );
-    if (telemetry?.runId && telemetry?.userId) {
-      recordModelUsage({
-        runId: telemetry.runId,
-        stepId: telemetry.stepId || null,
-        userId: telemetry.userId,
-        agentId: telemetry.agentId || null,
-        provider: providerName,
-        model,
-        phase,
-        usage: response.usage,
-        latencyMs: Date.now() - startedAt,
+    const structuredStep = `model:${phase}`;
+    if (telemetry?.runId) {
+      this.updateRunProgress(telemetry.runId, {
+        currentPhase: 'model',
+        currentStep: structuredStep,
+        currentTool: null,
+        currentStepStartedAt: isoNow(),
       });
     }
-    const parsed = parseJsonObject(response.content || '');
-    const normalizedUsage = normalizeUsage(response.usage);
-    return {
-      value: normalize(parsed || {}, fallback),
-      raw: response.content || '',
-      usage: normalizedUsage?.totalTokens || 0,
-    };
+    let completed = false;
+    try {
+      const response = await withProviderRetry(
+        () => withModelCallTimeout(
+          provider.chat(
+            sanitizeConversationMessages([
+              ...messages,
+              { role: 'system', content: prompt },
+            ]),
+            [],
+            {
+              model,
+              maxTokens,
+              reasoningEffort: reasoningEffort || this.getReasoningEffort(providerName, {}),
+            }
+          ),
+          telemetry || {},
+          `${phase} model call`,
+        ),
+        { label: `Engine ${model} (structured)` }
+      );
+      completed = true;
+      if (telemetry?.runId && telemetry?.userId) {
+        recordModelUsage({
+          runId: telemetry.runId,
+          stepId: telemetry.stepId || null,
+          userId: telemetry.userId,
+          agentId: telemetry.agentId || null,
+          provider: providerName,
+          model,
+          phase,
+          usage: response.usage,
+          latencyMs: Date.now() - startedAt,
+        });
+      }
+      const parsed = parseJsonObject(response.content || '');
+      const normalizedUsage = normalizeUsage(response.usage);
+      return {
+        value: normalize(parsed || {}, fallback),
+        raw: response.content || '',
+        usage: normalizedUsage?.totalTokens || 0,
+      };
+    } finally {
+      const runMeta = telemetry?.runId ? this.getRunMeta(telemetry.runId) : null;
+      if (runMeta?.progressLedger?.currentStep === structuredStep) {
+        this.updateRunProgress(telemetry.runId, {
+          currentPhase: 'idle',
+          currentStep: null,
+          currentTool: null,
+          currentStepStartedAt: null,
+        }, {
+          verified: completed,
+        });
+      }
+    }
   }
   async requestModelResponse({
@@ -980,8 +1300,16 @@ class AgentEngine {
       if (options.stream !== false) {
         let emittedContent = false;
         const stream = provider.stream(requestMessages, tools, callOptions);
+        const iterator = stream[Symbol.asyncIterator]();
         try {
-          for await (const chunk of stream) {
+          while (true) {
+            const next = await withModelCallTimeout(
+              iterator.next(),
+              options,
+              `Model stream iteration ${iteration}`,
+            );
+            if (next.done) break;
+            const chunk = next.value;
             if (chunk.type === 'content') {
               emittedContent = true;
               streamContent += chunk.content;
@@ -1005,13 +1333,18 @@ class AgentEngine {
             }
           }
         } catch (err) {
+          Promise.resolve(iterator.return?.()).catch(() => {});
           // Once tokens have streamed to the client a retry would duplicate
           // output, so only the pre-stream window is safe to replay.
           if (emittedContent) err.__providerRetryUnsafe = true;
           throw err;
         }
       } else {
-        response = await provider.chat(requestMessages, tools, callOptions);
+        response = await withModelCallTimeout(
+          provider.chat(requestMessages, tools, callOptions),
+          options,
+          `Model iteration ${iteration}`,
+        );
       }
       return { response, streamContent };
@@ -1152,53 +1485,27 @@ class AgentEngine {
     options,
     fallbackStatus,
   }) {
-    const successCriteria = Array.isArray(plan?.success_criteria)
-      ? plan.success_criteria
-        .map((item) => String(item || '').trim())
-        .filter(Boolean)
-        .slice(0, 6)
-      : [];
+    const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
+    const goalContext = resolveRunGoalContext(runMeta, analysis, plan);
     const response = await this.requestStructuredJson({
       provider,
       providerName,
       model,
       messages,
-      prompt: [
-        'Return JSON only.',
-        'Decide whether this run should continue autonomously or stop now.',
-        'Schema: {"status":"continue|complete|blocked","reason":"short concrete reason"}',
-        'Rules:',
-        '- Use "continue" whenever any safe next step remains in this same run.',
-        '- Use "complete" only when the requested outcome is actually achieved or a truthful final user reply is already ready now.',
-        '- Use "blocked" only when a specific external dependency outside this run is required.',
-        '- If the latest draft asks the user for a missing required value, confirmation, or choice needed to proceed, use "blocked" so the run waits instead of repeating the same ask.',
-        '- A progress update is not complete.',
-        '- A single failed tool attempt is not blocked if another safe retry, verification step, or alternative path remains.',
-        '- A tool-specific API error, timeout, rate limit, or missing result inside this run is usually "continue", not "blocked", if any other available tool could still make progress.',
-        '- If completion_confidence_required is high and the latest draft depends on unverified assumptions, use "continue" so the run can gather evidence, inspect state, or narrow the reply.',
-        triggerSource === 'messaging' && messagingSent
-          ? '- A reply was already delivered to the user via send_message. Use "complete" unless there is concrete remaining work (e.g., a tool call you still need to make) before the task is truly done. Do not send follow-up elaborations or re-introductions.'
-          : triggerSource === 'messaging'
-            ? '- For messaging, do not stop on a partial status message. Continue unless the task is actually complete or externally blocked. If you already asked for missing user input, choose "blocked" and wait.'
-            : '- Do not stop just because you wrote a status update. Continue unless the task is actually complete or externally blocked.',
-        analysis?.goal ? `Goal: ${analysis.goal}` : '',
-        `Autonomy contract: complexity=${analysis?.complexity || 'standard'}; autonomy_level=${analysis?.autonomy_level || 'normal'}; progress_update_policy=${analysis?.progress_update_policy || 'optional'}; parallel_work=${analysis?.parallel_work === true}; completion_confidence_required=${analysis?.completion_confidence_required || 'medium'}.`,
-        successCriteria.length > 0 ? `Success criteria:\n${successCriteria.map((item, index) => `${index + 1}. ${item}`).join('\n')}` : '',
-        `Current iteration: ${iteration} of ${maxIterations}.`,
-        `Available tools in this run: ${summarizeAvailableTools(tools) || 'none'}`,
-        `Recent tool evidence:\n${summarizeToolExecutions(toolExecutions, 8) || 'none'}`,
-        `Latest draft reply:\n${normalizeOutgoingMessage(lastReply) || '(empty)'}`,
-      ].filter(Boolean).join('\n'),
+      prompt: buildCompletionDecisionPrompt({
+        triggerSource,
+        messagingSent,
+        goalContext,
+        parallelWork: analysis?.parallel_work === true,
+        tools,
+        toolExecutions,
+        lastReply,
+        iteration,
+        maxIterations,
+      }),
       maxTokens: 320,
-      normalize: (raw) => {
-        const allowed = new Set(['continue', 'complete', 'blocked']);
-        const requestedStatus = String(raw.status || '').trim().toLowerCase();
-        return {
-          status: allowed.has(requestedStatus) ? requestedStatus : fallbackStatus,
-          reason: String(raw.reason || '').trim().slice(0, 400),
-        };
-      },
+      normalize: (raw) => normalizeCompletionDecision(raw, fallbackStatus),
       fallback: { status: fallbackStatus },
       reasoningEffort: this.getReasoningEffort(providerName, options),
       telemetry: options,
@@ -1211,6 +1518,67 @@ class AgentEngine {
     };
   }
+  async evaluateTaskCompleteSignal({
+    provider,
+    providerName,
+    model,
+    messages,
+    tools,
+    analysis,
+    plan,
+    toolExecutions,
+    finalMessage,
+    confidence,
+    triggerSource,
+    messagingSent,
+    iteration,
+    maxIterations,
+    options,
+  }) {
+    const runMeta = options?.runId ? this.getRunMeta(options.runId) : null;
+    const requiredConfidence = resolveRunGoalContext(runMeta, analysis, plan)
+      .effectiveCompletionConfidence;
+    const confidenceDecision = shouldAcceptTaskComplete({
+      confidence,
+      requiredConfidence,
+      iteration,
+      maxIterations,
+    });
+    if (!confidenceDecision.accept) {
+      return {
+        decision: {
+          status: 'continue',
+          reason: confidenceDecision.reason,
+        },
+        requiredConfidence,
+        usage: 0,
+      };
+    }
+    const loopState = await this.decideLoopState({
+      provider,
+      providerName,
+      model,
+      messages,
+      tools,
+      analysis,
+      plan,
+      toolExecutions,
+      lastReply: finalMessage,
+      triggerSource,
+      messagingSent,
+      iteration,
+      maxIterations,
+      options,
+      fallbackStatus: 'continue',
+    });
+    return {
+      decision: loopState.decision,
+      requiredConfidence,
+      usage: loopState.usage || 0,
+    };
+  }
   async verifyFinalResponse({
     provider,
     providerName,
@@ -1321,11 +1689,15 @@ class AgentEngine {
       }
     ];
-    const response = await provider.chat(promptMessages, [], {
-      model,
-      maxTokens: 800,
-      reasoningEffort: this.getReasoningEffort(providerName, options),
-    });
+    const response = await withModelCallTimeout(
+      provider.chat(promptMessages, [], {
+        model,
+        maxTokens: 800,
+        reasoningEffort: this.getReasoningEffort(providerName, options),
+      }),
+      options,
+      'Conversation state refresh',
+    );
     const parsed = parseJsonObject(response.content || '') || {};
     const nextState = {
       summary: String(parsed.summary || existingState?.summary || '').trim(),
@@ -1382,19 +1754,23 @@ class AgentEngine {
         `[Run ${shortenRunId(runId)}] blank_reply_recovery attempt=${attempt} model=${model}`
       );
       try {
-        const response = await provider.chat(
-          sanitizeConversationMessages([
-            ...messages,
+        const response = await withModelCallTimeout(
+          provider.chat(
+            sanitizeConversationMessages([
+              ...messages,
+              {
+                role: 'system',
+                content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
+              }
+            ]),
+            [],
             {
-              role: 'system',
-              content: buildBlankMessagingReplyPrompt(attempt, options?.source || null)
+              model,
+              reasoningEffort: this.getReasoningEffort(providerName, options)
             }
-          ]),
-          [],
-          {
-            model,
-            reasoningEffort: this.getReasoningEffort(providerName, options)
-          }
+          ),
+          options,
+          `Blank messaging reply recovery ${attempt}`,
         );
         totalTokens += response.usage?.totalTokens || 0;
         recoveredContent = sanitizeModelOutput(response.content || '', { model });
@@ -1827,21 +2203,27 @@ class AgentEngine {
   buildMessagingHeartbeatText(runMeta, options = {}) {
     const stalled = options.stalled === true;
-    const fallbackStartedAtMs = Number.isFinite(runMeta?.startedAt) ? runMeta.startedAt : Date.now();
-    const startedAtMs = timestampMs(
+    const now = Date.now();
+    const runStartedAtMs = Number.isFinite(runMeta?.startedAt) ? runMeta.startedAt : now;
+    const stepStartedAtMs = timestampMs(
       runMeta?.progressLedger?.currentStepStartedAt,
-      fallbackStartedAtMs,
+      0,
     );
-    const elapsed = formatElapsedDuration(Date.now() - startedAtMs);
+    const runElapsed = formatElapsedDuration(now - runStartedAtMs);
+    const stepElapsed = formatElapsedDuration(now - (stepStartedAtMs || runStartedAtMs));
+    const unverifiedElapsed = formatElapsedDuration(now - timestampMs(
+      runMeta?.progressLedger?.lastVerifiedProgressAt,
+      runStartedAtMs,
+    ));
     const currentTool = String(runMeta?.progressLedger?.currentTool || '').trim();
     if (currentTool) {
       return stalled
-        ? `Still working on ${currentTool}. This run has not made verified progress for ${elapsed}.`
-        : `Still working on ${currentTool}. ${elapsed} elapsed so far.`;
+        ? `Still working on ${currentTool}. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
+        : `Still working on ${currentTool}. Run active ${runElapsed}; current step ${stepElapsed} so far.`;
     }
     return stalled
-      ? `Still working on this. This run has not made verified progress for ${elapsed}.`
-      : `Still working on this. ${elapsed} elapsed so far.`;
+      ? `Still working on this. Run active ${runElapsed}; no verified progress for ${unverifiedElapsed}.`
+      : `Still working on this. Run active ${runElapsed}.`;
   }
   async sendRuntimeMessagingHeartbeat(runId, options = {}) {
@@ -1856,7 +2238,7 @@ class AgentEngine {
     const createdAt = isoNow();
     const content = this.buildMessagingHeartbeatText(runMeta, options);
-    await this.messagingManager.sendMessage(
+    const deliveryResult = await this.messagingManager.sendMessage(
       runMeta.userId,
       runMeta.messagingContext.platform,
       runMeta.messagingContext.chatId,
@@ -1874,6 +2256,7 @@ class AgentEngine {
         deliveryKind: 'interim',
       },
     );
+    requireSuccessfulMessagingDelivery(deliveryResult, 'Messaging heartbeat delivery');
     runMeta.lastInterimMessage = content;
     if (!Array.isArray(runMeta.interimMessages)) {
@@ -1950,9 +2333,31 @@ class AgentEngine {
         await this.messagingManager.sendTyping(userId, platform, chatId, true, { agentId }).catch(() => {});
         await new Promise((resolve) => setTimeout(resolve, delay));
       }
-      await this.messagingManager.sendMessage(userId, platform, chatId, chunks[i], { runId, agentId }).catch((err) =>
-        console.error('[Engine] Auto-reply fallback failed:', err.message)
-      );
+      try {
+        await withProviderRetry(async () => {
+          const deliveryResult = await this.messagingManager.sendMessage(
+            userId,
+            platform,
+            chatId,
+            chunks[i],
+            { runId, agentId },
+          );
+          return requireSuccessfulMessagingDelivery(deliveryResult, 'Final messaging delivery');
+        }, {
+          ...this.messagingDeliveryRetry,
+          label: `MessagingDelivery ${platform}`,
+          isRetryable: (error) => (
+            error?.retryable !== false
+            && (
+              error?.code === 'MESSAGING_DELIVERY_FAILED'
+              || isTransientError(error)
+            )
+          ),
+        });
+      } catch (error) {
+        error.disableAutonomousRetry = true;
+        throw error;
+      }
     }
     runMeta.lastSentMessage = chunks[chunks.length - 1] || cleanedContent;
@@ -2003,7 +2408,10 @@ class AgentEngine {
       return { sent: false, skipped: true };
     }
-    if (ledger.currentPhase === 'tool' && ledger.currentStepStartedAt) {
+    if (
+      (ledger.currentPhase === 'tool' || ledger.currentPhase === 'model')
+      && ledger.currentStepStartedAt
+    ) {
       return this.sendRuntimeMessagingHeartbeat(runId, { stalled });
     }
@@ -2317,6 +2725,12 @@ class AgentEngine {
     const carriedExplicitMessageSent = retryMessagingState.explicitMessageSent === true;
     const carriedInterimHistory = cloneInterimHistory(retryMessagingState.interimHistory);
     const carriedLastInterimMessage = carriedInterimHistory[carriedInterimHistory.length - 1]?.content || '';
+    const carriedGoalContract = mergeGoalContracts(
+      normalizeGoalContract({
+        goal: clampRunContext(userMessage, 1200),
+      }),
+      retryMessagingState.goalContract,
+    );
     const startedAtIso = isoNow();
     const progressLedger = buildInitialProgressLedger({
       startedAt: startedAtIso,
@@ -2358,10 +2772,12 @@ class AgentEngine {
           chatId: options.chatId || null,
         }
         : null,
+      goalContract: carriedGoalContract,
       progressLedger,
     });
     this.persistRunMetadata(runId, {
       progressLedger,
+      goalContract: carriedGoalContract,
     });
     this.startMessagingProgressSupervisor(runId);
     this.emit(userId, 'run:start', { runId, agentId, title: runTitle, model, triggerType, triggerSource });
@@ -2459,6 +2875,12 @@ class AgentEngine {
     if (threadStateMessage) {
       messages.push({ role: 'system', content: threadStateMessage });
     }
+    if (carriedGoalContract) {
+      messages.push({
+        role: 'system',
+        content: buildGoalContractPrompt(carriedGoalContract, 'Persisted run goal'),
+      });
+    }
     this.recordRunEvent(userId, runId, 'memory_injected', {
       hasRecallContext: Boolean(recallMsg),
       hasThreadState: Boolean(threadStateMessage),
@@ -2537,6 +2959,7 @@ class AgentEngine {
           taskAnalysis: analysis,
           capabilityHealth,
         });
+        this.updateRunGoalContract(runId, goalContractFromAnalysis(analysis));
         this.emit(userId, 'run:analysis', {
           runId,
           ...analysis,
@@ -2655,6 +3078,9 @@ class AgentEngine {
               plan: deliverablePlan,
             },
           });
+          this.updateRunGoalContract(runId, {
+            goal: deliverableWorkflow.selection.goal,
+          });
           this.recordRunEvent(userId, runId, 'deliverable_workflow_selected', {
             type: deliverableWorkflow.selection.type,
             confidence: deliverableWorkflow.selection.confidence,
@@ -2691,6 +3117,7 @@ class AgentEngine {
             JSON.stringify(plan).slice(0, 20000)
           );
         this.persistRunMetadata(runId, { executionPlan: plan });
+        this.updateRunGoalContract(runId, goalContractFromPlan(plan));
         this.emit(userId, 'run:plan', {
           runId,
           steps: plan.steps,
@@ -2699,6 +3126,13 @@ class AgentEngine {
         });
       }
+      const runGoalContract = this.getRunMeta(runId)?.goalContract || null;
+      if (runGoalContract) {
+        messages.push({
+          role: 'system',
+          content: buildGoalContractPrompt(runGoalContract, 'Run goal contract'),
+        });
+      }
       messages.push({
         role: 'system',
         content: buildExecutionGuidance({
@@ -2731,6 +3165,37 @@ class AgentEngine {
           db.prepare('INSERT INTO conversation_messages (conversation_id, role, content, tokens) VALUES (?, ?, ?, ?)')
             .run(conversationId, 'assistant', lastContent, analysisUsage);
         }
+        const directAnswerDecision = await runWithModelFallback(
+          'direct answer completion decision',
+          () => this.decideLoopState({
+            provider,
+            providerName,
+            model,
+            messages,
+            tools,
+            analysis,
+            plan,
+            toolExecutions,
+            lastReply: lastContent,
+            triggerSource,
+            messagingSent: false,
+            iteration,
+            maxIterations,
+            options: { ...options, runId, userId, agentId },
+            fallbackStatus: 'continue',
+          }),
+        );
+        totalTokens += directAnswerDecision.usage || 0;
+        if (directAnswerDecision.decision.status === 'continue') {
+          messages.push({
+            role: 'system',
+            content: directAnswerDecision.decision.reason
+              ? `Continue working: ${directAnswerDecision.decision.reason}.`
+              : 'The initial draft is not a finished answer. Continue working autonomously.',
+          });
+          lastContent = '';
+          directAnswerEligible = false;
+        }
       }
       // BUG FIX: consecutiveToolFailures was previously declared INSIDE the
@@ -2756,14 +3221,16 @@ class AgentEngine {
           currentStep: `model:${iteration}`,
           currentTool: null,
           currentStepStartedAt: isoNow(),
-        }, {
-          verified: true,
         });
         let metrics = this.estimatePromptMetrics(messages, tools);
         const contextWindow = provider.getContextWindow(model);
         if (metrics.totalEstimatedTokens > contextWindow * loopPolicy.compactionThreshold) {
-          messages = await compact(messages, provider, model, contextWindow);
+          messages = await withModelCallTimeout(
+            compact(messages, provider, model, contextWindow),
+            options,
+            `Context compaction before iteration ${iteration}`,
+          );
           messages = sanitizeConversationMessages(messages);
           this.emit(userId, 'run:compaction', { runId, iteration });
           metrics = this.estimatePromptMetrics(messages, tools);
@@ -2901,6 +3368,9 @@ class AgentEngine {
           toolCallCount: response.toolCalls?.length || 0,
           contentPreview: String(lastContent || streamContent || '').slice(0, 240),
         }, { agentId });
+        this.updateRunProgress(runId, {}, {
+          verified: true,
+        });
         const assistantMessage = { role: 'assistant', content: lastContent };
         if (response.toolCalls?.length) assistantMessage.tool_calls = response.toolCalls;
@@ -2924,8 +3394,6 @@ class AgentEngine {
             currentStep: null,
             currentTool: null,
             currentStepStartedAt: null,
-          }, {
-            verified: true,
           });
           const systemSteeringAfterResponse = this.applyQueuedSystemSteering(runId, messages);
           messages = systemSteeringAfterResponse.messages;
@@ -2954,51 +3422,54 @@ class AgentEngine {
           })) {
             break;
           }
-          if (iteration < maxIterations) {
-            const proactiveRunNeedsDecision = (
-              (triggerSource === 'schedule' || triggerSource === 'tasks')
-              && this.activeRuns.get(runId)?.noResponse !== true
-              && options.deliveryState?.noResponse !== true
-            );
-            const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
-            const fallbackStatus = (
-              proactiveRunNeedsDecision
-              || toolExecutions.length > 0
-              || failedStepCount > 0
-              || messagingSent
-              || visibleInterimActivity
-            ) ? 'continue' : 'complete';
-            const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
-              provider,
-              providerName,
-              model,
-              messages,
-              tools,
-              analysis,
-              plan,
-              toolExecutions,
-              lastReply: lastContent,
-              triggerSource,
-              messagingSent,
-              iteration,
-              maxIterations,
-              options: { ...options, runId, userId, agentId },
-              fallbackStatus,
-            }));
-            totalTokens += loopState.usage || 0;
-            if (loopState.decision.status === 'continue') {
-              messages.push({
-                role: 'system',
-                content: [
-                  loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
-                  messagingSent
-                    ? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
-                    : 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
-                ].join(' ')
-              });
-              lastContent = '';
-              continue;
+          const proactiveRunNeedsDecision = (
+            (triggerSource === 'schedule' || triggerSource === 'tasks')
+            && this.activeRuns.get(runId)?.noResponse !== true
+            && options.deliveryState?.noResponse !== true
+          );
+          const visibleInterimActivity = hasVisibleInterimActivity(this.activeRuns.get(runId));
+          const fallbackStatus = (
+            proactiveRunNeedsDecision
+            || toolExecutions.length > 0
+            || failedStepCount > 0
+            || messagingSent
+            || visibleInterimActivity
+          ) ? 'continue' : 'complete';
+          const loopState = await runWithModelFallback('loop decision', () => this.decideLoopState({
+            provider,
+            providerName,
+            model,
+            messages,
+            tools,
+            analysis,
+            plan,
+            toolExecutions,
+            lastReply: lastContent,
+            triggerSource,
+            messagingSent,
+            iteration,
+            maxIterations,
+            options: { ...options, runId, userId, agentId },
+            fallbackStatus,
+          }));
+          totalTokens += loopState.usage || 0;
+          if (loopState.decision.status === 'continue') {
+            if (iteration >= maxIterations) {
+              throw new Error(
+                `Completion judge found unfinished work at the iteration limit after ${maxIterations} iterations.`,
+              );
             }
+            messages.push({
+              role: 'system',
+              content: [
+                loopState.decision.reason ? `Continue working: ${loopState.decision.reason}.` : 'Continue working autonomously.',
+                messagingSent
+                  ? 'You already sent a user-facing message in this run. Keep working silently unless you have a materially new finished result or a real external blocker.'
+                  : 'Use send_interim_update sparingly if a short real update or question would help. Otherwise keep working until you have the result or a real blocker.',
+              ].join(' ')
+            });
+            lastContent = '';
+            continue;
           }
           break;
         }
@@ -3008,6 +3479,15 @@ class AgentEngine {
           && response.toolCalls.every((toolCall) => this.isReadOnlyToolCall(toolCall))
         );
         if (canRunParallelBatch) {
+          const parallelToolNames = response.toolCalls
+            .map((toolCall) => toolCall.function?.name)
+            .filter(Boolean);
+          this.updateRunProgress(runId, {
+            currentPhase: 'tool',
+            currentStep: `parallel:${iteration}`,
+            currentTool: parallelToolNames.join(', ') || 'parallel tools',
+            currentStepStartedAt: isoNow(),
+          });
           const batch = await this.executeReadOnlyBatch(response.toolCalls, {
             userId,
             runId,
@@ -3059,6 +3539,14 @@ class AgentEngine {
             deliverableArtifacts,
             compactionMetrics: compactionMetrics.slice(-20),
           });
+          this.updateRunProgress(runId, {
+            currentPhase: 'idle',
+            currentStep: null,
+            currentTool: null,
+            currentStepStartedAt: null,
+          }, {
+            verified: true,
+          });
           continue;
         }
@@ -3081,23 +3569,51 @@ class AgentEngine {
           if (toolName === 'task_complete') {
             const finalMessage = String(toolArgs.message || '').trim();
             const confidence = normalizeCompletionConfidence(toolArgs.confidence || 'medium');
-            const completionDecision = shouldAcceptTaskComplete({
-              confidence,
-              requiredConfidence: analysis?.completion_confidence_required || 'medium',
-              iteration,
-              maxIterations,
-            });
+            const messagingSent = this.getRunMeta(runId)?.messagingSent === true;
+            const completionResult = await runWithModelFallback(
+              'task completion decision',
+              () => this.evaluateTaskCompleteSignal({
+                provider,
+                providerName,
+                model,
+                messages,
+                tools,
+                analysis,
+                plan,
+                toolExecutions,
+                finalMessage,
+                confidence,
+                triggerSource,
+                messagingSent,
+                iteration,
+                maxIterations,
+                options: { ...options, runId, userId, agentId },
+              }),
+            );
+            totalTokens += completionResult.usage || 0;
+            const completionDecision = completionResult.decision || {
+              status: 'continue',
+              reason: 'The completion signal could not be verified.',
+            };
+            const accepted = completionDecision.status !== 'continue';
             this.recordRunEvent(userId, runId, 'task_complete_signaled', {
               confidence,
-              requiredConfidence: analysis?.completion_confidence_required || 'medium',
-              accepted: completionDecision.accept,
+              requiredConfidence: completionResult.requiredConfidence,
+              accepted,
+              judgeStatus: completionDecision.status,
+              judgeReason: completionDecision.reason || '',
               iteration,
               messageLength: finalMessage.length,
             }, { agentId });
             console.info(
-              `[Run ${shortenRunId(runId)}] task_complete signaled at iteration=${iteration} confidence=${confidence} accepted=${completionDecision.accept}`
+              `[Run ${shortenRunId(runId)}] task_complete signaled at iteration=${iteration} confidence=${confidence} judge=${completionDecision.status} accepted=${accepted}`
             );
-            if (!completionDecision.accept) {
+            if (!accepted) {
+              if (iteration >= maxIterations) {
+                throw new Error(
+                  `Completion judge rejected task_complete at the iteration limit after ${maxIterations} iterations.`,
+                );
+              }
               messages.push({
                 role: 'tool',
                 name: toolName,
@@ -3105,13 +3621,14 @@ class AgentEngine {
                 content: JSON.stringify({
                   status: 'continue',
                   reason: completionDecision.reason,
-                  required_confidence: analysis?.completion_confidence_required || 'medium',
+                  required_confidence: completionResult.requiredConfidence,
                 }),
               });
               messages.push({
                 role: 'system',
                 content: `${completionDecision.reason} Do not ask the user to decide the next step unless external input is truly required.`
               });
+              lastContent = '';
               continue;
             }
             if (completionDecision.reason) {
@@ -3183,7 +3700,6 @@ class AgentEngine {
             currentTool: toolName,
             currentStepStartedAt: isoNow(),
           }, {
-            verified: true,
             stepId,
           });
@@ -3610,20 +4126,6 @@ class AgentEngine {
           refreshConversationSummary(conversationId, provider, model, historyWindow).catch((err) => {
             console.error('[AI] Conversation summary refresh failed:', err.message);
           });
-          await this.refreshConversationState({
-            conversationId,
-            runId,
-            provider,
-            providerName,
-            model,
-            finalReply: finalResponseText,
-            analysis,
-            verification,
-            historyWindow,
-            options: { ...options, userId, agentId },
-          }).catch((err) => {
-            console.error('[AI] Conversation working state refresh failed:', err.message);
-          });
         }
       }
@@ -3657,6 +4159,23 @@ class AgentEngine {
         }
       }
+      if (conversationId && options.skipConversationMaintenance !== true) {
+        await this.refreshConversationState({
+          conversationId,
+          runId,
+          provider,
+          providerName,
+          model,
+          finalReply: finalResponseText,
+          analysis,
+          verification,
+          historyWindow,
+          options: { ...options, userId, agentId },
+        }).catch((err) => {
+          console.error('[AI] Conversation working state refresh failed:', err.message);
+        });
+      }
       console.info(
         `[Run ${shortenRunId(runId)}] completed trigger=${triggerSource} steps=${stepIndex} tokens=${totalTokens} durationMs=${runMeta?.startedAt ? Date.now() - runMeta.startedAt : 0} finalResponse=${finalResponseText ? 'yes' : 'no'} sentMessages=${runMeta?.sentMessages?.length || 0}`
       );
@@ -3743,6 +4262,8 @@ class AgentEngine {
         triggerSource === 'messaging'
         && options.source
         && options.chatId
+        && runMeta?.finalDeliverySent !== true
+        && runMeta?.messagingSent !== true
         && err?.disableAutonomousRetry !== true
         && !isRateLimitError
         && retryCount < this.getMessagingRetryLimit(maxIterations)
@@ -3784,6 +4305,10 @@ class AgentEngine {
               ...(Array.isArray(options?.messagingRetryState?.interimHistory) ? options.messagingRetryState.interimHistory : []),
               ...(Array.isArray(runMeta?.interimMessages) ? runMeta.interimMessages : []),
             ]),
+            goalContract: mergeGoalContracts(
+              options?.messagingRetryState?.goalContract || null,
+              runMeta?.goalContract || null,
+            ),
             lastUserVisibleUpdateAt: runMeta?.progressLedger?.lastUserVisibleUpdateAt || options?.messagingRetryState?.lastUserVisibleUpdateAt || null,
             lastFinalDeliveryAt: runMeta?.progressLedger?.lastFinalDeliveryAt || options?.messagingRetryState?.lastFinalDeliveryAt || null,
             heartbeatCount: Number(runMeta?.progressLedger?.heartbeatCount || options?.messagingRetryState?.heartbeatCount || 0),
@@ -3809,7 +4334,7 @@ class AgentEngine {
       let messagingFailureContent = '';
       let sendSucceeded = false;
       if (triggerSource === 'messaging' && options.source && options.chatId) {
-        if (!runMeta?.messagingSent) {
+        if (!runMeta?.finalDeliverySent && !runMeta?.messagingSent) {
           const manager = this.messagingManager;
           if (manager) {
             const failureScenario = buildMessagingFailureScenario({
@@ -3826,10 +4351,14 @@ class AgentEngine {
                   content: `The run encountered a runtime error and cannot continue reliably. Use the actual run scenario below to explain the blocker naturally.\n\nScenario:\n${failureScenario || 'No additional scenario details were captured.'}\n\nDo not call tools. Write exactly one short user message. Do not ask the user to resend or restate the same task. Only ask the user for something if a specific external input, permission, or configuration change is actually required. Do not promise future work unless it will happen automatically before this reply is sent.\n\n${buildPlatformFormattingGuide(options?.source || null)}`
                 }
               ]);
-              const modelReply = await provider.chat(failedMessage, [], {
-                model,
-                reasoningEffort: this.getReasoningEffort(providerName, options)
-              });
+              const modelReply = await withModelCallTimeout(
+                provider.chat(failedMessage, [], {
+                  model,
+                  reasoningEffort: this.getReasoningEffort(providerName, options)
+                }),
+                options,
+                'Messaging failure reply',
+              );
               const drafted = sanitizeModelOutput(modelReply.content || '', { model });
               if (normalizeOutgoingMessage(drafted, options?.source || null)) {
                 messagingFailureContent = drafted.trim();
@@ -3848,7 +4377,14 @@ class AgentEngine {
             }
             try {
-              await manager.sendMessage(userId, options.source, options.chatId, messagingFailureContent, { runId, agentId });
+              const deliveryResult = await manager.sendMessage(
+                userId,
+                options.source,
+                options.chatId,
+                messagingFailureContent,
+                { runId, agentId },
+              );
+              requireSuccessfulMessagingDelivery(deliveryResult, 'Messaging failure delivery');
               sendSucceeded = true;
               if (runMeta) {
                 runMeta.lastSentMessage = messagingFailureContent;