@cereworker/core 26.330.2 → 26.330.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,10 @@ const TASK_COMPLETE_TOOL = 'task_complete';
13
13
  const TASK_BLOCKED_TOOL = 'task_blocked';
14
14
  const TASK_CHECKPOINT_TOOL = 'task_checkpoint';
15
15
  const INTERNAL_TASK_TOOL_NAMES = new Set([TASK_COMPLETE_TOOL, TASK_BLOCKED_TOOL, TASK_CHECKPOINT_TOOL]);
16
- const COMPLETION_RETRY_PROMPT = '[Cerebellum] Your last turn ended without a final answer. Continue from where you left off and end by calling task_complete or task_blocked before your final answer.';
16
+ const SYSTEM_FALLBACK_COMPLETION_PROMPT = '[System fallback] The last turn ended without a final answer. Continue from the last verified state and end by calling task_complete or task_blocked before your final answer.';
17
+ const SYSTEM_FALLBACK_STALL_PROMPT = '[System fallback] The stalled turn is being retried from the last verified state.';
18
+ const DEBUG_TOOL_OUTPUT_MAX_CHARS = 8_000;
19
+ const DEBUG_TOOL_STRUCTURED_MAX_CHARS = 16_000;
17
20
  const READ_ONLY_TOOL_NAMES = new Set([
18
21
  'browserGetText',
19
22
  'browserGetUrl',
@@ -64,6 +67,7 @@ export class Orchestrator extends TypedEventEmitter {
64
67
  lastStreamActivityAt = 0;
65
68
  streamWatchdog = null;
66
69
  streamNudgeCount = 0;
70
+ streamDeferredUntil = 0;
67
71
  streamStallThreshold = 30_000;
68
72
  maxNudgeRetries = 2;
69
73
  maxCompletionRetries = 2;
@@ -71,6 +75,8 @@ export class Orchestrator extends TypedEventEmitter {
71
75
  activeToolCall = null;
72
76
  currentStreamTurn = null;
73
77
  currentAttemptCompletionState = null;
78
+ currentPartialContent = '';
79
+ pendingRecoveryDecision = null;
74
80
  streamAbortGraceMs = 1_000;
75
81
  taskConversations = new Map();
76
82
  taskRunning = new Set();
@@ -680,6 +686,8 @@ export class Orchestrator extends TypedEventEmitter {
680
686
  resetStreamState() {
681
687
  this.streamPhase = 'idle';
682
688
  this.activeToolCall = null;
689
+ this.streamDeferredUntil = 0;
690
+ this.currentPartialContent = '';
683
691
  }
684
692
  getStreamDiagnostics(elapsedSeconds) {
685
693
  return {
@@ -787,112 +795,184 @@ export class Orchestrator extends TypedEventEmitter {
787
795
  browserState: {},
788
796
  };
789
797
  }
790
- buildStallRetrySnapshot(params) {
798
+ buildRecoveryRequest(params) {
791
799
  const partialContent = this.truncateResumeText(params.partialContent, 600);
792
800
  const continuity = params.completionState.continuity;
793
- if (!partialContent
794
- && continuity.progressLedger.length === 0
795
- && continuity.taskCheckpoints.length === 0
796
- && !params.activeToolName
797
- && !continuity.browserState.currentUrl
798
- && !continuity.browserState.activeTabId) {
799
- return null;
800
- }
801
801
  return {
802
- cause: 'stall',
802
+ conversationId: this.currentStreamTurn?.conversationId ?? '',
803
+ turnId: this.currentStreamTurn?.turnId ?? '',
803
804
  attempt: params.attempt,
804
- phase: params.phase,
805
- activeToolName: params.activeToolName,
806
- activeToolCallId: params.activeToolCallId,
805
+ cause: params.cause,
806
+ phase: this.streamPhase,
807
+ activeToolName: this.activeToolCall?.name,
808
+ activeToolCallId: this.activeToolCall?.id,
809
+ stallRetryCount: this.streamNudgeCount,
810
+ completionRetryCount: params.completionRetryCount ?? 0,
811
+ finishReason: params.finishMeta?.finishReason ?? params.finishMeta?.stepFinishReasons.at(-1),
812
+ elapsedSeconds: params.elapsedSeconds,
807
813
  partialContent: partialContent || undefined,
808
- progressEntries: continuity.progressLedger.slice(-20),
809
- taskCheckpoints: continuity.taskCheckpoints.slice(-8),
814
+ latestUserMessage: params.latestUserMessage ? this.truncateResumeText(params.latestUserMessage, 600) : undefined,
815
+ progressEntries: continuity.progressLedger.slice(-50).map((entry) => ({ ...entry })),
816
+ taskCheckpoints: continuity.taskCheckpoints.map((checkpoint) => ({ ...checkpoint })),
810
817
  browserState: this.cloneBrowserState(continuity.browserState),
811
818
  };
812
819
  }
813
- buildCompletionRetrySnapshot(params) {
814
- const partialContent = this.truncateResumeText(params.partialContent, 600);
815
- const continuity = params.completionState.continuity;
816
- const finishReason = params.finishMeta?.finishReason ?? params.finishMeta?.stepFinishReasons.at(-1);
817
- if (!partialContent
818
- && continuity.progressLedger.length === 0
819
- && continuity.taskCheckpoints.length === 0
820
- && !continuity.browserState.currentUrl
821
- && !continuity.browserState.activeTabId
822
- && !finishReason) {
823
- return null;
820
+ emitRecoveryTrace(cause, source, assessment, level = 'info') {
821
+ if (!this.currentStreamTurn)
822
+ return;
823
+ const payload = {
824
+ type: 'cerebellum:recovery',
825
+ cause,
826
+ action: assessment.action,
827
+ turnId: this.currentStreamTurn.turnId,
828
+ attempt: this.currentStreamTurn.attempt,
829
+ conversationId: this.currentStreamTurn.conversationId,
830
+ message: assessment.operatorMessage,
831
+ operatorMessage: assessment.operatorMessage,
832
+ diagnosis: assessment.diagnosis,
833
+ nextStep: assessment.nextStep,
834
+ completedSteps: assessment.completedSteps,
835
+ waitSeconds: assessment.waitSeconds,
836
+ source,
837
+ ...this.getStreamDiagnostics(),
838
+ };
839
+ switch (level) {
840
+ case 'debug':
841
+ log.debug('cerebellum_recovery', payload);
842
+ break;
843
+ case 'warn':
844
+ log.warn('cerebellum_recovery', payload);
845
+ break;
846
+ case 'error':
847
+ log.error('cerebellum_recovery', payload);
848
+ break;
849
+ default:
850
+ log.info('cerebellum_recovery', payload);
851
+ break;
852
+ }
853
+ this.emit(payload);
854
+ }
855
+ async assessTurnRecovery(request) {
856
+ log.debug('turn_recovery_request', {
857
+ turnId: request.turnId,
858
+ attempt: request.attempt,
859
+ conversationId: request.conversationId,
860
+ cause: request.cause,
861
+ phase: request.phase,
862
+ activeToolName: request.activeToolName,
863
+ activeToolCallId: request.activeToolCallId,
864
+ stallRetryCount: request.stallRetryCount,
865
+ completionRetryCount: request.completionRetryCount,
866
+ finishReason: request.finishReason,
867
+ elapsedSeconds: request.elapsedSeconds,
868
+ hasPartialContent: Boolean(request.partialContent),
869
+ latestUserMessage: request.latestUserMessage ? this.truncateResumeText(request.latestUserMessage, 300) : '',
870
+ browserState: request.browserState,
871
+ progressEntries: request.progressEntries,
872
+ taskCheckpoints: request.taskCheckpoints,
873
+ });
874
+ if (this.cerebellum?.isConnected() && this.cerebellum.assessTurnRecovery) {
875
+ try {
876
+ const assessment = await this.cerebellum.assessTurnRecovery(request);
877
+ if (assessment) {
878
+ if (request.cause === 'completion' && assessment.action === 'wait') {
879
+ return {
880
+ source: 'cerebellum',
881
+ assessment: {
882
+ ...assessment,
883
+ action: 'retry',
884
+ waitSeconds: undefined,
885
+ },
886
+ };
887
+ }
888
+ return { source: 'cerebellum', assessment };
889
+ }
890
+ }
891
+ catch (error) {
892
+ log.warn('Turn recovery assessment failed', {
893
+ turnId: request.turnId,
894
+ attempt: request.attempt,
895
+ conversationId: request.conversationId,
896
+ cause: request.cause,
897
+ error: error instanceof Error ? error.message : String(error),
898
+ });
899
+ }
824
900
  }
825
901
  return {
826
- cause: 'completion',
827
- attempt: params.attempt,
828
- finishReason,
829
- partialContent: partialContent || undefined,
830
- progressEntries: continuity.progressLedger.slice(-20),
831
- taskCheckpoints: continuity.taskCheckpoints.slice(-8),
832
- browserState: this.cloneBrowserState(continuity.browserState),
902
+ source: 'fallback',
903
+ assessment: this.buildFallbackRecoveryAssessment(request),
833
904
  };
834
905
  }
835
- buildRetryContextMessage(snapshot) {
836
- if (!snapshot)
837
- return null;
838
- const isStall = snapshot.cause === 'stall';
839
- const header = isStall ? '[Watchdog resume context]' : '[Completion resume context]';
840
- const lines = [
841
- header,
842
- isStall
843
- ? `The previous attempt (${snapshot.attempt}) was interrupted after stalling while ${this.describeStreamLocation(snapshot.phase, snapshot.activeToolName)}.`
844
- : `The previous attempt (${snapshot.attempt}) ended without a valid completion${snapshot.finishReason ? ` (finish reason: ${snapshot.finishReason})` : ''}.`,
845
- 'IMPORTANT: The tool call history from the failed attempt has been removed from this conversation. The ledger below is the authoritative record of what was already verified.',
846
- 'Do NOT repeat completed steps unless the current page state clearly contradicts this ledger.',
847
- 'Continue from the NEXT incomplete step, then either finish the task or report a concrete blocker.',
848
- ];
849
- if (snapshot.browserState.currentUrl || snapshot.browserState.activeTabId || snapshot.browserState.tabs?.length) {
850
- lines.push('', 'Last known browser state:');
851
- if (snapshot.browserState.currentUrl) {
852
- lines.push(`- Current URL: ${snapshot.browserState.currentUrl}`);
853
- }
854
- if (snapshot.browserState.activeTabId) {
855
- lines.push(`- Active tab: ${snapshot.browserState.activeTabId}`);
856
- }
857
- if (snapshot.browserState.tabs?.length) {
858
- const visibleTabs = snapshot.browserState.tabs.slice(0, 6);
859
- for (const tab of visibleTabs) {
860
- lines.push(`- Tab ${tab.id}${tab.active ? ' [active]' : ''}: ${tab.url}${tab.title ? ` (${tab.title})` : ''}`);
861
- }
862
- if (snapshot.browserState.tabs.length > visibleTabs.length) {
863
- lines.push(`- ... ${snapshot.browserState.tabs.length - visibleTabs.length} more tab(s)`);
864
- }
906
+ deriveCompletedSteps(request) {
907
+ const completed = new Set();
908
+ for (const checkpoint of request.taskCheckpoints) {
909
+ if (checkpoint.status === 'done') {
910
+ completed.add(checkpoint.summary);
865
911
  }
866
912
  }
867
- if (snapshot.taskCheckpoints.length > 0) {
868
- lines.push('', 'Recorded task checkpoints:');
869
- for (const checkpoint of snapshot.taskCheckpoints) {
870
- lines.push(`- ${checkpoint.summary}`);
913
+ for (const entry of request.progressEntries) {
914
+ if (entry.source === 'tool' && entry.stateChanging && !entry.isError) {
915
+ completed.add(entry.summary);
871
916
  }
872
917
  }
873
- const toolEntries = snapshot.progressEntries.filter((entry) => entry.source === 'tool');
874
- if (toolEntries.length > 0) {
875
- lines.push('', 'Confirmed actions from the previous attempt:');
876
- for (const entry of toolEntries) {
877
- const prefix = entry.isError ? '[error]' : entry.stateChanging ? '[done]' : '[seen]';
878
- lines.push(`- ${prefix} ${entry.summary}`);
879
- }
918
+ return Array.from(completed).slice(-10);
919
+ }
920
+ buildFallbackRecoveryAssessment(request, options) {
921
+ const completedSteps = this.deriveCompletedSteps(request);
922
+ const browserHints = [];
923
+ if (request.browserState.currentUrl)
924
+ browserHints.push(`Current URL: ${request.browserState.currentUrl}`);
925
+ if (request.browserState.activeTabId)
926
+ browserHints.push(`Active tab: ${request.browserState.activeTabId}`);
927
+ const diagnosis = options?.reason
928
+ ?? (request.cause === 'stall'
929
+ ? `Recovery guidance is unavailable while the stream is stalled in ${this.describeStreamLocation(request.phase, request.activeToolName)}.`
930
+ : `Recovery guidance is unavailable after the turn ended with ${request.finishReason ?? 'no final answer'}.`);
931
+ const nextStep = request.cause === 'stall'
932
+ ? 'Resume from the last verified browser state and continue with the next unfinished step.'
933
+ : 'Use the verified progress below to continue from the next unfinished step and avoid repeating confirmed work.';
934
+ const lines = [
935
+ '[System fallback recovery]',
936
+ diagnosis,
937
+ 'The failed attempt tool history has been removed; rely on this verified summary instead.',
938
+ ];
939
+ if (completedSteps.length > 0) {
940
+ lines.push('', 'Completed steps:');
941
+ for (const step of completedSteps)
942
+ lines.push(`- ${step}`);
880
943
  }
881
- if (snapshot.cause === 'stall' && snapshot.activeToolName) {
882
- lines.push('', `The attempt was last waiting on: ${snapshot.activeToolName}${snapshot.activeToolCallId ? ` (${snapshot.activeToolCallId})` : ''}.`);
944
+ if (browserHints.length > 0) {
945
+ lines.push('', 'Last known browser state:');
946
+ for (const hint of browserHints)
947
+ lines.push(`- ${hint}`);
883
948
  }
884
- if (snapshot.partialContent) {
885
- lines.push('', 'Partial assistant text emitted before the attempt ended:', snapshot.partialContent);
949
+ if (request.partialContent) {
950
+ lines.push('', 'Partial assistant text from the failed attempt:', request.partialContent);
886
951
  }
887
- lines.push('', 'End your final answer by calling task_complete or task_blocked.');
952
+ lines.push('', `Next step: ${nextStep}`);
953
+ lines.push('Only repeat a completed action if the current page state clearly contradicts this summary.');
954
+ lines.push('End your final answer by calling task_complete or task_blocked.');
955
+ return {
956
+ action: options?.action ?? 'retry',
957
+ operatorMessage: request.cause === 'stall'
958
+ ? SYSTEM_FALLBACK_STALL_PROMPT
959
+ : SYSTEM_FALLBACK_COMPLETION_PROMPT,
960
+ modelMessage: lines.join('\n'),
961
+ diagnosis,
962
+ nextStep,
963
+ completedSteps,
964
+ };
965
+ }
966
+ buildRetryContextMessage(cause, attempt, modelMessage, source) {
888
967
  return {
889
- id: `system:${snapshot.cause}-retry:${snapshot.attempt}`,
968
+ id: `system:${cause}-retry:${attempt}`,
890
969
  role: 'system',
891
- content: lines.join('\n'),
970
+ content: modelMessage,
892
971
  timestamp: 0,
893
972
  metadata: {
894
973
  transient: true,
895
- source: snapshot.cause === 'stall' ? 'watchdog-resume' : 'completion-resume',
974
+ source: cause === 'stall' ? 'watchdog-resume' : 'completion-resume',
975
+ recoverySource: source,
896
976
  },
897
977
  };
898
978
  }
@@ -907,6 +987,53 @@ export class Orchestrator extends TypedEventEmitter {
907
987
  return normalized;
908
988
  return `${normalized.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`;
909
989
  }
990
+ serializeDebugValue(value, maxChars) {
991
+ const raw = typeof value === 'string'
992
+ ? value
993
+ : JSON.stringify(value, null, 2) ?? String(value);
994
+ if (raw.length <= maxChars) {
995
+ return { value: raw, truncated: false };
996
+ }
997
+ return {
998
+ value: `${raw.slice(0, Math.max(0, maxChars - 1)).trimEnd()}…`,
999
+ truncated: true,
1000
+ };
1001
+ }
1002
+ buildToolDebugPayload(toolCall, result, toolName) {
1003
+ const argsPreview = this.serializeDebugValue(toolCall.args, DEBUG_TOOL_STRUCTURED_MAX_CHARS);
1004
+ if (!result) {
1005
+ return {
1006
+ requestedToolName: toolCall.name,
1007
+ toolName: toolName ?? (toolCall.name.trim() || toolCall.name),
1008
+ toolCallId: toolCall.id,
1009
+ toolArgs: argsPreview.value,
1010
+ debugPayloadTruncated: argsPreview.truncated,
1011
+ };
1012
+ }
1013
+ const outputPreview = this.serializeDebugValue(result.output, DEBUG_TOOL_OUTPUT_MAX_CHARS);
1014
+ const detailsPreview = result.details
1015
+ ? this.serializeDebugValue(result.details, DEBUG_TOOL_STRUCTURED_MAX_CHARS)
1016
+ : null;
1017
+ const resumeMetadata = result.metadata && typeof result.metadata === 'object'
1018
+ ? (result.metadata.resume ?? null)
1019
+ : null;
1020
+ const resumePreview = resumeMetadata
1021
+ ? this.serializeDebugValue(resumeMetadata, DEBUG_TOOL_STRUCTURED_MAX_CHARS)
1022
+ : null;
1023
+ return {
1024
+ requestedToolName: toolCall.name,
1025
+ toolName: toolName ?? (toolCall.name.trim() || toolCall.name),
1026
+ toolCallId: toolCall.id,
1027
+ toolArgs: argsPreview.value,
1028
+ toolOutput: outputPreview.value,
1029
+ toolDetails: detailsPreview?.value ?? null,
1030
+ toolResume: resumePreview?.value ?? null,
1031
+ isError: result.isError,
1032
+ warnings: result.warnings ?? [],
1033
+ truncated: result.truncated ?? false,
1034
+ debugPayloadTruncated: argsPreview.truncated || outputPreview.truncated || Boolean(detailsPreview?.truncated) || Boolean(resumePreview?.truncated),
1035
+ };
1036
+ }
910
1037
  recordCheckpoint(continuity, step, status, evidence) {
911
1038
  const checkpoint = {
912
1039
  step,
@@ -979,7 +1106,7 @@ export class Orchestrator extends TypedEventEmitter {
979
1106
  return;
980
1107
  }
981
1108
  continuity.progressLedger.push(entry);
982
- while (continuity.progressLedger.length > 20) {
1109
+ while (continuity.progressLedger.length > 50) {
983
1110
  const removableIndex = continuity.progressLedger.findIndex((candidate) => candidate.source === 'tool' && !candidate.stateChanging);
984
1111
  continuity.progressLedger.splice(removableIndex >= 0 ? removableIndex : 0, 1);
985
1112
  }
@@ -1098,10 +1225,15 @@ export class Orchestrator extends TypedEventEmitter {
1098
1225
  streamPromise.then(settleResolve, settleReject);
1099
1226
  });
1100
1227
  }
1101
- startStreamWatchdog() {
1228
+ startStreamWatchdog(latestUserMessage) {
1102
1229
  this.stopStreamWatchdog();
1103
1230
  this.markStreamWaitingModel();
1231
+ this.streamDeferredUntil = 0;
1104
1232
  this.streamWatchdog = setInterval(() => {
1233
+ if (!this.currentAttemptCompletionState || !this.currentStreamTurn)
1234
+ return;
1235
+ if (this.streamDeferredUntil > Date.now())
1236
+ return;
1105
1237
  const elapsed = Date.now() - this.lastStreamActivityAt;
1106
1238
  const stallThresholdMs = this.getCurrentStallThresholdMs();
1107
1239
  if (elapsed < stallThresholdMs)
@@ -1115,31 +1247,60 @@ export class Orchestrator extends TypedEventEmitter {
1115
1247
  this.emitWatchdog('stalled', `Stalled after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'warn', elapsedSeconds });
1116
1248
  this.emit({ type: 'cerebrum:stall', ...diagnostics });
1117
1249
  if (!this.cerebellum?.isConnected()) {
1118
- // Cerebellum dropped mid-stream — abort the current turn
1119
1250
  this.emitWatchdog('abort_issued', 'Cerebellum disconnected during an active stream; aborting the turn.', { level: 'warn', elapsedSeconds });
1120
1251
  this.abortController?.abort();
1121
1252
  return;
1122
1253
  }
1123
1254
  this._nudgeInFlight = true;
1124
- const doNudge = () => {
1125
- this.streamNudgeCount++;
1126
- this.emitWatchdog('nudge_requested', `Cerebellum requested nudge ${this.streamNudgeCount}/${this.maxNudgeRetries} after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'info', elapsedSeconds });
1127
- this.emit({ type: 'cerebrum:stall:nudge', attempt: this.streamNudgeCount, ...diagnostics });
1128
- this.emitWatchdog('abort_issued', `Aborting stalled stream attempt ${this.currentStreamTurn?.attempt ?? 0}.`, { level: 'warn', elapsedSeconds });
1129
- this.abortController?.abort();
1130
- };
1131
1255
  void (async () => {
1132
1256
  try {
1133
- const result = await this.cerebellum.verifyToolResult('stream_watchdog', { action: 'check_stall', elapsed: String(elapsedSeconds) }, `Stream silent for ${elapsedSeconds}s — no chunks or tool calls received`, false);
1134
- // Cerebellum decides: passed=false → nudge. passed=true → wait.
1135
- // null (disconnected mid-call) → nudge as safety fallback.
1136
- if (!result || !result.passed) {
1137
- doNudge();
1257
+ const request = this.buildRecoveryRequest({
1258
+ cause: 'stall',
1259
+ attempt: this.currentStreamTurn.attempt,
1260
+ partialContent: this.currentPartialContent,
1261
+ completionState: this.currentAttemptCompletionState,
1262
+ latestUserMessage,
1263
+ elapsedSeconds,
1264
+ });
1265
+ const { source, assessment } = await this.assessTurnRecovery(request);
1266
+ this.emitRecoveryTrace('stall', source, assessment, assessment.action === 'stop' ? 'warn' : 'info');
1267
+ if (assessment.action === 'wait') {
1268
+ const waitSeconds = Math.max(15, assessment.waitSeconds ?? this.streamStallThreshold / 1000);
1269
+ this.streamDeferredUntil = Date.now() + (waitSeconds * 1000);
1270
+ return;
1271
+ }
1272
+ if (assessment.action === 'retry') {
1273
+ this.streamNudgeCount++;
1274
+ this.pendingRecoveryDecision = { cause: 'stall', source, assessment };
1275
+ this.emitWatchdog('nudge_requested', `Cerebellum requested nudge ${this.streamNudgeCount}/${this.maxNudgeRetries} after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'info', elapsedSeconds });
1276
+ this.emit({ type: 'cerebrum:stall:nudge', attempt: this.streamNudgeCount, ...diagnostics });
1277
+ this.emitWatchdog('abort_issued', `Aborting stalled stream attempt ${this.currentStreamTurn?.attempt ?? 0}.`, { level: 'warn', elapsedSeconds });
1278
+ this.abortController?.abort();
1279
+ return;
1138
1280
  }
1281
+ this.pendingRecoveryDecision = { cause: 'stall', source, assessment };
1282
+ this.emitWatchdog('abort_issued', 'Aborting stalled stream because recovery guidance requested stop.', { level: 'warn', elapsedSeconds });
1283
+ this.abortController?.abort();
1139
1284
  }
1140
1285
  catch {
1141
- // gRPC error (including deadline exceeded) → nudge
1142
- doNudge();
1286
+ const request = this.buildRecoveryRequest({
1287
+ cause: 'stall',
1288
+ attempt: this.currentStreamTurn.attempt,
1289
+ partialContent: this.currentPartialContent,
1290
+ completionState: this.currentAttemptCompletionState,
1291
+ latestUserMessage,
1292
+ elapsedSeconds,
1293
+ });
1294
+ const assessment = this.buildFallbackRecoveryAssessment(request, {
1295
+ reason: `Recovery assessment failed after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`,
1296
+ });
1297
+ this.pendingRecoveryDecision = { cause: 'stall', source: 'fallback', assessment };
1298
+ this.emitRecoveryTrace('stall', 'fallback', assessment, 'warn');
1299
+ this.streamNudgeCount++;
1300
+ this.emitWatchdog('nudge_requested', `Fallback retry ${this.streamNudgeCount}/${this.maxNudgeRetries} after ${elapsedSeconds}s while ${this.describeStreamLocation()}.`, { level: 'info', elapsedSeconds });
1301
+ this.emit({ type: 'cerebrum:stall:nudge', attempt: this.streamNudgeCount, ...diagnostics });
1302
+ this.emitWatchdog('abort_issued', `Aborting stalled stream attempt ${this.currentStreamTurn?.attempt ?? 0}.`, { level: 'warn', elapsedSeconds });
1303
+ this.abortController?.abort();
1143
1304
  }
1144
1305
  finally {
1145
1306
  this._nudgeInFlight = false;
@@ -1168,6 +1329,9 @@ export class Orchestrator extends TypedEventEmitter {
1168
1329
  const userMessage = this.conversations.appendMessage(convId, 'user', content);
1169
1330
  this.emit({ type: 'message:user', message: userMessage });
1170
1331
  }
1332
+ const latestUserMessage = content
1333
+ || [...this.conversations.getMessages(convId)].reverse().find((message) => message.role === 'user')?.content
1334
+ || '';
1171
1335
  this.streamNudgeCount = 0;
1172
1336
  let completionRetryCount = 0;
1173
1337
  let nextRetryContext = null;
@@ -1196,6 +1360,8 @@ export class Orchestrator extends TypedEventEmitter {
1196
1360
  attempt: attemptNumber,
1197
1361
  conversationId: convId,
1198
1362
  };
1363
+ this.currentPartialContent = '';
1364
+ this.pendingRecoveryDecision = null;
1199
1365
  log.info('stream_started', {
1200
1366
  turnId,
1201
1367
  attempt: attemptNumber,
@@ -1205,7 +1371,7 @@ export class Orchestrator extends TypedEventEmitter {
1205
1371
  retryCause,
1206
1372
  });
1207
1373
  this.emit({ type: 'message:cerebrum:start', conversationId: convId });
1208
- this.startStreamWatchdog();
1374
+ this.startStreamWatchdog(latestUserMessage);
1209
1375
  let messages = this.conversations.getMessages(convId);
1210
1376
  // On retry: exclude failed attempts' messages from history.
1211
1377
  // The resume context already summarizes what happened — sending the raw tool calls
@@ -1274,6 +1440,8 @@ export class Orchestrator extends TypedEventEmitter {
1274
1440
  ];
1275
1441
  const toolDefs = Object.fromEntries(allTools);
1276
1442
  let fullContent = '';
1443
+ let finalDisplayContent = '';
1444
+ let attemptFinishMeta;
1277
1445
  const throwIfToolAttemptAborted = () => {
1278
1446
  if (!isCurrentAttempt()) {
1279
1447
  throw createAbortError('Tool execution aborted');
@@ -1286,15 +1454,13 @@ export class Orchestrator extends TypedEventEmitter {
1286
1454
  if (!isCurrentAttempt() || abortController.signal.aborted)
1287
1455
  return;
1288
1456
  fullContent += chunk;
1457
+ this.currentPartialContent = fullContent;
1289
1458
  this.markStreamWaitingModel();
1290
1459
  this.emit({ type: 'message:cerebrum:chunk', chunk });
1291
1460
  },
1292
1461
  onToolCall: async (toolCall) => {
1293
1462
  throwIfToolAttemptAborted();
1294
- this.logStreamDebug('tool_callback_started', {
1295
- toolName: toolCall.name.trim() || toolCall.name,
1296
- toolCallId: toolCall.id,
1297
- });
1463
+ this.logStreamDebug('tool_callback_started', this.buildToolDebugPayload(toolCall));
1298
1464
  this.markStreamWaitingTool(toolCall);
1299
1465
  const requestedToolName = toolCall.name;
1300
1466
  const normalizedToolName = requestedToolName.trim() || requestedToolName;
@@ -1305,7 +1471,13 @@ export class Orchestrator extends TypedEventEmitter {
1305
1471
  else {
1306
1472
  completionState.externalToolCallCount++;
1307
1473
  this.emit({ type: 'message:cerebrum:toolcall', toolCall: { ...toolCall, name: normalizedToolName } });
1308
- this.emit({ type: 'tool:start', callId: toolCall.id, name: normalizedToolName });
1474
+ this.emit({
1475
+ type: 'tool:start',
1476
+ callId: toolCall.id,
1477
+ name: normalizedToolName,
1478
+ requestedName: requestedToolName !== normalizedToolName ? requestedToolName : undefined,
1479
+ args: toolCall.args,
1480
+ });
1309
1481
  }
1310
1482
  const { toolName, result } = await this.toolRuntime.execute({
1311
1483
  toolCall,
@@ -1315,15 +1487,18 @@ export class Orchestrator extends TypedEventEmitter {
1315
1487
  scopeKey: convId,
1316
1488
  abortSignal: abortController.signal,
1317
1489
  });
1318
- this.logStreamDebug('tool_callback_finished', {
1319
- toolName,
1320
- toolCallId: toolCall.id,
1321
- isError: result.isError,
1322
- });
1490
+ this.logStreamDebug('tool_callback_finished', this.buildToolDebugPayload(toolCall, result, toolName));
1323
1491
  throwIfAborted(abortController.signal, 'Tool execution aborted');
1324
1492
  this.markStreamWaitingModel();
1325
1493
  if (!isInternalTaskSignal) {
1326
- this.emit({ type: 'tool:end', result });
1494
+ this.emit({
1495
+ type: 'tool:end',
1496
+ callId: toolCall.id,
1497
+ name: toolName,
1498
+ requestedName: requestedToolName !== toolName ? requestedToolName : undefined,
1499
+ args: toolCall.args,
1500
+ result,
1501
+ });
1327
1502
  }
1328
1503
  if (!isInternalTaskSignal && !result.isError) {
1329
1504
  completionState.successfulExternalToolCount++;
@@ -1382,6 +1557,8 @@ export class Orchestrator extends TypedEventEmitter {
1382
1557
  return;
1383
1558
  this.stopStreamWatchdog();
1384
1559
  let displayContent = content;
1560
+ finalDisplayContent = content;
1561
+ attemptFinishMeta = finishMeta;
1385
1562
  const visibleToolCalls = toolCalls?.filter((toolCall) => !this.isInternalTaskSignalTool(toolCall.name));
1386
1563
  log.info('stream_finish_observed', {
1387
1564
  turnId,
@@ -1402,6 +1579,7 @@ export class Orchestrator extends TypedEventEmitter {
1402
1579
  displayContent = content
1403
1580
  .replace(/<discovery_complete>[\s\S]*?<\/discovery_complete>/g, '')
1404
1581
  .trim();
1582
+ finalDisplayContent = displayContent;
1405
1583
  if (parsed && this.onDiscoveryComplete) {
1406
1584
  this.discoveryMode = false;
1407
1585
  this.onDiscoveryComplete(parsed);
@@ -1411,27 +1589,7 @@ export class Orchestrator extends TypedEventEmitter {
1411
1589
  const guardFailure = this.evaluateCompletionGuard(displayContent, finishMeta, completionState);
1412
1590
  if (guardFailure) {
1413
1591
  completionGuardFailure = guardFailure;
1414
- nextRetryContext = this.buildRetryContextMessage(this.buildCompletionRetrySnapshot({
1415
- attempt: attemptNumber,
1416
- partialContent: fullContent || displayContent,
1417
- completionState,
1418
- finishMeta,
1419
- }));
1420
- if (nextRetryContext) {
1421
- log.info('completion_retry_context_prepared', {
1422
- turnId,
1423
- attempt: attemptNumber,
1424
- conversationId: convId,
1425
- finishReason: finishMeta?.finishReason,
1426
- rawFinishReason: finishMeta?.rawFinishReason,
1427
- hasPartialContent: (fullContent || displayContent).trim().length > 0,
1428
- progressEntries: completionState.continuity.progressLedger.length,
1429
- taskCheckpoints: completionState.continuity.taskCheckpoints.length,
1430
- hasBrowserState: Boolean(completionState.continuity.browserState.currentUrl
1431
- || completionState.continuity.browserState.activeTabId
1432
- || completionState.continuity.browserState.tabs?.length),
1433
- });
1434
- }
1592
+ finalDisplayContent = displayContent;
1435
1593
  this.emitCompletionTrace('guard_triggered', guardFailure.message, guardFailure.signal, 'warn');
1436
1594
  log.warn('completion_guard_triggered', {
1437
1595
  turnId,
@@ -1487,9 +1645,50 @@ export class Orchestrator extends TypedEventEmitter {
1487
1645
  const completionFailure = completionGuardFailure;
1488
1646
  if (completionFailure !== null) {
1489
1647
  const completionSignal = completionFailure.signal;
1648
+ const recoveryRequest = this.buildRecoveryRequest({
1649
+ cause: 'completion',
1650
+ attempt: attemptNumber,
1651
+ partialContent: fullContent || finalDisplayContent,
1652
+ completionState,
1653
+ latestUserMessage,
1654
+ completionRetryCount,
1655
+ finishMeta: attemptFinishMeta,
1656
+ });
1657
+ const { source, assessment } = await this.assessTurnRecovery(recoveryRequest);
1658
+ this.emitRecoveryTrace('completion', source, assessment, assessment.action === 'stop' ? 'warn' : 'info');
1659
+ nextRetryContext = this.buildRetryContextMessage('completion', attemptNumber, assessment.modelMessage, source);
1660
+ log.info('completion_retry_context_prepared', {
1661
+ turnId,
1662
+ attempt: attemptNumber,
1663
+ conversationId: convId,
1664
+ source,
1665
+ action: assessment.action,
1666
+ finishReason: attemptFinishMeta?.finishReason,
1667
+ rawFinishReason: attemptFinishMeta?.rawFinishReason,
1668
+ hasPartialContent: (fullContent || finalDisplayContent).trim().length > 0,
1669
+ progressEntries: completionState.continuity.progressLedger.length,
1670
+ taskCheckpoints: completionState.continuity.taskCheckpoints.length,
1671
+ completedSteps: assessment.completedSteps,
1672
+ nextStep: assessment.nextStep,
1673
+ });
1674
+ if (assessment.action === 'stop') {
1675
+ failedAttemptMessageIds.push(...attemptMessageIds);
1676
+ const diagnosticMessage = this.conversations.appendMessage(convId, 'system', assessment.operatorMessage);
1677
+ this.emit({ type: 'message:system', message: diagnosticMessage });
1678
+ this.emitCompletionTrace('retry_failed', assessment.diagnosis, completionSignal, 'error');
1679
+ this.emit({
1680
+ type: 'error',
1681
+ error: new Error(assessment.diagnosis || 'Turn ended without a valid completion signal or final answer.'),
1682
+ });
1683
+ if (failedAttemptMessageIds.length > 0) {
1684
+ this.conversations.deleteMessages(convId, failedAttemptMessageIds);
1685
+ }
1686
+ loopTerminated = true;
1687
+ break;
1688
+ }
1490
1689
  if (completionRetryCount < this.maxCompletionRetries) {
1491
1690
  completionRetryCount++;
1492
- const systemMessage = this.conversations.appendMessage(convId, 'system', COMPLETION_RETRY_PROMPT);
1691
+ const systemMessage = this.conversations.appendMessage(convId, 'system', assessment.operatorMessage);
1493
1692
  attemptMessageIds.push(systemMessage.id);
1494
1693
  failedAttemptMessageIds.push(...attemptMessageIds);
1495
1694
  this.emit({ type: 'message:system', message: systemMessage });
@@ -1498,12 +1697,14 @@ export class Orchestrator extends TypedEventEmitter {
1498
1697
  continue;
1499
1698
  }
1500
1699
  failedAttemptMessageIds.push(...attemptMessageIds);
1501
- const diagnosticMessage = this.conversations.appendMessage(convId, 'system', '[Cerebellum] The turn ended repeatedly without a valid completion signal or final answer.');
1700
+ const diagnosticMessage = this.conversations.appendMessage(convId, 'system', source === 'cerebellum'
1701
+ ? '[Cerebellum] The turn ended repeatedly without a valid completion signal or final answer.'
1702
+ : '[System fallback] The turn ended repeatedly without a valid completion signal or final answer.');
1502
1703
  this.emit({ type: 'message:system', message: diagnosticMessage });
1503
- this.emitCompletionTrace('retry_failed', `Completion retries exhausted after ${completionRetryCount}/${this.maxCompletionRetries}: ${completionFailure.message}`, completionSignal, 'error');
1704
+ this.emitCompletionTrace('retry_failed', `Completion retries exhausted after ${completionRetryCount}/${this.maxCompletionRetries}: ${assessment.diagnosis || completionFailure.message}`, completionSignal, 'error');
1504
1705
  this.emit({
1505
1706
  type: 'error',
1506
- error: new Error('Turn ended without a valid completion signal or final answer.'),
1707
+ error: new Error(assessment.diagnosis || 'Turn ended without a valid completion signal or final answer.'),
1507
1708
  });
1508
1709
  // Clean up all failed attempt messages on exhaustion
1509
1710
  if (failedAttemptMessageIds.length > 0) {
@@ -1519,21 +1720,17 @@ export class Orchestrator extends TypedEventEmitter {
1519
1720
  const failureState = this.getStreamState();
1520
1721
  this.stopStreamWatchdog();
1521
1722
  failedAttemptMessageIds.push(...attemptMessageIds);
1522
- // Check if this was a nudge-abort (not emergency stop, not a real error)
1523
- const isNudgeAbort = abortController.signal.aborted
1723
+ const recoveryDecision = this.pendingRecoveryDecision;
1724
+ this.pendingRecoveryDecision = null;
1725
+ const stallRecovery = recoveryDecision;
1726
+ const isRecoveryRetryAbort = abortController.signal.aborted
1727
+ && stallRecovery !== null
1728
+ && stallRecovery.assessment.action === 'retry'
1524
1729
  && this.streamNudgeCount > stallRetryCountAtStart
1525
1730
  && this.streamNudgeCount <= this.maxNudgeRetries;
1526
- if (isNudgeAbort) {
1527
- nextRetryContext = this.buildRetryContextMessage(this.buildStallRetrySnapshot({
1528
- attempt: attemptNumber,
1529
- phase: failureState.phase,
1530
- activeToolName: failureState.activeToolName,
1531
- activeToolCallId: failureState.activeToolCallId,
1532
- partialContent: fullContent,
1533
- completionState,
1534
- }));
1535
- // Inject nudge message and retry via the loop
1536
- const systemMessage = this.conversations.appendMessage(convId, 'system', '[Cerebellum] You stopped mid-response. Continue from where you left off.');
1731
+ if (isRecoveryRetryAbort && stallRecovery) {
1732
+ nextRetryContext = this.buildRetryContextMessage('stall', attemptNumber, stallRecovery.assessment.modelMessage, stallRecovery.source);
1733
+ const systemMessage = this.conversations.appendMessage(convId, 'system', stallRecovery.assessment.operatorMessage);
1537
1734
  attemptMessageIds.push(systemMessage.id);
1538
1735
  failedAttemptMessageIds.push(...attemptMessageIds);
1539
1736
  this.emit({ type: 'message:system', message: systemMessage });
@@ -1541,6 +1738,18 @@ export class Orchestrator extends TypedEventEmitter {
1541
1738
  nextRetryCause = 'stall';
1542
1739
  continue; // retry loop
1543
1740
  }
1741
+ if (abortController.signal.aborted
1742
+ && stallRecovery !== null
1743
+ && stallRecovery.assessment.action === 'stop') {
1744
+ const systemMessage = this.conversations.appendMessage(convId, 'system', stallRecovery.assessment.operatorMessage);
1745
+ this.emit({ type: 'message:system', message: systemMessage });
1746
+ this.emit({ type: 'error', error: new Error(stallRecovery.assessment.diagnosis) });
1747
+ if (failedAttemptMessageIds.length > 0) {
1748
+ this.conversations.deleteMessages(convId, failedAttemptMessageIds);
1749
+ }
1750
+ loopTerminated = true;
1751
+ break;
1752
+ }
1544
1753
  // Check if Cerebellum dropped mid-stream
1545
1754
  if (this.cerebellum && !this.cerebellum.isConnected() && abortController.signal.aborted) {
1546
1755
  const err = new Error('Cerebellum disconnected during active response. Restart it with: docker compose up -d cerebellum');