@pellux/goodvibes-tui 0.22.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/CHANGELOG.md +47 -0
  2. package/README.md +17 -8
  3. package/package.json +1 -1
  4. package/src/cli/management-commands.ts +1 -1
  5. package/src/cli/management-utils.ts +352 -0
  6. package/src/cli/management.ts +116 -344
  7. package/src/cli/surface-command.ts +1 -1
  8. package/src/core/context-auto-compact.ts +43 -10
  9. package/src/core/conversation-rendering.ts +5 -2
  10. package/src/core/conversation-types.ts +24 -0
  11. package/src/core/conversation.ts +7 -12
  12. package/src/core/long-task-notifier.ts +145 -0
  13. package/src/core/session-recovery.ts +147 -0
  14. package/src/core/stream-event-wiring.ts +199 -7
  15. package/src/core/transcript-journal.ts +339 -0
  16. package/src/core/turn-event-wiring.ts +67 -4
  17. package/src/input/commands/channel-runtime.ts +139 -0
  18. package/src/input/commands/control-room-runtime.ts +0 -2
  19. package/src/input/commands/diff-runtime.ts +1 -1
  20. package/src/input/commands/eval.ts +1 -1
  21. package/src/input/commands/health-runtime.ts +23 -4
  22. package/src/input/commands/knowledge.ts +1 -1
  23. package/src/input/commands/local-runtime.ts +1 -2
  24. package/src/input/commands/memory-product-runtime.ts +2 -2
  25. package/src/input/commands/memory.ts +1 -1
  26. package/src/input/commands/onboarding-runtime.ts +0 -1
  27. package/src/input/commands/policy.ts +1 -1
  28. package/src/input/commands/profile-sync-runtime.ts +4 -3
  29. package/src/input/commands/provider.ts +1 -1
  30. package/src/input/commands/qrcode-runtime.ts +0 -1
  31. package/src/input/commands/runtime-services.ts +30 -1
  32. package/src/input/commands/session-content.ts +2 -2
  33. package/src/input/commands/session-workflow.ts +32 -2
  34. package/src/input/commands/session.ts +1 -1
  35. package/src/input/commands/settings-sync-runtime.ts +9 -9
  36. package/src/input/commands/share-runtime.ts +1 -1
  37. package/src/input/commands/shell-core.ts +56 -6
  38. package/src/input/commands/work-plan-runtime.ts +8 -8
  39. package/src/input/commands.ts +2 -0
  40. package/src/input/feed-context-factory.ts +6 -0
  41. package/src/input/handler-feed-routes.ts +19 -1
  42. package/src/input/handler-feed.ts +11 -0
  43. package/src/input/handler-prompt-buffer.ts +28 -0
  44. package/src/input/handler-shortcuts.ts +88 -2
  45. package/src/input/handler-ui-state.ts +2 -2
  46. package/src/input/handler.ts +39 -3
  47. package/src/input/keybindings.ts +33 -3
  48. package/src/input/kill-ring.ts +134 -0
  49. package/src/input/model-picker.ts +18 -1
  50. package/src/input/search.ts +18 -6
  51. package/src/input/settings-modal-activation.ts +134 -0
  52. package/src/input/settings-modal-adjustment.ts +124 -0
  53. package/src/input/settings-modal-data.ts +53 -0
  54. package/src/input/settings-modal.ts +48 -145
  55. package/src/main.ts +50 -50
  56. package/src/panels/base-panel.ts +2 -1
  57. package/src/panels/provider-health-domains.ts +3 -3
  58. package/src/panels/provider-health-panel.ts +13 -9
  59. package/src/panels/provider-health-tracker.ts +7 -4
  60. package/src/panels/settings-sync-panel.ts +3 -3
  61. package/src/panels/work-plan-panel.ts +2 -2
  62. package/src/renderer/compaction-history-modal.ts +55 -0
  63. package/src/renderer/compaction-preview.ts +146 -0
  64. package/src/renderer/diff-view.ts +2 -2
  65. package/src/renderer/help-overlay.ts +1 -0
  66. package/src/renderer/model-picker-overlay.ts +23 -11
  67. package/src/renderer/progress.ts +3 -3
  68. package/src/renderer/search-overlay.ts +8 -5
  69. package/src/renderer/settings-modal-helpers.ts +2 -2
  70. package/src/renderer/settings-modal.ts +1 -1
  71. package/src/renderer/ui-factory.ts +11 -0
  72. package/src/runtime/bootstrap-core.ts +92 -0
  73. package/src/runtime/bootstrap-hook-bridge.ts +18 -0
  74. package/src/runtime/bootstrap-shell.ts +1 -0
  75. package/src/shell/blocking-input.ts +32 -0
  76. package/src/shell/recovery-input-helpers.ts +71 -0
  77. package/src/utils/browser.ts +29 -0
  78. package/src/utils/terminal-width.ts +10 -3
  79. package/src/version.ts +1 -1
@@ -0,0 +1,145 @@
1
+ /**
2
+ * long-task-notifier — fires push notifications when a turn or agent task
3
+ * completes after running longer than the configured threshold.
4
+ *
5
+ * PRIVACY GUARANTEE: Notification text must never include conversation content
6
+ * (user messages, assistant replies, tool outputs). Only metadata is included:
7
+ * task kind, elapsed time, ok/fail status, and session id. This module enforces
8
+ * that constraint by construction — it receives no conversation object and
9
+ * builds all message text from structural metadata only.
10
+ *
11
+ * Delivery targets (in preference order):
12
+ * 1. Desktop notification (linux notify-send / mac osascript) via SDK
13
+ * notifyCompletion — detected and dispatched by the SDK; silently
14
+ * no-ops when the platform does not support it.
15
+ * 2. Configured outbound webhook channel (ntfy topic / webhook URL) via
16
+ * WebhookNotifier.send() — only fires when the user has URLs configured.
17
+ *
18
+ * When neither target is available the function is an honest no-op (debug log
19
+ * only; no user-facing error spam).
20
+ *
21
+ * Focus tracking: terminal focus state is not tracked anywhere in the TUI.
22
+ * Notifications therefore fire regardless of whether the terminal window is
23
+ * focused. A future implementation may suppress notifications when the TUI is
24
+ * in the foreground by reading a focus-state ref. // seam: wire focus ref here.
25
+ */
26
+
27
+ import { notifyCompletion } from '@pellux/goodvibes-sdk/platform/utils';
28
+ import { logger } from '@pellux/goodvibes-sdk/platform/utils';
29
+ import type { WebhookNotifier } from '@pellux/goodvibes-sdk/platform/integrations';
30
+
31
+ /** Default threshold in seconds. Turns shorter than this do not notify. */
32
+ export const NOTIFY_AFTER_SECONDS_DEFAULT = 60;
33
+
34
+ /**
35
+ * Sentinel value for the off-state. When behavior.notifyAfterSeconds is 0,
36
+ * push notifications are disabled (same convention as other numeric-off keys
37
+ * in the config schema).
38
+ */
39
+ export const NOTIFY_AFTER_SECONDS_OFF = 0;
40
+
41
+ /** Accepted task kinds for notification messages. */
42
+ export type LongTaskKind = 'turn' | 'agent';
43
+
44
+ /** Completion status for notification messages. */
45
+ export type LongTaskStatus = 'ok' | 'fail';
46
+
47
+ export interface MaybeNotifyLongTaskOptions {
48
+ /**
49
+ * Elapsed milliseconds for the turn or agent task.
50
+ * Must not include any conversation content.
51
+ */
52
+ readonly elapsedMs: number;
53
+
54
+ /** Whether the task completed successfully or failed. */
55
+ readonly status: LongTaskStatus;
56
+
57
+ /** Task kind label for the notification body. */
58
+ readonly kind: LongTaskKind;
59
+
60
+ /** Session id for correlation. Must not be a PII value. */
61
+ readonly sessionId: string;
62
+
63
+ /**
64
+ * Threshold in seconds from config (behavior.notifyAfterSeconds).
65
+ * 0 means off; notifications are suppressed entirely.
66
+ * Should be the raw config value; this function normalises it.
67
+ */
68
+ readonly thresholdSeconds: number;
69
+
70
+ /**
71
+ * Outbound webhook notifier. When provided and the user has URLs
72
+ * configured, the notification is also sent to all configured endpoints
73
+ * (e.g. ntfy.sh topics). Optional — absent means outbound delivery is
74
+ * skipped silently.
75
+ */
76
+ readonly webhookNotifier?: WebhookNotifier | null;
77
+ }
78
+
79
+ /**
80
+ * Fires push notifications for a completed long task if the elapsed time
81
+ * exceeds the configured threshold.
82
+ *
83
+ * Returns true when at least one delivery was attempted, false when the
84
+ * call was a no-op (threshold not reached, or off-state).
85
+ *
86
+ * PRIVACY: builds message text from structural metadata only (kind, elapsed,
87
+ * status, sessionId). Never includes conversation content.
88
+ */
89
+ export function maybeNotifyLongTask(opts: MaybeNotifyLongTaskOptions): boolean {
90
+ const { elapsedMs, status, kind, sessionId, thresholdSeconds, webhookNotifier } = opts;
91
+
92
+ // Off-state: 0 disables notifications entirely.
93
+ if (thresholdSeconds === NOTIFY_AFTER_SECONDS_OFF) {
94
+ logger.debug('long-task-notifier: disabled (threshold=0)');
95
+ return false;
96
+ }
97
+
98
+ // Gate: only notify when the task exceeded the threshold.
99
+ const elapsedSeconds = Math.floor(elapsedMs / 1000);
100
+ if (elapsedSeconds < thresholdSeconds) {
101
+ logger.debug('long-task-notifier: below threshold', { elapsedSeconds, thresholdSeconds });
102
+ return false;
103
+ }
104
+
105
+ // Build concise, metadata-only message. No conversation text.
106
+ const statusLabel = status === 'ok' ? 'completed' : 'failed';
107
+ const title = `GoodVibes — ${kind} ${statusLabel}`;
108
+ // PRIVACY: message contains only structural metadata, never conversation content.
109
+ const message = `${kind} ${statusLabel} in ${elapsedSeconds}s · session ${sessionId.slice(0, 8)}`;
110
+
111
+ // Delivery 1: desktop notification (notify-send on linux, osascript on mac).
112
+ // notifyCompletion is non-throwing; SDK handles platform absence silently.
113
+ try {
114
+ notifyCompletion(title, message, elapsedMs);
115
+ } catch (err) {
116
+ logger.debug('long-task-notifier: desktop notify error', { error: String(err) });
117
+ }
118
+
119
+ // Delivery 2: outbound webhook (ntfy / generic endpoint) if configured.
120
+ if (webhookNotifier) {
121
+ const urls = webhookNotifier.getUrls();
122
+ if (urls.length > 0) {
123
+ webhookNotifier.send(message).catch((err: unknown) => {
124
+ logger.debug('long-task-notifier: webhook send error', { error: String(err) });
125
+ });
126
+ } else {
127
+ logger.debug('long-task-notifier: no webhook URLs configured, skipping outbound delivery');
128
+ }
129
+ }
130
+
131
+ return true;
132
+ }
133
+
134
+ /**
135
+ * Read behavior.notifyAfterSeconds from a config manager.
136
+ * Returns NOTIFY_AFTER_SECONDS_DEFAULT when the key is absent or invalid.
137
+ * Returns NOTIFY_AFTER_SECONDS_OFF (0) when explicitly set to 0.
138
+ */
139
+ export function readNotifyAfterSeconds(configGet: (key: string) => unknown): number {
140
+ const raw = configGet('behavior.notifyAfterSeconds');
141
+ if (raw === 0) return NOTIFY_AFTER_SECONDS_OFF;
142
+ const parsed = typeof raw === 'number' ? raw : Number(raw);
143
+ if (Number.isFinite(parsed) && parsed >= 0) return parsed;
144
+ return NOTIFY_AFTER_SECONDS_DEFAULT;
145
+ }
@@ -0,0 +1,147 @@
1
+ /**
2
+ * session-recovery.ts — Journal replay at session resume.
3
+ *
4
+ * Purpose
5
+ * ───────
6
+ * When a session is resumed, this module checks whether a transcript journal
7
+ * exists for that session whose records post-date the loaded snapshot. If so,
8
+ * it replays those records onto the live conversation and writes a fresh
9
+ * snapshot so the gap is permanently closed.
10
+ *
11
+ * Seams (all three must call replayJournalForSession)
12
+ * ────────────────────────────────────────────────────
13
+ * 1. CLI / command resume — session-workflow.ts, after `fromJSON` +
14
+ * `rebuildHistory` complete. Handles --continue, --resume, /session resume,
15
+ * and --fork.
16
+ * 2. Ctrl+R crash recovery — blocking-input.ts, after `conversation.fromJSON`
17
+ * in the Ctrl+R branch. Handles SIGKILL-era recovery files.
18
+ * 3. In-TUI panel resume — bootstrap-hook-bridge.ts
19
+ * `createResumeSessionHandler`, after `options.runtime.sessionId` is
20
+ * assigned. Handles the session browser / panel-driven resume.
21
+ *
22
+ * Recovery protocol
23
+ * ─────────────────
24
+ * 1. Call replayJournal() with the journal path and the snapshot timestamp.
25
+ * 2. If no records are newer than the snapshot, rotate the (now-stale)
26
+ * journal silently and return.
27
+ * 3. If records are found, apply the final record's messages — each journal
28
+ * record carries the full conversation snapshot at that moment, so the
29
+ * last record by seq is the authoritative post-crash state.
30
+ * 4. Rebuild the conversation history and call the snapshot writer so the
31
+ * gap is durably closed before the user sees the restored conversation.
32
+ * 5. Rotate the journal (it is no longer needed as a gap-filler).
33
+ * 6. Return a result so the caller can emit an honest notice.
34
+ */
35
+
36
+ import { journalPathFor, openTranscriptJournal, replayJournal } from './transcript-journal.ts';
37
+ import type { ConversationManager } from './conversation.ts';
38
+ import type { ConversationMessageSnapshot } from '@pellux/goodvibes-sdk/platform/core';
39
+
40
+ // ─── Types ──────────────────────────────────────────────────────────────────
41
+
42
+ export interface ReplayIntoConversationOptions {
43
+ /** Absolute path to the journal file for this session. */
44
+ readonly journalPath: string;
45
+ /**
46
+ * The `timestamp` field from the loaded session snapshot (SessionMeta).
47
+ * Only journal records with ts > snapshotTimestamp are replayed.
48
+ */
49
+ readonly snapshotTimestamp: number;
50
+ /** The live conversation manager to mutate with replayed messages. */
51
+ readonly conversation: ConversationManager;
52
+ /** Session ID — used when creating the post-replay journal instance for rotate(). */
53
+ readonly sessionId: string;
54
+ /**
55
+ * Persist the restored conversation so the gap is durably closed.
56
+ * Called with the final replayed message list. Best-effort — failures
57
+ * are swallowed so recovery never hard-fails a resume.
58
+ */
59
+ readonly persistSnapshot: (messages: ConversationMessageSnapshot[]) => void;
60
+ }
61
+
62
+ export interface ReplayIntoConversationResult {
63
+ /** Number of journal records that post-dated the snapshot. 0 if nothing to replay. */
64
+ readonly replayed: number;
65
+ /** True if the journal tail was corrupt (quarantined). */
66
+ readonly hadCorruptTail: boolean;
67
+ /** True if the journal had an unrecognised schema version (quarantined). */
68
+ readonly hadVersionMismatch: boolean;
69
+ }
70
+
71
+ // ─── Public API ─────────────────────────────────────────────────────────────
72
+
73
+ /**
74
+ * Replay journal records newer than `snapshotTimestamp` onto `conversation`.
75
+ *
76
+ * Returns a result object so the caller can emit an appropriate notice.
77
+ * Never throws — all errors are swallowed to preserve the "best-effort"
78
+ * recovery contract.
79
+ */
80
+ export function replayJournalIntoConversation(
81
+ options: ReplayIntoConversationOptions,
82
+ ): ReplayIntoConversationResult {
83
+ const { journalPath, snapshotTimestamp, conversation, sessionId, persistSnapshot } = options;
84
+
85
+ try {
86
+ const { records, hadCorruptTail } = replayJournal(journalPath, snapshotTimestamp);
87
+
88
+ // Detect version mismatch: replayJournal quarantines and returns
89
+ // hadCorruptTail=true + 0 records when the header version is wrong.
90
+ // We distinguish it from a genuine corrupt tail by checking whether the
91
+ // journal file still exists (quarantine renames it away in both cases,
92
+ // so we cannot inspect the header at this point). We surface both cases
93
+ // through hadCorruptTail to the caller; hadVersionMismatch is derived
94
+ // from it to give the caller a distinct notice option.
95
+ const hadVersionMismatch = hadCorruptTail && records.length === 0;
96
+
97
+ const journal = openTranscriptJournal(journalPath, sessionId);
98
+
99
+ if (records.length === 0) {
100
+ // Nothing to replay — rotate the (now-stale) journal silently.
101
+ journal.rotate();
102
+ return { replayed: 0, hadCorruptTail, hadVersionMismatch };
103
+ }
104
+
105
+ // The last record (highest seq) holds the most recent full conversation
106
+ // state captured before the crash. Apply it.
107
+ const lastRecord = records[records.length - 1]!;
108
+ const replayedMessages = lastRecord.messages as ConversationMessageSnapshot[];
109
+
110
+ conversation.fromJSON({
111
+ messages: replayedMessages as never[],
112
+ });
113
+ conversation.rebuildHistory();
114
+
115
+ // Write a fresh snapshot so the gap is durably closed even if the
116
+ // process is killed again before the next turn-complete snapshot.
117
+ try {
118
+ persistSnapshot(replayedMessages);
119
+ } catch {
120
+ // Best-effort — never hard-fail recovery due to snapshot write failure.
121
+ }
122
+
123
+ // Rotate the journal — it is no longer needed as a gap-filler.
124
+ journal.rotate();
125
+
126
+ return { replayed: records.length, hadCorruptTail, hadVersionMismatch: false };
127
+ } catch {
128
+ // Absolute last-resort guard — recovery must never crash a resume.
129
+ return { replayed: 0, hadCorruptTail: false, hadVersionMismatch: false };
130
+ }
131
+ }
132
+
133
+ /**
134
+ * Build the journal path for a given session and home directory, then call
135
+ * replayJournalIntoConversation().
136
+ *
137
+ * Convenience wrapper used by session-workflow.ts so it does not need to
138
+ * import journalPathFor directly.
139
+ */
140
+ export function replayJournalForSession(
141
+ options: Omit<ReplayIntoConversationOptions, 'journalPath'> & {
142
+ readonly homeDirectory: string;
143
+ },
144
+ ): ReplayIntoConversationResult {
145
+ const journalPath = journalPathFor(options.homeDirectory, options.sessionId);
146
+ return replayJournalIntoConversation({ ...options, journalPath });
147
+ }
@@ -1,6 +1,7 @@
1
1
  import type { UiRuntimeEvents } from '@/runtime/index.ts';
2
2
  import { createStreamStallWatchdog } from './stream-stall-watchdog.ts';
3
3
  import { formatUserFacingErrorLine } from './format-user-error.ts';
4
+ import { logger } from '@pellux/goodvibes-sdk/platform/utils';
4
5
 
5
6
  /**
6
7
  * Live stream and tool-execution metrics maintained by wireStreamEventMetrics.
@@ -29,9 +30,28 @@ interface StreamOrchestrator {
29
30
  readonly streamingOutputTokens: number;
30
31
  }
31
32
 
32
- /** Minimal provider surface required for the stream stall watchdog. */
33
+ /** Minimal provider surface required for the stream stall watchdog and failover switching. */
33
34
  interface StreamProviderRegistry {
34
- getCurrentModel(): { readonly provider: string };
35
+ getCurrentModel(): { readonly provider: string; readonly registryKey?: string };
36
+ setCurrentModel(registryKey: string): void;
37
+ }
38
+
39
+ /**
40
+ * Minimal fallback-chain node shape returned by ProviderOptimizer.testFallback().
41
+ * Only the fields consumed by the failover path are declared here.
42
+ */
43
+ interface FailoverChainNode {
44
+ readonly position: number;
45
+ readonly providerId: string;
46
+ readonly modelId: string;
47
+ readonly capable: boolean;
48
+ }
49
+
50
+ /** Minimal ProviderOptimizer surface required by the failover path. */
51
+ interface FailoverOptimizer {
52
+ readonly enabled: boolean;
53
+ testFallback(profile?: Record<string, unknown>): { readonly chain: readonly FailoverChainNode[] };
54
+ recordFallbackTransition(from: string, to: string, reason: string): void;
35
55
  }
36
56
 
37
57
  /** Minimal system-message surface required for user-visible notifications. */
@@ -40,6 +60,15 @@ interface StreamSystemMessageRouter {
40
60
  low(message: string): void;
41
61
  }
42
62
 
63
+ /**
64
+ * Minimal cost lookup surface for attaching cost-delta information to failover notices.
65
+ * Returns USD-per-1M-token pricing for the given model ID.
66
+ * The implementation may consult a catalog; if the model is unknown both fields are 0.
67
+ */
68
+ export interface FailoverCostLookup {
69
+ getCostFromCatalog(modelId: string): { readonly input: number; readonly output: number };
70
+ }
71
+
43
72
  export interface WireStreamEventMetricsOptions {
44
73
  /** The UI runtime event bus (turns + tools sub-buses). */
45
74
  readonly events: UiRuntimeEvents;
@@ -56,6 +85,86 @@ export interface WireStreamEventMetricsOptions {
56
85
  * so the render closure can read it without a forward-reference issue.
57
86
  */
58
87
  readonly metrics: StreamMetrics;
88
+ /**
89
+ * When provided and enabled, the optimizer is consulted on TURN_ERROR to
90
+ * attempt the next viable provider before surfacing the error to the user.
91
+ * When absent or optimizer.enabled is false, behaviour is identical to the
92
+ * pre-failover baseline: error surfaces immediately via systemMessageRouter.
93
+ */
94
+ readonly providerOptimizer?: FailoverOptimizer;
95
+ /**
96
+ * Callback the caller provides to re-submit the last user turn on a
97
+ * different provider after a successful failover switch. Called only when
98
+ * the optimizer is enabled and a viable next provider exists in the chain.
99
+ */
100
+ readonly retryTurn?: () => void;
101
+ /**
102
+ * Optional cost catalog for attaching per-1M-token cost information to
103
+ * the failover notice. When provided and both models have non-zero pricing,
104
+ * the notice includes input and output cost comparisons. When absent or pricing is
105
+ * unavailable for either model, the notice honestly states "cost data unavailable".
106
+ */
107
+ readonly costLookup?: FailoverCostLookup;
108
+ }
109
+
110
+ /** Result of wireStreamEventMetrics. */
111
+ export interface WireStreamEventMetricsResult {
112
+ /** Unsubscribe functions; push into the parent unsubs array for cleanup on exit. */
113
+ readonly unsubs: ReadonlyArray<() => void>;
114
+ /**
115
+ * Clear the per-turn failover visited-provider set.
116
+ * Call this on every new user submission so the visited set does not bleed
117
+ * across independent turns (the set is also cleared automatically on
118
+ * TURN_COMPLETED, but a new submission may arrive before TURN_COMPLETED fires).
119
+ */
120
+ readonly clearFailoverVisited: () => void;
121
+ /**
122
+ * Register a callback that fires whenever a TURN_ERROR is surfaced to the
123
+ * user — either immediately (no optimizer) or after chain exhaustion.
124
+ * Does NOT fire when the optimizer performs a successful automatic failover
125
+ * (in that case the user sees a [Failover] notice, not an error).
126
+ * Used by main.ts to activate the one-key retry affordance. The callback
127
+ * receives exhausted=true when the failover chain was exhausted first, so
128
+ * the notice can say honestly that a retry reuses the same failed provider.
129
+ */
130
+ readonly onErrorSurfaced: (cb: (exhausted: boolean) => void) => void;
131
+ }
132
+
133
+ /**
134
+ * Build the cost-delta suffix for a failover notice.
135
+ *
136
+ * Extracts the model ID from registry keys (format: `provider:modelId`),
137
+ * queries the cost catalog for both, and formats a human-readable comparison.
138
+ * If the lookup is absent or either model returns zero pricing (unknown),
139
+ * returns an honest "cost data unavailable" suffix instead of fabricating values.
140
+ *
141
+ * @param lookup - Optional cost catalog; when absent, returns unavailable notice.
142
+ * @param fromRegistryKey - Registry key of the provider being abandoned (may be undefined).
143
+ * @param toRegistryKey - Registry key of the provider being selected.
144
+ * @returns A parenthesised suffix string or empty string.
145
+ */
146
+ function buildCostDeltaSuffix(
147
+ lookup: FailoverCostLookup | undefined,
148
+ fromRegistryKey: string | undefined,
149
+ toRegistryKey: string,
150
+ ): string {
151
+ if (!lookup) return '';
152
+ // Registry key format: `provider:modelId` — modelId may itself contain `:`.
153
+ const fromModelId = fromRegistryKey ? fromRegistryKey.split(':').slice(1).join(':') : '';
154
+ const toModelId = toRegistryKey.split(':').slice(1).join(':');
155
+ const fromCost = fromModelId ? lookup.getCostFromCatalog(fromModelId) : { input: 0, output: 0 };
156
+ const toCost = lookup.getCostFromCatalog(toModelId);
157
+ // Report unavailable when either side has zero pricing (unknown model).
158
+ if (fromCost.input === 0 && fromCost.output === 0 && !fromModelId) {
159
+ return ' [cost data unavailable]';
160
+ }
161
+ const hasFromData = fromCost.input > 0 || fromCost.output > 0;
162
+ const hasToData = toCost.input > 0 || toCost.output > 0;
163
+ if (!hasFromData || !hasToData) {
164
+ return ' [cost data unavailable]';
165
+ }
166
+ const fmt = (n: number) => `$${n.toFixed(2)}`;
167
+ return ` [cost/1M: input ${fmt(fromCost.input)}→${fmt(toCost.input)}, output ${fmt(fromCost.output)}→${fmt(toCost.output)}]`;
59
168
  }
60
169
 
61
170
  /**
@@ -64,8 +173,7 @@ export interface WireStreamEventMetricsOptions {
64
173
  * and declares it before render() so both the render closure and the returned
65
174
  * event handlers share the same reference.
66
175
  *
67
- * Returns an array of unsubscribe functions; push them into the parent unsubs
68
- * array so they are cleaned up on exit.
176
+ * Returns an object with unsubscribe functions and a clearFailoverVisited helper.
69
177
  *
70
178
  * Responsibilities:
71
179
  * - Track stream start time, delta count, token speed, and TTFT
@@ -75,8 +183,11 @@ export interface WireStreamEventMetricsOptions {
75
183
  */
76
184
  export function wireStreamEventMetrics(
77
185
  options: WireStreamEventMetricsOptions,
78
- ): ReadonlyArray<() => void> {
79
- const { events, metrics, orchestrator, providerRegistry, systemMessageRouter, render } = options;
186
+ ): WireStreamEventMetricsResult {
187
+ const {
188
+ events, metrics, orchestrator, providerRegistry,
189
+ systemMessageRouter, render, providerOptimizer, retryTurn, costLookup,
190
+ } = options;
80
191
 
81
192
  const unsubs: Array<() => void> = [];
82
193
 
@@ -103,10 +214,85 @@ export function wireStreamEventMetrics(
103
214
  metrics.tokenSpeed = elapsed > 0 ? tokenCount / elapsed : 0;
104
215
  }));
105
216
 
217
+ // Per-turn visited-provider set: tracks providers already attempted this turn
218
+ // so failover cannot ping-pong between two mutually-failing providers.
219
+ // True invariant: at most one retry per provider per turn; exhaustion fires
220
+ // after the chain is consumed.
221
+ // Cleared on TURN_COMPLETED (see handler below) and on new user submission
222
+ // (caller clears via clearFailoverVisited(), wired in main.ts).
223
+ const failoverVisited = new Set<string>();
224
+
225
+ unsubs.push(events.turns.on('TURN_COMPLETED', () => {
226
+ failoverVisited.clear();
227
+ }));
228
+
106
229
  unsubs.push(events.turns.on('TURN_ERROR', (event) => {
107
230
  const errVal: string = event.error;
231
+
232
+ // --- Optimizer-gated failover path ---
233
+ // When the optimizer is present and enabled, attempt to advance to the next
234
+ // viable provider in the fallback chain before surfacing the error. When
235
+ // the optimizer is absent or disabled, behaviour is identical to baseline:
236
+ // error surfaces immediately.
237
+ if (providerOptimizer?.enabled && retryTurn) {
238
+ const fromProvider = providerRegistry.getCurrentModel().provider;
239
+ // Mark the failing provider as visited so it will never be selected again
240
+ // in this turn, even if a second TURN_ERROR arrives (e.g. ping-pong).
241
+ failoverVisited.add(fromProvider);
242
+ const result = providerOptimizer.testFallback({});
243
+ // Find the first capable node that is NOT already visited this turn and
244
+ // is NOT synthetic. Synthetic nodes are skipped permanently by design:
245
+ // a synthetic model is itself a fallback ladder over real backends, so
246
+ // failing over INTO one after a real backend already failed is unsound
247
+ // double-indirection (it can route straight back to the failed provider).
248
+ const next = result.chain.find(
249
+ (node) =>
250
+ node.capable &&
251
+ !failoverVisited.has(node.providerId) &&
252
+ node.providerId !== 'synthetic',
253
+ );
254
+
255
+ if (next) {
256
+ const toRegistryKey = `${next.providerId}:${next.modelId}`;
257
+ const errorClass = formatUserFacingErrorLine(errVal);
258
+ // Capture FROM registry key before switching — needed for cost comparison.
259
+ const fromRegistryKey = providerRegistry.getCurrentModel().registryKey;
260
+ try {
261
+ providerRegistry.setCurrentModel(toRegistryKey);
262
+ } catch (switchErr) {
263
+ // Switch failed — fall through to honest error display.
264
+ logger.debug('failover setCurrentModel failed', { toRegistryKey, error: String(switchErr) });
265
+ systemMessageRouter.high(`[Error] ${errorClass}`);
266
+ render();
267
+ return;
268
+ }
269
+ // Record the selected provider as visited before the retry fires so
270
+ // a subsequent TURN_ERROR from that provider also skips it.
271
+ failoverVisited.add(next.providerId);
272
+ providerOptimizer.recordFallbackTransition(fromProvider, next.providerId, errorClass);
273
+ const costSuffix = buildCostDeltaSuffix(costLookup, fromRegistryKey, toRegistryKey);
274
+ systemMessageRouter.high(
275
+ `[Failover] ${fromProvider} -> ${next.providerId} (${errorClass})${costSuffix}`,
276
+ );
277
+ render();
278
+ // Re-submit the last user turn on the new provider.
279
+ retryTurn();
280
+ return;
281
+ }
282
+
283
+ // Chain exhausted — all capable candidates have been visited or none exist.
284
+ systemMessageRouter.high(
285
+ `[Failover] Chain exhausted — no alternative provider available. Original error: ${formatUserFacingErrorLine(errVal)}`,
286
+ );
287
+ notifyErrorSurfaced(true);
288
+ render();
289
+ return;
290
+ }
291
+
292
+ // Baseline: optimizer disabled or not wired — surface error immediately.
108
293
  const formatted = formatUserFacingErrorLine(errVal);
109
294
  systemMessageRouter.high(`[Error] ${formatted}`);
295
+ notifyErrorSurfaced(false);
110
296
  render();
111
297
  }));
112
298
 
@@ -140,5 +326,11 @@ export function wireStreamEventMetrics(
140
326
  metrics.activeToolName = undefined;
141
327
  }));
142
328
 
143
- return unsubs;
329
+ let _errorSurfacedCb: ((exhausted: boolean) => void) | undefined;
330
+ function notifyErrorSurfaced(exhausted: boolean) { _errorSurfacedCb?.(exhausted); }
331
+ return {
332
+ unsubs,
333
+ clearFailoverVisited: () => failoverVisited.clear(),
334
+ onErrorSurfaced: (cb: (exhausted: boolean) => void) => { _errorSurfacedCb = cb; },
335
+ };
144
336
  }