@pellux/goodvibes-tui 0.22.0 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/README.md +17 -8
- package/package.json +1 -1
- package/src/cli/management-commands.ts +1 -1
- package/src/cli/management-utils.ts +352 -0
- package/src/cli/management.ts +116 -344
- package/src/cli/surface-command.ts +1 -1
- package/src/core/context-auto-compact.ts +43 -10
- package/src/core/conversation-rendering.ts +5 -2
- package/src/core/conversation-types.ts +24 -0
- package/src/core/conversation.ts +7 -12
- package/src/core/long-task-notifier.ts +145 -0
- package/src/core/session-recovery.ts +147 -0
- package/src/core/stream-event-wiring.ts +199 -7
- package/src/core/transcript-journal.ts +339 -0
- package/src/core/turn-event-wiring.ts +67 -4
- package/src/input/commands/channel-runtime.ts +139 -0
- package/src/input/commands/control-room-runtime.ts +0 -2
- package/src/input/commands/diff-runtime.ts +1 -1
- package/src/input/commands/eval.ts +1 -1
- package/src/input/commands/health-runtime.ts +23 -4
- package/src/input/commands/knowledge.ts +1 -1
- package/src/input/commands/local-runtime.ts +1 -2
- package/src/input/commands/memory-product-runtime.ts +2 -2
- package/src/input/commands/memory.ts +1 -1
- package/src/input/commands/onboarding-runtime.ts +0 -1
- package/src/input/commands/policy.ts +1 -1
- package/src/input/commands/profile-sync-runtime.ts +4 -3
- package/src/input/commands/provider.ts +1 -1
- package/src/input/commands/qrcode-runtime.ts +0 -1
- package/src/input/commands/runtime-services.ts +30 -1
- package/src/input/commands/session-content.ts +2 -2
- package/src/input/commands/session-workflow.ts +32 -2
- package/src/input/commands/session.ts +1 -1
- package/src/input/commands/settings-sync-runtime.ts +9 -9
- package/src/input/commands/share-runtime.ts +1 -1
- package/src/input/commands/shell-core.ts +56 -6
- package/src/input/commands/work-plan-runtime.ts +8 -8
- package/src/input/commands.ts +2 -0
- package/src/input/feed-context-factory.ts +6 -0
- package/src/input/handler-feed-routes.ts +19 -1
- package/src/input/handler-feed.ts +11 -0
- package/src/input/handler-prompt-buffer.ts +28 -0
- package/src/input/handler-shortcuts.ts +88 -2
- package/src/input/handler-ui-state.ts +2 -2
- package/src/input/handler.ts +39 -3
- package/src/input/keybindings.ts +33 -3
- package/src/input/kill-ring.ts +134 -0
- package/src/input/model-picker.ts +18 -1
- package/src/input/search.ts +18 -6
- package/src/input/settings-modal-activation.ts +134 -0
- package/src/input/settings-modal-adjustment.ts +124 -0
- package/src/input/settings-modal-data.ts +53 -0
- package/src/input/settings-modal.ts +48 -145
- package/src/main.ts +50 -50
- package/src/panels/base-panel.ts +2 -1
- package/src/panels/provider-health-domains.ts +3 -3
- package/src/panels/provider-health-panel.ts +13 -9
- package/src/panels/provider-health-tracker.ts +7 -4
- package/src/panels/settings-sync-panel.ts +3 -3
- package/src/panels/work-plan-panel.ts +2 -2
- package/src/renderer/compaction-history-modal.ts +55 -0
- package/src/renderer/compaction-preview.ts +146 -0
- package/src/renderer/diff-view.ts +2 -2
- package/src/renderer/help-overlay.ts +1 -0
- package/src/renderer/model-picker-overlay.ts +23 -11
- package/src/renderer/progress.ts +3 -3
- package/src/renderer/search-overlay.ts +8 -5
- package/src/renderer/settings-modal-helpers.ts +2 -2
- package/src/renderer/settings-modal.ts +1 -1
- package/src/renderer/ui-factory.ts +11 -0
- package/src/runtime/bootstrap-core.ts +92 -0
- package/src/runtime/bootstrap-hook-bridge.ts +18 -0
- package/src/runtime/bootstrap-shell.ts +1 -0
- package/src/shell/blocking-input.ts +32 -0
- package/src/shell/recovery-input-helpers.ts +71 -0
- package/src/utils/browser.ts +29 -0
- package/src/utils/terminal-width.ts +10 -3
- package/src/version.ts +1 -1
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* long-task-notifier — fires push notifications when a turn or agent task
|
|
3
|
+
* completes after running longer than the configured threshold.
|
|
4
|
+
*
|
|
5
|
+
* PRIVACY GUARANTEE: Notification text must never include conversation content
|
|
6
|
+
* (user messages, assistant replies, tool outputs). Only metadata is included:
|
|
7
|
+
* task kind, elapsed time, ok/fail status, and session id. This module enforces
|
|
8
|
+
* that constraint by construction — it receives no conversation object and
|
|
9
|
+
* builds all message text from structural metadata only.
|
|
10
|
+
*
|
|
11
|
+
* Delivery targets (in preference order):
|
|
12
|
+
* 1. Desktop notification (linux notify-send / mac osascript) via SDK
|
|
13
|
+
* notifyCompletion — detected and dispatched by the SDK; silently
|
|
14
|
+
* no-ops when the platform does not support it.
|
|
15
|
+
* 2. Configured outbound webhook channel (ntfy topic / webhook URL) via
|
|
16
|
+
* WebhookNotifier.send() — only fires when the user has URLs configured.
|
|
17
|
+
*
|
|
18
|
+
* When neither target is available the function is an honest no-op (debug log
|
|
19
|
+
* only; no user-facing error spam).
|
|
20
|
+
*
|
|
21
|
+
* Focus tracking: terminal focus state is not tracked anywhere in the TUI.
|
|
22
|
+
* Notifications therefore fire regardless of whether the terminal window is
|
|
23
|
+
* focused. A future implementation may suppress notifications when the TUI is
|
|
24
|
+
* in the foreground by reading a focus-state ref. // seam: wire focus ref here.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { notifyCompletion } from '@pellux/goodvibes-sdk/platform/utils';
|
|
28
|
+
import { logger } from '@pellux/goodvibes-sdk/platform/utils';
|
|
29
|
+
import type { WebhookNotifier } from '@pellux/goodvibes-sdk/platform/integrations';
|
|
30
|
+
|
|
31
|
+
/** Default threshold in seconds. Turns shorter than this do not notify. */
|
|
32
|
+
export const NOTIFY_AFTER_SECONDS_DEFAULT = 60;
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Sentinel value for the off-state. When behavior.notifyAfterSeconds is 0,
|
|
36
|
+
* push notifications are disabled (same convention as other numeric-off keys
|
|
37
|
+
* in the config schema).
|
|
38
|
+
*/
|
|
39
|
+
export const NOTIFY_AFTER_SECONDS_OFF = 0;
|
|
40
|
+
|
|
41
|
+
/** Accepted task kinds for notification messages. */
|
|
42
|
+
export type LongTaskKind = 'turn' | 'agent';
|
|
43
|
+
|
|
44
|
+
/** Completion status for notification messages. */
|
|
45
|
+
export type LongTaskStatus = 'ok' | 'fail';
|
|
46
|
+
|
|
47
|
+
export interface MaybeNotifyLongTaskOptions {
|
|
48
|
+
/**
|
|
49
|
+
* Elapsed milliseconds for the turn or agent task.
|
|
50
|
+
* Must not include any conversation content.
|
|
51
|
+
*/
|
|
52
|
+
readonly elapsedMs: number;
|
|
53
|
+
|
|
54
|
+
/** Whether the task completed successfully or failed. */
|
|
55
|
+
readonly status: LongTaskStatus;
|
|
56
|
+
|
|
57
|
+
/** Task kind label for the notification body. */
|
|
58
|
+
readonly kind: LongTaskKind;
|
|
59
|
+
|
|
60
|
+
/** Session id for correlation. Must not be a PII value. */
|
|
61
|
+
readonly sessionId: string;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Threshold in seconds from config (behavior.notifyAfterSeconds).
|
|
65
|
+
* 0 means off; notifications are suppressed entirely.
|
|
66
|
+
* Should be the raw config value; this function normalises it.
|
|
67
|
+
*/
|
|
68
|
+
readonly thresholdSeconds: number;
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Outbound webhook notifier. When provided and the user has URLs
|
|
72
|
+
* configured, the notification is also sent to all configured endpoints
|
|
73
|
+
* (e.g. ntfy.sh topics). Optional — absent means outbound delivery is
|
|
74
|
+
* skipped silently.
|
|
75
|
+
*/
|
|
76
|
+
readonly webhookNotifier?: WebhookNotifier | null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Fires push notifications for a completed long task if the elapsed time
|
|
81
|
+
* exceeds the configured threshold.
|
|
82
|
+
*
|
|
83
|
+
* Returns true when at least one delivery was attempted, false when the
|
|
84
|
+
* call was a no-op (threshold not reached, or off-state).
|
|
85
|
+
*
|
|
86
|
+
* PRIVACY: builds message text from structural metadata only (kind, elapsed,
|
|
87
|
+
* status, sessionId). Never includes conversation content.
|
|
88
|
+
*/
|
|
89
|
+
export function maybeNotifyLongTask(opts: MaybeNotifyLongTaskOptions): boolean {
|
|
90
|
+
const { elapsedMs, status, kind, sessionId, thresholdSeconds, webhookNotifier } = opts;
|
|
91
|
+
|
|
92
|
+
// Off-state: 0 disables notifications entirely.
|
|
93
|
+
if (thresholdSeconds === NOTIFY_AFTER_SECONDS_OFF) {
|
|
94
|
+
logger.debug('long-task-notifier: disabled (threshold=0)');
|
|
95
|
+
return false;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Gate: only notify when the task exceeded the threshold.
|
|
99
|
+
const elapsedSeconds = Math.floor(elapsedMs / 1000);
|
|
100
|
+
if (elapsedSeconds < thresholdSeconds) {
|
|
101
|
+
logger.debug('long-task-notifier: below threshold', { elapsedSeconds, thresholdSeconds });
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Build concise, metadata-only message. No conversation text.
|
|
106
|
+
const statusLabel = status === 'ok' ? 'completed' : 'failed';
|
|
107
|
+
const title = `GoodVibes — ${kind} ${statusLabel}`;
|
|
108
|
+
// PRIVACY: message contains only structural metadata, never conversation content.
|
|
109
|
+
const message = `${kind} ${statusLabel} in ${elapsedSeconds}s · session ${sessionId.slice(0, 8)}`;
|
|
110
|
+
|
|
111
|
+
// Delivery 1: desktop notification (notify-send on linux, osascript on mac).
|
|
112
|
+
// notifyCompletion is non-throwing; SDK handles platform absence silently.
|
|
113
|
+
try {
|
|
114
|
+
notifyCompletion(title, message, elapsedMs);
|
|
115
|
+
} catch (err) {
|
|
116
|
+
logger.debug('long-task-notifier: desktop notify error', { error: String(err) });
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Delivery 2: outbound webhook (ntfy / generic endpoint) if configured.
|
|
120
|
+
if (webhookNotifier) {
|
|
121
|
+
const urls = webhookNotifier.getUrls();
|
|
122
|
+
if (urls.length > 0) {
|
|
123
|
+
webhookNotifier.send(message).catch((err: unknown) => {
|
|
124
|
+
logger.debug('long-task-notifier: webhook send error', { error: String(err) });
|
|
125
|
+
});
|
|
126
|
+
} else {
|
|
127
|
+
logger.debug('long-task-notifier: no webhook URLs configured, skipping outbound delivery');
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Read behavior.notifyAfterSeconds from a config manager.
|
|
136
|
+
* Returns NOTIFY_AFTER_SECONDS_DEFAULT when the key is absent or invalid.
|
|
137
|
+
* Returns NOTIFY_AFTER_SECONDS_OFF (0) when explicitly set to 0.
|
|
138
|
+
*/
|
|
139
|
+
export function readNotifyAfterSeconds(configGet: (key: string) => unknown): number {
|
|
140
|
+
const raw = configGet('behavior.notifyAfterSeconds');
|
|
141
|
+
if (raw === 0) return NOTIFY_AFTER_SECONDS_OFF;
|
|
142
|
+
const parsed = typeof raw === 'number' ? raw : Number(raw);
|
|
143
|
+
if (Number.isFinite(parsed) && parsed >= 0) return parsed;
|
|
144
|
+
return NOTIFY_AFTER_SECONDS_DEFAULT;
|
|
145
|
+
}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* session-recovery.ts — Journal replay at session resume.
|
|
3
|
+
*
|
|
4
|
+
* Purpose
|
|
5
|
+
* ───────
|
|
6
|
+
* When a session is resumed, this module checks whether a transcript journal
|
|
7
|
+
* exists for that session whose records post-date the loaded snapshot. If so,
|
|
8
|
+
* it replays those records onto the live conversation and writes a fresh
|
|
9
|
+
* snapshot so the gap is permanently closed.
|
|
10
|
+
*
|
|
11
|
+
* Seams (all three must call replayJournalForSession)
|
|
12
|
+
* ────────────────────────────────────────────────────
|
|
13
|
+
* 1. CLI / command resume — session-workflow.ts, after `fromJSON` +
|
|
14
|
+
* `rebuildHistory` complete. Handles --continue, --resume, /session resume,
|
|
15
|
+
* and --fork.
|
|
16
|
+
* 2. Ctrl+R crash recovery — blocking-input.ts, after `conversation.fromJSON`
|
|
17
|
+
* in the Ctrl+R branch. Handles SIGKILL-era recovery files.
|
|
18
|
+
* 3. In-TUI panel resume — bootstrap-hook-bridge.ts
|
|
19
|
+
* `createResumeSessionHandler`, after `options.runtime.sessionId` is
|
|
20
|
+
* assigned. Handles the session browser / panel-driven resume.
|
|
21
|
+
*
|
|
22
|
+
* Recovery protocol
|
|
23
|
+
* ─────────────────
|
|
24
|
+
* 1. Call replayJournal() with the journal path and the snapshot timestamp.
|
|
25
|
+
* 2. If no records are newer than the snapshot, rotate the (now-stale)
|
|
26
|
+
* journal silently and return.
|
|
27
|
+
* 3. If records are found, apply the final record's messages — each journal
|
|
28
|
+
* record carries the full conversation snapshot at that moment, so the
|
|
29
|
+
* last record by seq is the authoritative post-crash state.
|
|
30
|
+
* 4. Rebuild the conversation history and call the snapshot writer so the
|
|
31
|
+
* gap is durably closed before the user sees the restored conversation.
|
|
32
|
+
* 5. Rotate the journal (it is no longer needed as a gap-filler).
|
|
33
|
+
* 6. Return a result so the caller can emit an honest notice.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { journalPathFor, openTranscriptJournal, replayJournal } from './transcript-journal.ts';
|
|
37
|
+
import type { ConversationManager } from './conversation.ts';
|
|
38
|
+
import type { ConversationMessageSnapshot } from '@pellux/goodvibes-sdk/platform/core';
|
|
39
|
+
|
|
40
|
+
// ─── Types ──────────────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
export interface ReplayIntoConversationOptions {
|
|
43
|
+
/** Absolute path to the journal file for this session. */
|
|
44
|
+
readonly journalPath: string;
|
|
45
|
+
/**
|
|
46
|
+
* The `timestamp` field from the loaded session snapshot (SessionMeta).
|
|
47
|
+
* Only journal records with ts > snapshotTimestamp are replayed.
|
|
48
|
+
*/
|
|
49
|
+
readonly snapshotTimestamp: number;
|
|
50
|
+
/** The live conversation manager to mutate with replayed messages. */
|
|
51
|
+
readonly conversation: ConversationManager;
|
|
52
|
+
/** Session ID — used when creating the post-replay journal instance for rotate(). */
|
|
53
|
+
readonly sessionId: string;
|
|
54
|
+
/**
|
|
55
|
+
* Persist the restored conversation so the gap is durably closed.
|
|
56
|
+
* Called with the final replayed message list. Best-effort — failures
|
|
57
|
+
* are swallowed so recovery never hard-fails a resume.
|
|
58
|
+
*/
|
|
59
|
+
readonly persistSnapshot: (messages: ConversationMessageSnapshot[]) => void;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface ReplayIntoConversationResult {
|
|
63
|
+
/** Number of journal records that post-dated the snapshot. 0 if nothing to replay. */
|
|
64
|
+
readonly replayed: number;
|
|
65
|
+
/** True if the journal tail was corrupt (quarantined). */
|
|
66
|
+
readonly hadCorruptTail: boolean;
|
|
67
|
+
/** True if the journal had an unrecognised schema version (quarantined). */
|
|
68
|
+
readonly hadVersionMismatch: boolean;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ─── Public API ─────────────────────────────────────────────────────────────
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Replay journal records newer than `snapshotTimestamp` onto `conversation`.
|
|
75
|
+
*
|
|
76
|
+
* Returns a result object so the caller can emit an appropriate notice.
|
|
77
|
+
* Never throws — all errors are swallowed to preserve the "best-effort"
|
|
78
|
+
* recovery contract.
|
|
79
|
+
*/
|
|
80
|
+
export function replayJournalIntoConversation(
|
|
81
|
+
options: ReplayIntoConversationOptions,
|
|
82
|
+
): ReplayIntoConversationResult {
|
|
83
|
+
const { journalPath, snapshotTimestamp, conversation, sessionId, persistSnapshot } = options;
|
|
84
|
+
|
|
85
|
+
try {
|
|
86
|
+
const { records, hadCorruptTail } = replayJournal(journalPath, snapshotTimestamp);
|
|
87
|
+
|
|
88
|
+
// Detect version mismatch: replayJournal quarantines and returns
|
|
89
|
+
// hadCorruptTail=true + 0 records when the header version is wrong.
|
|
90
|
+
// We distinguish it from a genuine corrupt tail by checking whether the
|
|
91
|
+
// journal file still exists (quarantine renames it away in both cases,
|
|
92
|
+
// so we cannot inspect the header at this point). We surface both cases
|
|
93
|
+
// through hadCorruptTail to the caller; hadVersionMismatch is derived
|
|
94
|
+
// from it to give the caller a distinct notice option.
|
|
95
|
+
const hadVersionMismatch = hadCorruptTail && records.length === 0;
|
|
96
|
+
|
|
97
|
+
const journal = openTranscriptJournal(journalPath, sessionId);
|
|
98
|
+
|
|
99
|
+
if (records.length === 0) {
|
|
100
|
+
// Nothing to replay — rotate the (now-stale) journal silently.
|
|
101
|
+
journal.rotate();
|
|
102
|
+
return { replayed: 0, hadCorruptTail, hadVersionMismatch };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// The last record (highest seq) holds the most recent full conversation
|
|
106
|
+
// state captured before the crash. Apply it.
|
|
107
|
+
const lastRecord = records[records.length - 1]!;
|
|
108
|
+
const replayedMessages = lastRecord.messages as ConversationMessageSnapshot[];
|
|
109
|
+
|
|
110
|
+
conversation.fromJSON({
|
|
111
|
+
messages: replayedMessages as never[],
|
|
112
|
+
});
|
|
113
|
+
conversation.rebuildHistory();
|
|
114
|
+
|
|
115
|
+
// Write a fresh snapshot so the gap is durably closed even if the
|
|
116
|
+
// process is killed again before the next turn-complete snapshot.
|
|
117
|
+
try {
|
|
118
|
+
persistSnapshot(replayedMessages);
|
|
119
|
+
} catch {
|
|
120
|
+
// Best-effort — never hard-fail recovery due to snapshot write failure.
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Rotate the journal — it is no longer needed as a gap-filler.
|
|
124
|
+
journal.rotate();
|
|
125
|
+
|
|
126
|
+
return { replayed: records.length, hadCorruptTail, hadVersionMismatch: false };
|
|
127
|
+
} catch {
|
|
128
|
+
// Absolute last-resort guard — recovery must never crash a resume.
|
|
129
|
+
return { replayed: 0, hadCorruptTail: false, hadVersionMismatch: false };
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Build the journal path for a given session and home directory, then call
|
|
135
|
+
* replayJournalIntoConversation().
|
|
136
|
+
*
|
|
137
|
+
* Convenience wrapper used by session-workflow.ts so it does not need to
|
|
138
|
+
* import journalPathFor directly.
|
|
139
|
+
*/
|
|
140
|
+
export function replayJournalForSession(
|
|
141
|
+
options: Omit<ReplayIntoConversationOptions, 'journalPath'> & {
|
|
142
|
+
readonly homeDirectory: string;
|
|
143
|
+
},
|
|
144
|
+
): ReplayIntoConversationResult {
|
|
145
|
+
const journalPath = journalPathFor(options.homeDirectory, options.sessionId);
|
|
146
|
+
return replayJournalIntoConversation({ ...options, journalPath });
|
|
147
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { UiRuntimeEvents } from '@/runtime/index.ts';
|
|
2
2
|
import { createStreamStallWatchdog } from './stream-stall-watchdog.ts';
|
|
3
3
|
import { formatUserFacingErrorLine } from './format-user-error.ts';
|
|
4
|
+
import { logger } from '@pellux/goodvibes-sdk/platform/utils';
|
|
4
5
|
|
|
5
6
|
/**
|
|
6
7
|
* Live stream and tool-execution metrics maintained by wireStreamEventMetrics.
|
|
@@ -29,9 +30,28 @@ interface StreamOrchestrator {
|
|
|
29
30
|
readonly streamingOutputTokens: number;
|
|
30
31
|
}
|
|
31
32
|
|
|
32
|
-
/** Minimal provider surface required for the stream stall watchdog. */
|
|
33
|
+
/** Minimal provider surface required for the stream stall watchdog and failover switching. */
|
|
33
34
|
interface StreamProviderRegistry {
|
|
34
|
-
getCurrentModel(): { readonly provider: string };
|
|
35
|
+
getCurrentModel(): { readonly provider: string; readonly registryKey?: string };
|
|
36
|
+
setCurrentModel(registryKey: string): void;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Minimal fallback-chain node shape returned by ProviderOptimizer.testFallback().
|
|
41
|
+
* Only the fields consumed by the failover path are declared here.
|
|
42
|
+
*/
|
|
43
|
+
interface FailoverChainNode {
|
|
44
|
+
readonly position: number;
|
|
45
|
+
readonly providerId: string;
|
|
46
|
+
readonly modelId: string;
|
|
47
|
+
readonly capable: boolean;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Minimal ProviderOptimizer surface required by the failover path. */
|
|
51
|
+
interface FailoverOptimizer {
|
|
52
|
+
readonly enabled: boolean;
|
|
53
|
+
testFallback(profile?: Record<string, unknown>): { readonly chain: readonly FailoverChainNode[] };
|
|
54
|
+
recordFallbackTransition(from: string, to: string, reason: string): void;
|
|
35
55
|
}
|
|
36
56
|
|
|
37
57
|
/** Minimal system-message surface required for user-visible notifications. */
|
|
@@ -40,6 +60,15 @@ interface StreamSystemMessageRouter {
|
|
|
40
60
|
low(message: string): void;
|
|
41
61
|
}
|
|
42
62
|
|
|
63
|
+
/**
|
|
64
|
+
* Minimal cost lookup surface for attaching cost-delta information to failover notices.
|
|
65
|
+
* Returns USD-per-1M-token pricing for the given model ID.
|
|
66
|
+
* The implementation may consult a catalog; if the model is unknown both fields are 0.
|
|
67
|
+
*/
|
|
68
|
+
export interface FailoverCostLookup {
|
|
69
|
+
getCostFromCatalog(modelId: string): { readonly input: number; readonly output: number };
|
|
70
|
+
}
|
|
71
|
+
|
|
43
72
|
export interface WireStreamEventMetricsOptions {
|
|
44
73
|
/** The UI runtime event bus (turns + tools sub-buses). */
|
|
45
74
|
readonly events: UiRuntimeEvents;
|
|
@@ -56,6 +85,86 @@ export interface WireStreamEventMetricsOptions {
|
|
|
56
85
|
* so the render closure can read it without a forward-reference issue.
|
|
57
86
|
*/
|
|
58
87
|
readonly metrics: StreamMetrics;
|
|
88
|
+
/**
|
|
89
|
+
* When provided and enabled, the optimizer is consulted on TURN_ERROR to
|
|
90
|
+
* attempt the next viable provider before surfacing the error to the user.
|
|
91
|
+
* When absent or optimizer.enabled is false, behaviour is identical to the
|
|
92
|
+
* pre-failover baseline: error surfaces immediately via systemMessageRouter.
|
|
93
|
+
*/
|
|
94
|
+
readonly providerOptimizer?: FailoverOptimizer;
|
|
95
|
+
/**
|
|
96
|
+
* Callback the caller provides to re-submit the last user turn on a
|
|
97
|
+
* different provider after a successful failover switch. Called only when
|
|
98
|
+
* the optimizer is enabled and a viable next provider exists in the chain.
|
|
99
|
+
*/
|
|
100
|
+
readonly retryTurn?: () => void;
|
|
101
|
+
/**
|
|
102
|
+
* Optional cost catalog for attaching per-1M-token cost information to
|
|
103
|
+
* the failover notice. When provided and both models have non-zero pricing,
|
|
104
|
+
* the notice includes input and output cost comparisons. When absent or pricing is
|
|
105
|
+
* unavailable for either model, the notice honestly states "cost data unavailable".
|
|
106
|
+
*/
|
|
107
|
+
readonly costLookup?: FailoverCostLookup;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** Result of wireStreamEventMetrics. */
|
|
111
|
+
export interface WireStreamEventMetricsResult {
|
|
112
|
+
/** Unsubscribe functions; push into the parent unsubs array for cleanup on exit. */
|
|
113
|
+
readonly unsubs: ReadonlyArray<() => void>;
|
|
114
|
+
/**
|
|
115
|
+
* Clear the per-turn failover visited-provider set.
|
|
116
|
+
* Call this on every new user submission so the visited set does not bleed
|
|
117
|
+
* across independent turns (the set is also cleared automatically on
|
|
118
|
+
* TURN_COMPLETED, but a new submission may arrive before TURN_COMPLETED fires).
|
|
119
|
+
*/
|
|
120
|
+
readonly clearFailoverVisited: () => void;
|
|
121
|
+
/**
|
|
122
|
+
* Register a callback that fires whenever a TURN_ERROR is surfaced to the
|
|
123
|
+
* user — either immediately (no optimizer) or after chain exhaustion.
|
|
124
|
+
* Does NOT fire when the optimizer performs a successful automatic failover
|
|
125
|
+
* (in that case the user sees a [Failover] notice, not an error).
|
|
126
|
+
* Used by main.ts to activate the one-key retry affordance. The callback
|
|
127
|
+
* receives exhausted=true when the failover chain was exhausted first, so
|
|
128
|
+
* the notice can say honestly that a retry reuses the same failed provider.
|
|
129
|
+
*/
|
|
130
|
+
readonly onErrorSurfaced: (cb: (exhausted: boolean) => void) => void;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Build the cost-delta suffix for a failover notice.
|
|
135
|
+
*
|
|
136
|
+
* Extracts the model ID from registry keys (format: `provider:modelId`),
|
|
137
|
+
* queries the cost catalog for both, and formats a human-readable comparison.
|
|
138
|
+
* If the lookup is absent or either model returns zero pricing (unknown),
|
|
139
|
+
* returns an honest "cost data unavailable" suffix instead of fabricating values.
|
|
140
|
+
*
|
|
141
|
+
* @param lookup - Optional cost catalog; when absent, returns unavailable notice.
|
|
142
|
+
* @param fromRegistryKey - Registry key of the provider being abandoned (may be undefined).
|
|
143
|
+
* @param toRegistryKey - Registry key of the provider being selected.
|
|
144
|
+
* @returns A parenthesised suffix string or empty string.
|
|
145
|
+
*/
|
|
146
|
+
function buildCostDeltaSuffix(
|
|
147
|
+
lookup: FailoverCostLookup | undefined,
|
|
148
|
+
fromRegistryKey: string | undefined,
|
|
149
|
+
toRegistryKey: string,
|
|
150
|
+
): string {
|
|
151
|
+
if (!lookup) return '';
|
|
152
|
+
// Registry key format: `provider:modelId` — modelId may itself contain `:`.
|
|
153
|
+
const fromModelId = fromRegistryKey ? fromRegistryKey.split(':').slice(1).join(':') : '';
|
|
154
|
+
const toModelId = toRegistryKey.split(':').slice(1).join(':');
|
|
155
|
+
const fromCost = fromModelId ? lookup.getCostFromCatalog(fromModelId) : { input: 0, output: 0 };
|
|
156
|
+
const toCost = lookup.getCostFromCatalog(toModelId);
|
|
157
|
+
// Report unavailable when either side has zero pricing (unknown model).
|
|
158
|
+
if (fromCost.input === 0 && fromCost.output === 0 && !fromModelId) {
|
|
159
|
+
return ' [cost data unavailable]';
|
|
160
|
+
}
|
|
161
|
+
const hasFromData = fromCost.input > 0 || fromCost.output > 0;
|
|
162
|
+
const hasToData = toCost.input > 0 || toCost.output > 0;
|
|
163
|
+
if (!hasFromData || !hasToData) {
|
|
164
|
+
return ' [cost data unavailable]';
|
|
165
|
+
}
|
|
166
|
+
const fmt = (n: number) => `$${n.toFixed(2)}`;
|
|
167
|
+
return ` [cost/1M: input ${fmt(fromCost.input)}→${fmt(toCost.input)}, output ${fmt(fromCost.output)}→${fmt(toCost.output)}]`;
|
|
59
168
|
}
|
|
60
169
|
|
|
61
170
|
/**
|
|
@@ -64,8 +173,7 @@ export interface WireStreamEventMetricsOptions {
|
|
|
64
173
|
* and declares it before render() so both the render closure and the returned
|
|
65
174
|
* event handlers share the same reference.
|
|
66
175
|
*
|
|
67
|
-
* Returns an
|
|
68
|
-
* array so they are cleaned up on exit.
|
|
176
|
+
* Returns an object with unsubscribe functions and a clearFailoverVisited helper.
|
|
69
177
|
*
|
|
70
178
|
* Responsibilities:
|
|
71
179
|
* - Track stream start time, delta count, token speed, and TTFT
|
|
@@ -75,8 +183,11 @@ export interface WireStreamEventMetricsOptions {
|
|
|
75
183
|
*/
|
|
76
184
|
export function wireStreamEventMetrics(
|
|
77
185
|
options: WireStreamEventMetricsOptions,
|
|
78
|
-
):
|
|
79
|
-
const {
|
|
186
|
+
): WireStreamEventMetricsResult {
|
|
187
|
+
const {
|
|
188
|
+
events, metrics, orchestrator, providerRegistry,
|
|
189
|
+
systemMessageRouter, render, providerOptimizer, retryTurn, costLookup,
|
|
190
|
+
} = options;
|
|
80
191
|
|
|
81
192
|
const unsubs: Array<() => void> = [];
|
|
82
193
|
|
|
@@ -103,10 +214,85 @@ export function wireStreamEventMetrics(
|
|
|
103
214
|
metrics.tokenSpeed = elapsed > 0 ? tokenCount / elapsed : 0;
|
|
104
215
|
}));
|
|
105
216
|
|
|
217
|
+
// Per-turn visited-provider set: tracks providers already attempted this turn
|
|
218
|
+
// so failover cannot ping-pong between two mutually-failing providers.
|
|
219
|
+
// True invariant: at most one retry per provider per turn; exhaustion fires
|
|
220
|
+
// after the chain is consumed.
|
|
221
|
+
// Cleared on TURN_COMPLETED (see handler below) and on new user submission
|
|
222
|
+
// (caller clears via clearFailoverVisited(), wired in main.ts).
|
|
223
|
+
const failoverVisited = new Set<string>();
|
|
224
|
+
|
|
225
|
+
unsubs.push(events.turns.on('TURN_COMPLETED', () => {
|
|
226
|
+
failoverVisited.clear();
|
|
227
|
+
}));
|
|
228
|
+
|
|
106
229
|
unsubs.push(events.turns.on('TURN_ERROR', (event) => {
|
|
107
230
|
const errVal: string = event.error;
|
|
231
|
+
|
|
232
|
+
// --- Optimizer-gated failover path ---
|
|
233
|
+
// When the optimizer is present and enabled, attempt to advance to the next
|
|
234
|
+
// viable provider in the fallback chain before surfacing the error. When
|
|
235
|
+
// the optimizer is absent or disabled, behaviour is identical to baseline:
|
|
236
|
+
// error surfaces immediately.
|
|
237
|
+
if (providerOptimizer?.enabled && retryTurn) {
|
|
238
|
+
const fromProvider = providerRegistry.getCurrentModel().provider;
|
|
239
|
+
// Mark the failing provider as visited so it will never be selected again
|
|
240
|
+
// in this turn, even if a second TURN_ERROR arrives (e.g. ping-pong).
|
|
241
|
+
failoverVisited.add(fromProvider);
|
|
242
|
+
const result = providerOptimizer.testFallback({});
|
|
243
|
+
// Find the first capable node that is NOT already visited this turn and
|
|
244
|
+
// is NOT synthetic. Synthetic nodes are skipped permanently by design:
|
|
245
|
+
// a synthetic model is itself a fallback ladder over real backends, so
|
|
246
|
+
// failing over INTO one after a real backend already failed is unsound
|
|
247
|
+
// double-indirection (it can route straight back to the failed provider).
|
|
248
|
+
const next = result.chain.find(
|
|
249
|
+
(node) =>
|
|
250
|
+
node.capable &&
|
|
251
|
+
!failoverVisited.has(node.providerId) &&
|
|
252
|
+
node.providerId !== 'synthetic',
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
if (next) {
|
|
256
|
+
const toRegistryKey = `${next.providerId}:${next.modelId}`;
|
|
257
|
+
const errorClass = formatUserFacingErrorLine(errVal);
|
|
258
|
+
// Capture FROM registry key before switching — needed for cost comparison.
|
|
259
|
+
const fromRegistryKey = providerRegistry.getCurrentModel().registryKey;
|
|
260
|
+
try {
|
|
261
|
+
providerRegistry.setCurrentModel(toRegistryKey);
|
|
262
|
+
} catch (switchErr) {
|
|
263
|
+
// Switch failed — fall through to honest error display.
|
|
264
|
+
logger.debug('failover setCurrentModel failed', { toRegistryKey, error: String(switchErr) });
|
|
265
|
+
systemMessageRouter.high(`[Error] ${errorClass}`);
|
|
266
|
+
render();
|
|
267
|
+
return;
|
|
268
|
+
}
|
|
269
|
+
// Record the selected provider as visited before the retry fires so
|
|
270
|
+
// a subsequent TURN_ERROR from that provider also skips it.
|
|
271
|
+
failoverVisited.add(next.providerId);
|
|
272
|
+
providerOptimizer.recordFallbackTransition(fromProvider, next.providerId, errorClass);
|
|
273
|
+
const costSuffix = buildCostDeltaSuffix(costLookup, fromRegistryKey, toRegistryKey);
|
|
274
|
+
systemMessageRouter.high(
|
|
275
|
+
`[Failover] ${fromProvider} -> ${next.providerId} (${errorClass})${costSuffix}`,
|
|
276
|
+
);
|
|
277
|
+
render();
|
|
278
|
+
// Re-submit the last user turn on the new provider.
|
|
279
|
+
retryTurn();
|
|
280
|
+
return;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Chain exhausted — all capable candidates have been visited or none exist.
|
|
284
|
+
systemMessageRouter.high(
|
|
285
|
+
`[Failover] Chain exhausted — no alternative provider available. Original error: ${formatUserFacingErrorLine(errVal)}`,
|
|
286
|
+
);
|
|
287
|
+
notifyErrorSurfaced(true);
|
|
288
|
+
render();
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Baseline: optimizer disabled or not wired — surface error immediately.
|
|
108
293
|
const formatted = formatUserFacingErrorLine(errVal);
|
|
109
294
|
systemMessageRouter.high(`[Error] ${formatted}`);
|
|
295
|
+
notifyErrorSurfaced(false);
|
|
110
296
|
render();
|
|
111
297
|
}));
|
|
112
298
|
|
|
@@ -140,5 +326,11 @@ export function wireStreamEventMetrics(
|
|
|
140
326
|
metrics.activeToolName = undefined;
|
|
141
327
|
}));
|
|
142
328
|
|
|
143
|
-
|
|
329
|
+
let _errorSurfacedCb: ((exhausted: boolean) => void) | undefined;
|
|
330
|
+
function notifyErrorSurfaced(exhausted: boolean) { _errorSurfacedCb?.(exhausted); }
|
|
331
|
+
return {
|
|
332
|
+
unsubs,
|
|
333
|
+
clearFailoverVisited: () => failoverVisited.clear(),
|
|
334
|
+
onErrorSurfaced: (cb: (exhausted: boolean) => void) => { _errorSurfacedCb = cb; },
|
|
335
|
+
};
|
|
144
336
|
}
|