@iletai/nzb 1.7.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -77,8 +77,23 @@ app.get("/stream", (req, res) => {
77
77
  sseClients.set(connectionId, res);
78
78
  // Heartbeat to keep connection alive
79
79
  const heartbeat = setInterval(() => {
80
- res.write(`:ping\n\n`);
80
+ if (res.writableEnded || res.closed) {
81
+ clearInterval(heartbeat);
82
+ sseClients.delete(connectionId);
83
+ return;
84
+ }
85
+ try {
86
+ res.write(`:ping\n\n`);
87
+ }
88
+ catch {
89
+ clearInterval(heartbeat);
90
+ sseClients.delete(connectionId);
91
+ }
81
92
  }, 20_000);
93
+ res.on("error", () => {
94
+ clearInterval(heartbeat);
95
+ sseClients.delete(connectionId);
96
+ });
82
97
  req.on("close", () => {
83
98
  clearInterval(heartbeat);
84
99
  sseClients.delete(connectionId);
@@ -196,6 +211,13 @@ app.post("/send-photo", async (req, res) => {
196
211
  res.status(500).json({ error: msg });
197
212
  }
198
213
  });
214
+ // Global error handler — catch unhandled Express errors
215
+ app.use((err, _req, res, _next) => {
216
+ console.error("[nzb] Express error:", err.message);
217
+ if (!res.headersSent) {
218
+ res.status(500).json({ error: "Internal server error" });
219
+ }
220
+ });
199
221
  export function startApiServer() {
200
222
  return new Promise((resolve, reject) => {
201
223
  const server = app.listen(config.apiPort, "127.0.0.1", () => {
package/dist/cli.js CHANGED
@@ -45,6 +45,7 @@ Commands:
45
45
  tui Connect to the daemon via terminal UI
46
46
  setup Interactive first-run configuration
47
47
  update Check for updates and install the latest version
48
+ cron Manage scheduled cron jobs
48
49
  help Show this help message
49
50
 
50
51
  Flags (start):
@@ -98,6 +99,101 @@ switch (command) {
98
99
  }
99
100
  break;
100
101
  }
102
+ case "cron": {
103
+ const subcommand = args[1] || "list";
104
+ const { listCronJobs, createCronJob, deleteCronJob, updateCronJob } = await import("./store/cron-store.js");
105
+ switch (subcommand) {
106
+ case "list": {
107
+ const jobs = listCronJobs();
108
+ if (jobs.length === 0) {
109
+ console.log("No cron jobs configured.");
110
+ }
111
+ else {
112
+ for (const job of jobs) {
113
+ const status = job.enabled ? "✅" : "⏸️";
114
+ console.log(`${status} ${job.id} — ${job.name} [${job.taskType}] ${job.cronExpression}`);
115
+ }
116
+ }
117
+ break;
118
+ }
119
+ case "add": {
120
+ const id = args[2];
121
+ const name = args[3];
122
+ const cronExpr = args[4];
123
+ const taskType = args[5];
124
+ if (!id || !name || !cronExpr || !taskType) {
125
+ console.error("Usage: nzb cron add <id> <name> <cron-expression> <task-type> [payload-json]");
126
+ console.error("Task types: prompt, health_check, backup, notification, webhook");
127
+ process.exit(1);
128
+ }
129
+ const validTypes = ["prompt", "health_check", "backup", "notification", "webhook"];
130
+ if (!validTypes.includes(taskType)) {
131
+ console.error(`Invalid task type: ${taskType}. Valid: ${validTypes.join(", ")}`);
132
+ process.exit(1);
133
+ }
134
+ const { Cron } = await import("croner");
135
+ try {
136
+ new Cron(cronExpr);
137
+ }
138
+ catch {
139
+ console.error(`Invalid cron expression: ${cronExpr}`);
140
+ process.exit(1);
141
+ }
142
+ const payload = args[6] || "{}";
143
+ try {
144
+ const job = createCronJob({
145
+ id,
146
+ name,
147
+ cronExpression: cronExpr,
148
+ taskType: taskType,
149
+ payload,
150
+ });
151
+ console.log(`Created cron job '${job.id}' (${job.name}): ${job.cronExpression}`);
152
+ console.log("Note: The job will be scheduled when the daemon starts.");
153
+ }
154
+ catch (err) {
155
+ console.error("Error:", err instanceof Error ? err.message : err);
156
+ process.exit(1);
157
+ }
158
+ break;
159
+ }
160
+ case "remove": {
161
+ const removeId = args[2];
162
+ if (!removeId) {
163
+ console.error("Usage: nzb cron remove <id>");
164
+ process.exit(1);
165
+ }
166
+ const deleted = deleteCronJob(removeId);
167
+ console.log(deleted ? `Deleted cron job '${removeId}'.` : `Job '${removeId}' not found.`);
168
+ break;
169
+ }
170
+ case "enable": {
171
+ const enableId = args[2];
172
+ if (!enableId) {
173
+ console.error("Usage: nzb cron enable <id>");
174
+ process.exit(1);
175
+ }
176
+ const enabled = updateCronJob(enableId, { enabled: true });
177
+ console.log(enabled ? `Enabled cron job '${enableId}'.` : `Job '${enableId}' not found.`);
178
+ break;
179
+ }
180
+ case "disable": {
181
+ const disableId = args[2];
182
+ if (!disableId) {
183
+ console.error("Usage: nzb cron disable <id>");
184
+ process.exit(1);
185
+ }
186
+ const disabled = updateCronJob(disableId, { enabled: false });
187
+ console.log(disabled ? `Disabled cron job '${disableId}'.` : `Job '${disableId}' not found.`);
188
+ break;
189
+ }
190
+ default:
191
+ console.error(`Unknown cron subcommand: ${subcommand}`);
192
+ console.error("Available: list, add, remove, enable, disable");
193
+ process.exit(1);
194
+ }
195
+ break;
196
+ }
101
197
  case "help":
102
198
  case "--help":
103
199
  case "-h":
package/dist/config.js CHANGED
@@ -82,6 +82,10 @@ export const config = {
82
82
  groupMentionOnly: process.env.GROUP_MENTION_ONLY !== "false",
83
83
  /** Reasoning effort: low | medium | high */
84
84
  reasoningEffort: validateEnum(process.env.REASONING_EFFORT, ["low", "medium", "high"], "medium", "REASONING_EFFORT"),
85
+ /** Model failover chain: comma-separated list of fallback models */
86
+ modelFailoverChain: process.env.MODEL_FAILOVER_CHAIN?.split(",").map((s) => s.trim()).filter(Boolean) ?? [],
87
+ /** Cooldown duration (ms) for a model after failure before retrying it */
88
+ modelCooldownMs: parseInt(process.env.MODEL_COOLDOWN_MS ?? "60000"),
85
89
  };
86
90
  /** Persist an env variable to ~/.nzb/.env */
87
91
  export function persistEnvVar(key, value) {
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Model Failover Manager — tracks model health and selects fallback models
3
+ * when the primary model encounters errors (rate limits, timeouts, etc.).
4
+ *
5
+ * When MODEL_FAILOVER_CHAIN is empty, this module is a no-op:
6
+ * selectModel() returns the configured primary, and getNextFallback() returns undefined.
7
+ */
8
+ /** Detect the provider from a model name string. */
9
+ export function detectProvider(model) {
10
+ const lower = model.toLowerCase();
11
+ if (lower.startsWith("claude-"))
12
+ return "anthropic";
13
+ if (lower.startsWith("gpt-") || lower.startsWith("o1-") || lower.startsWith("o3-") || lower.startsWith("o4-"))
14
+ return "openai";
15
+ if (lower.startsWith("gemini-"))
16
+ return "google";
17
+ return "unknown";
18
+ }
19
+ /** Number of consecutive failures before a model is considered "degraded". */
20
+ const DEGRADED_THRESHOLD = 3;
21
+ export class ModelFailoverManager {
22
+ chain;
23
+ cooldownMs;
24
+ health = new Map();
25
+ constructor(chain, cooldownMs) {
26
+ this.chain = Array.isArray(chain) ? chain : [];
27
+ this.cooldownMs = cooldownMs || 60_000;
28
+ // Initialise health entries for every model in the chain
29
+ for (const model of this.chain) {
30
+ this.health.set(model, {
31
+ failures: 0,
32
+ lastFailure: undefined,
33
+ cooldownUntil: 0,
34
+ successCount: 0,
35
+ });
36
+ }
37
+ }
38
+ /** True when at least one fallback model is configured. */
39
+ get enabled() {
40
+ return this.chain.length > 0;
41
+ }
42
+ /**
43
+ * Select the best model to use right now.
44
+ * Returns the first healthy model from the chain, or undefined when the
45
+ * chain is empty (caller should fall back to `config.copilotModel`).
46
+ */
47
+ selectModel() {
48
+ if (this.chain.length === 0)
49
+ return undefined;
50
+ const now = Date.now();
51
+ for (const model of this.chain) {
52
+ const h = this.getOrCreate(model);
53
+ if (now >= h.cooldownUntil)
54
+ return model;
55
+ }
56
+ // All models are on cooldown — pick the one whose cooldown expires soonest
57
+ let earliest;
58
+ let earliestTime = Infinity;
59
+ for (const model of this.chain) {
60
+ const h = this.getOrCreate(model);
61
+ if (h.cooldownUntil < earliestTime) {
62
+ earliestTime = h.cooldownUntil;
63
+ earliest = model;
64
+ }
65
+ }
66
+ return earliest;
67
+ }
68
+ /** Record a successful request for `model`. Resets its failure counter. */
69
+ recordSuccess(model) {
70
+ const h = this.getOrCreate(model);
71
+ h.failures = 0;
72
+ h.cooldownUntil = 0;
73
+ h.successCount++;
74
+ }
75
+ /** Record a failed request for `model`. Applies cooldown after threshold. */
76
+ recordFailure(model) {
77
+ const h = this.getOrCreate(model);
78
+ h.failures++;
79
+ h.lastFailure = Date.now();
80
+ // Apply cooldown immediately on failure so we try a different model next
81
+ h.cooldownUntil = Date.now() + this.cooldownMs;
82
+ }
83
+ /**
84
+ * Get the next fallback model after `currentModel`.
85
+ * Prefers a model from a DIFFERENT provider to maximise availability.
86
+ */
87
+ getNextFallback(currentModel) {
88
+ if (this.chain.length === 0)
89
+ return undefined;
90
+ const now = Date.now();
91
+ const currentProvider = detectProvider(currentModel);
92
+ // First pass: healthy model from a different provider
93
+ for (const model of this.chain) {
94
+ if (model === currentModel)
95
+ continue;
96
+ const h = this.getOrCreate(model);
97
+ if (now >= h.cooldownUntil && detectProvider(model) !== currentProvider) {
98
+ return model;
99
+ }
100
+ }
101
+ // Second pass: any healthy model (same provider is OK)
102
+ for (const model of this.chain) {
103
+ if (model === currentModel)
104
+ continue;
105
+ const h = this.getOrCreate(model);
106
+ if (now >= h.cooldownUntil) {
107
+ return model;
108
+ }
109
+ }
110
+ return undefined;
111
+ }
112
+ /**
113
+ * Detect whether an error is a model-level error that warrants failover
114
+ * (as opposed to a generic connectivity issue that warrants simple retry).
115
+ */
116
+ isModelError(err) {
117
+ const msg = err instanceof Error ? err.message : String(err);
118
+ return /429|rate.?limit|too many requests|quota|capacity|overloaded|model.*not.*available|model.*error|resource.*exhausted/i.test(msg);
119
+ }
120
+ /** Return a snapshot of health status for every model in the chain. */
121
+ getHealthStatus() {
122
+ return this.chain.map((model) => {
123
+ const h = this.getOrCreate(model);
124
+ const now = Date.now();
125
+ let status;
126
+ if (h.failures >= DEGRADED_THRESHOLD) {
127
+ status = "degraded";
128
+ }
129
+ else if (now < h.cooldownUntil) {
130
+ status = "cooldown";
131
+ }
132
+ else {
133
+ status = "healthy";
134
+ }
135
+ return {
136
+ model,
137
+ provider: detectProvider(model),
138
+ status,
139
+ failures: h.failures,
140
+ successCount: h.successCount,
141
+ lastFailure: h.lastFailure ? new Date(h.lastFailure).toISOString() : undefined,
142
+ };
143
+ });
144
+ }
145
+ getOrCreate(model) {
146
+ let h = this.health.get(model);
147
+ if (!h) {
148
+ h = { failures: 0, lastFailure: undefined, cooldownUntil: 0, successCount: 0 };
149
+ this.health.set(model, h);
150
+ }
151
+ return h;
152
+ }
153
+ }
154
+ //# sourceMappingURL=model-failover.js.map
@@ -8,6 +8,7 @@ import { completeTeam, updateTeamMemberResult } from "../store/team-store.js";
8
8
  import { formatAge, withTimeout } from "../utils.js";
9
9
  import { resetClient } from "./client.js";
10
10
  import { loadMcpConfig } from "./mcp-config.js";
11
+ import { ModelFailoverManager } from "./model-failover.js";
11
12
  import { getSkillDirectories } from "./skills.js";
12
13
  import { getOrchestratorSystemMessage } from "./system-message.js";
13
14
  import { createTools } from "./tools.js";
@@ -32,6 +33,8 @@ const workers = new Map();
32
33
  const teams = new Map();
33
34
  let healthCheckTimer;
34
35
  let workerReaperTimer;
36
+ // Model failover manager — initialised lazily in initOrchestrator
37
+ let failoverManager;
35
38
  // Persistent orchestrator session
36
39
  let orchestratorSession;
37
40
  // Coalesces concurrent ensureOrchestratorSession calls
@@ -155,6 +158,8 @@ function startHealthCheck() {
155
158
  return;
156
159
  if (healthCheckRunning)
157
160
  return;
161
+ if (processing)
162
+ return; // Don't interfere while processing messages
158
163
  healthCheckRunning = true;
159
164
  try {
160
165
  const state = copilotClient.getState();
@@ -188,17 +193,17 @@ export function stopHealthCheck() {
188
193
  function startWorkerReaper() {
189
194
  if (workerReaperTimer)
190
195
  return;
191
- workerReaperTimer = setInterval(() => {
196
+ workerReaperTimer = setInterval(async () => {
192
197
  const maxAge = config.workerTimeoutMs * 2;
193
198
  const now = Date.now();
194
199
  for (const [name, worker] of workers) {
195
200
  if (worker.startedAt && now - worker.startedAt > maxAge) {
196
201
  console.log(`[nzb] Reaping stuck worker '${name}' (age: ${formatAge(worker.startedAt)})`);
197
202
  try {
198
- worker.session.disconnect().catch(() => { });
203
+ await withTimeout(worker.session.disconnect(), 5_000, `reaper: worker '${name}'`);
199
204
  }
200
- catch {
201
- // Session may already be destroyed
205
+ catch (err) {
206
+ console.error(`[nzb] Reaper: worker '${name}' disconnect failed:`, err instanceof Error ? err.message : err);
202
207
  }
203
208
  workers.delete(name);
204
209
  feedBackgroundResult(name, `⚠ Worker '${name}' was automatically killed after exceeding timeout.`);
@@ -334,6 +339,11 @@ async function createOrResumeSession() {
334
339
  export async function initOrchestrator(client) {
335
340
  copilotClient = client;
336
341
  const { mcpServers, skillDirectories } = getSessionConfig();
342
+ // Initialise failover manager from config
343
+ failoverManager = new ModelFailoverManager(config.modelFailoverChain, config.modelCooldownMs);
344
+ if (failoverManager.enabled) {
345
+ console.log(`[nzb] Model failover chain: ${config.modelFailoverChain.join(" → ")}`);
346
+ }
337
347
  // Validate configured model against available models (skip for default — saves 1-3s startup)
338
348
  if (config.copilotModel !== DEFAULT_MODEL) {
339
349
  try {
@@ -522,7 +532,10 @@ async function processQueue() {
522
532
  }
523
533
  // Re-check for messages that arrived during the last executeOnSession call
524
534
  if (messageQueue.length > 0) {
525
- void processQueue();
535
+ processQueue().catch((err) => {
536
+ console.error("[nzb] processQueue re-check failed:", err instanceof Error ? err.message : err);
537
+ processing = false;
538
+ });
526
539
  }
527
540
  }
528
541
  function isRecoverableError(err) {
@@ -560,104 +573,172 @@ export async function sendToOrchestrator(prompt, source, callback, onToolEvent,
560
573
  const sourceChannel = source.type === "telegram" ? "telegram" : source.type === "tui" ? "tui" : undefined;
561
574
  // Enqueue with priority — user messages go before background messages
562
575
  void (async () => {
563
- for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
564
- try {
565
- const finalContent = await new Promise((resolve, reject) => {
566
- const item = {
567
- prompt: taggedPrompt,
568
- attachments,
569
- callback,
570
- onToolEvent,
571
- onUsage,
572
- sourceChannel,
573
- resolve,
574
- reject,
575
- };
576
- if (source.type === "background") {
577
- // Background results go to the back of the queue
578
- messageQueue.push(item);
579
- }
580
- else {
581
- // User messages inserted before any background messages (priority)
582
- const bgIndex = messageQueue.findIndex(isBackgroundMessage);
583
- if (bgIndex >= 0) {
584
- messageQueue.splice(bgIndex, 0, item);
576
+ // Safety timeout for entire message processing chain.
577
+ // Uses a flag to prevent double-callback if timeout fires while processing completes.
578
+ const GLOBAL_MSG_TIMEOUT_MS = 300_000; // 5 minutes
579
+ let globalTimedOut = false;
580
+ const globalTimer = setTimeout(() => {
581
+ globalTimedOut = true;
582
+ console.error("[nzb] Global message processing timeout (5 min). Force-failing.");
583
+ Promise.resolve(callback("Error: Message processing timed out after 5 minutes. Please try again.", true)).catch(() => { });
584
+ }, GLOBAL_MSG_TIMEOUT_MS);
585
+ try {
586
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
587
+ try {
588
+ const finalContent = await new Promise((resolve, reject) => {
589
+ const item = {
590
+ prompt: taggedPrompt,
591
+ attachments,
592
+ callback,
593
+ onToolEvent,
594
+ onUsage,
595
+ sourceChannel,
596
+ resolve,
597
+ reject,
598
+ };
599
+ if (source.type === "background") {
600
+ // Background results go to the back of the queue
601
+ messageQueue.push(item);
585
602
  }
586
603
  else {
587
- messageQueue.push(item);
604
+ // User messages inserted before any background messages (priority)
605
+ const bgIndex = messageQueue.findIndex(isBackgroundMessage);
606
+ if (bgIndex >= 0) {
607
+ messageQueue.splice(bgIndex, 0, item);
608
+ }
609
+ else {
610
+ messageQueue.push(item);
611
+ }
588
612
  }
613
+ processQueue();
614
+ });
615
+ // Deliver response to user FIRST, then log best-effort
616
+ // Record success for failover tracking
617
+ if (failoverManager?.enabled) {
618
+ failoverManager.recordSuccess(config.copilotModel);
589
619
  }
590
- processQueue();
591
- });
592
- // Deliver response to user FIRST, then log best-effort
593
- try {
594
- logMessage("out", sourceLabel, finalContent);
595
- }
596
- catch {
597
- /* best-effort */
598
- }
599
- // Log both sides of the conversation before delivery so we have the row ID
600
- let assistantLogId;
601
- try {
602
- const telegramMsgId = source.type === "telegram" ? source.messageId : undefined;
603
- logConversation(logRole, prompt, sourceLabel, telegramMsgId);
604
- }
605
- catch {
606
- /* best-effort */
607
- }
608
- try {
609
- assistantLogId = logConversation("assistant", finalContent, sourceLabel);
610
- }
611
- catch {
612
- /* best-effort */
613
- }
614
- await callback(finalContent, true, { assistantLogId });
615
- // Auto-continue: if the response was cut short by timeout, automatically
616
- // send a follow-up "Continue" message so the user doesn't have to
617
- if (finalContent.includes("⏱ Response was cut short (timeout)") && _autoContinueCount < MAX_AUTO_CONTINUE) {
618
- console.log(`[nzb] Auto-continuing after timeout (${_autoContinueCount + 1}/${MAX_AUTO_CONTINUE})…`);
619
- await sleep(1000);
620
- void sendToOrchestrator("Continue from where you left off. Do not repeat what was already said.", source, callback, onToolEvent, onUsage, _autoContinueCount + 1);
621
- }
622
- return;
623
- }
624
- catch (err) {
625
- const msg = err instanceof Error ? err.message : String(err);
626
- // Don't retry cancelled messages
627
- if (/cancelled|abort/i.test(msg)) {
628
- return;
629
- }
630
- // Vision not supported — strip attachments and retry with text-only prompt.
631
- // executeOnSession already destroyed the tainted session.
632
- if (/not supported for vision/i.test(msg)) {
633
- console.log(`[nzb] Vision not supported — retrying without attachments`);
634
- attachments = undefined;
635
- taggedPrompt =
636
- `[System: The current model '${config.copilotModel}' does not support image/vision analysis. ` +
637
- `The image path is already included in the user's message below. ` +
638
- `Please inform the user that the current model doesn't support direct image analysis, ` +
639
- `and suggest switching to a vision-capable model (e.g. gpt-4o, claude-sonnet-4, gemini-2.0-flash) ` +
640
- `using the /model command.]\n\n${taggedPrompt}`;
641
- continue;
642
- }
643
- if (isRecoverableError(err) && attempt < MAX_RETRIES) {
644
- const delay = RECONNECT_DELAYS_MS[Math.min(attempt, RECONNECT_DELAYS_MS.length - 1)];
645
- console.error(`[nzb] Recoverable error: ${msg}. Retry ${attempt + 1}/${MAX_RETRIES} after ${delay}ms…`);
646
- await sleep(delay);
647
- // Reset client before retry in case the connection is stale
648
620
  try {
649
- await ensureClient();
621
+ logMessage("out", sourceLabel, finalContent);
650
622
  }
651
623
  catch {
652
- /* will fail again on next attempt */
624
+ /* best-effort */
653
625
  }
654
- continue;
626
+ // Log both sides of the conversation before delivery so we have the row ID
627
+ let assistantLogId;
628
+ try {
629
+ const telegramMsgId = source.type === "telegram" ? source.messageId : undefined;
630
+ logConversation(logRole, prompt, sourceLabel, telegramMsgId);
631
+ }
632
+ catch {
633
+ /* best-effort */
634
+ }
635
+ try {
636
+ assistantLogId = logConversation("assistant", finalContent, sourceLabel);
637
+ }
638
+ catch {
639
+ /* best-effort */
640
+ }
641
+ try {
642
+ if (!globalTimedOut) {
643
+ await callback(finalContent, true, { assistantLogId });
644
+ }
645
+ }
646
+ catch (callbackErr) {
647
+ console.error("[nzb] Callback error after successful response:", callbackErr instanceof Error ? callbackErr.message : callbackErr);
648
+ }
649
+ // Auto-continue: if the response was cut short by timeout, automatically
650
+ // send a follow-up "Continue" message so the user doesn't have to
651
+ if (finalContent.includes("⏱ Response was cut short (timeout)") && _autoContinueCount < MAX_AUTO_CONTINUE) {
652
+ console.log(`[nzb] Auto-continuing after timeout (${_autoContinueCount + 1}/${MAX_AUTO_CONTINUE})…`);
653
+ await sleep(1000);
654
+ void sendToOrchestrator("Continue from where you left off. Do not repeat what was already said.", source, callback, onToolEvent, onUsage, _autoContinueCount + 1);
655
+ }
656
+ return;
657
+ }
658
+ catch (err) {
659
+ const msg = err instanceof Error ? err.message : String(err);
660
+ // Don't retry cancelled messages
661
+ if (/cancelled|abort/i.test(msg)) {
662
+ if (!globalTimedOut) {
663
+ try {
664
+ await callback("Request was cancelled.", true);
665
+ }
666
+ catch { /* best-effort */ }
667
+ }
668
+ return;
669
+ }
670
+ // Vision not supported — strip attachments and retry with text-only prompt.
671
+ // executeOnSession already destroyed the tainted session.
672
+ if (/not supported for vision/i.test(msg)) {
673
+ console.log(`[nzb] Vision not supported — retrying without attachments`);
674
+ attachments = undefined;
675
+ taggedPrompt =
676
+ `[System: The current model '${config.copilotModel}' does not support image/vision analysis. ` +
677
+ `The image path is already included in the user's message below. ` +
678
+ `Please inform the user that the current model doesn't support direct image analysis, ` +
679
+ `and suggest switching to a vision-capable model (e.g. gpt-4o, claude-sonnet-4, gemini-2.0-flash) ` +
680
+ `using the /model command.]\n\n${taggedPrompt}`;
681
+ continue;
682
+ }
683
+ if (isRecoverableError(err) && attempt < MAX_RETRIES) {
684
+ // Model failover: if it's a model-level error and we have fallbacks, switch model
685
+ if (failoverManager?.enabled && failoverManager.isModelError(err)) {
686
+ const failedModel = config.copilotModel;
687
+ failoverManager.recordFailure(failedModel);
688
+ const fallback = failoverManager.getNextFallback(failedModel);
689
+ if (fallback) {
690
+ console.log(`[nzb] Model failover: ${failedModel} → ${fallback} (${msg})`);
691
+ config.copilotModel = fallback;
692
+ // Force session recreation with the new model
693
+ orchestratorSession = undefined;
694
+ sessionCreatedAt = undefined;
695
+ deleteState(ORCHESTRATOR_SESSION_KEY);
696
+ }
697
+ }
698
+ const delay = RECONNECT_DELAYS_MS[Math.min(attempt, RECONNECT_DELAYS_MS.length - 1)];
699
+ console.error(`[nzb] Recoverable error: ${msg}. Retry ${attempt + 1}/${MAX_RETRIES} after ${delay}ms…`);
700
+ await sleep(delay);
701
+ // Reset client before retry in case the connection is stale
702
+ try {
703
+ await ensureClient();
704
+ }
705
+ catch {
706
+ /* will fail again on next attempt */
707
+ }
708
+ continue;
709
+ }
710
+ // Model-level error with failover available — try switching model even if not normally recoverable
711
+ if (failoverManager?.enabled && failoverManager.isModelError(err) && attempt < MAX_RETRIES) {
712
+ const failedModel = config.copilotModel;
713
+ failoverManager.recordFailure(failedModel);
714
+ const fallback = failoverManager.getNextFallback(failedModel);
715
+ if (fallback) {
716
+ console.log(`[nzb] Model failover: ${failedModel} → ${fallback} (${msg})`);
717
+ config.copilotModel = fallback;
718
+ orchestratorSession = undefined;
719
+ sessionCreatedAt = undefined;
720
+ deleteState(ORCHESTRATOR_SESSION_KEY);
721
+ await sleep(RECONNECT_DELAYS_MS[0]);
722
+ try {
723
+ await ensureClient();
724
+ }
725
+ catch {
726
+ /* will fail again on next attempt */
727
+ }
728
+ continue;
729
+ }
730
+ }
731
+ console.error(`[nzb] Error processing message: ${msg}`);
732
+ if (!globalTimedOut) {
733
+ await callback(`Error: ${msg}`, true);
734
+ }
735
+ return;
655
736
  }
656
- console.error(`[nzb] Error processing message: ${msg}`);
657
- await callback(`Error: ${msg}`, true);
658
- return;
659
737
  }
660
738
  }
739
+ finally {
740
+ clearTimeout(globalTimer);
741
+ }
661
742
  })().catch((err) => {
662
743
  console.error(`[nzb] Unhandled error in sendToOrchestrator: ${err instanceof Error ? err.message : String(err)}`);
663
744
  });
@@ -733,4 +814,8 @@ export async function compactSession() {
733
814
  return `Compaction failed: ${err instanceof Error ? err.message : String(err)}`;
734
815
  }
735
816
  }
817
+ /** Expose the failover manager so tools can query health status. */
818
+ export function getFailoverManager() {
819
+ return failoverManager;
820
+ }
736
821
  //# sourceMappingURL=orchestrator.js.map