@link-assistant/hive-mind 1.23.5 → 1.23.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @link-assistant/hive-mind
2
2
 
3
+ ## 1.23.6
4
+
5
+ ### Patch Changes
6
+
7
+ - 0a7dbcf: Add exponential backoff retry when bot launch fails with 409 Conflict error (e.g., due to restart overlap, stale connections, or network issues). Retry schedule: 1s, 2s, 4s, ... up to 10 minutes max. Non-retryable errors (401 Unauthorized) still cause immediate exit.
8
+
3
9
  ## 1.23.5
4
10
 
5
11
  ### Patch Changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/hive-mind",
3
- "version": "1.23.5",
3
+ "version": "1.23.6",
4
4
  "description": "AI-powered issue solver and hive mind for collaborative problem solving",
5
5
  "main": "src/hive.mjs",
6
6
  "type": "module",
@@ -13,7 +13,7 @@
13
13
  "hive-telegram-bot": "./src/telegram-bot.mjs"
14
14
  },
15
15
  "scripts": {
16
- "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs",
16
+ "test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
17
17
  "test:queue": "node tests/solve-queue.test.mjs",
18
18
  "test:limits-display": "node tests/limits-display.test.mjs",
19
19
  "test:usage-limit": "node tests/test-usage-limit.mjs",
@@ -0,0 +1,190 @@
1
+ /**
2
+ * Bot launcher with exponential backoff retry for Telegraf polling mode.
3
+ *
4
+ * Handles transient errors (409 Conflict, network errors, 5xx) by retrying
5
+ * with exponential backoff. Non-retryable errors (401 Unauthorized) cause
6
+ * immediate exit.
7
+ *
8
+ * @see https://github.com/link-assistant/hive-mind/issues/1240
9
+ * @see https://core.telegram.org/bots/api#getupdates
10
+ */
11
+
12
+ /**
13
+ * Default configuration for the retry mechanism.
14
+ */
15
+ export const LAUNCHER_DEFAULTS = {
16
+ baseDelayMs: 1000, // Initial retry delay: 1 second
17
+ maxDelayMs: 10 * 60 * 1000, // Maximum retry delay: 10 minutes
18
+ backoffMultiplier: 2, // Exponential growth factor
19
+ jitterFraction: 0.1, // 10% random jitter to prevent thundering herd
20
+ };
21
+
22
+ /**
23
+ * Error codes that should NOT be retried (fatal errors).
24
+ * 401 = Invalid bot token -- retrying won't help.
25
+ */
26
+ const NON_RETRYABLE_CODES = new Set([401]);
27
+
28
+ /**
29
+ * Determines whether a given error is retryable.
30
+ *
31
+ * Retryable: 409 (Conflict), 429 (Rate limit), 5xx (Server errors),
32
+ * network/fetch errors (no code or ECONNRESET, ETIMEDOUT, etc.)
33
+ * Non-retryable: 401 (Unauthorized/invalid token)
34
+ *
35
+ * @param {Error} error - The error to classify
36
+ * @returns {boolean} true if the error is retryable
37
+ */
38
+ export function isRetryableError(error) {
39
+ if (NON_RETRYABLE_CODES.has(error.code)) {
40
+ return false;
41
+ }
42
+ return true;
43
+ }
44
+
45
+ /**
46
+ * Calculates the delay before the next retry attempt using exponential backoff
47
+ * with jitter.
48
+ *
49
+ * Formula: min(baseDelay * multiplier^(attempt-1), maxDelay) + random jitter
50
+ *
51
+ * @param {number} attempt - Current attempt number (1-based)
52
+ * @param {object} [options] - Configuration options
53
+ * @param {number} [options.baseDelayMs] - Base delay in milliseconds
54
+ * @param {number} [options.maxDelayMs] - Maximum delay cap in milliseconds
55
+ * @param {number} [options.backoffMultiplier] - Exponential growth factor
56
+ * @param {number} [options.jitterFraction] - Fraction of delay to use as jitter (0-1)
57
+ * @returns {number} Delay in milliseconds before next retry
58
+ */
59
+ export function calculateRetryDelay(attempt, options = {}) {
60
+ const { baseDelayMs = LAUNCHER_DEFAULTS.baseDelayMs, maxDelayMs = LAUNCHER_DEFAULTS.maxDelayMs, backoffMultiplier = LAUNCHER_DEFAULTS.backoffMultiplier, jitterFraction = LAUNCHER_DEFAULTS.jitterFraction } = options;
61
+
62
+ const exponentialDelay = baseDelayMs * Math.pow(backoffMultiplier, attempt - 1);
63
+ const cappedDelay = Math.min(exponentialDelay, maxDelayMs);
64
+ const jitter = cappedDelay * jitterFraction * Math.random();
65
+ return Math.round(cappedDelay + jitter);
66
+ }
67
+
68
+ /**
69
+ * Formats a delay in milliseconds as a human-readable string.
70
+ *
71
+ * @param {number} delayMs - Delay in milliseconds
72
+ * @returns {string} Human-readable delay (e.g., "5s", "2m 30s", "10m")
73
+ */
74
+ export function formatDelay(delayMs) {
75
+ const totalSeconds = Math.round(delayMs / 1000);
76
+ if (totalSeconds < 60) {
77
+ return `${totalSeconds}s`;
78
+ }
79
+ const minutes = Math.floor(totalSeconds / 60);
80
+ const seconds = totalSeconds % 60;
81
+ if (seconds === 0) {
82
+ return `${minutes}m`;
83
+ }
84
+ return `${minutes}m ${seconds}s`;
85
+ }
86
+
87
+ /**
88
+ * Launches a Telegraf bot with retry logic and exponential backoff.
89
+ *
90
+ * On each attempt:
91
+ * 1. Deletes any existing webhook (to prevent webhook/polling conflict)
92
+ * 2. Calls bot.launch() in polling mode
93
+ *
94
+ * If bot.launch() fails:
95
+ * - For retryable errors (409, network, 5xx): waits with exponential backoff
96
+ * and retries
97
+ * - For non-retryable errors (401): exits immediately
98
+ *
99
+ * @param {object} bot - Telegraf bot instance
100
+ * @param {object} launchOptions - Options passed to bot.launch()
101
+ * @param {object} [retryOptions] - Retry configuration
102
+ * @param {number} [retryOptions.baseDelayMs] - Initial retry delay (default: 1000)
103
+ * @param {number} [retryOptions.maxDelayMs] - Maximum retry delay (default: 600000)
104
+ * @param {number} [retryOptions.backoffMultiplier] - Growth factor (default: 2)
105
+ * @param {number} [retryOptions.jitterFraction] - Jitter fraction (default: 0.1)
106
+ * @param {boolean} [retryOptions.verbose] - Enable verbose logging
107
+ * @param {Function} [retryOptions.onRetry] - Callback on each retry: (attempt, error, delayMs) => void
108
+ * @param {AbortSignal} [retryOptions.signal] - AbortSignal to cancel retry loop
109
+ * @returns {Promise<void>} Resolves when bot is successfully launched
110
+ * @throws {Error} If a non-retryable error occurs or signal is aborted
111
+ */
112
+ export async function launchBotWithRetry(bot, launchOptions, retryOptions = {}) {
113
+ const { verbose = false, onRetry, signal, ...backoffConfig } = retryOptions;
114
+ let attempt = 0;
115
+
116
+ while (true) {
117
+ // Check if abort was requested (e.g., during shutdown)
118
+ if (signal?.aborted) {
119
+ const abortError = new Error('Bot launch aborted');
120
+ abortError.code = 'ABORT';
121
+ throw abortError;
122
+ }
123
+
124
+ attempt++;
125
+
126
+ try {
127
+ // Step 1: Delete webhook to prevent webhook/polling conflict
128
+ if (verbose) console.log(`[VERBOSE] Launch attempt ${attempt}: deleting webhook...`);
129
+ await bot.telegram.deleteWebhook({ drop_pending_updates: true });
130
+
131
+ if (verbose) console.log(`[VERBOSE] Launch attempt ${attempt}: starting polling...`);
132
+
133
+ // Step 2: Launch bot in polling mode
134
+ await bot.launch(launchOptions);
135
+
136
+ // Success -- bot is running
137
+ if (attempt > 1) {
138
+ console.log(`✅ Bot launched successfully after ${attempt} attempts`);
139
+ }
140
+ return;
141
+ } catch (error) {
142
+ // Check if the error is retryable
143
+ if (!isRetryableError(error)) {
144
+ console.error(`❌ Non-retryable error (${error.code}): ${error.message}`);
145
+ throw error;
146
+ }
147
+
148
+ // Calculate delay with exponential backoff
149
+ const delayMs = calculateRetryDelay(attempt, backoffConfig);
150
+
151
+ console.warn(`⚠️ Bot launch attempt ${attempt} failed` + ` (${error.code || 'unknown'}): ${error.message}.` + ` Retrying in ${formatDelay(delayMs)}...`);
152
+
153
+ if (verbose) {
154
+ console.warn(`[VERBOSE] Retry delay: ${delayMs}ms, next attempt: ${attempt + 1}`);
155
+ if (error.response) {
156
+ console.warn('[VERBOSE] API response:', JSON.stringify(error.response));
157
+ }
158
+ }
159
+
160
+ // Notify retry callback if provided
161
+ if (onRetry) {
162
+ onRetry(attempt, error, delayMs);
163
+ }
164
+
165
+ // Wait before retrying (interruptible via AbortSignal)
166
+ await new Promise((resolve, reject) => {
167
+ const timer = setTimeout(resolve, delayMs);
168
+
169
+ if (signal) {
170
+ const onAbort = () => {
171
+ clearTimeout(timer);
172
+ reject(new Error('Bot launch aborted during retry wait'));
173
+ };
174
+ if (signal.aborted) {
175
+ clearTimeout(timer);
176
+ reject(new Error('Bot launch aborted during retry wait'));
177
+ return;
178
+ }
179
+ signal.addEventListener('abort', onAbort, { once: true });
180
+ // Clean up the listener when the timer fires naturally
181
+ const originalResolve = resolve;
182
+ resolve = () => {
183
+ signal.removeEventListener('abort', onAbort);
184
+ originalResolve();
185
+ };
186
+ }
187
+ });
188
+ }
189
+ }
190
+ }
@@ -45,6 +45,8 @@ const { escapeMarkdown, escapeMarkdownV2, cleanNonPrintableChars, makeSpecialCha
45
45
  const { getSolveQueue, createQueueExecuteCallback } = await import('./telegram-solve-queue.lib.mjs');
46
46
  // Import extracted message filter functions for testability (issue #1207)
47
47
  const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized: _isChatAuthorized, isForwardedOrReply: _isForwardedOrReply, extractCommandFromText } = await import('./telegram-message-filters.lib.mjs');
48
+ // Import bot launcher with exponential backoff retry (issue #1240)
49
+ const { launchBotWithRetry } = await import('./telegram-bot-launcher.lib.mjs');
48
50
 
49
51
  const config = yargs(hideBin(process.argv))
50
52
  .usage('Usage: hive-telegram-bot [options]')
@@ -1395,26 +1397,22 @@ if (VERBOSE) {
1395
1397
  console.log('[VERBOSE] Bot start time (ISO):', new Date(BOT_START_TIME * 1000).toISOString());
1396
1398
  }
1397
1399
 
1398
- // Delete existing webhook (critical: webhooks prevent polling from working)
1399
- if (VERBOSE) console.log('[VERBOSE] Deleting webhook...');
1400
- bot.telegram
1401
- .deleteWebhook({ drop_pending_updates: true })
1402
- .then(result => {
1403
- if (VERBOSE) {
1404
- console.log('[VERBOSE] Webhook deletion result:', result);
1405
- }
1406
- console.log('🔄 Webhook deleted (if existed), starting polling mode...');
1407
- if (VERBOSE) {
1408
- console.log('[VERBOSE] Launching bot with config:', {
1409
- allowedUpdates: ['message'],
1410
- dropPendingUpdates: true,
1411
- });
1412
- }
1413
- return bot.launch({
1414
- allowedUpdates: ['message', 'callback_query'], // Receive messages and callback queries
1415
- dropPendingUpdates: true, // Drop pending updates sent before bot started
1416
- });
1417
- })
1400
+ // Launch bot with retry logic (issue #1240: handle 409 Conflict with exponential backoff)
1401
+ // The launcher handles deleteWebhook + bot.launch() with retry on transient errors.
1402
+ // Non-retryable errors (401 Unauthorized) cause immediate exit.
1403
+ const launchAbortController = new AbortController();
1404
+
1405
+ launchBotWithRetry(
1406
+ bot,
1407
+ {
1408
+ allowedUpdates: ['message', 'callback_query'], // Receive messages and callback queries
1409
+ dropPendingUpdates: true, // Drop pending updates sent before bot started
1410
+ },
1411
+ {
1412
+ verbose: VERBOSE,
1413
+ signal: launchAbortController.signal,
1414
+ }
1415
+ )
1418
1416
  .then(async () => {
1419
1417
  if (isShuttingDown) return; // Skip success messages if shutting down
1420
1418
 
@@ -1483,6 +1481,7 @@ process.once('SIGINT', () => {
1483
1481
  isShuttingDown = true;
1484
1482
  console.log('\n🛑 Received SIGINT (Ctrl+C), stopping bot...');
1485
1483
  if (VERBOSE) console.log(`[VERBOSE] Signal: SIGINT, PID: ${process.pid}, PPID: ${process.ppid}`);
1484
+ launchAbortController.abort(); // Cancel retry loop if still retrying (issue #1240)
1486
1485
  stopSolveQueue();
1487
1486
  bot.stop('SIGINT');
1488
1487
  });
@@ -1491,6 +1490,7 @@ process.once('SIGTERM', () => {
1491
1490
  isShuttingDown = true;
1492
1491
  console.log('\n🛑 Received SIGTERM, stopping bot... (Check system logs: journalctl -u <service> or dmesg)');
1493
1492
  if (VERBOSE) console.log(`[VERBOSE] Signal: SIGTERM, PID: ${process.pid}, PPID: ${process.ppid}`);
1493
+ launchAbortController.abort(); // Cancel retry loop if still retrying (issue #1240)
1494
1494
  stopSolveQueue();
1495
1495
  bot.stop('SIGTERM');
1496
1496
  });