@link-assistant/hive-mind 1.23.5 → 1.23.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/package.json +2 -2
- package/src/telegram-bot-launcher.lib.mjs +190 -0
- package/src/telegram-bot.mjs +20 -20
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @link-assistant/hive-mind
|
|
2
2
|
|
|
3
|
+
## 1.23.6
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 0a7dbcf: Add exponential backoff retry when bot launch fails with 409 Conflict error (e.g., due to restart overlap, stale connections, or network issues). Retry schedule: 1s, 2s, 4s, ... up to 10 minutes max. Non-retryable errors (401 Unauthorized) still cause immediate exit.
|
|
8
|
+
|
|
3
9
|
## 1.23.5
|
|
4
10
|
|
|
5
11
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@link-assistant/hive-mind",
|
|
3
|
-
"version": "1.23.
|
|
3
|
+
"version": "1.23.6",
|
|
4
4
|
"description": "AI-powered issue solver and hive mind for collaborative problem solving",
|
|
5
5
|
"main": "src/hive.mjs",
|
|
6
6
|
"type": "module",
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"hive-telegram-bot": "./src/telegram-bot.mjs"
|
|
14
14
|
},
|
|
15
15
|
"scripts": {
|
|
16
|
-
"test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs",
|
|
16
|
+
"test": "node tests/solve-queue.test.mjs && node tests/limits-display.test.mjs && node tests/test-usage-limit.mjs && node tests/test-telegram-message-filters.mjs && node tests/test-solve-queue-command.mjs && node tests/test-queue-display-1267.mjs && node tests/test-telegram-bot-launcher.mjs",
|
|
17
17
|
"test:queue": "node tests/solve-queue.test.mjs",
|
|
18
18
|
"test:limits-display": "node tests/limits-display.test.mjs",
|
|
19
19
|
"test:usage-limit": "node tests/test-usage-limit.mjs",
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bot launcher with exponential backoff retry for Telegraf polling mode.
|
|
3
|
+
*
|
|
4
|
+
* Handles transient errors (409 Conflict, network errors, 5xx) by retrying
|
|
5
|
+
* with exponential backoff. Non-retryable errors (401 Unauthorized) cause
|
|
6
|
+
* immediate exit.
|
|
7
|
+
*
|
|
8
|
+
* @see https://github.com/link-assistant/hive-mind/issues/1240
|
|
9
|
+
* @see https://core.telegram.org/bots/api#getupdates
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Default configuration for the retry mechanism.
|
|
14
|
+
*/
|
|
15
|
+
export const LAUNCHER_DEFAULTS = {
|
|
16
|
+
baseDelayMs: 1000, // Initial retry delay: 1 second
|
|
17
|
+
maxDelayMs: 10 * 60 * 1000, // Maximum retry delay: 10 minutes
|
|
18
|
+
backoffMultiplier: 2, // Exponential growth factor
|
|
19
|
+
jitterFraction: 0.1, // 10% random jitter to prevent thundering herd
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Error codes that should NOT be retried (fatal errors).
|
|
24
|
+
* 401 = Invalid bot token -- retrying won't help.
|
|
25
|
+
*/
|
|
26
|
+
const NON_RETRYABLE_CODES = new Set([401]);
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Determines whether a given error is retryable.
|
|
30
|
+
*
|
|
31
|
+
* Retryable: 409 (Conflict), 429 (Rate limit), 5xx (Server errors),
|
|
32
|
+
* network/fetch errors (no code or ECONNRESET, ETIMEDOUT, etc.)
|
|
33
|
+
* Non-retryable: 401 (Unauthorized/invalid token)
|
|
34
|
+
*
|
|
35
|
+
* @param {Error} error - The error to classify
|
|
36
|
+
* @returns {boolean} true if the error is retryable
|
|
37
|
+
*/
|
|
38
|
+
export function isRetryableError(error) {
|
|
39
|
+
if (NON_RETRYABLE_CODES.has(error.code)) {
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Calculates the delay before the next retry attempt using exponential backoff
|
|
47
|
+
* with jitter.
|
|
48
|
+
*
|
|
49
|
+
* Formula: min(baseDelay * multiplier^(attempt-1), maxDelay) + random jitter
|
|
50
|
+
*
|
|
51
|
+
* @param {number} attempt - Current attempt number (1-based)
|
|
52
|
+
* @param {object} [options] - Configuration options
|
|
53
|
+
* @param {number} [options.baseDelayMs] - Base delay in milliseconds
|
|
54
|
+
* @param {number} [options.maxDelayMs] - Maximum delay cap in milliseconds
|
|
55
|
+
* @param {number} [options.backoffMultiplier] - Exponential growth factor
|
|
56
|
+
* @param {number} [options.jitterFraction] - Fraction of delay to use as jitter (0-1)
|
|
57
|
+
* @returns {number} Delay in milliseconds before next retry
|
|
58
|
+
*/
|
|
59
|
+
export function calculateRetryDelay(attempt, options = {}) {
|
|
60
|
+
const { baseDelayMs = LAUNCHER_DEFAULTS.baseDelayMs, maxDelayMs = LAUNCHER_DEFAULTS.maxDelayMs, backoffMultiplier = LAUNCHER_DEFAULTS.backoffMultiplier, jitterFraction = LAUNCHER_DEFAULTS.jitterFraction } = options;
|
|
61
|
+
|
|
62
|
+
const exponentialDelay = baseDelayMs * Math.pow(backoffMultiplier, attempt - 1);
|
|
63
|
+
const cappedDelay = Math.min(exponentialDelay, maxDelayMs);
|
|
64
|
+
const jitter = cappedDelay * jitterFraction * Math.random();
|
|
65
|
+
return Math.round(cappedDelay + jitter);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Formats a delay in milliseconds as a human-readable string.
|
|
70
|
+
*
|
|
71
|
+
* @param {number} delayMs - Delay in milliseconds
|
|
72
|
+
* @returns {string} Human-readable delay (e.g., "5s", "2m 30s", "10m")
|
|
73
|
+
*/
|
|
74
|
+
export function formatDelay(delayMs) {
|
|
75
|
+
const totalSeconds = Math.round(delayMs / 1000);
|
|
76
|
+
if (totalSeconds < 60) {
|
|
77
|
+
return `${totalSeconds}s`;
|
|
78
|
+
}
|
|
79
|
+
const minutes = Math.floor(totalSeconds / 60);
|
|
80
|
+
const seconds = totalSeconds % 60;
|
|
81
|
+
if (seconds === 0) {
|
|
82
|
+
return `${minutes}m`;
|
|
83
|
+
}
|
|
84
|
+
return `${minutes}m ${seconds}s`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Launches a Telegraf bot with retry logic and exponential backoff.
|
|
89
|
+
*
|
|
90
|
+
* On each attempt:
|
|
91
|
+
* 1. Deletes any existing webhook (to prevent webhook/polling conflict)
|
|
92
|
+
* 2. Calls bot.launch() in polling mode
|
|
93
|
+
*
|
|
94
|
+
* If bot.launch() fails:
|
|
95
|
+
* - For retryable errors (409, network, 5xx): waits with exponential backoff
|
|
96
|
+
* and retries
|
|
97
|
+
* - For non-retryable errors (401): exits immediately
|
|
98
|
+
*
|
|
99
|
+
* @param {object} bot - Telegraf bot instance
|
|
100
|
+
* @param {object} launchOptions - Options passed to bot.launch()
|
|
101
|
+
* @param {object} [retryOptions] - Retry configuration
|
|
102
|
+
* @param {number} [retryOptions.baseDelayMs] - Initial retry delay (default: 1000)
|
|
103
|
+
* @param {number} [retryOptions.maxDelayMs] - Maximum retry delay (default: 600000)
|
|
104
|
+
* @param {number} [retryOptions.backoffMultiplier] - Growth factor (default: 2)
|
|
105
|
+
* @param {number} [retryOptions.jitterFraction] - Jitter fraction (default: 0.1)
|
|
106
|
+
* @param {boolean} [retryOptions.verbose] - Enable verbose logging
|
|
107
|
+
* @param {Function} [retryOptions.onRetry] - Callback on each retry: (attempt, error, delayMs) => void
|
|
108
|
+
* @param {AbortSignal} [retryOptions.signal] - AbortSignal to cancel retry loop
|
|
109
|
+
* @returns {Promise<void>} Resolves when bot is successfully launched
|
|
110
|
+
* @throws {Error} If a non-retryable error occurs or signal is aborted
|
|
111
|
+
*/
|
|
112
|
+
export async function launchBotWithRetry(bot, launchOptions, retryOptions = {}) {
|
|
113
|
+
const { verbose = false, onRetry, signal, ...backoffConfig } = retryOptions;
|
|
114
|
+
let attempt = 0;
|
|
115
|
+
|
|
116
|
+
while (true) {
|
|
117
|
+
// Check if abort was requested (e.g., during shutdown)
|
|
118
|
+
if (signal?.aborted) {
|
|
119
|
+
const abortError = new Error('Bot launch aborted');
|
|
120
|
+
abortError.code = 'ABORT';
|
|
121
|
+
throw abortError;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
attempt++;
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
// Step 1: Delete webhook to prevent webhook/polling conflict
|
|
128
|
+
if (verbose) console.log(`[VERBOSE] Launch attempt ${attempt}: deleting webhook...`);
|
|
129
|
+
await bot.telegram.deleteWebhook({ drop_pending_updates: true });
|
|
130
|
+
|
|
131
|
+
if (verbose) console.log(`[VERBOSE] Launch attempt ${attempt}: starting polling...`);
|
|
132
|
+
|
|
133
|
+
// Step 2: Launch bot in polling mode
|
|
134
|
+
await bot.launch(launchOptions);
|
|
135
|
+
|
|
136
|
+
// Success -- bot is running
|
|
137
|
+
if (attempt > 1) {
|
|
138
|
+
console.log(`✅ Bot launched successfully after ${attempt} attempts`);
|
|
139
|
+
}
|
|
140
|
+
return;
|
|
141
|
+
} catch (error) {
|
|
142
|
+
// Check if the error is retryable
|
|
143
|
+
if (!isRetryableError(error)) {
|
|
144
|
+
console.error(`❌ Non-retryable error (${error.code}): ${error.message}`);
|
|
145
|
+
throw error;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Calculate delay with exponential backoff
|
|
149
|
+
const delayMs = calculateRetryDelay(attempt, backoffConfig);
|
|
150
|
+
|
|
151
|
+
console.warn(`⚠️ Bot launch attempt ${attempt} failed` + ` (${error.code || 'unknown'}): ${error.message}.` + ` Retrying in ${formatDelay(delayMs)}...`);
|
|
152
|
+
|
|
153
|
+
if (verbose) {
|
|
154
|
+
console.warn(`[VERBOSE] Retry delay: ${delayMs}ms, next attempt: ${attempt + 1}`);
|
|
155
|
+
if (error.response) {
|
|
156
|
+
console.warn('[VERBOSE] API response:', JSON.stringify(error.response));
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Notify retry callback if provided
|
|
161
|
+
if (onRetry) {
|
|
162
|
+
onRetry(attempt, error, delayMs);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Wait before retrying (interruptible via AbortSignal)
|
|
166
|
+
await new Promise((resolve, reject) => {
|
|
167
|
+
const timer = setTimeout(resolve, delayMs);
|
|
168
|
+
|
|
169
|
+
if (signal) {
|
|
170
|
+
const onAbort = () => {
|
|
171
|
+
clearTimeout(timer);
|
|
172
|
+
reject(new Error('Bot launch aborted during retry wait'));
|
|
173
|
+
};
|
|
174
|
+
if (signal.aborted) {
|
|
175
|
+
clearTimeout(timer);
|
|
176
|
+
reject(new Error('Bot launch aborted during retry wait'));
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
180
|
+
// Clean up the listener when the timer fires naturally
|
|
181
|
+
const originalResolve = resolve;
|
|
182
|
+
resolve = () => {
|
|
183
|
+
signal.removeEventListener('abort', onAbort);
|
|
184
|
+
originalResolve();
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
package/src/telegram-bot.mjs
CHANGED
|
@@ -45,6 +45,8 @@ const { escapeMarkdown, escapeMarkdownV2, cleanNonPrintableChars, makeSpecialCha
|
|
|
45
45
|
const { getSolveQueue, createQueueExecuteCallback } = await import('./telegram-solve-queue.lib.mjs');
|
|
46
46
|
// Import extracted message filter functions for testability (issue #1207)
|
|
47
47
|
const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized: _isChatAuthorized, isForwardedOrReply: _isForwardedOrReply, extractCommandFromText } = await import('./telegram-message-filters.lib.mjs');
|
|
48
|
+
// Import bot launcher with exponential backoff retry (issue #1240)
|
|
49
|
+
const { launchBotWithRetry } = await import('./telegram-bot-launcher.lib.mjs');
|
|
48
50
|
|
|
49
51
|
const config = yargs(hideBin(process.argv))
|
|
50
52
|
.usage('Usage: hive-telegram-bot [options]')
|
|
@@ -1395,26 +1397,22 @@ if (VERBOSE) {
|
|
|
1395
1397
|
console.log('[VERBOSE] Bot start time (ISO):', new Date(BOT_START_TIME * 1000).toISOString());
|
|
1396
1398
|
}
|
|
1397
1399
|
|
|
1398
|
-
//
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
allowedUpdates: ['message', 'callback_query'], // Receive messages and callback queries
|
|
1415
|
-
dropPendingUpdates: true, // Drop pending updates sent before bot started
|
|
1416
|
-
});
|
|
1417
|
-
})
|
|
1400
|
+
// Launch bot with retry logic (issue #1240: handle 409 Conflict with exponential backoff)
|
|
1401
|
+
// The launcher handles deleteWebhook + bot.launch() with retry on transient errors.
|
|
1402
|
+
// Non-retryable errors (401 Unauthorized) cause immediate exit.
|
|
1403
|
+
const launchAbortController = new AbortController();
|
|
1404
|
+
|
|
1405
|
+
launchBotWithRetry(
|
|
1406
|
+
bot,
|
|
1407
|
+
{
|
|
1408
|
+
allowedUpdates: ['message', 'callback_query'], // Receive messages and callback queries
|
|
1409
|
+
dropPendingUpdates: true, // Drop pending updates sent before bot started
|
|
1410
|
+
},
|
|
1411
|
+
{
|
|
1412
|
+
verbose: VERBOSE,
|
|
1413
|
+
signal: launchAbortController.signal,
|
|
1414
|
+
}
|
|
1415
|
+
)
|
|
1418
1416
|
.then(async () => {
|
|
1419
1417
|
if (isShuttingDown) return; // Skip success messages if shutting down
|
|
1420
1418
|
|
|
@@ -1483,6 +1481,7 @@ process.once('SIGINT', () => {
|
|
|
1483
1481
|
isShuttingDown = true;
|
|
1484
1482
|
console.log('\n🛑 Received SIGINT (Ctrl+C), stopping bot...');
|
|
1485
1483
|
if (VERBOSE) console.log(`[VERBOSE] Signal: SIGINT, PID: ${process.pid}, PPID: ${process.ppid}`);
|
|
1484
|
+
launchAbortController.abort(); // Cancel retry loop if still retrying (issue #1240)
|
|
1486
1485
|
stopSolveQueue();
|
|
1487
1486
|
bot.stop('SIGINT');
|
|
1488
1487
|
});
|
|
@@ -1491,6 +1490,7 @@ process.once('SIGTERM', () => {
|
|
|
1491
1490
|
isShuttingDown = true;
|
|
1492
1491
|
console.log('\n🛑 Received SIGTERM, stopping bot... (Check system logs: journalctl -u <service> or dmesg)');
|
|
1493
1492
|
if (VERBOSE) console.log(`[VERBOSE] Signal: SIGTERM, PID: ${process.pid}, PPID: ${process.ppid}`);
|
|
1493
|
+
launchAbortController.abort(); // Cancel retry loop if still retrying (issue #1240)
|
|
1494
1494
|
stopSolveQueue();
|
|
1495
1495
|
bot.stop('SIGTERM');
|
|
1496
1496
|
});
|