@link-assistant/hive-mind 2.0.3 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -328,7 +328,10 @@ const { isOldMessage: _isOldMessage, isGroupChat: _isGroupChat, isChatAuthorized
328
328
  const { installTelegramFormattingFallback, isTelegramFormattingError, isTelegramMessageTooLongError, safeEditMessageText, safeReply, TELEGRAM_TEXT_LIMIT } = await import('./telegram-safe-reply.lib.mjs');
329
329
  const { registerTerminalWatchCommand, startAutoTerminalWatchForSession } = await import('./telegram-terminal-watch-command.lib.mjs');
330
330
  const { launchBotWithRetry } = await import('./telegram-bot-launcher.lib.mjs');
331
- const { trackSession, startSessionMonitoring, hasActiveSessionForUrlAsync, findStoppableSessionByUrl } = await import('./session-monitor.lib.mjs');
331
+ const { trackSession, startSessionMonitoring, hasActiveSessionForUrlAsync, findStoppableSessionByUrl, setSessionStore, setSessionLogger, resumeTrackedSessions, getActiveSessionCount } = await import('./session-monitor.lib.mjs');
332
+ const { createBotLogger } = await import('./bot-logger.lib.mjs');
333
+ const { createSessionStore } = await import('./session-store.lib.mjs');
334
+ const { createHeartbeat, resumeSessionsOnLaunch, createShutdownHandler } = await import('./bot-lifecycle.lib.mjs');
332
335
  const { formatExecutingWorkSessionMessage, formatStartingWorkSessionMessage } = await import('./work-session-formatting.lib.mjs');
333
336
  const { buildTelegramHelpMessage, buildTelegramInfoBlock, buildSolveQueuedMessage } = await import('./telegram-ui-messages.lib.mjs');
334
337
 
@@ -354,6 +357,18 @@ installTelegramFormattingFallback(bot.telegram, { verbose: VERBOSE });
354
357
 
355
358
  // Track bot startup time (Unix seconds to match Telegram's message.date format)
356
359
  const BOT_START_TIME = Math.floor(Date.now() / 1000);
360
+
361
+ // Issue #1927: durable, timestamped bot log + durable session store. The logger
362
+ // preserves the previous run's log under a timestamped backup (never overwriting
363
+ // it) so the moment of a total failure stays discoverable, and every line is
364
+ // timestamped. The session store mirrors the in-memory session registry to disk
365
+ // so a restart can resume monitoring detached sessions that were still running.
366
+ const botLogger = createBotLogger({ verbose: VERBOSE });
367
+ const sessionStore = createSessionStore({ verbose: VERBOSE, logger: botLogger });
368
+ setSessionLogger(botLogger);
369
+ setSessionStore(sessionStore);
370
+ botLogger.event('bot_starting', { pid: process.pid, ppid: process.ppid, botStartTime: BOT_START_TIME, startTimeIso: new Date(BOT_START_TIME * 1000).toISOString(), logFile: botLogger.filePath, sessionSnapshot: sessionStore.snapshotPath });
371
+
357
372
  // Wrapper functions binding filter logic to bot state (actual logic in telegram-message-filters.lib.mjs, issue #1207)
358
373
  function isChatAuthorized(chatId) {
359
374
  return _isChatAuthorized(chatId, allowedChats);
@@ -1341,13 +1356,28 @@ function startSessionMonitoringOnce() {
1341
1356
  sessionMonitoringTimer = startSessionMonitoring(bot, VERBOSE);
1342
1357
  }
1343
1358
 
1359
+ // Issue #1927 (requirements #3/#4): a periodic timestamped heartbeat so the "last
1360
+ // time the bot was alive" is always discoverable from the log. The heartbeat
1361
+ // logic lives in bot-lifecycle.lib.mjs so it can be unit tested.
1362
+ const heartbeat = createHeartbeat({ logger: botLogger, getActiveSessionCount });
1363
+
1344
1364
  async function onBotLaunched() {
1345
1365
  if (isShuttingDown || launchAnnouncementShown) return;
1346
1366
  launchAnnouncementShown = true;
1347
1367
 
1348
1368
  console.log('✅ SwarmMindBot is now running!');
1349
1369
  console.log('Press Ctrl+C to stop');
1370
+ botLogger.event('bot_launched', { pid: process.pid, botStartTime: BOT_START_TIME });
1371
+
1372
+ // Issue #1927 (requirements #2/#4): after a restart, reload sessions that were
1373
+ // still being tracked when the previous process died and re-register them so
1374
+ // the monitor resumes watching — and finally reports any that were killed while
1375
+ // the bot was down. Done before starting the monitor so the first tick already
1376
+ // sees the resumed sessions.
1377
+ await resumeSessionsOnLaunch({ resumeTrackedSessions, botStartTime: BOT_START_TIME, verbose: VERBOSE, logger: botLogger });
1378
+
1350
1379
  startSessionMonitoringOnce();
1380
+ heartbeat.start();
1351
1381
 
1352
1382
  if (VERBOSE) {
1353
1383
  console.log('[VERBOSE] Bot launched successfully');
@@ -1429,22 +1459,33 @@ const stopSolveQueue = () => {
1429
1459
  }
1430
1460
  };
1431
1461
 
1462
+ // Issue #1927: record the shutdown (with a timestamp) so the log shows the bot
1463
+ // stopped cleanly — the ABSENCE of this line before the next startup is how a
1464
+ // later analysis tells an orderly stop apart from a hard kill. The handler lives
1465
+ // in bot-lifecycle.lib.mjs; the timer/flag mutations stay here via the closures
1466
+ // (issue #1240: still abort the retry loop on the way out).
1467
+ const handleShutdownSignal = createShutdownHandler({
1468
+ logger: botLogger,
1469
+ getActiveSessionCount,
1470
+ verbose: VERBOSE,
1471
+ bot,
1472
+ onShutdown: () => {
1473
+ isShuttingDown = true;
1474
+ },
1475
+ cleanup: () => {
1476
+ launchAbortController.abort();
1477
+ if (sessionMonitoringTimer) clearInterval(sessionMonitoringTimer);
1478
+ heartbeat.stop();
1479
+ stopSolveQueue();
1480
+ },
1481
+ });
1482
+
1432
1483
  process.once('SIGINT', () => {
1433
- isShuttingDown = true;
1434
1484
  console.log('\n🛑 Received SIGINT (Ctrl+C), stopping bot...');
1435
- if (VERBOSE) console.log(`[VERBOSE] Signal: SIGINT, PID: ${process.pid}, PPID: ${process.ppid}`);
1436
- launchAbortController.abort(); // Cancel retry loop if still retrying (issue #1240)
1437
- if (sessionMonitoringTimer) clearInterval(sessionMonitoringTimer);
1438
- stopSolveQueue();
1439
- bot.stop('SIGINT');
1485
+ handleShutdownSignal('SIGINT');
1440
1486
  });
1441
1487
 
1442
1488
  process.once('SIGTERM', () => {
1443
- isShuttingDown = true;
1444
1489
  console.log('\n🛑 Received SIGTERM, stopping bot... (Check system logs: journalctl -u <service> or dmesg)');
1445
- if (VERBOSE) console.log(`[VERBOSE] Signal: SIGTERM, PID: ${process.pid}, PPID: ${process.ppid}`);
1446
- launchAbortController.abort(); // Cancel retry loop if still retrying (issue #1240)
1447
- if (sessionMonitoringTimer) clearInterval(sessionMonitoringTimer);
1448
- stopSolveQueue();
1449
- bot.stop('SIGTERM');
1490
+ handleShutdownSignal('SIGTERM');
1450
1491
  });
@@ -109,7 +109,9 @@ export function buildExecuteAndUpdateMessage(deps) {
109
109
  }
110
110
  };
111
111
  const requesterUserId = ctx.from?.id ?? null; // Issue #1688: suppress duplicate /subscribe DM
112
- const baseSessionInfo = { chatId: ctx.chat.id, messageId: msgId, startTime: new Date(), url: args[0], command: commandName, tool, infoBlock, urlContext, requesterUserId, showLimits, limitsAtStart, locale }; // #594: showLimits/limitsAtStart
112
+ // #1927 review follow-up: persist the full args so a killed /solve can be
113
+ // resumed with its exact original invocation + `--resume <lastSessionId>`.
114
+ const baseSessionInfo = { chatId: ctx.chat.id, messageId: msgId, startTime: new Date(), url: args[0], command: commandName, tool, infoBlock, urlContext, requesterUserId, showLimits, limitsAtStart, locale, args: Array.isArray(args) ? [...args] : undefined }; // #594: showLimits/limitsAtStart
113
115
  const iso = await resolveIsolation(perCommandIsolation, ISOLATION_BACKEND, isolationRunner, VERBOSE);
114
116
  let result, session, sessionInfo;
115
117
  if (iso) {
@@ -7,6 +7,8 @@
7
7
 
8
8
  import fs from 'fs/promises';
9
9
  import { extractSessionIdFromText, decideLogDestination, resolveLogPath } from './telegram-log-command.lib.mjs';
10
+ import { parseSessionExitFooter } from './isolation-runner.lib.mjs';
11
+ import { classifyExitStatus, isFailureSessionStatus } from './session-status.lib.mjs';
10
12
 
11
13
  const DEFAULT_WIDTH = 120;
12
14
  const DEFAULT_HEIGHT = 25;
@@ -124,7 +126,11 @@ export function formatTerminalWatchMessage({ sessionId, statusResult = null, log
124
126
  const width = options.width || DEFAULT_WIDTH;
125
127
  const height = options.height || DEFAULT_HEIGHT;
126
128
  const snapshot = sanitizeCodeBlock(tailTextForTerminal(logText, options));
127
- const title = completed ? '✅ Terminal watch complete' : '🔄 Live terminal watch';
129
+ // Issue #1927: a completed-but-failed/killed session must not wear a success
130
+ // ✅ — surface the failure so an OOM/SIGKILL is reported, not mistaken for a
131
+ // clean finish. Both titles keep the "Terminal watch complete" phrase.
132
+ const failed = completed && isFailureSessionStatus(status);
133
+ const title = !completed ? '🔄 Live terminal watch' : failed ? '❌ Terminal watch complete — session failed' : '✅ Terminal watch complete';
128
134
  const lines = [title, `Session: \`${sessionId}\``, `Status: \`${status}\``, `Terminal: \`${width}x${height}\``];
129
135
  if (repoDescription) lines.push(`Repo: \`${repoDescription}\``);
130
136
  if (!completed) lines.push(`Updates: ${updateCount}`);
@@ -183,6 +189,37 @@ function getDisplayedTerminalSnapshot(logText, options) {
183
189
  return sanitizeCodeBlock(tailTextForTerminal(logText, options));
184
190
  }
185
191
 
192
+ /**
193
+ * Issue #1927: decide whether a watched session has actually finished.
194
+ *
195
+ * A non-terminal `$ --status` (e.g. `executing`) is NOT trusted on its own —
196
+ * start-command can keep reporting `executing` after the wrapped command was
197
+ * SIGKILLed/OOM-killed (a lingering shell outlives it). Trusting that status
198
+ * would make this watch poll forever and render a misleading "still running"
199
+ * snapshot — the same silent-hang that left issue #1927's killed `/solve`
200
+ * unreported, here in the watch loop. The execution-log FOOTER ("Exit Code: N")
201
+ * that `start` writes is authoritative: once present the command has terminated,
202
+ * full stop. In that case the displayed status is corrected to the real terminal
203
+ * status (e.g. `killed`) so the kill is surfaced instead of a perpetual
204
+ * `executing`.
205
+ *
206
+ * @returns {{completed: boolean, statusResult: object|null}}
207
+ */
208
+ export function reconcileWatchCompletion(statusResult, logText, isTerminalSessionStatus) {
209
+ if (statusResult?.status && isTerminalSessionStatus(statusResult.status)) {
210
+ return { completed: true, statusResult };
211
+ }
212
+ const footer = parseSessionExitFooter(logText);
213
+ if (footer.finished) {
214
+ const corrected = classifyExitStatus(footer.exitCode) || (footer.exitCode === 0 ? 'executed' : 'failed');
215
+ return {
216
+ completed: true,
217
+ statusResult: { ...(statusResult || {}), status: corrected, exitCode: footer.exitCode, endTime: statusResult?.endTime || footer.endTime || null },
218
+ };
219
+ }
220
+ return { completed: false, statusResult };
221
+ }
222
+
186
223
  export function watchTerminalLogSession({ bot, chatId, messageId, sessionId, logPath, querySessionStatus, isTerminalSessionStatus, options = {}, repoDescription = null, verbose = false, initialStatusResult = null, initialLogText = null, initialMessage = '' }) {
187
224
  const key = `${chatId}:${messageId}:${sessionId}`;
188
225
  activeWatches.get(key)?.stop();
@@ -190,7 +227,8 @@ export function watchTerminalLogSession({ bot, chatId, messageId, sessionId, log
190
227
  let stopped = false;
191
228
  const hasInitialLogText = initialLogText !== null && initialLogText !== undefined;
192
229
  let lastSnapshot = hasInitialLogText ? getDisplayedTerminalSnapshot(initialLogText, options) : null;
193
- let lastMessage = initialMessage || (hasInitialLogText ? formatTerminalWatchMessage({ sessionId, statusResult: initialStatusResult, logText: initialLogText, options, updateCount: 0, completed: !!initialStatusResult?.status && isTerminalSessionStatus(initialStatusResult.status), repoDescription }) : '');
230
+ const initialReconciled = hasInitialLogText ? reconcileWatchCompletion(initialStatusResult, initialLogText, isTerminalSessionStatus) : { completed: false, statusResult: initialStatusResult };
231
+ let lastMessage = initialMessage || (hasInitialLogText ? formatTerminalWatchMessage({ sessionId, statusResult: initialReconciled.statusResult, logText: initialLogText, options, updateCount: 0, completed: initialReconciled.completed, repoDescription }) : '');
194
232
  let updateCount = 0;
195
233
  let timer = null;
196
234
  const intervalMs = options.intervalMs || DEFAULT_INTERVAL_MS;
@@ -198,9 +236,12 @@ export function watchTerminalLogSession({ bot, chatId, messageId, sessionId, log
198
236
  const tick = async () => {
199
237
  if (stopped) return;
200
238
  try {
201
- const statusResult = await querySessionStatus(sessionId, verbose);
202
- const completed = !!statusResult?.status && isTerminalSessionStatus(statusResult.status);
239
+ const rawStatus = await querySessionStatus(sessionId, verbose);
203
240
  const logText = await readLogFile(logPath);
241
+ // Issue #1927: cross-check the authoritative log footer so a session killed
242
+ // while `$ --status` still reports `executing` is detected as finished
243
+ // instead of being polled forever with a misleading "running" snapshot.
244
+ const { completed, statusResult } = reconcileWatchCompletion(rawStatus, logText, isTerminalSessionStatus);
204
245
  const snapshot = getDisplayedTerminalSnapshot(logText, options);
205
246
  const snapshotChanged = snapshot !== lastSnapshot;
206
247
  if (snapshotChanged) updateCount++;
@@ -272,8 +313,8 @@ async function startWatchFromResolvedSession({ bot, ctx, sessionId, statusResult
272
313
  if (!targetChatId) return { started: false, reason: 'Missing target chat id' };
273
314
 
274
315
  const initialLogText = await readLogFile(logPath);
275
- const initialCompleted = !!statusResult?.status && isTerminalSessionStatus(statusResult.status);
276
- const initialText = formatTerminalWatchMessage({ sessionId, statusResult, logText: initialLogText, options: watchOptions, completed: initialCompleted, repoDescription });
316
+ const { completed: initialCompleted, statusResult: reconciledInitialStatus } = reconcileWatchCompletion(statusResult, initialLogText, isTerminalSessionStatus);
317
+ const initialText = formatTerminalWatchMessage({ sessionId, statusResult: reconciledInitialStatus, logText: initialLogText, options: watchOptions, completed: initialCompleted, repoDescription });
277
318
  let replyToMessageId = ctx.message?.message_id || undefined;
278
319
  if (decision.destination === 'dm' && ctx.chat.type !== 'private') {
279
320
  replyToMessageId = await forwardOrCopyToDm(ctx, ctx.message?.reply_to_message || ctx.message);
@@ -1,7 +1,6 @@
1
1
  import { t } from './i18n.lib.mjs';
2
2
  import { escapeMarkdown } from './telegram-markdown.lib.mjs';
3
-
4
- const FAILURE_STATUSES = new Set(['failed', 'cancelled', 'canceled', 'error']);
3
+ import { FAILURE_SESSION_STATUSES, KILLED_SESSION_STATUSES, isKilledSessionStatus, describeExitSignal, normalizeExitCode } from './session-status.lib.mjs';
5
4
 
6
5
  function text(locale, key, fallback, params = {}) {
7
6
  if (!locale) return fallback;
@@ -14,12 +13,6 @@ function parseDateValue(value) {
14
13
  return Number.isNaN(date.getTime()) ? null : date;
15
14
  }
16
15
 
17
- function normalizeExitCode(value) {
18
- if (value === null || value === undefined) return null;
19
- const numeric = Number(value);
20
- return Number.isFinite(numeric) ? numeric : null;
21
- }
22
-
23
16
  export function getSessionCompletionExitCode({ exitCode = null, statusResult = null } = {}) {
24
17
  const explicitExitCode = normalizeExitCode(exitCode);
25
18
  if (explicitExitCode !== null) return explicitExitCode;
@@ -28,11 +21,34 @@ export function getSessionCompletionExitCode({ exitCode = null, statusResult = n
28
21
  if (statusExitCode !== null) return statusExitCode;
29
22
 
30
23
  const status = String(statusResult?.status || '').toLowerCase();
31
- if (FAILURE_STATUSES.has(status)) return 1;
24
+ if (FAILURE_SESSION_STATUSES.has(status)) return 1;
32
25
 
33
26
  return null;
34
27
  }
35
28
 
29
+ /**
30
+ * Decide how a completed session should be presented: success, generic failure,
31
+ * or an explicit kill (OOM/SIGKILL/SIGTERM/…). A session counts as "killed"
32
+ * when its exit code is a signal exit (>128) or its status is one of the kill
33
+ * statuses. This is what stops a SIGKILLed /solve from ever being labelled
34
+ * "finished successfully" (issue #1927, requirement #1).
35
+ *
36
+ * @param {Object} params
37
+ * @param {number|null} params.exitCode - Resolved final exit code
38
+ * @param {string|null} [params.status] - Session status string, if known
39
+ * @returns {{ failed: boolean, killed: boolean, signal: object|null }}
40
+ */
41
+ export function classifySessionOutcome({ exitCode = null, status = null } = {}) {
42
+ const code = normalizeExitCode(exitCode);
43
+ const signal = describeExitSignal(code);
44
+ const killedByStatus = isKilledSessionStatus(status);
45
+ const killed = Boolean(signal) || killedByStatus;
46
+ const failed = killed || (code !== null && code !== 0) || FAILURE_SESSION_STATUSES.has(String(status || '').toLowerCase());
47
+ return { failed, killed, signal };
48
+ }
49
+
50
+ export { KILLED_SESSION_STATUSES };
51
+
36
52
  export function formatSessionDurationSeconds(seconds) {
37
53
  const totalSeconds = Math.max(0, Math.round(Number(seconds) || 0));
38
54
  const days = Math.floor(totalSeconds / 86400);
@@ -104,10 +120,27 @@ export function appendPullRequestLine(infoBlock, pullRequestUrl, { locale = null
104
120
 
105
121
  export function formatSessionCompletionMessage({ sessionName, sessionInfo, statusResult = null, observedEndTime = new Date(), exitCode = null, infoBlock = '', pullRequestUrl = null, extraSections = [], locale = null } = {}) {
106
122
  const finalExitCode = getSessionCompletionExitCode({ exitCode, statusResult });
107
- const failed = finalExitCode !== null && finalExitCode !== 0;
123
+ const outcome = classifySessionOutcome({ exitCode: finalExitCode, status: statusResult?.status || null });
124
+ const { failed, killed, signal } = outcome;
108
125
  const statusEmoji = failed ? '❌' : '✅';
109
126
  const messageLocale = locale || sessionInfo?.locale || null;
110
- const statusText = failed ? text(messageLocale, 'telegram.work_session_failed', `Work session failed (exit code: ${finalExitCode})`, { exitCode: finalExitCode }) : text(messageLocale, 'telegram.work_session_finished', 'Work session finished successfully');
127
+ // Issue #1927: a killed session (OOM/SIGKILL/SIGTERM) must never read as a
128
+ // success, and the signal/reason is surfaced explicitly so an operator can
129
+ // tell an out-of-memory kill apart from an ordinary non-zero exit.
130
+ let statusText;
131
+ if (killed) {
132
+ // A real signal exit is always >128; an exit code of exactly 1 on a
133
+ // status-only kill (process vanished, code unknown) is a synthesized failure
134
+ // sentinel, so suppress the misleading "(exit code: 1)" in that case.
135
+ const showCode = finalExitCode !== null && !(!signal && finalExitCode === 1);
136
+ const exitSuffix = showCode ? ` (exit code: ${finalExitCode})` : '';
137
+ const reason = signal ? signal.reason : 'killed';
138
+ statusText = text(messageLocale, 'telegram.work_session_killed', `Work session ${reason}${exitSuffix}`, { reason, exitCode: finalExitCode ?? '', signal: signal?.signal ?? '' });
139
+ } else if (failed) {
140
+ statusText = text(messageLocale, 'telegram.work_session_failed', `Work session failed (exit code: ${finalExitCode})`, { exitCode: finalExitCode });
141
+ } else {
142
+ statusText = text(messageLocale, 'telegram.work_session_finished', 'Work session finished successfully');
143
+ }
111
144
  const durationLabel = text(messageLocale, 'telegram.duration_label', 'Duration');
112
145
  const sessionLabel = text(messageLocale, 'telegram.session_label', 'Session');
113
146
  const isolationLabel = text(messageLocale, 'telegram.isolation_label', 'Isolation');