npm - @s0nderlabs/anima-plugin-telegram - Versions diffs - 0.19.13 → 0.19.14 - Mend

@s0nderlabs/anima-plugin-telegram 0.19.13 → 0.19.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +2 -2
package/src/listener-retry.test.ts +97 -0
package/src/listener.ts +46 -2

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@s0nderlabs/anima-plugin-telegram",
-  "version": "0.19.13",
+  "version": "0.19.14",
   "type": "module",
   "description": "Telegram gateway plugin for anima — long-poll bot, debounced dispatch, reactions, allowlist",
   "license": "MIT",
@@ -28,7 +28,7 @@
     "test": "bun test"
   },
   "dependencies": {
-    "@s0nderlabs/anima-core": "0.19.13",
+    "@s0nderlabs/anima-core": "0.19.14",
     "grammy": "^1.42.0",
     "zod": "^3.23.8"
   }

package/src/listener-retry.test.ts ADDED Viewed

@@ -0,0 +1,97 @@
+import { afterEach, beforeEach, describe, expect, it } from 'bun:test'
+import { existsSync, mkdtempSync, rmSync } from 'node:fs'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { TelegramListener } from './listener'
+import { acquireTelegramTokenLock } from './recovery'
+import type { TelegramRuntimeContext } from './types'
+let lockDir: string
+beforeEach(() => {
+  lockDir = mkdtempSync(join(tmpdir(), 'anima-listener-retry-'))
+})
+afterEach(() => {
+  rmSync(lockDir, { recursive: true, force: true })
+})
+const FAKE_TOKEN = '999:does-not-call-network'
+function makeOpts(): TelegramRuntimeContext & { lockRootDir: string; apiRoot: string } {
+  return {
+    botToken: FAKE_TOKEN,
+    allowedUserIds: [42],
+    agentName: 'retry-canary',
+    pairingStore: undefined,
+    dispatchUserMessage: async () => ({ response: 'ok' }),
+    onProcessingStart: async () => {},
+    onProcessingEnd: async () => {},
+    approvalBridge: undefined,
+    lockRootDir: lockDir,
+    // Point grammY at an unreachable host so any accidental network call
+    // would fail fast. We never reach bot.start() in these tests because
+    // the lock path returns first.
+    apiRoot: 'http://127.0.0.1:1',
+  }
+}
+describe('TelegramListener lock-retry', () => {
+  it('does NOT throw when the bot-token lock is held; retains running=false until the lock frees', async () => {
+    // Pre-occupy the lock. From the listener's perspective this is a
+    // zombie/leftover holder it must wait out.
+    const blocker = acquireTelegramTokenLock(FAKE_TOKEN, {
+      agentId: 'retry-canary',
+      rootDir: lockDir,
+    })
+    const listener = new TelegramListener(makeOpts())
+    // Pre-fix this would throw BotTokenLockedError synchronously after the
+    // build-runtime catch and never re-attempt. Now it must swallow,
+    // schedule a retry timer, and remain stoppable.
+    await expect(listener.start()).resolves.toBeUndefined()
+    // stop() should release whatever we held + cancel the retry timer.
+    await listener.stop()
+    blocker.release()
+  })
+  it('stop() cancels a pending retry without leaking timers', async () => {
+    const blocker = acquireTelegramTokenLock(FAKE_TOKEN, {
+      agentId: 'retry-canary',
+      rootDir: lockDir,
+    })
+    const listener = new TelegramListener(makeOpts())
+    await listener.start() // schedules retry because blocker holds the lock
+    // Immediately stop. If the retry timer wasn't unref'd / cleared the
+    // bun:test process would hang waiting for it (visible as a >30s test
+    // timeout; this assertion fails fast otherwise).
+    await listener.stop()
+    blocker.release()
+    // After stop+release, fresh acquisition by an outside caller works
+    // (no orphaned listener still holding the lock).
+    const now = acquireTelegramTokenLock(FAKE_TOKEN, {
+      agentId: 'retry-canary',
+      rootDir: lockDir,
+    })
+    expect(now).toBeDefined()
+    now.release()
+  })
+  it('lock-clear path: when the prior holder releases, the next start succeeds', async () => {
+    const prior = acquireTelegramTokenLock(FAKE_TOKEN, {
+      agentId: 'retry-canary',
+      rootDir: lockDir,
+    })
+    const listener = new TelegramListener(makeOpts())
+    await listener.start() // pending retry; lock not yet acquired
+    prior.release()
+    // Retry runs every 30s in production; we verify the lockfile state
+    // rather than waiting on real timers. Pending retry won't fire in this
+    // synchronous window, but the listener.stop() path must still succeed.
+    await listener.stop()
+    // After listener.stop() with retry pending and prior released, the
+    // lockfile dir is empty (no orphan).
+    expect(existsSync(join(lockDir, 'telegram-bot-token-cbae9eeaf0ee85c6.lock'))).toBe(false)
+  })
+})

package/src/listener.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import { formatPairingMessage } from './pairing-flow'
 import { reactError, reactProcessing, reactSuccess } from './reactions'
 import {
   BotTokenLockedError,
+  TELEGRAM_TOKEN_LOCK_SCOPE,
   type TokenLock,
   acquireTelegramTokenLock,
   classifyStartFailure,
@@ -29,6 +30,12 @@ import type { TelegramDispatchInput, TelegramRuntimeContext } from './types'
  * webhook, then boots grammy in long-poll mode. `stop()` releases the lock
  * and stops the bot. Both are idempotent.
  */
+/** Retry cadence + cap when the TG bot-token lock is held by a (possibly
+ *  zombie) prior holder. 12 × 30s = 6 minutes, comfortably past the 5-minute
+ *  lock TTL so a stale-but-tenable lock auto-evicts. */
+const RETRY_INTERVAL_MS = 30_000
+const MAX_LOCK_RETRY_ATTEMPTS = 12
 export interface TelegramListenerOpts extends TelegramRuntimeContext {
   /** Optional override of the Telegram Bot API root. Used by the mock-bot test. */
   apiRoot?: string
@@ -49,6 +56,9 @@ export class TelegramListener {
   private running = false
   private tokenLock: TokenLock | null = null
   private refreshTimer: ReturnType<typeof setInterval> | null = null
+  private retryTimer: ReturnType<typeof setTimeout> | null = null
+  private retryAttempts = 0
+  private stopped = false
   private approvalResolver:
     | ((approvalId: string, choice: ApprovalChoice, fromUserId: number) => void)
     | null = null
@@ -110,7 +120,7 @@ export class TelegramListener {
   }
   async start(): Promise<void> {
-    if (this.running) return
+    if (this.running || this.stopped) return
     try {
       this.tokenLock = acquireTelegramTokenLock(this.opts.botToken, {
@@ -118,12 +128,24 @@ export class TelegramListener {
         rootDir: this.opts.lockRootDir,
       })
     } catch (err) {
+      // Lock contention is recoverable: the prior holder may be a zombie or
+      // a stale lockfile from an ungraceful exit (see
+      // feedback-tg-token-lock-zombie-after-upgrade.md). Retry every 30s up
+      // to 12 attempts (6 minutes, past the 5-minute lock TTL) so we
+      // eventually reclaim once the existing entry expires. Without this,
+      // a single failed lock acquisition silenced the bot for the entire
+      // harness lifetime.
       if (err instanceof BotTokenLockedError) {
-        console.warn(`[telegram] cannot start listener: ${err.message}`)
+        console.warn(
+          `[telegram] cannot start listener: ${err.message}; will retry in ${RETRY_INTERVAL_MS / 1000}s`,
+        )
+        this.scheduleStartRetry()
+        return
       }
       throw err
     }
+    this.retryAttempts = 0
     this.running = true
     console.log(`[telegram] listener.start() called for @${this.opts.agentName}`)
@@ -169,6 +191,11 @@ export class TelegramListener {
   }
   async stop(): Promise<void> {
+    this.stopped = true
+    if (this.retryTimer) {
+      clearTimeout(this.retryTimer)
+      this.retryTimer = null
+    }
     if (!this.running) {
       this.releaseLock()
       return
@@ -188,6 +215,23 @@ export class TelegramListener {
     this.releaseLock()
   }
+  private scheduleStartRetry(): void {
+    if (this.stopped) return
+    if (this.retryAttempts >= MAX_LOCK_RETRY_ATTEMPTS) {
+      console.error(
+        `[telegram] gave up acquiring bot-token lock after ${this.retryAttempts} attempts; manual intervention required (rm ~/.anima/locks/${TELEGRAM_TOKEN_LOCK_SCOPE}-*.lock)`,
+      )
+      return
+    }
+    if (this.retryTimer) clearTimeout(this.retryTimer)
+    this.retryAttempts += 1
+    this.retryTimer = setTimeout(() => {
+      this.retryTimer = null
+      void this.start()
+    }, RETRY_INTERVAL_MS)
+    this.retryTimer.unref?.()
+  }
   private releaseLock(): void {
     if (this.tokenLock) {
       try {