kiro-telegram-bot 1.6.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/acp/client.ts CHANGED
@@ -29,7 +29,7 @@ const log = createLogger("acp:client");
29
29
  /** JSON-RPC error codes that usually mean "transient backend hiccup". */
30
30
  const TRANSIENT_CODES = new Set([-32603, -32500, -32000, 500, 502, 503, 504, 429]);
31
31
  const TRANSIENT_RE =
32
- /internal error|high volume|experiencing|overloaded|temporar|unavailable|rate.?limit|too many requests|try again|capacity|\b50[234]\b|\b429\b/i;
32
+ /internal error|high volume|experiencing|overloaded|temporar|unavailable|rate.?limit|too many requests|try again|capacity|dispatch failure|response stream|connection (?:reset|closed|refused|error)|reset by peer|broken pipe|socket hang ?up|econnreset|econnrefused|enotfound|eai_again|etimedout|\b50[234]\b|\b429\b/i;
33
33
 
34
34
  /** Error that preserves the agent's JSON-RPC error code and data payload. */
35
35
  export class AcpError extends Error {
@@ -120,6 +120,12 @@ export class AcpClient extends EventEmitter {
120
120
  private readonly promptMaxMs: number;
121
121
  /** Last time we saw streaming activity for a session (epoch ms). */
122
122
  private readonly lastActivity = new Map<string, number>();
123
+ /** Last time ANY session OR subagent produced output (epoch ms) — a
124
+ * process-wide "the agent is alive" clock. The per-session map misses work
125
+ * done by subagents (they stream on their own session ids), so without this
126
+ * a prompt that delegates to long-running subagents (e.g. parallel
127
+ * translation) trips the idle timeout while its subagents are doing the work. */
128
+ private lastActivityAny = 0;
123
129
  private stopped = false;
124
130
  private restartAttempts = 0;
125
131
  private restartTimer?: NodeJS.Timeout;
@@ -159,24 +165,31 @@ export class AcpClient extends EventEmitter {
159
165
  if (this.opts.agent) args.push("--agent", this.opts.agent);
160
166
 
161
167
  log.info(`spawning: ${this.opts.kiroCliPath} ${args.join(" ")}`);
162
- this.proc = spawn(this.opts.kiroCliPath, args, {
168
+ const proc = spawn(this.opts.kiroCliPath, args, {
163
169
  stdio: ["pipe", "pipe", "pipe"],
164
170
  cwd: this.opts.workspace,
165
171
  env: { ...process.env, KIRO_LOG_LEVEL: process.env.KIRO_LOG_LEVEL || "error" },
166
172
  }) as ChildProcessWithoutNullStreams;
167
-
168
- this.proc.on("exit", (code) => {
173
+ this.proc = proc;
174
+
175
+ proc.on("exit", (code) => {
176
+ // Ignore the exit of a process we've already replaced (a deliberate
177
+ // restart/stop). Its teardown must NOT fail the new process's pending
178
+ // requests nor trigger a competing auto-restart — that race was the cause
179
+ // of "/reauth → agent restart failed: kiro-cli acp exited (code null)".
180
+ if (this.proc !== proc) return;
169
181
  log.warn(`kiro-cli acp exited (code ${code})`);
170
182
  this.failAllPending(new Error(`kiro-cli acp exited (code ${code})`));
171
183
  this.emit("exit", code);
172
184
  this.maybeRestart();
173
185
  });
174
- this.proc.on("error", (err) => {
186
+ proc.on("error", (err) => {
187
+ if (this.proc !== proc) return;
175
188
  log.error("failed to spawn kiro-cli:", err.message);
176
189
  this.failAllPending(err);
177
190
  });
178
191
 
179
- this.transport = new JsonRpcTransport(this.proc);
192
+ this.transport = new JsonRpcTransport(proc);
180
193
  this.transport.on("message", (m: JsonRpcMessage) => this.onMessage(m));
181
194
 
182
195
  const init = (await this.request("initialize", {
@@ -276,15 +289,21 @@ export class AcpClient extends EventEmitter {
276
289
  const start = Date.now();
277
290
  this.lastActivity.set(sessionId, start);
278
291
  const watch = setInterval(() => {
279
- const idle = Date.now() - (this.lastActivity.get(sessionId) ?? start);
292
+ // Count activity from ANY session/subagent (the process-wide clock), so
293
+ // a turn that's delegating to long-running subagents stays alive while
294
+ // they work — only a genuinely silent (stuck) agent trips the timeout.
295
+ const last = Math.max(this.lastActivity.get(sessionId) ?? start, this.lastActivityAny);
296
+ const idle = Date.now() - last;
280
297
  const total = Date.now() - start;
281
298
  if (total > this.promptMaxMs) {
282
299
  this.pending.delete(id);
283
300
  clearInterval(watch);
301
+ void this.cancel(sessionId); // stop the runaway turn so the session is reusable
284
302
  reject(new Error(`Prompt exceeded the ${Math.round(this.promptMaxMs / 60_000)}min cap`));
285
303
  } else if (idle > this.promptIdleMs) {
286
304
  this.pending.delete(id);
287
305
  clearInterval(watch);
306
+ void this.cancel(sessionId); // free the session — otherwise the next prompt collides ("dispatch failure")
288
307
  reject(new Error(`No agent activity for ${Math.round(idle / 1000)}s — giving up`));
289
308
  }
290
309
  }, 15_000);
@@ -329,22 +348,87 @@ export class AcpClient extends EventEmitter {
329
348
 
330
349
  stop(): void {
331
350
  this.stopped = true;
332
- if (this.restartTimer) clearTimeout(this.restartTimer);
333
- this.proc?.kill();
334
- this.proc = undefined;
351
+ if (this.restartTimer) {
352
+ clearTimeout(this.restartTimer);
353
+ this.restartTimer = undefined;
354
+ }
355
+ void this.killCurrent();
335
356
  }
336
357
 
337
- /** Manually restart the agent (used by the /restart command). */
338
- async restart(): Promise<void> {
358
+ /**
359
+ * Stop the agent and WAIT for the process to fully exit, leaving it stopped
360
+ * (no auto-restart until start()/restart()). Used by /reauth to release the
361
+ * held session BEFORE logging out — otherwise the live agent keeps refreshing
362
+ * and re-persisting the old token, silently restoring the previous identity.
363
+ */
364
+ async stopAndWait(): Promise<void> {
339
365
  this.stopped = true;
340
- if (this.restartTimer) clearTimeout(this.restartTimer);
341
- this.proc?.kill();
342
- this.proc = undefined;
366
+ if (this.restartTimer) {
367
+ clearTimeout(this.restartTimer);
368
+ this.restartTimer = undefined;
369
+ }
370
+ await this.killCurrent();
371
+ }
372
+
373
+ /**
374
+ * Manually restart the agent (used by /restart, /reauth and the MCP toggle).
375
+ * The old process is fully torn down BEFORE a fresh one is spawned, so its
376
+ * exit can't fail the new connection's `initialize` — which previously
377
+ * surfaced as "agent restart failed: kiro-cli acp exited (code null)".
378
+ */
379
+ async restart(): Promise<void> {
380
+ if (this.restartTimer) {
381
+ clearTimeout(this.restartTimer);
382
+ this.restartTimer = undefined;
383
+ }
384
+ this.stopped = true; // suppress auto-restart while we swap processes
385
+ this.restartAttempts = 0;
386
+ await this.killCurrent();
343
387
  this.stopped = false;
344
388
  await this.connect();
345
389
  this.emit("restarted");
346
390
  }
347
391
 
392
+ /**
393
+ * Terminate the current process and wait for it to fully exit. Clearing
394
+ * `this.proc` first makes the connect()-registered exit/error handlers
395
+ * short-circuit, so a deliberate teardown is silent (no `exit` event, no
396
+ * auto-restart). In-flight requests are rejected here (the handler no longer
397
+ * will). Escalates to SIGKILL if the process lingers, and never hangs.
398
+ */
399
+ private killCurrent(): Promise<void> {
400
+ const proc = this.proc;
401
+ this.proc = undefined;
402
+ this.transport = undefined;
403
+ this.failAllPending(new Error("kiro-cli acp is restarting"));
404
+ if (!proc || proc.exitCode !== null || proc.signalCode !== null) {
405
+ return Promise.resolve();
406
+ }
407
+ return new Promise<void>((resolve) => {
408
+ let settled = false;
409
+ const done = (): void => {
410
+ if (settled) return;
411
+ settled = true;
412
+ clearTimeout(hard);
413
+ resolve();
414
+ };
415
+ const hard = setTimeout(() => {
416
+ try {
417
+ proc.kill("SIGKILL");
418
+ } catch {
419
+ /* ignore */
420
+ }
421
+ setTimeout(done, 500); // give the OS a beat, then proceed regardless
422
+ }, 4000);
423
+ proc.once("exit", done);
424
+ try {
425
+ proc.kill();
426
+ } catch {
427
+ done();
428
+ }
429
+ });
430
+ }
431
+
348
432
  // ── JSON-RPC plumbing ──────────────────────────────────────────────────────
349
433
 
350
434
  private request(method: string, params: unknown): Promise<unknown> {
@@ -424,6 +508,17 @@ export class AcpClient extends EventEmitter {
424
508
  }
425
509
 
426
510
  private routeNotification(method: string, params: unknown): void {
511
+ // Any agent-work notification — the main stream, a SUBAGENT's stream, model
512
+ // metadata, or a subagent status change — proves the process is alive, so
513
+ // refresh the process-wide clock. This is what keeps a prompt delegating to
514
+ // long-running subagents from tripping the per-session idle timeout.
515
+ if (
516
+ method === "session/update" ||
517
+ method === "_kiro.dev/metadata" ||
518
+ method === "_kiro.dev/subagent/list_update"
519
+ ) {
520
+ this.lastActivityAny = Date.now();
521
+ }
427
522
  if (method === "session/update") {
428
523
  const p = params as SessionNotificationParams;
429
524
  if (p?.sessionId && p.update) {
@@ -0,0 +1,325 @@
1
+ /**
2
+ * Kiro authentication control for /reauth: `kiro-cli logout` then an interactive
3
+ * `kiro-cli login --use-device-flow`. The device flow prints a verification URL
4
+ * + code to stdout (no browser redirect on the bot host), which we stream back
5
+ * to Telegram so the user can complete it on their own device.
6
+ */
7
+ import { execFile, spawn } from "node:child_process";
8
+ import { rm } from "node:fs/promises";
9
+ import { homedir } from "node:os";
10
+ import { join } from "node:path";
11
+ import { promisify } from "node:util";
12
+ import { createLogger } from "../logger.js";
13
+
14
+ const run = promisify(execFile);
15
+ const log = createLogger("auth");
16
+
17
+ // Strip ANSI colour/cursor escapes so the Telegram transcript stays readable.
18
+ // eslint-disable-next-line no-control-regex
19
+ const ANSI_RE = /\x1b\[[0-9;?]*[ -/]*[@-~]/g;
20
+
21
+ export interface LoginResult {
22
+ ok: boolean;
23
+ code: number | null;
24
+ /** True when the login was aborted via the supplied AbortSignal. */
25
+ cancelled?: boolean;
26
+ /** Human-readable failure reason, when known (shown in the chat). */
27
+ error?: string;
28
+ }
29
+
30
+ export interface LoginOptions {
31
+ /** Extra CLI flags (e.g. `--license pro`). `--use-device-flow` is added if absent. */
32
+ extraArgs?: string[];
33
+ /** Receives decoded stdout/stderr chunks as they arrive. */
34
+ onOutput: (text: string) => void;
35
+ /** Overall timeout before the login process is killed (default 5 min). */
36
+ timeoutMs?: number;
37
+ /** Abort to cancel the in-flight login — kills the process (Cancel button). */
38
+ signal?: AbortSignal;
39
+ }
40
+
41
+ export interface IdcLoginOptions {
42
+ /** IAM Identity Center start URL (e.g. https://my-org.awsapps.com/start). */
43
+ startUrl: string;
44
+ /** AWS region of the Identity Center (e.g. us-east-1). */
45
+ region: string;
46
+ /** Receives decoded output chunks as they arrive. */
47
+ onOutput: (text: string) => void;
48
+ /** Overall timeout before the login process is killed (default 5 min). */
49
+ timeoutMs?: number;
50
+ /** Abort to cancel the in-flight login — kills the process (Cancel button). */
51
+ signal?: AbortSignal;
52
+ }
53
+
54
+ /** Minimal shape of the optional `node-pty` module we rely on. */
55
+ interface IPty {
56
+ onData(cb: (data: string) => void): void;
57
+ onExit(cb: (e: { exitCode: number; signal?: number }) => void): void;
58
+ write(data: string): void;
59
+ kill(signal?: string): void;
60
+ }
61
+ interface PtyModule {
62
+ spawn(
63
+ file: string,
64
+ args: string[],
65
+ options: { name?: string; cols?: number; rows?: number; cwd?: string; env?: NodeJS.ProcessEnv },
66
+ ): IPty;
67
+ }
68
+
69
+ export class AuthService {
70
+ constructor(private readonly kiroCliPath: string) {}
71
+
72
+ /** Run `kiro-cli logout` (non-interactive). */
73
+ async logout(): Promise<{ ok: boolean; out: string }> {
74
+ try {
75
+ const { stdout, stderr } = await run(this.kiroCliPath, ["logout"], {
76
+ timeout: 30_000,
77
+ encoding: "utf-8",
78
+ });
79
+ return { ok: true, out: clean(`${stdout}${stderr}`) };
80
+ } catch (e) {
81
+ const err = e as { stdout?: string; stderr?: string; message?: string };
82
+ const out = clean(`${err.stdout ?? ""}${err.stderr ?? ""}`) || err.message || "logout failed";
83
+ return { ok: false, out };
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Best-effort removal of Kiro's cached auth token (`~/.aws/sso/cache/
89
+ * kiro-auth-token.json`) — the file that carries the logged-in identity
90
+ * (accessToken + refreshToken). Removing it after `logout` guarantees the
91
+ * next `login` performs a genuine device-flow authentication instead of
92
+ * silently reusing the previous account's cached/refreshable token.
93
+ *
94
+ * Surgical and safe: it touches ONLY Kiro's own token file, never the shared,
95
+ * account-agnostic OIDC client registrations, and is a no-op if absent.
96
+ */
97
+ async clearTokenCache(): Promise<boolean> {
98
+ const path = join(homedir(), ".aws", "sso", "cache", "kiro-auth-token.json");
99
+ try {
100
+ await rm(path, { force: true });
101
+ log.info(`cleared cached auth token (${path})`);
102
+ return true;
103
+ } catch (e) {
104
+ log.debug("clearTokenCache failed:", (e as Error).message);
105
+ return false;
106
+ }
107
+ }
108
+
109
+ /**
110
+ * to `onOutput` as it arrives (so the device code/URL reaches the user fast).
111
+ * Resolves when the process exits, the timeout fires, or the signal aborts.
112
+ */
113
+ login(opts: LoginOptions): Promise<LoginResult> {
114
+ const { extraArgs = [], onOutput, timeoutMs = 300_000, signal } = opts;
115
+ return new Promise<LoginResult>((resolve) => {
116
+ if (signal?.aborted) {
117
+ resolve({ ok: false, code: null, cancelled: true });
118
+ return;
119
+ }
120
+ const args = ["login"];
121
+ if (!extraArgs.includes("--use-device-flow")) args.push("--use-device-flow");
122
+ args.push(...extraArgs);
123
+ log.info(`spawning login: ${this.kiroCliPath} ${args.join(" ")}`);
124
+
125
+ let proc;
126
+ try {
127
+ // stdin ignored: any interactive prompt gets EOF rather than hanging.
128
+ proc = spawn(this.kiroCliPath, args, { stdio: ["ignore", "pipe", "pipe"] });
129
+ } catch (e) {
130
+ onOutput(`error: ${(e as Error).message}`);
131
+ resolve({ ok: false, code: null });
132
+ return;
133
+ }
134
+
135
+ let cancelled = false;
136
+ let settled = false;
137
+ let hardKill: NodeJS.Timeout | undefined;
138
+
139
+ const onAbort = (): void => {
140
+ cancelled = true;
141
+ try {
142
+ proc.kill();
143
+ } catch {
144
+ /* ignore */
145
+ }
146
+ // Escalate if the CLI ignores the polite signal.
147
+ hardKill = setTimeout(() => {
148
+ try {
149
+ proc.kill("SIGKILL");
150
+ } catch {
151
+ /* ignore */
152
+ }
153
+ }, 2000);
154
+ };
155
+
156
+ const finish = (r: LoginResult): void => {
157
+ if (settled) return;
158
+ settled = true;
159
+ clearTimeout(timer);
160
+ if (hardKill) clearTimeout(hardKill);
161
+ signal?.removeEventListener("abort", onAbort);
162
+ resolve(r);
163
+ };
164
+
165
+ const feed = (b: Buffer): void => {
166
+ const t = clean(b.toString("utf-8"));
167
+ if (t) onOutput(t);
168
+ };
169
+ proc.stdout.on("data", feed);
170
+ proc.stderr.on("data", feed);
171
+
172
+ const timer = setTimeout(() => {
173
+ onOutput("\n\u23F1\uFE0F Timed out waiting for login to complete.");
174
+ try {
175
+ proc.kill();
176
+ } catch {
177
+ /* ignore */
178
+ }
179
+ }, timeoutMs);
180
+
181
+ signal?.addEventListener("abort", onAbort, { once: true });
182
+
183
+ proc.on("error", (e: Error) => {
184
+ onOutput(`error: ${e.message}`);
185
+ finish({ ok: false, code: null, cancelled });
186
+ });
187
+ proc.on("exit", (code: number | null) => {
188
+ finish({ ok: code === 0 && !cancelled, code, cancelled });
189
+ });
190
+ });
191
+ }
192
+
193
+ /**
194
+ * IAM Identity Center (Pro) login. Unlike the Builder ID device flow, this
195
+ * CLI path is *interactive*: it always prompts ("Enter Start URL", "Enter
196
+ * Region", and — when the account has several — an Identity Center profile
197
+ * picker) and refuses to run without a real terminal. So we drive it inside a
198
+ * pseudo-terminal (the optional `node-pty` module), answering each prompt with
199
+ * the start URL / region the user supplied and accepting the default profile.
200
+ * The device verification URL + code still stream out via `onOutput`.
201
+ */
202
+ loginIdc(opts: IdcLoginOptions): Promise<LoginResult> {
203
+ const { startUrl, region, onOutput, timeoutMs = 300_000, signal } = opts;
204
+ return new Promise<LoginResult>((resolve) => {
205
+ if (signal?.aborted) {
206
+ resolve({ ok: false, code: null, cancelled: true });
207
+ return;
208
+ }
209
+ void (async () => {
210
+ // Load the optional native PTY lazily via a variable specifier so a
211
+ // missing module is a graceful runtime error, not an install/type break.
212
+ let pty: PtyModule;
213
+ try {
214
+ const specifier = "@homebridge/node-pty-prebuilt-multiarch";
215
+ const mod = (await import(specifier)) as unknown as PtyModule & { default?: PtyModule };
216
+ pty = typeof mod.spawn === "function" ? mod : (mod.default as PtyModule);
217
+ if (!pty || typeof pty.spawn !== "function") throw new Error("invalid pty module");
218
+ } catch {
219
+ resolve({
220
+ ok: false,
221
+ code: null,
222
+ error:
223
+ "IAM Identity Center login needs the PTY module. Run `npm install` in the bot folder, then try again.",
224
+ });
225
+ return;
226
+ }
227
+
228
+ // Pass the start URL + region as flags so the interactive prompts come
229
+ // PREFILLED — we then just press Enter to accept them. Typing the values
230
+ // ourselves makes the terminal echo them, which doubles the captured
231
+ // input (e.g. "us-east-1us-east-1" → bad OIDC endpoint). Q_FAKE_IS_REMOTE
232
+ // forces the CLI to PRINT the verification URL instead of opening a
233
+ // browser on the bot host, so it streams to Telegram.
234
+ const args = [
235
+ "login",
236
+ "--license",
237
+ "pro",
238
+ "--identity-provider",
239
+ startUrl,
240
+ "--region",
241
+ region,
242
+ "--use-device-flow",
243
+ ];
244
+ log.info(`spawning IDC login (pty): ${this.kiroCliPath} ${args.join(" ")}`);
245
+
246
+ let term: IPty;
247
+ try {
248
+ term = pty.spawn(this.kiroCliPath, args, {
249
+ name: "xterm-color",
250
+ cols: 120,
251
+ rows: 30,
252
+ env: { ...process.env, Q_FAKE_IS_REMOTE: "1" },
253
+ });
254
+ } catch (e) {
255
+ resolve({ ok: false, code: null, error: (e as Error).message });
256
+ return;
257
+ }
258
+
259
+ let buf = "";
260
+ let sentUrl = false;
261
+ let sentRegion = false;
262
+ let sentProfile = false;
263
+ let cancelled = false;
264
+ let settled = false;
265
+
266
+ const onAbort = (): void => {
267
+ cancelled = true;
268
+ try {
269
+ term.kill();
270
+ } catch {
271
+ /* ignore */
272
+ }
273
+ };
274
+
275
+ const finish = (r: LoginResult): void => {
276
+ if (settled) return;
277
+ settled = true;
278
+ clearTimeout(timer);
279
+ signal?.removeEventListener("abort", onAbort);
280
+ try {
281
+ term.kill();
282
+ } catch {
283
+ /* ignore */
284
+ }
285
+ resolve(r);
286
+ };
287
+
288
+ const timer = setTimeout(() => {
289
+ onOutput("\n\u23F1\uFE0F Timed out waiting for login to complete.");
290
+ try {
291
+ term.kill();
292
+ } catch {
293
+ /* ignore */
294
+ }
295
+ }, timeoutMs);
296
+
297
+ signal?.addEventListener("abort", onAbort, { once: true });
298
+
299
+ term.onData((d: string) => {
300
+ const t = clean(d);
301
+ if (t) onOutput(t);
302
+ buf += t;
303
+ // Each prompt is PREFILLED (from the flags); just press Enter to accept
304
+ // it. We answer each exactly once, in order.
305
+ if (!sentUrl && /start url/i.test(buf)) {
306
+ sentUrl = true;
307
+ term.write("\r");
308
+ } else if (sentUrl && !sentRegion && /enter region/i.test(buf)) {
309
+ sentRegion = true;
310
+ term.write("\r");
311
+ } else if (sentRegion && !sentProfile && /select an iam identity center profile/i.test(buf)) {
312
+ sentProfile = true;
313
+ term.write("\r"); // accept the default (first) profile
314
+ }
315
+ });
316
+
317
+ term.onExit(({ exitCode }) => finish({ ok: exitCode === 0 && !cancelled, code: exitCode, cancelled }));
318
+ })();
319
+ });
320
+ }
321
+ }
322
+
323
+ function clean(s: string): string {
324
+ return s.replace(ANSI_RE, "").replace(/\r/g, "");
325
+ }
@@ -0,0 +1,139 @@
1
+ /**
2
+ * Single-instance guard, keyed per bot token (NOT per folder), so the same bot
3
+ * can't run twice no matter which directory it's started from.
4
+ *
5
+ * Telegram allows only ONE long-polling consumer per token — a second instance
6
+ * triggers 409 Conflict and, worse, a leftover "ghost" process started from an
7
+ * old folder keeps answering with a stale `.env` (e.g. an outdated
8
+ * `ALLOWED_USERS`, so you get "⛔ Not authorized"). On startup we therefore
9
+ * take an exclusive lock: if a still-alive instance holds it, we terminate that
10
+ * process (and its child tree on Windows) so the fresh process — with the
11
+ * current config — becomes the only consumer.
12
+ *
13
+ * The lock lives under the canonical home (`~/.kiro/tg/locks/<tokenHash>.lock`)
14
+ * and stores only a pid + start time + whether the holder is supervised. The
15
+ * token itself is never written to disk (only its hash names the file).
16
+ */
17
+ import { execFileSync } from "node:child_process";
18
+ import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
19
+ import { createHash } from "node:crypto";
20
+ import { join } from "node:path";
21
+ import { createLogger } from "../logger.js";
22
+ import { killPid } from "../sessions/process.js";
23
+ import { isPidAlive } from "../sessions/store.js";
24
+
25
+ const log = createLogger("lock");
26
+
27
+ interface LockData {
28
+ pid: number;
29
+ startedAt: number;
30
+ /** True when the holder runs under a supervisor (systemd/launchd/Task). */
31
+ supervised: boolean;
32
+ }
33
+
34
+ const sleep = (ms: number): Promise<void> => new Promise((r) => setTimeout(r, ms));
35
+
36
+ export class InstanceLock {
37
+ private readonly file: string;
38
+ private held = false;
39
+
40
+ constructor(
41
+ token: string,
42
+ locksDir: string,
43
+ private readonly supervised: boolean,
44
+ ) {
45
+ const hash = createHash("sha256").update(token).digest("hex").slice(0, 16);
46
+ this.file = join(locksDir, `${hash}.lock`);
47
+ }
48
+
49
+ /**
50
+ * Become the sole instance for this token. Returns `false` (caller should
51
+ * exit) only when a *supervised* service instance is already running and this
52
+ * process is a plain manual start — we don't fight the background service
53
+ * (that would cause a restart/kill loop). Otherwise we take over: a live
54
+ * holder is terminated and the lock is rewritten with our pid.
55
+ */
56
+ async acquire(): Promise<boolean> {
57
+ const existing = this.read();
58
+ if (existing && existing.pid !== process.pid && isPidAlive(existing.pid)) {
59
+ if (existing.supervised && !this.supervised) {
60
+ log.warn(`a supervised service instance is already running (pid ${existing.pid}); not starting a duplicate`);
61
+ return false;
62
+ }
63
+ if (looksLikeNode(existing.pid)) {
64
+ log.warn(`another bot instance is running (pid ${existing.pid}); terminating it to take over`);
65
+ killPid(existing.pid);
66
+ for (let i = 0; i < 20 && isPidAlive(existing.pid); i++) await sleep(150); // up to ~3s
67
+ if (isPidAlive(existing.pid)) log.warn(`previous instance ${existing.pid} still alive after kill; continuing anyway`);
68
+ } else {
69
+ // The locked pid was recycled to an unrelated process — don't kill it,
70
+ // just reclaim the stale lock.
71
+ log.warn(`lock pid ${existing.pid} is not a node process; reclaiming stale lock`);
72
+ }
73
+ }
74
+ this.write();
75
+ this.held = true;
76
+ return true;
77
+ }
78
+
79
+ /** Release the lock if (and only if) we still own it. */
80
+ release(): void {
81
+ if (!this.held) return;
82
+ this.held = false;
83
+ try {
84
+ const cur = this.read();
85
+ if (cur?.pid === process.pid) rmSync(this.file, { force: true });
86
+ } catch {
87
+ /* best-effort */
88
+ }
89
+ }
90
+
91
+ private write(): void {
92
+ const data: LockData = { pid: process.pid, startedAt: Date.now(), supervised: this.supervised };
93
+ try {
94
+ mkdirSync(join(this.file, ".."), { recursive: true });
95
+ writeFileSync(this.file, JSON.stringify(data), "utf-8");
96
+ } catch (e) {
97
+ log.warn(`could not write lock file ${this.file}: ${(e as Error).message}`);
98
+ }
99
+ }
100
+
101
+ private read(): LockData | undefined {
102
+ try {
103
+ const d = JSON.parse(readFileSync(this.file, "utf-8")) as Partial<LockData>;
104
+ if (typeof d.pid === "number" && d.pid > 0) {
105
+ return { pid: d.pid, startedAt: Number(d.startedAt) || 0, supervised: Boolean(d.supervised) };
106
+ }
107
+ } catch {
108
+ /* no/invalid lock */
109
+ }
110
+ return undefined;
111
+ }
112
+ }
113
+
114
+ /**
115
+ * Best-effort check that `pid` is a node process (our bot), to avoid killing an
116
+ * unrelated process that happened to reuse the pid. If the platform query can't
117
+ * run or be parsed, we assume it's ours (only this bot writes the lock) — better
118
+ * to clear a ghost than to leave one fighting over the token.
119
+ */
120
+ function looksLikeNode(pid: number): boolean {
121
+ try {
122
+ if (process.platform === "win32") {
123
+ const out = execFileSync("tasklist", ["/FI", `PID eq ${pid}`, "/FO", "CSV", "/NH"], {
124
+ encoding: "utf-8",
125
+ stdio: ["ignore", "pipe", "ignore"],
126
+ });
127
+ // No matching task prints an INFO line, not a CSV row — treat as "gone".
128
+ if (!/^\s*"/.test(out)) return false;
129
+ return /node\.exe|tsx/i.test(out);
130
+ }
131
+ const out = execFileSync("ps", ["-p", String(pid), "-o", "comm="], {
132
+ encoding: "utf-8",
133
+ stdio: ["ignore", "pipe", "ignore"],
134
+ });
135
+ return /node|tsx/i.test(out);
136
+ } catch {
137
+ return true;
138
+ }
139
+ }
package/src/bot/auth.ts CHANGED
@@ -14,12 +14,23 @@ export function createAuthMiddleware(cfg: AppConfig) {
14
14
  }
15
15
 
16
16
  return async (ctx: Context, next: NextFunction): Promise<void> => {
17
- const userId = ctx.from?.id ? String(ctx.from.id) : undefined;
18
- if (allowAll || (userId && cfg.allowedUsers.has(userId))) {
17
+ const from = ctx.from;
18
+ // Only a genuine USER action is subject to (and worth replying to) the auth
19
+ // gate. Ignore everything else silently — most importantly the bot's OWN
20
+ // updates: the status panel being pinned/unpinned emits a service message
21
+ // whose `from` is THIS bot (is_bot), and replying "⛔ Not authorized" to
22
+ // that (or to any service/no-`from` update) spammed the chat with false
23
+ // rejections. Real unauthorized users still get one clear reply below.
24
+ if (!from || from.is_bot) return;
25
+ const m = ctx.message ?? ctx.editedMessage;
26
+ if (m && (m.pinned_message || m.new_chat_members || m.left_chat_member)) return;
27
+
28
+ const userId = String(from.id);
29
+ if (allowAll || cfg.allowedUsers.has(userId)) {
19
30
  await next();
20
31
  return;
21
32
  }
22
- log.warn(`blocked unauthorized user ${userId ?? "unknown"}`);
33
+ log.warn(`blocked unauthorized user ${userId}`);
23
34
  if (ctx.chat) {
24
35
  await ctx.reply("\u26D4 Not authorized. Ask the bot owner to add your Telegram ID.");
25
36
  }