alvin-bot 5.1.6 → 5.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  All notable changes to Alvin Bot are documented here.
4
4
 
5
+ ## [5.1.7] — 2026-05-17
6
+
7
+ ### Scheduled jobs no longer run twice after a restart
8
+
9
+ If two bot instances were briefly alive at the same time — for example
10
+ right after an auto-update or a restart, while the old process was still
11
+ shutting down — a scheduled job could fire twice within the same minute.
12
+ One real case: a weekly report job sent its email, then sent an empty
13
+ duplicate 30 seconds later. The old overlap guard only worked inside a
14
+ single process, so a second instance never saw the first one's claim.
15
+
16
+ Jobs are now claimed with a small cross-process lock before they run, so
17
+ only one instance can execute a given job for a given slot. A crashed
18
+ run can't wedge the lock — it is reclaimed automatically once the owning
19
+ process is gone. Manual `/cron run` honours the same lock. No
20
+ configuration changes; existing jobs just stop double-firing.
21
+
5
22
  ## [5.1.6] — 2026-05-15
6
23
 
7
24
  ### Planned restarts really stop counting as crashes now
@@ -11,7 +11,7 @@
11
11
  */
12
12
  import fs from "fs";
13
13
  import { execSync } from "child_process";
14
- import { dirname } from "path";
14
+ import { resolve, dirname } from "path";
15
15
  import { CRON_FILE, BOT_ROOT } from "../paths.js";
16
16
  import { prepareForExecution, handleStartupCatchup, calculateNextRunFrom, } from "./cron-scheduling.js";
17
17
  import { resolveJobByNameOrId } from "./cron-resolver.js";
@@ -256,6 +256,85 @@ async function executeJob(job) {
256
256
  // ── Scheduler Loop ──────────────────────────────────────
257
257
  let schedulerTimer = null;
258
258
  const runningJobs = new Set(); // Guard against overlapping executions
259
+ // ── Cross-process job lock ──────────────────────────────
260
+ //
261
+ // `runningJobs` only guards overlap WITHIN this process. If two bot
262
+ // instances are briefly alive at once (a launchd/pm2 restart that left
263
+ // the old process running, or startup-catchup racing the normal tick),
264
+ // each has its own in-memory Set and the same job can fire twice —
265
+ // observed in the wild: a weekly job mailed its report, then mailed an
266
+ // empty duplicate 30 s later. This atomic `mkdir` lock makes the claim
267
+ // cross-process: the second instance sees the lock and skips the slot
268
+ // instead of double-firing. Stale locks (owning PID gone, or — when the
269
+ // meta is unreadable — older than the catch-up grace) are reclaimed so a
270
+ // crash can never wedge a job forever. No deps, cross-platform.
271
+ const CRON_LOCK_DIR = resolve(dirname(CRON_FILE), ".cron-locks");
272
+ const CRON_LOCK_MAX_AGE_MS = 6 * 60 * 60 * 1000; // backstop for corrupt meta
273
+ function cronLockPath(jobId) {
274
+ return resolve(CRON_LOCK_DIR, `${jobId.replace(/[^A-Za-z0-9_-]/g, "_")}.lock`);
275
+ }
276
+ function acquireJobLock(jobId) {
277
+ const lock = cronLockPath(jobId);
278
+ const writeMeta = () => {
279
+ try {
280
+ fs.writeFileSync(resolve(lock, "meta"), JSON.stringify({ pid: process.pid, at: Date.now() }));
281
+ }
282
+ catch { /* meta is best-effort */ }
283
+ };
284
+ try {
285
+ fs.mkdirSync(CRON_LOCK_DIR, { recursive: true });
286
+ }
287
+ catch { /* ignore */ }
288
+ try {
289
+ fs.mkdirSync(lock); // atomic: throws EEXIST if another instance holds it
290
+ writeMeta();
291
+ return true;
292
+ }
293
+ catch {
294
+ let stale = false;
295
+ try {
296
+ const meta = JSON.parse(fs.readFileSync(resolve(lock, "meta"), "utf-8"));
297
+ if (typeof meta.pid === "number") {
298
+ try {
299
+ process.kill(meta.pid, 0); // same-host liveness probe (no signal sent)
300
+ }
301
+ catch (e) {
302
+ if (e.code === "ESRCH")
303
+ stale = true; // owner gone
304
+ }
305
+ }
306
+ else {
307
+ stale = true; // no usable pid recorded
308
+ }
309
+ }
310
+ catch {
311
+ // meta missing/corrupt → fall back to lock-dir age
312
+ try {
313
+ stale = Date.now() - fs.statSync(lock).mtimeMs > CRON_LOCK_MAX_AGE_MS;
314
+ }
315
+ catch {
316
+ stale = false; // can't stat → treat as held (skip rather than double-fire)
317
+ }
318
+ }
319
+ if (!stale)
320
+ return false;
321
+ try {
322
+ fs.rmSync(lock, { recursive: true, force: true });
323
+ fs.mkdirSync(lock);
324
+ writeMeta();
325
+ return true;
326
+ }
327
+ catch {
328
+ return false;
329
+ }
330
+ }
331
+ }
332
+ function releaseJobLock(jobId) {
333
+ try {
334
+ fs.rmSync(cronLockPath(jobId), { recursive: true, force: true });
335
+ }
336
+ catch { /* ignore */ }
337
+ }
259
338
  export function startScheduler() {
260
339
  if (schedulerTimer)
261
340
  return;
@@ -301,6 +380,13 @@ export function startScheduler() {
301
380
  // mid-execution, handleStartupCatchup will notice the attempt
302
381
  // without completion and nachholen within the grace window.
303
382
  runningJobs.add(job.id);
383
+ // Cross-process claim: if another bot instance already owns this
384
+ // slot, skip instead of double-firing (the duplicate-report bug).
385
+ if (!acquireJobLock(job.id)) {
386
+ runningJobs.delete(job.id);
387
+ console.log(`Cron: job "${job.name}" (${job.id}) already claimed by another instance — skipping to avoid double-fire`);
388
+ continue;
389
+ }
304
390
  const prepared = prepareForExecution(job, now);
305
391
  Object.assign(job, prepared);
306
392
  saveJobs(jobs);
@@ -328,6 +414,7 @@ export function startScheduler() {
328
414
  }
329
415
  finally {
330
416
  runningJobs.delete(job.id);
417
+ releaseJobLock(job.id);
331
418
  }
332
419
  continue; // Skip the outer changed/save since we save inside
333
420
  }
@@ -422,6 +509,11 @@ export async function runJobNow(nameOrId) {
422
509
  return { status: "already-running", job };
423
510
  }
424
511
  runningJobs.add(job.id);
512
+ // Cross-process: another bot instance may already be running this job.
513
+ if (!acquireJobLock(job.id)) {
514
+ runningJobs.delete(job.id);
515
+ return { status: "already-running", job };
516
+ }
425
517
  try {
426
518
  // executeJob catches its own errors and returns { output, error }.
427
519
  // The inner try/catch here is a defensive belt against future
@@ -460,6 +552,7 @@ export async function runJobNow(nameOrId) {
460
552
  }
461
553
  finally {
462
554
  runningJobs.delete(job.id);
555
+ releaseJobLock(job.id);
463
556
  }
464
557
  }
465
558
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "alvin-bot",
3
- "version": "5.1.6",
3
+ "version": "5.1.7",
4
4
  "description": "Alvin Bot — Your personal AI agent on Telegram, WhatsApp, Discord, Signal, and Web.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",