npm - alvin-bot - Versions diffs - 4.8.8 → 4.9.0 - Mend

alvin-bot 4.8.8 → 4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +72 -0
package/dist/handlers/message.js +5 -2
package/dist/index.js +14 -10
package/dist/paths.js +2 -0
package/dist/platforms/whatsapp-auth-helpers.js +53 -0
package/dist/platforms/whatsapp.js +6 -2
package/dist/services/browser-manager.js +470 -95
package/dist/services/browser-webfetch.js +93 -0
package/dist/services/cron-scheduling.js +142 -0
package/dist/services/cron.js +32 -6
package/dist/services/skills.js +15 -11
package/dist/services/subagent-delivery.js +8 -2
package/dist/services/subagents.js +49 -8
package/dist/services/telegram.js +12 -3
package/dist/services/watchdog-brake.js +113 -0
package/dist/services/watchdog.js +56 -42
package/dist/util/console-formatter.js +109 -0
package/dist/util/debounce.js +24 -0
package/dist/util/telegram-error-filter.js +62 -0
package/dist/web/server.js +56 -0
package/package.json +1 -1
package/skills/browse/SKILL.md +123 -98
package/test/browser-webfetch.test.ts +121 -0
package/test/console-timestamps.test.ts +98 -0
package/test/cron-restart-resilience.test.ts +191 -0
package/test/debounce.test.ts +60 -0
package/test/subagent-final-text.test.ts +132 -0
package/test/telegram-error-filter.test.ts +85 -0
package/test/watchdog-brake.test.ts +157 -0
package/test/web-server-shutdown.test.ts +111 -0
package/test/whatsapp-auth-resilience.test.ts +96 -0

package/test/cron-restart-resilience.test.ts ADDED Viewed

@@ -0,0 +1,191 @@
+/**
+ * Fix #3 — Cron scheduler must survive a bot restart during job execution.
+ *
+ * Background: the old scheduler set `nextRunAt = null` immediately before
+ * `await executeJob(job)` and only re-calculated it after completion. A
+ * crash mid-execution (EADDRINUSE, unhandled rejection, launchd restart)
+ * left `nextRunAt = null`, so the next boot called `calculateNextRun()`
+ * from the current time — which for a cron expression always yields a
+ * FUTURE trigger (e.g. tomorrow 08:00). Today's run was lost forever.
+ *
+ * New contract (pure-function pair):
+ *
+ *   prepareForExecution(job, now)
+ *     - updates lastAttemptAt = now
+ *     - updates nextRunAt = <next regular trigger from `now`>
+ *     - returns the mutated job
+ *
+ *   handleStartupCatchup(jobs, now, graceMs)
+ *     - for every enabled job where `lastAttemptAt > lastRunAt`
+ *       (i.e. the last attempt never completed) AND the attempt is
+ *       within `graceMs`, rewinds `nextRunAt` to `now` so the next
+ *       scheduler tick picks it up immediately
+ *     - for every enabled job where `lastAttemptAt > lastRunAt` but
+ *       the attempt is older than `graceMs`, gives up and recalculates
+ *       `nextRunAt` normally
+ *     - never touches disabled jobs
+ *     - returns a NEW array of jobs (pure, no mutation of input)
+ */
+import { describe, it, expect } from "vitest";
+import {
+  prepareForExecution,
+  handleStartupCatchup,
+} from "../src/services/cron-scheduling.js";
+import type { CronJob } from "../src/services/cron.js";
+function makeJob(overrides: Partial<CronJob> = {}): CronJob {
+  return {
+    id: "job-1",
+    name: "Daily Job Alert",
+    type: "ai-query",
+    schedule: "00 08 * * *",
+    oneShot: false,
+    payload: { prompt: "x" },
+    target: { platform: "telegram", chatId: "1" },
+    enabled: true,
+    createdAt: 1_700_000_000_000,
+    lastRunAt: null,
+    lastResult: null,
+    lastError: null,
+    nextRunAt: null,
+    runCount: 0,
+    createdBy: "test",
+    ...overrides,
+  };
+}
+describe("prepareForExecution (Fix #3)", () => {
+  it("sets lastAttemptAt to now", () => {
+    const job = makeJob();
+    const now = 1_775_887_200_000; // 2026-04-11 08:00 Berlin
+    const updated = prepareForExecution(job, now);
+    expect(updated.lastAttemptAt).toBe(now);
+  });
+  it("advances nextRunAt to the NEXT regular trigger, not null", () => {
+    const job = makeJob({ schedule: "00 08 * * *" });
+    const now = 1_775_887_200_000; // today 08:00
+    const updated = prepareForExecution(job, now);
+    // nextRunAt must be a future timestamp, not null, not zero
+    expect(updated.nextRunAt).not.toBeNull();
+    expect(updated.nextRunAt!).toBeGreaterThan(now);
+  });
+  it("works with interval schedules — base = now, not lastRunAt", () => {
+    const job = makeJob({ schedule: "5m", lastRunAt: 1_000_000_000_000 });
+    const now = 1_775_887_200_000;
+    const updated = prepareForExecution(job, now);
+    expect(updated.nextRunAt).toBe(now + 5 * 60_000);
+  });
+  it("does not touch lastRunAt", () => {
+    const job = makeJob({ lastRunAt: 123 });
+    const updated = prepareForExecution(job, 9999);
+    expect(updated.lastRunAt).toBe(123);
+  });
+  it("is pure — returns a new object, leaves the input alone", () => {
+    const job = makeJob();
+    const before = JSON.stringify(job);
+    prepareForExecution(job, 42);
+    expect(JSON.stringify(job)).toBe(before);
+  });
+});
+describe("handleStartupCatchup (Fix #3)", () => {
+  const GRACE = 6 * 60 * 60 * 1000; // 6 h
+  it("rewinds nextRunAt to now when a recent attempt never completed", () => {
+    // Scenario: 08:00 triggered, 08:05 bot crashed, 10:30 bot restarts.
+    const job = makeJob({
+      lastRunAt: null,                              // never completed
+      lastAttemptAt: 1_775_887_200_000,             // 08:00
+      nextRunAt: 1_775_973_600_000,                 // tomorrow 08:00 (set pre-execution)
+    });
+    const now = 1_775_896_200_000;                  // 10:30
+    const [out] = handleStartupCatchup([job], now, GRACE);
+    expect(out.nextRunAt).toBe(now); // rewind → picked up on next tick
+  });
+  it("does not rewind when attempt completed (lastRunAt >= lastAttemptAt)", () => {
+    const tomorrow8am = 1_775_973_600_000;
+    const job = makeJob({
+      lastRunAt: 1_775_896_200_000,                 // completed at 10:30
+      lastAttemptAt: 1_775_887_200_000,             // started at 08:00
+      nextRunAt: tomorrow8am,
+    });
+    const now = 1_775_900_000_000;
+    const [out] = handleStartupCatchup([job], now, GRACE);
+    expect(out.nextRunAt).toBe(tomorrow8am); // unchanged
+  });
+  it("gives up when the attempt is older than the grace window", () => {
+    // Scenario: attempt was 7h ago, never completed, bot only now back up.
+    const sevenHoursAgo = 1_775_887_200_000 - 60_000;
+    const now = sevenHoursAgo + 7 * 60 * 60 * 1000 + 60_000;
+    const job = makeJob({
+      lastRunAt: null,
+      lastAttemptAt: sevenHoursAgo,
+      nextRunAt: now + 86_400_000, // whatever — scheduler will replace
+    });
+    const [out] = handleStartupCatchup([job], now, GRACE);
+    // Must NOT rewind to `now`. Must either keep the future value or
+    // recompute — either way it has to stay strictly greater than now.
+    expect(out.nextRunAt).not.toBe(now);
+    expect(out.nextRunAt!).toBeGreaterThan(now);
+  });
+  it("ignores disabled jobs", () => {
+    const job = makeJob({
+      enabled: false,
+      lastRunAt: null,
+      lastAttemptAt: 1_775_887_200_000,
+      nextRunAt: 1_775_973_600_000,
+    });
+    const now = 1_775_896_200_000;
+    const [out] = handleStartupCatchup([job], now, GRACE);
+    expect(out).toEqual(job); // untouched
+  });
+  it("handles jobs without any attempt history (no-op)", () => {
+    const job = makeJob({
+      lastRunAt: null,
+      lastAttemptAt: null,
+      nextRunAt: 1_775_973_600_000,
+    });
+    const now = 1_775_896_200_000;
+    const [out] = handleStartupCatchup([job], now, GRACE);
+    expect(out).toEqual(job);
+  });
+  it("is pure — does not mutate the input array", () => {
+    const job = makeJob({
+      lastRunAt: null,
+      lastAttemptAt: 1_775_887_200_000,
+      nextRunAt: 1_775_973_600_000,
+    });
+    const input = [job];
+    const snapshot = JSON.stringify(input);
+    handleStartupCatchup(input, 1_775_896_200_000, GRACE);
+    expect(JSON.stringify(input)).toBe(snapshot);
+  });
+  it("processes multiple jobs independently", () => {
+    const now = 1_775_896_200_000;
+    const recent = makeJob({
+      id: "a",
+      lastRunAt: null,
+      lastAttemptAt: 1_775_887_200_000, // within grace
+      nextRunAt: 1_775_973_600_000,
+    });
+    const completed = makeJob({
+      id: "b",
+      lastRunAt: 1_775_887_500_000,
+      lastAttemptAt: 1_775_887_200_000,
+      nextRunAt: 1_775_973_600_000,
+    });
+    const out = handleStartupCatchup([recent, completed], now, GRACE);
+    expect(out[0].nextRunAt).toBe(now);            // caught up
+    expect(out[1].nextRunAt).toBe(1_775_973_600_000); // untouched
+  });
+});

package/test/debounce.test.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Fix #7 — fs.watch emits duplicates on macOS; we need a simple debounce.
+ *
+ * Contract: `debounce(fn, waitMs)` returns a wrapped function. Calling
+ * the wrapped function schedules `fn()` to run `waitMs` ms after the
+ * last call. Multiple calls inside the window coalesce into one
+ * invocation. Each "quiet period" starts a fresh cycle.
+ */
+import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
+import { debounce } from "../src/util/debounce.js";
+beforeEach(() => { vi.useFakeTimers(); });
+afterEach(() => { vi.useRealTimers(); });
+describe("debounce (Fix #7)", () => {
+  it("runs the function once after the wait period", () => {
+    const fn = vi.fn();
+    const d = debounce(fn, 200);
+    d();
+    expect(fn).not.toHaveBeenCalled();
+    vi.advanceTimersByTime(199);
+    expect(fn).not.toHaveBeenCalled();
+    vi.advanceTimersByTime(1);
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+  it("coalesces many rapid calls into one invocation", () => {
+    const fn = vi.fn();
+    const d = debounce(fn, 300);
+    d(); d(); d(); d();
+    vi.advanceTimersByTime(299);
+    d(); // resets the timer
+    vi.advanceTimersByTime(299);
+    expect(fn).not.toHaveBeenCalled();
+    vi.advanceTimersByTime(1);
+    expect(fn).toHaveBeenCalledTimes(1);
+  });
+  it("allows a second invocation after the wait elapses between calls", () => {
+    const fn = vi.fn();
+    const d = debounce(fn, 100);
+    d();
+    vi.advanceTimersByTime(100);
+    expect(fn).toHaveBeenCalledTimes(1);
+    d();
+    vi.advanceTimersByTime(100);
+    expect(fn).toHaveBeenCalledTimes(2);
+  });
+  it("passes through the latest arguments to the final call", () => {
+    const fn = vi.fn();
+    const d = debounce(fn, 50);
+    d("first");
+    d("second");
+    d("third");
+    vi.advanceTimersByTime(50);
+    expect(fn).toHaveBeenCalledTimes(1);
+    expect(fn).toHaveBeenCalledWith("third");
+  });
+});

package/test/subagent-final-text.test.ts ADDED Viewed

@@ -0,0 +1,132 @@
+/**
+ * Fix #5 — runSubAgent must preserve the full final text, even when the
+ * stream ends on a tool_use or is aborted mid-stream.
+ *
+ * Regressions this closes:
+ *
+ *   (a) The SDK yields `text` chunks as accumulated strings, then tool
+ *       calls, then more text, then finally a `done` chunk that ALSO
+ *       carries the final accumulated text. The old runSubAgent read
+ *       `text` from text-chunks only and ignored `done.text`. If the
+ *       assistant's very last action was a tool call with no trailing
+ *       text block, `finalText` kept the pre-tool text and the
+ *       cron-jobs.json `lastResult` ended mid-sentence.
+ *
+ *   (b) When queryWithFallback threw mid-stream (provider aborted,
+ *       network error, etc.), the catch block set `output: ""` —
+ *       throwing away whatever text had already streamed in before the
+ *       failure. Users saw an empty "(empty output)" delivery.
+ *
+ * Contract:
+ *   - Output = last non-empty value observed from (text.text | done.text)
+ *   - On error / abort: output = whatever we'd buffered so far (never "")
+ */
+import { describe, it, expect, beforeEach, vi } from "vitest";
+import fs from "fs";
+import os from "os";
+import { resolve } from "path";
+import type { StreamChunk } from "../src/providers/types.js";
+const TEST_DATA_DIR = resolve(os.tmpdir(), `alvin-bot-finaltext-${process.pid}-${Date.now()}`);
+beforeEach(() => {
+  if (fs.existsSync(TEST_DATA_DIR)) fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
+  fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
+  process.env.ALVIN_DATA_DIR = TEST_DATA_DIR;
+  delete process.env.MAX_SUBAGENTS;
+  vi.resetModules();
+});
+function mockStream(chunks: StreamChunk[] | (() => AsyncIterable<StreamChunk>)) {
+  vi.doMock("../src/engine.js", () => ({
+    getRegistry: () => ({
+      queryWithFallback: typeof chunks === "function"
+        ? chunks
+        : async function* () { for (const c of chunks) yield c; },
+    }),
+  }));
+  vi.doMock("../src/services/subagent-delivery.js", () => ({
+    deliverSubAgentResult: async () => { /* no-op */ },
+    attachBotApi: () => {},
+    __setBotApiForTest: () => {},
+  }));
+}
+async function runAndGetResult(prompt = "test") {
+  const mod = await import("../src/services/subagents.js");
+  return new Promise<{ output: string; status: string; tokensUsed: { input: number; output: number } }>((resolveResult) => {
+    mod.spawnSubAgent({
+      name: "test-agent",
+      prompt,
+      source: "cron",
+      parentChatId: 1,
+      onComplete: (r) => resolveResult({
+        output: r.output,
+        status: r.status,
+        tokensUsed: r.tokensUsed,
+      }),
+    }).catch(() => { /* spawn errors handled elsewhere */ });
+  });
+}
+describe("runSubAgent finalText (Fix #5)", () => {
+  it("uses done.text as the authoritative final output", async () => {
+    mockStream([
+      { type: "text", text: "Working on it…" },
+      { type: "tool_use", toolName: "Bash" },
+      { type: "text", text: "Intermediate finding: 5 results." },
+      { type: "tool_use", toolName: "Write" },
+      // No trailing text chunk — the assistant ended on a tool call,
+      // then the done chunk carries the authoritative final text.
+      { type: "done", text: "Job complete. Report at /tmp/out.html", inputTokens: 100, outputTokens: 50 },
+    ]);
+    const r = await runAndGetResult();
+    expect(r.status).toBe("completed");
+    expect(r.output).toBe("Job complete. Report at /tmp/out.html");
+    expect(r.tokensUsed).toEqual({ input: 100, output: 50 });
+  });
+  it("falls back to last text chunk when done has no text", async () => {
+    mockStream([
+      { type: "text", text: "First sentence." },
+      { type: "text", text: "Second sentence." },
+      { type: "done", inputTokens: 10, outputTokens: 5 },
+    ]);
+    const r = await runAndGetResult();
+    expect(r.output).toBe("Second sentence.");
+  });
+  it("preserves buffered text when stream errors mid-way", async () => {
+    mockStream(async function* () {
+      yield { type: "text", text: "Partial progress so far…" };
+      yield { type: "tool_use", toolName: "Bash" };
+      throw new Error("network: socket hang up");
+    });
+    const r = await runAndGetResult();
+    // Status can legitimately be "error" or "cancelled" — but output
+    // must NOT be an empty string. That's the regression.
+    expect(r.output.length).toBeGreaterThan(0);
+    expect(r.output).toContain("Partial progress");
+  });
+  it("preserves buffered text when the provider yields an error chunk", async () => {
+    mockStream([
+      { type: "text", text: "Started the task." },
+      { type: "text", text: "Started the task. More detail here." },
+      { type: "error", error: "Provider 'claude-sdk' failed: Request aborted" },
+    ]);
+    const r = await runAndGetResult();
+    expect(r.output).toContain("More detail");
+  });
+  it("returns empty output gracefully when nothing was buffered", async () => {
+    mockStream(async function* () {
+      throw new Error("immediate failure");
+    });
+    const r = await runAndGetResult();
+    // No text at all → empty is acceptable (nothing to preserve), but
+    // status must reflect the failure.
+    expect(r.output).toBe("");
+    expect(["error", "cancelled", "timeout"]).toContain(r.status);
+  });
+});

package/test/telegram-error-filter.test.ts ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * Fix #12 — grammy error noise filter.
+ *
+ * Regression: chunks like
+ *   Fehler: Call to 'editMessageText' failed! (400: Bad Request:
+ *   message is not modified: specified new message content and reply
+ *   markup are exactly the same as a current content and reply markup
+ *   of the message)
+ * were being sent to end users 2-3 times per day whenever a live-stream
+ * edit raced against itself. The v4.8.8 `bot.catch()` fix swallowed
+ * these at the middleware layer, but `telegram.ts` finalize() and
+ * `handlers/message.ts` error paths bypass bot.catch completely —
+ * they surface the raw grammy error via `ctx.reply()`.
+ *
+ * Contract: `isHarmlessTelegramError(err)` returns true for:
+ *   - "message is not modified" (any language, any prefix)
+ *   - "Call to 'editMessageText' failed" combined with the above
+ *   - "query is too old" (harmless callback-answer race)
+ *   - "MESSAGE_ID_INVALID" (user deleted the message before we edited it)
+ *
+ * Returns false for all other errors — they still need surfacing.
+ */
+import { describe, it, expect } from "vitest";
+import { isHarmlessTelegramError } from "../src/util/telegram-error-filter.js";
+describe("isHarmlessTelegramError (Fix #12)", () => {
+  it("matches the exact production message", () => {
+    const err = new Error(
+      "Call to 'editMessageText' failed! (400: Bad Request: message is not modified: " +
+      "specified new message content and reply markup are exactly the same as a current " +
+      "content and reply markup of the message)",
+    );
+    expect(isHarmlessTelegramError(err)).toBe(true);
+  });
+  it("matches just the 'message is not modified' substring", () => {
+    expect(isHarmlessTelegramError(new Error("400: message is not modified"))).toBe(true);
+  });
+  it("matches 'specified new message content ... exactly the same'", () => {
+    expect(
+      isHarmlessTelegramError(
+        new Error("specified new message content and reply markup are exactly the same"),
+      ),
+    ).toBe(true);
+  });
+  it("matches 'query is too old' (answerCallbackQuery race)", () => {
+    expect(
+      isHarmlessTelegramError(new Error("Bad Request: query is too old and response timeout expired")),
+    ).toBe(true);
+  });
+  it("matches 'message to edit not found' (user deleted)", () => {
+    expect(
+      isHarmlessTelegramError(new Error("Bad Request: message to edit not found")),
+    ).toBe(true);
+  });
+  it("matches MESSAGE_ID_INVALID", () => {
+    expect(isHarmlessTelegramError(new Error("Bad Request: MESSAGE_ID_INVALID"))).toBe(true);
+  });
+  it("accepts plain strings as well as Error objects", () => {
+    expect(isHarmlessTelegramError("message is not modified")).toBe(true);
+  });
+  it("accepts undefined / null as not harmless (caller decides)", () => {
+    expect(isHarmlessTelegramError(undefined)).toBe(false);
+    expect(isHarmlessTelegramError(null)).toBe(false);
+  });
+  it("does NOT swallow real errors", () => {
+    expect(isHarmlessTelegramError(new Error("Unauthorized"))).toBe(false);
+    expect(isHarmlessTelegramError(new Error("Too Many Requests: retry after 5"))).toBe(false);
+    expect(isHarmlessTelegramError(new Error("chat not found"))).toBe(false);
+    expect(isHarmlessTelegramError(new Error("stream error: provider timeout"))).toBe(false);
+  });
+  it("handles nested err.description from grammy", () => {
+    const err = new Error("anything") as Error & { description?: string };
+    err.description = "Bad Request: message is not modified";
+    expect(isHarmlessTelegramError(err)).toBe(true);
+  });
+});

package/test/watchdog-brake.test.ts ADDED Viewed

@@ -0,0 +1,157 @@
+/**
+ * Fix #4 — Watchdog brake must actually engage on chronic crashes.
+ *
+ * Regression: the previous logic reset crashCount after 5 min of clean
+ * uptime. Production logs showed the bot crashing ~5 times per hour, but
+ * each boot lived just long enough (>5 min, <10 min) to reset the counter.
+ * Result: `crashCount` never reached the brake threshold, the bot cycled
+ * for hours, and the daily job-alert silently lost its scheduled runs.
+ *
+ * New contract (pure function pair extracted to watchdog-brake.ts):
+ *
+ *   decideBrakeAction(prevBeacon, now, opts)
+ *     - returns `{ action: "proceed", crashCount, crashWindowStart }`
+ *       on clean start or old previous beacon
+ *     - returns `{ action: "proceed", crashCount: N }` when the last run
+ *       exited recently but we're still under the brake threshold
+ *     - returns `{ action: "brake", reason }` when either
+ *         (a) N+1 crashes in a short window, or
+ *         (b) the daily crash cap (default 20) is exceeded
+ *
+ *   shouldResetCrashCounter(uptimeMs, opts) → boolean
+ *     - default policy: only reset after 1 h of clean uptime (NOT 5 min)
+ */
+import { describe, it, expect } from "vitest";
+import {
+  decideBrakeAction,
+  shouldResetCrashCounter,
+  DEFAULTS,
+  type BeaconData,
+} from "../src/services/watchdog-brake.js";
+const ONE_MIN = 60_000;
+const ONE_HOUR = 60 * ONE_MIN;
+function beacon(partial: Partial<BeaconData> = {}): BeaconData {
+  return {
+    lastBeat: 0,
+    pid: 1,
+    bootTime: 0,
+    crashCount: 0,
+    crashWindowStart: 0,
+    dailyCrashCount: 0,
+    dailyCrashWindowStart: 0,
+    version: "test",
+    ...partial,
+  };
+}
+describe("decideBrakeAction (Fix #4)", () => {
+  it("proceeds on first boot (no previous beacon)", () => {
+    const now = 1_000_000;
+    const result = decideBrakeAction(null, now);
+    expect(result.action).toBe("proceed");
+    if (result.action === "proceed") {
+      expect(result.crashCount).toBe(0);
+      expect(result.crashWindowStart).toBe(now);
+      expect(result.dailyCrashCount).toBe(0);
+    }
+  });
+  it("proceeds when previous beacon is old (>STALE_MS) — clean exit", () => {
+    const now = 1_000_000_000;
+    const prev = beacon({ lastBeat: now - 10 * ONE_MIN, crashCount: 3 });
+    const result = decideBrakeAction(prev, now);
+    expect(result.action).toBe("proceed");
+    if (result.action === "proceed") {
+      // Old beacon → treat as clean, reset window counter (but keep daily)
+      expect(result.crashCount).toBe(0);
+    }
+  });
+  it("counts a restart after a fresh beacon as a crash", () => {
+    const now = 1_000_000_000;
+    const prev = beacon({
+      lastBeat: now - 15_000, // 15 s ago
+      crashCount: 2,
+      crashWindowStart: now - 5 * ONE_MIN,
+      dailyCrashCount: 2,
+      dailyCrashWindowStart: now - 2 * ONE_HOUR,
+    });
+    const result = decideBrakeAction(prev, now);
+    expect(result.action).toBe("proceed");
+    if (result.action === "proceed") {
+      expect(result.crashCount).toBe(3);
+      expect(result.dailyCrashCount).toBe(3);
+    }
+  });
+  it("engages brake when short-window threshold is crossed", () => {
+    const now = 1_000_000_000;
+    const prev = beacon({
+      lastBeat: now - 10_000,
+      crashCount: DEFAULTS.SHORT_BRAKE_THRESHOLD - 1, // one more = brake
+      crashWindowStart: now - 2 * ONE_MIN,
+      dailyCrashCount: 5,
+      dailyCrashWindowStart: now - ONE_HOUR,
+    });
+    const result = decideBrakeAction(prev, now);
+    expect(result.action).toBe("brake");
+    if (result.action === "brake") {
+      expect(result.reason).toMatch(/short.*window|threshold|crashes/i);
+    }
+  });
+  it("engages brake when daily cap is exceeded", () => {
+    const now = 1_000_000_000;
+    const prev = beacon({
+      lastBeat: now - 10_000,
+      crashCount: 1, // short window fine
+      crashWindowStart: now - 30 * ONE_MIN,
+      dailyCrashCount: DEFAULTS.DAILY_BRAKE_THRESHOLD - 1,
+      dailyCrashWindowStart: now - 12 * ONE_HOUR,
+    });
+    const result = decideBrakeAction(prev, now);
+    expect(result.action).toBe("brake");
+    if (result.action === "brake") {
+      expect(result.reason).toMatch(/daily|day/i);
+    }
+  });
+  it("rolls over daily counter when 24h window expires", () => {
+    const now = 1_000_000_000;
+    const prev = beacon({
+      lastBeat: now - 10_000,
+      crashCount: 1,
+      crashWindowStart: now - 30 * ONE_MIN,
+      dailyCrashCount: 18,                  // high
+      dailyCrashWindowStart: now - 25 * ONE_HOUR, // but window rolled over
+    });
+    const result = decideBrakeAction(prev, now);
+    expect(result.action).toBe("proceed");
+    if (result.action === "proceed") {
+      expect(result.dailyCrashCount).toBe(1); // fresh window
+      expect(result.dailyCrashWindowStart).toBe(now);
+    }
+  });
+});
+describe("shouldResetCrashCounter (Fix #4)", () => {
+  it("does NOT reset after 5 min of uptime (old buggy behaviour)", () => {
+    expect(shouldResetCrashCounter(5 * ONE_MIN)).toBe(false);
+  });
+  it("does NOT reset after 30 min of uptime", () => {
+    expect(shouldResetCrashCounter(30 * ONE_MIN)).toBe(false);
+  });
+  it("resets after 1 h of clean uptime", () => {
+    expect(shouldResetCrashCounter(ONE_HOUR)).toBe(true);
+    expect(shouldResetCrashCounter(ONE_HOUR + 1)).toBe(true);
+  });
+  it("can be overridden via opts.resetAfterMs", () => {
+    expect(shouldResetCrashCounter(10 * ONE_MIN, { resetAfterMs: 10 * ONE_MIN })).toBe(true);
+    expect(shouldResetCrashCounter(10 * ONE_MIN - 1, { resetAfterMs: 10 * ONE_MIN })).toBe(false);
+  });
+});