npm - alvin-bot - Versions diffs - 4.8.9 → 4.9.1 - Mend

alvin-bot 4.8.9 → 4.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/CHANGELOG.md +61 -0
package/dist/handlers/commands.js +18 -7
package/dist/handlers/message.js +5 -2
package/dist/index.js +14 -10
package/dist/platforms/whatsapp-auth-helpers.js +53 -0
package/dist/platforms/whatsapp.js +6 -2
package/dist/services/browser-manager.js +82 -10
package/dist/services/browser-webfetch.js +93 -0
package/dist/services/cron-resolver.js +58 -0
package/dist/services/cron-scheduling.js +142 -0
package/dist/services/cron.js +70 -10
package/dist/services/skills.js +15 -11
package/dist/services/subagent-delivery.js +8 -2
package/dist/services/subagents.js +49 -8
package/dist/services/telegram.js +12 -3
package/dist/services/watchdog-brake.js +113 -0
package/dist/services/watchdog.js +56 -42
package/dist/util/console-formatter.js +109 -0
package/dist/util/debounce.js +24 -0
package/dist/util/telegram-error-filter.js +62 -0
package/dist/web/server.js +56 -0
package/package.json +1 -1
package/test/browser-webfetch.test.ts +121 -0
package/test/console-timestamps.test.ts +98 -0
package/test/cron-restart-resilience.test.ts +191 -0
package/test/cron-run-resolver.test.ts +133 -0
package/test/debounce.test.ts +60 -0
package/test/subagent-final-text.test.ts +132 -0
package/test/telegram-error-filter.test.ts +85 -0
package/test/watchdog-brake.test.ts +157 -0
package/test/web-server-shutdown.test.ts +111 -0
package/test/whatsapp-auth-resilience.test.ts +96 -0

package/dist/services/cron-scheduling.js ADDED Viewed

@@ -0,0 +1,142 @@
+/**
+ * Pure scheduling helpers for the cron service.
+ *
+ * Extracted from cron.ts so the startup-catchup and pre-execution state
+ * updates can be unit-tested without booting the full scheduler loop.
+ * This module is side-effect-free: it does not touch the filesystem, the
+ * clock, or the sub-agent registry. Give it jobs + a `now` value and it
+ * returns what the next state should look like.
+ *
+ * Background — see test/cron-restart-resilience.test.ts for the exact
+ * contract and the regression it closes.
+ */
+// ── Pure parsers ────────────────────────────────────────────
+//
+// These mirror parseInterval / nextCronRun from cron.ts. We duplicate them
+// intentionally instead of importing — cron.ts is the scheduler-with-side-
+// effects, and importing it from a "pure" helper would reintroduce the
+// circular dependency we just broke. The duplication is small and well
+// covered by tests; keep the two in sync when editing.
+function parseInterval(input) {
+    const match = input.match(/^(\d+(?:\.\d+)?)\s*(s|sec|m|min|h|hr|d|day)s?$/i);
+    if (!match)
+        return null;
+    const value = parseFloat(match[1]);
+    const unit = match[2].toLowerCase();
+    const mult = {
+        s: 1000, sec: 1000, m: 60_000, min: 60_000,
+        h: 3_600_000, hr: 3_600_000, d: 86_400_000, day: 86_400_000,
+    };
+    return value * (mult[unit] || 60_000);
+}
+function nextCronRun(expression, after) {
+    const parts = expression.trim().split(/\s+/);
+    if (parts.length !== 5)
+        return null;
+    const [minExpr, hourExpr, dayExpr, monthExpr, weekdayExpr] = parts;
+    function parseField(expr, min, max) {
+        if (expr === "*")
+            return Array.from({ length: max - min + 1 }, (_, i) => i + min);
+        if (expr.includes("/")) {
+            const [, step] = expr.split("/");
+            const s = parseInt(step);
+            return Array.from({ length: max - min + 1 }, (_, i) => i + min).filter((v) => v % s === 0);
+        }
+        if (expr.includes(","))
+            return expr.split(",").map(Number);
+        if (expr.includes("-")) {
+            const [a, b] = expr.split("-").map(Number);
+            return Array.from({ length: b - a + 1 }, (_, i) => i + a);
+        }
+        return [parseInt(expr)];
+    }
+    const minutes = parseField(minExpr, 0, 59);
+    const hours = parseField(hourExpr, 0, 23);
+    const days = parseField(dayExpr, 1, 31);
+    const months = parseField(monthExpr, 1, 12);
+    const weekdays = parseField(weekdayExpr, 0, 6);
+    const candidate = new Date(after);
+    candidate.setSeconds(0, 0);
+    candidate.setMinutes(candidate.getMinutes() + 1);
+    for (let i = 0; i < 366 * 24 * 60; i++) {
+        const m = candidate.getMinutes();
+        const h = candidate.getHours();
+        const d = candidate.getDate();
+        const mo = candidate.getMonth() + 1;
+        const wd = candidate.getDay();
+        if (minutes.includes(m) &&
+            hours.includes(h) &&
+            days.includes(d) &&
+            months.includes(mo) &&
+            weekdays.includes(wd)) {
+            return candidate;
+        }
+        candidate.setMinutes(candidate.getMinutes() + 1);
+    }
+    return null;
+}
+/** Compute the next run relative to an explicit base timestamp.
+ *  Used by prepareForExecution to make the interval calculation stable
+ *  even when `lastRunAt` is stale or null. */
+export function calculateNextRunFrom(job, base) {
+    if (!job.enabled)
+        return null;
+    const intervalMs = parseInterval(job.schedule);
+    if (intervalMs)
+        return base + intervalMs;
+    const next = nextCronRun(job.schedule, new Date(base));
+    return next ? next.getTime() : null;
+}
+// ── Pre-execution state update ─────────────────────────────
+/**
+ * Mark a job as "being attempted" and advance `nextRunAt` to the next
+ * regular trigger, pure-functionally. Returns a NEW job object.
+ *
+ * Why not set `nextRunAt = null`: if the bot crashes between this call
+ * and the post-execution save, we still know when the next regular run
+ * is — the scheduler simply won't re-trigger. The `lastAttemptAt >
+ * lastRunAt` asymmetry is then the signal for handleStartupCatchup to
+ * nachholen the current attempt on the next boot.
+ */
+export function prepareForExecution(job, now) {
+    return {
+        ...job,
+        lastAttemptAt: now,
+        nextRunAt: calculateNextRunFrom(job, now),
+    };
+}
+// ── Startup catch-up ───────────────────────────────────────
+/** Default grace window for catching up an interrupted attempt on boot. */
+export const DEFAULT_CATCHUP_GRACE_MS = 6 * 60 * 60 * 1000; // 6 h
+/**
+ * Rewind `nextRunAt` to `now` for every enabled job whose most recent
+ * attempt never completed AND is still inside the grace window. This
+ * makes the very next scheduler tick pick the job up again, without
+ * double-firing jobs that actually finished.
+ *
+ * Jobs whose crashed attempt is older than the grace window are NOT
+ * caught up — the assumption is that such a run is too stale to be
+ * meaningful (a "daily" run from yesterday isn't what the user wants
+ * at 2pm today). Those jobs keep their scheduled future nextRunAt.
+ *
+ * PURE: returns a fresh array, never mutates the input.
+ */
+export function handleStartupCatchup(jobs, now, graceMs = DEFAULT_CATCHUP_GRACE_MS) {
+    return jobs.map((job) => {
+        if (!job.enabled)
+            return job;
+        if (!job.lastAttemptAt)
+            return job;
+        const completed = typeof job.lastRunAt === "number" &&
+            job.lastRunAt >= job.lastAttemptAt;
+        if (completed)
+            return job;
+        const ageMs = now - job.lastAttemptAt;
+        if (ageMs <= 0)
+            return job; // clock weirdness — skip
+        if (ageMs > graceMs)
+            return job; // outside grace — give up
+        // Within grace, never completed → catch up on next tick.
+        return { ...job, nextRunAt: now };
+    });
+}

package/dist/services/cron.js CHANGED Viewed

@@ -13,6 +13,8 @@ import fs from "fs";
 import { execSync } from "child_process";
 import { dirname } from "path";
 import { CRON_FILE, BOT_ROOT } from "../paths.js";
+import { prepareForExecution, handleStartupCatchup, calculateNextRunFrom, } from "./cron-scheduling.js";
+import { resolveJobByNameOrId } from "./cron-resolver.js";
 // ── Storage ─────────────────────────────────────────────
 function loadJobs() {
     try {
@@ -240,6 +242,25 @@ const runningJobs = new Set(); // Guard against overlapping executions
 export function startScheduler() {
     if (schedulerTimer)
         return;
+    // Startup catch-up — nachholen runs whose last attempt crashed within
+    // the grace window. Must run BEFORE the first scheduler tick so the
+    // catch-up nextRunAt rewind is visible on the very next pass.
+    try {
+        const bootJobs = loadJobs();
+        const caught = handleStartupCatchup(bootJobs, Date.now());
+        // Only persist if something actually changed to avoid needless writes
+        const mutated = caught.some((j, i) => j.nextRunAt !== bootJobs[i].nextRunAt);
+        if (mutated) {
+            saveJobs(caught);
+            const names = caught
+                .filter((j, i) => j.nextRunAt !== bootJobs[i].nextRunAt)
+                .map((j) => j.name);
+            console.log(`⏰ Cron startup catch-up: rewound ${names.length} job(s): ${names.join(", ")}`);
+        }
+    }
+    catch (err) {
+        console.error("⏰ Cron startup catch-up failed:", err);
+    }
     // Check every 30 seconds for due jobs
     schedulerTimer = setInterval(async () => {
         const jobs = loadJobs();
@@ -248,7 +269,7 @@ export function startScheduler() {
         for (const job of jobs) {
             if (!job.enabled)
                 continue;
-            // Skip if this job is already running
+            // Skip if this job is already running in THIS bot instance
             if (runningJobs.has(job.id))
                 continue;
             // Calculate next run if not set
@@ -258,9 +279,13 @@ export function startScheduler() {
             }
             if (job.nextRunAt && now >= job.nextRunAt) {
                 console.log(`Cron: Running job "${job.name}" (${job.id})`);
-                // Mark as running + clear nextRunAt BEFORE async execution to prevent re-trigger
+                // Pre-execution state update: advance nextRunAt to the NEXT regular
+                // trigger (NOT null) and stamp lastAttemptAt. If the bot crashes
+                // mid-execution, handleStartupCatchup will notice the attempt
+                // without completion and nachholen within the grace window.
                 runningJobs.add(job.id);
-                job.nextRunAt = null;
+                const prepared = prepareForExecution(job, now);
+                Object.assign(job, prepared);
                 saveJobs(jobs);
                 try {
                     const result = await executeJob(job);
@@ -268,8 +293,8 @@ export function startScheduler() {
                     const freshJobs = loadJobs();
                     const freshJob = freshJobs.find(j => j.id === job.id);
                     if (freshJob) {
-                        freshJob.lastRunAt = now;
-                        freshJob.lastResult = result.output.slice(0, 500);
+                        freshJob.lastRunAt = Date.now();
+                        freshJob.lastResult = result.output.slice(0, 4000);
                         freshJob.lastError = result.error || null;
                         freshJob.runCount++;
                         if (freshJob.oneShot) {
@@ -277,7 +302,9 @@ export function startScheduler() {
                             freshJob.nextRunAt = null;
                         }
                         else {
-                            freshJob.nextRunAt = calculateNextRun(freshJob);
+                            // nextRunAt already set pre-execution, but recalculate in case
+                            // the schedule or enabled state changed during execution.
+                            freshJob.nextRunAt = calculateNextRunFrom(freshJob, Date.now());
                         }
                         saveJobs(freshJobs);
                     }
@@ -365,11 +392,44 @@ export function toggleJob(id) {
     saveJobs(jobs);
     return job;
 }
-export function runJobNow(id) {
-    const job = getJob(id);
+/**
+ * Manual /cron run — resolves `nameOrId` against the job list, then
+ * executes the job while honouring the in-memory `runningJobs` guard
+ * so a simultaneous scheduler-trigger can't overlap.
+ */
+export async function runJobNow(nameOrId) {
+    const job = resolveJobByNameOrId(loadJobs(), nameOrId);
     if (!job)
-        return null;
-    return executeJob(job);
+        return { status: "not-found" };
+    if (runningJobs.has(job.id)) {
+        return { status: "already-running", job };
+    }
+    runningJobs.add(job.id);
+    try {
+        const result = await executeJob(job);
+        // Persist the manual run the same way the scheduler does so the
+        // timeline stays honest: lastAttemptAt + lastRunAt + runCount bump.
+        try {
+            const freshJobs = loadJobs();
+            const freshJob = freshJobs.find((j) => j.id === job.id);
+            if (freshJob) {
+                const now = Date.now();
+                freshJob.lastAttemptAt = now;
+                freshJob.lastRunAt = now;
+                freshJob.lastResult = result.output.slice(0, 4000);
+                freshJob.lastError = result.error || null;
+                freshJob.runCount++;
+                saveJobs(freshJobs);
+            }
+        }
+        catch (err) {
+            console.error("[cron] failed to persist manual run state:", err);
+        }
+        return { status: "ran", job, output: result.output, error: result.error };
+    }
+    finally {
+        runningJobs.delete(job.id);
+    }
 }
 /**
  * Convert a cron expression or interval string to a human-readable German description.

package/dist/services/skills.js CHANGED Viewed

@@ -21,6 +21,7 @@ import { resolve } from "path";
 import { SKILLS_DIR } from "../paths.js";
 import { USER_SKILLS_DIR } from "../paths.js";
 import { loadAssetIndex } from "./asset-index.js";
+import { debounce } from "../util/debounce.js";
 // ── Skill Registry ──────────────────────────────────────
 let cachedSkills = [];
 let lastScanAt = 0;
@@ -143,23 +144,26 @@ function reloadAllSkills() {
  */
 export function loadSkills() {
     reloadAllSkills();
-    // Hot-reload watchers
+    // Hot-reload watchers — macOS FSEvents delivers many duplicate events
+    // for a single logical change, so we coalesce bursts into one reload.
+    const bundledReload = debounce(() => {
+        console.log("Skills changed (bundled) \u2014 reloading");
+        reloadAllSkills();
+    }, 300);
+    const userReload = debounce(() => {
+        console.log("Skills changed (user) \u2014 reloading");
+        reloadAllSkills();
+    }, 300);
     try {
-        watch(SKILLS_DIR, { recursive: true }, () => {
-            console.log("Skills changed (bundled) \u2014 reloading");
-            reloadAllSkills();
-        });
+        watch(SKILLS_DIR, { recursive: true }, () => bundledReload());
     }
-    catch { }
+    catch { /* ignore — watcher failures fall back to manual reload */ }
     try {
         if (existsSync(USER_SKILLS_DIR)) {
-            watch(USER_SKILLS_DIR, { recursive: true }, () => {
-                console.log("Skills changed (user) \u2014 reloading");
-                reloadAllSkills();
-            });
+            watch(USER_SKILLS_DIR, { recursive: true }, () => userReload());
         }
     }
-    catch { }
+    catch { /* ignore */ }
     return cachedSkills;
 }
 /**

package/dist/services/subagent-delivery.js CHANGED Viewed

@@ -47,11 +47,17 @@ function statusIcon(status) {
     }
 }
 function buildBanner(info, result) {
-    const icon = statusIcon(result.status);
+    // A "completed" run that produced zero output is almost always a
+    // silent failure — a truncated stream, a tool-only final turn, a
+    // provider that swallowed its response. Call that out explicitly so
+    // the user sees a clear signal instead of a green tick on nothing.
+    const truncated = result.status === "completed" && (!result.output || result.output.trim().length === 0);
+    const icon = truncated ? "⚠️" : statusIcon(result.status);
+    const statusLabel = truncated ? "completed · empty output" : result.status;
     const dur = formatDuration(result.duration);
     const ti = formatTokens(result.tokensUsed.input);
     const to = formatTokens(result.tokensUsed.output);
-    return `${icon} *${info.name}* ${result.status} · ${dur} · ${ti} in / ${to} out`;
+    return `${icon} *${info.name}* ${statusLabel} · ${dur} · ${ti} in / ${to} out`;
 }
 // ── A4 Live-Stream ──────────────────────────────────────────
 /**

package/dist/services/subagents.js CHANGED Viewed

@@ -231,6 +231,13 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             console.error(`[subagent ${id}] live-stream init failed:`, err);
         }
     }
+    // These live OUTSIDE the try block so the catch handler can read
+    // whatever was buffered before the stream failed. Moving them into
+    // the try scope was the cause of the "output: ''" regression.
+    let finalText = "";
+    let inputTokens = 0;
+    let outputTokens = 0;
+    let streamError = null;
     try {
         const { getRegistry } = await import("../engine.js");
         const registry = getRegistry();
@@ -243,9 +250,6 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             ? agentConfig.workingDir || os.homedir()
             : os.homedir();
         const systemPrompt = `You are a sub-agent named "${resolvedName}". Complete the following task autonomously and report your results clearly when done. Working directory: ${effectiveCwd}`;
-        let finalText = "";
-        let inputTokens = 0;
-        let outputTokens = 0;
         for await (const chunk of registry.queryWithFallback({
             prompt: agentConfig.prompt,
             systemPrompt,
@@ -254,16 +258,33 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             abortSignal: abort.signal,
         })) {
             if (chunk.type === "text") {
-                finalText = chunk.text || "";
-                // A4: push text updates into the throttled live-stream
+                // Both SDK providers emit `text` as the accumulated string.
+                // Keep the last non-empty one we've seen so a final tool-only
+                // turn doesn't wipe our buffer.
+                if (chunk.text && chunk.text.length > 0) {
+                    finalText = chunk.text;
+                }
                 if (liveStream && !liveStream.failed) {
                     liveStream.update(finalText);
                 }
             }
             if (chunk.type === "done") {
+                // done.text is the authoritative final accumulated text from
+                // the provider. Prefer it over the buffered value so runs that
+                // end on a tool_use don't leave us with a pre-tool snippet.
+                if (chunk.text && chunk.text.length > 0) {
+                    finalText = chunk.text;
+                }
                 inputTokens = chunk.inputTokens || 0;
                 outputTokens = chunk.outputTokens || 0;
             }
+            if (chunk.type === "error") {
+                // Providers surface mid-stream errors as an `error` chunk
+                // instead of throwing. Capture the reason so the post-loop
+                // status resolution below can distinguish this from a clean
+                // finish, and keep whatever text we already buffered.
+                streamError = chunk.error || "stream error";
+            }
         }
         // If cancelAllSubAgents has already taken over (shutdown path), don't
         // overwrite the cancelled result it synthesised. Also: if the generator
@@ -285,6 +306,21 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
             };
             entry.info.status = "cancelled";
         }
+        else if (streamError) {
+            // Provider emitted an error chunk but the generator ended cleanly —
+            // record it as an error, but preserve the text buffered before the
+            // failure so the caller sees useful partial output instead of "".
+            entry.result = {
+                id,
+                name: resolvedName,
+                status: "error",
+                output: finalText,
+                tokensUsed: { input: inputTokens, output: outputTokens },
+                duration: Date.now() - startTime,
+                error: streamError,
+            };
+            entry.info.status = "error";
+        }
         else {
             entry.result = {
                 id,
@@ -312,6 +348,9 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
         }
     }
     catch (err) {
+        // If cancelAllSubAgents already set a cancelled result, keep it.
+        if (entry.result && entry.result.status === "cancelled")
+            return;
         const isAbort = err instanceof Error && err.message.includes("abort");
         const isTimeout = abort.signal.aborted;
         const status = isTimeout
@@ -322,11 +361,13 @@ async function runSubAgent(id, agentConfig, abort, resolvedName) {
         entry.result = {
             id,
             name: resolvedName,
-            status,
-            output: "",
-            tokensUsed: { input: 0, output: 0 },
+            // Preserve whatever text was buffered before the failure.
+            // Empty output here used to throw away multi-minute runs.
+            output: finalText,
+            tokensUsed: { input: inputTokens, output: outputTokens },
             duration: Date.now() - startTime,
             error: err instanceof Error ? err.message : String(err),
+            status,
         };
         entry.info.status = status;
     }

package/dist/services/telegram.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { config } from "../config.js";
 import { sanitizeTelegramMarkdown } from "./markdown.js";
+import { isHarmlessTelegramError } from "../util/telegram-error-filter.js";
 export class TelegramStreamer {
     messageId = null;
     chatId;
@@ -94,9 +95,17 @@ export class TelegramStreamer {
         // If text fits in one message, just update the existing one
         if (safeText.length <= config.telegramMaxLength && this.messageId) {
             if (safeText !== this.lastSentText) {
-                await this.api.editMessageText(this.chatId, this.messageId, safeText, {
-                    parse_mode: "Markdown",
-                }).catch(() => this.api.editMessageText(this.chatId, this.messageId, safeText));
+                try {
+                    await this.api.editMessageText(this.chatId, this.messageId, safeText, {
+                        parse_mode: "Markdown",
+                    }).catch(() => this.api.editMessageText(this.chatId, this.messageId, safeText));
+                }
+                catch (err) {
+                    // Drop "message is not modified" / "message to edit not found"
+                    // races silently — they're harmless and always race-based.
+                    if (!isHarmlessTelegramError(err))
+                        throw err;
+                }
             }
             return;
         }

package/dist/services/watchdog-brake.js ADDED Viewed

@@ -0,0 +1,113 @@
+/**
+ * Pure crash-loop brake logic, extracted from watchdog.ts so it can be
+ * unit-tested without touching the filesystem or launchctl.
+ *
+ * See test/watchdog-brake.test.ts for the regression this closes:
+ * chronic crashes with >5 min of uptime between them used to reset
+ * the counter before it could trip the brake, so the bot cycled
+ * indefinitely. The new policy enforces TWO thresholds — a fast
+ * short-window brake and a hard 24h daily cap — and only resets the
+ * counter after a real 1 h of clean uptime.
+ */
+export const DEFAULTS = {
+    /** Beacon older than this → previous process exited cleanly (or the
+     *  machine was rebooted); do not count as a crash. */
+    STALE_BEACON_MS: 90_000,
+    /** Short-window crash tracking — N crashes in SHORT_WINDOW_MS. */
+    SHORT_WINDOW_MS: 10 * 60_000,
+    SHORT_BRAKE_THRESHOLD: 10,
+    /** Daily crash cap — hard ceiling regardless of gaps. Tripping this
+     *  means the bot has been restarting >20 times per day, which is
+     *  almost certainly a chronic issue worth freezing and alerting. */
+    DAILY_WINDOW_MS: 24 * 60 * 60 * 1000,
+    DAILY_BRAKE_THRESHOLD: 20,
+    /** Uptime required before the short-window counter resets. Was 5 min
+     *  in the buggy version — but 5 min is shorter than the typical
+     *  sub-agent lifetime (the daily job-alert takes 10+ min), so chronic
+     *  crashes with ≥5 min gaps sailed right past the brake. 1 h is safer. */
+    RESET_AFTER_MS: 60 * 60_000,
+};
+/**
+ * Given the previous beacon (or null on first boot) and the current time,
+ * decide whether the bot should proceed with boot or engage the crash-loop
+ * brake.
+ *
+ * PURE: no fs, no launchctl, no clock — `now` is an explicit parameter.
+ */
+export function decideBrakeAction(previous, now, opts = {}) {
+    const staleMs = opts.staleBeaconMs ?? DEFAULTS.STALE_BEACON_MS;
+    const shortWindow = opts.shortWindowMs ?? DEFAULTS.SHORT_WINDOW_MS;
+    const shortBrake = opts.shortBrakeThreshold ?? DEFAULTS.SHORT_BRAKE_THRESHOLD;
+    const dailyWindow = opts.dailyWindowMs ?? DEFAULTS.DAILY_WINDOW_MS;
+    const dailyBrake = opts.dailyBrakeThreshold ?? DEFAULTS.DAILY_BRAKE_THRESHOLD;
+    // First boot or no beacon file → clean start
+    if (!previous) {
+        return {
+            action: "proceed",
+            crashCount: 0,
+            crashWindowStart: now,
+            dailyCrashCount: 0,
+            dailyCrashWindowStart: now,
+        };
+    }
+    // Daily window roll-over first — it's independent of short window.
+    let dailyCount = previous.dailyCrashCount;
+    let dailyStart = previous.dailyCrashWindowStart;
+    if (now - dailyStart >= dailyWindow) {
+        dailyCount = 0;
+        dailyStart = now;
+    }
+    const timeSinceLastBeat = now - previous.lastBeat;
+    const previousExitedRecently = timeSinceLastBeat < staleMs;
+    if (!previousExitedRecently) {
+        // Clean exit (or machine reboot between runs) → short-window counter
+        // resets, but the daily counter keeps going unless its own window
+        // already expired above.
+        return {
+            action: "proceed",
+            crashCount: 0,
+            crashWindowStart: now,
+            dailyCrashCount: dailyCount,
+            dailyCrashWindowStart: dailyStart,
+        };
+    }
+    // Short-window logic
+    const shortWindowExpired = now - previous.crashWindowStart >= shortWindow;
+    let crashCount;
+    let crashWindowStart;
+    if (shortWindowExpired) {
+        crashCount = 1;
+        crashWindowStart = now;
+    }
+    else {
+        crashCount = previous.crashCount + 1;
+        crashWindowStart = previous.crashWindowStart;
+    }
+    // Increment daily count since we treat this as a crash
+    dailyCount += 1;
+    if (crashCount >= shortBrake) {
+        return {
+            action: "brake",
+            reason: `${crashCount} crashes within short window (${Math.round(shortWindow / 60_000)}min) — threshold is ${shortBrake}`,
+        };
+    }
+    if (dailyCount >= dailyBrake) {
+        return {
+            action: "brake",
+            reason: `${dailyCount} crashes within daily window (${Math.round(dailyWindow / 3_600_000)}h) — threshold is ${dailyBrake}`,
+        };
+    }
+    return {
+        action: "proceed",
+        crashCount,
+        crashWindowStart,
+        dailyCrashCount: dailyCount,
+        dailyCrashWindowStart: dailyStart,
+    };
+}
+/** Whether the short-window crash counter should be reset after this
+ *  much clean uptime. Default: 1 h. */
+export function shouldResetCrashCounter(uptimeMs, opts = {}) {
+    const threshold = opts.resetAfterMs ?? DEFAULTS.RESET_AFTER_MS;
+    return uptimeMs >= threshold;
+}