npm - oxtail - Versions diffs - 0.15.0 → 0.16.0 - Mend

oxtail 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/claims.js +27 -7
package/dist/detect/birthTimeMatchStrategy.js +8 -2
package/dist/locks.js +26 -7
package/dist/mailbox.js +44 -3
package/dist/received.js +26 -5
package/dist/registry.js +38 -1
package/dist/server.js +33 -3
package/dist/wake-debounce.js +8 -1
package/package.json +1 -1

package/dist/claims.js CHANGED Viewed

@@ -155,9 +155,6 @@ function compareClaimScores(a, b) {
     }
     return a.claimed_at - b.claimed_at;
 }
-function scoresTie(a, b) {
-    return compareClaimScores(a, b) === 0;
-}
 function claimKey(clientType, cwd, sessionId) {
     return createHash("sha256")
         .update(`${clientType} ${cwd} ${sessionId}`)
@@ -172,7 +169,14 @@ function claimPath(key) {
 // Atomic temp+rename so a concurrent reader never sees a torn write.
 export function writeClaim(input) {
     ensureClaimsDir();
-    gcStaleClaims();
+    // Age-only sweep on this hot path. writeClaim can run concurrently with another
+    // agent's writeClaim (dual-scope, or two sessions in one project); the
+    // transcript-existence check is racy — a transcript that momentarily fails to
+    // stat would unlink a sibling's just-written claim (M6). Age is monotonic and
+    // race-free, so reclaim only by age here. recoverClaim already skips records
+    // whose transcript is gone, and the full (transcript-aware) sweep remains
+    // available via a direct gcStaleClaims() call.
+    gcStaleClaims(Date.now(), { ageOnly: true });
     const rec = {
         schema_version: 1,
         client_type: input.client_type,
@@ -245,14 +249,30 @@ export function recoverClaim(clientType, cwd, ancestors, deps = {}) {
     matches.sort((a, b) => compareClaimScores(b.score, a.score));
     const best = matches[0];
     const second = matches[1];
-    if (second && scoresTie(best.score, second.score))
+    // Abstain on cross-session ambiguity. Two DISTINCT sessions that overlap the
+    // live chain equally (same overlap_count) AND at the same live-chain depth
+    // (same nearest_overlap_current) share liveness only at a common ancestor —
+    // the shared terminal/login-shell. The remaining tiebreakers (record-side
+    // depth, recency) do NOT correlate with which child actually restarted, so
+    // adopting either would risk cross-session misrouting (H1) — the very
+    // split-identity class this store exists to prevent. Return null so the caller
+    // falls back to the explicit claim_session next_step. (This strictly subsumes
+    // the old exact-tie check, which had equal overlap_count and nearest_current
+    // by definition.) A same-session second-best routes to the same identity and
+    // so can never split-route — defensive only, since the per-session claim key
+    // means two records can't share a session_id.
+    if (second &&
+        best.rec.session_id !== second.rec.session_id &&
+        best.score.overlap_count === second.score.overlap_count &&
+        best.score.nearest_overlap_current === second.score.nearest_overlap_current) {
         return null;
+    }
     return best.rec;
 }
 // Drop records that are clearly dead: transcript gone, or older than the max
 // age. Best-effort; never throws. A dead process pid alone is NOT grounds for
 // removal — that's exactly the restart case recovery exists to serve.
-export function gcStaleClaims(nowMs = Date.now()) {
+export function gcStaleClaims(nowMs = Date.now(), opts = {}) {
     const dir = claimsDir();
     if (!existsSync(dir))
         return;
@@ -274,7 +294,7 @@ export function gcStaleClaims(nowMs = Date.now()) {
         catch {
             continue;
         }
-        const transcriptGone = !rec.transcript_path || !existsSync(rec.transcript_path);
+        const transcriptGone = !opts.ageOnly && (!rec.transcript_path || !existsSync(rec.transcript_path));
         const tooOld = nowMs - rec.claimed_at * 1000 > CLAIM_MAX_AGE_MS;
         if (transcriptGone || tooOld) {
             try {

package/dist/detect/birthTimeMatchStrategy.js CHANGED Viewed

@@ -2,6 +2,12 @@ import { closeSync, existsSync, openSync, readSync, readdirSync, statSync } from
 import { homedir } from "node:os";
 import { join } from "node:path";
 const FIVE_MIN_MS = 5 * 60 * 1000;
+// started_at is whole-second granularity (Math.floor(Date.now()/1000)*1000)
+// while a transcript's birth_ms is real-millisecond, so a transcript
+// legitimately created in the same second can land slightly BEFORE started_at
+// (delta in [-1000, 0]). Allow one second of grace below zero so the unique
+// candidate isn't dropped on pure rounding (M7).
+const ONE_SECOND_MS = 1000;
 const UUID_RE = /([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/;
 // Returns the unique post-start candidate inside the window, or null if there
 // are zero or multiple. Multiple positive-delta candidates means another
@@ -10,7 +16,7 @@ const UUID_RE = /([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/
 export function pickByDelta(candidates, startedAtMs, windowMs = FIVE_MIN_MS) {
     const ranked = candidates
         .map((c) => ({ ...c, delta: c.birth_ms - startedAtMs }))
-        .filter((c) => c.delta > 0 && c.delta <= windowMs);
+        .filter((c) => c.delta > -ONE_SECOND_MS && c.delta <= windowMs);
     if (ranked.length !== 1)
         return null;
     return { session_id: ranked[0].session_id, birth_ms: ranked[0].birth_ms };
@@ -141,7 +147,7 @@ function abstainReason(type, candidates, startedAtMs) {
     }
     const ranked = candidates
         .map((c) => ({ ...c, delta: c.birth_ms - startedAtMs }))
-        .filter((c) => c.delta > 0 && c.delta <= FIVE_MIN_MS);
+        .filter((c) => c.delta > -ONE_SECOND_MS && c.delta <= FIVE_MIN_MS);
     if (ranked.length === 0) {
         return {
             abstain: true,

package/dist/locks.js CHANGED Viewed

@@ -41,8 +41,13 @@ import { trace } from "./trace.js";
 //     ALWAYS the single-winner `mkdir(lock)`, so even redundant clears can never
 //     produce two owners — the worst they do is race to recreate the lock, which
 //     exactly one wins.
-const LOCK_RETRY_LIMIT = 50;
 const LOCK_RETRY_DELAY_MS = 10;
+// Total acquire budget is wall-clock, NOT a fixed retry count: a successful
+// stale-clear retries mkdir immediately (no sleep) so it must not consume the
+// budget without time passing — a count-based budget threw "could not acquire
+// lock" spuriously under contention (H2). 2s is ample for the tiny mailbox/
+// ledger critical sections and well under any caller-level timeout.
+const LOCK_ACQUIRE_TIMEOUT_MS = 2_000;
 function sleepSync(ms) {
     Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms);
 }
@@ -166,7 +171,8 @@ export function clearStaleLock(lock, staleMs, traceEvent, traceCtx) {
 // releaseDirLock. The caller is responsible for creating the parent directory.
 export function acquireDirLock(lock, staleMs, traceEvent, traceCtx) {
     const token = mintToken();
-    for (let i = 0; i < LOCK_RETRY_LIMIT; i++) {
+    const deadline = Date.now() + LOCK_ACQUIRE_TIMEOUT_MS;
+    for (;;) {
         try {
             mkdirSync(lock, { mode: 0o700 });
             writeOwner(lock, token);
@@ -176,12 +182,19 @@ export function acquireDirLock(lock, staleMs, traceEvent, traceCtx) {
             const err = e;
             if (err.code !== "EEXIST")
                 throw err;
-            if (clearStaleLock(lock, staleMs, traceEvent, traceCtx))
-                continue;
-            sleepSync(LOCK_RETRY_DELAY_MS);
+            // A successful stale-clear means the lock is gone: loop straight back to
+            // mkdir WITHOUT sleeping, to grab it before another contender (this retry
+            // must not consume the budget without time passing). Otherwise — a fresh
+            // holder or a lost steal — back off before retrying.
+            if (!clearStaleLock(lock, staleMs, traceEvent, traceCtx)) {
+                sleepSync(LOCK_RETRY_DELAY_MS);
+            }
+        }
+        // Wall-clock budget so the no-sleep stale-clear path cannot spin forever.
+        if (Date.now() >= deadline) {
+            throw new Error(`could not acquire lock at ${lock}`);
         }
     }
-    throw new Error(`could not acquire lock at ${lock}`);
 }
 // Release the lock — but only if we PROVABLY still own it (owner === our token).
 // A holder that stalled past the stale window and was stolen from sees a
@@ -192,7 +205,13 @@ export function acquireDirLock(lock, staleMs, traceEvent, traceCtx) {
 // stale lock and is reclaimed by clearStaleLock, strictly safer than a stomp.
 export function releaseDirLock(lock, token) {
     if (!token) {
-        removeLock(lock); // no token to verify (defensive/legacy) — best-effort
+        // No token to prove ownership. An empty token reaches here only from a
+        // lockTokens Map miss (an acquire that threw, or a future same-key nested
+        // release), so removing would stomp whatever lock currently exists —
+        // possibly a DIFFERENT owner's fresh one. Leave it: a genuinely leaked lock
+        // ages into a stale lock and is reclaimed by clearStaleLock, strictly safer
+        // than a stomp (H3).
+        trace("lock_release_skipped_no_token", { lock });
         return;
     }
     const owner = readOwner(lock);

package/dist/mailbox.js CHANGED Viewed

@@ -188,6 +188,13 @@ export function requeueMany(target_pid, msgs) {
 // two unioned sibling mailboxes. Both copies are drained (so neither lingers) but
 // the message is returned ONCE. message_id is a unique per-message nonce, so this
 // only ever collapses true duplicates, never two distinct messages.
+// Union-drain a session's mailboxes (one per server_pid it has used), deduping
+// by message_id so a migrate crash-window duplicate (same id in two sibling
+// mailboxes) is delivered once. INVARIANT: every unioned pid is drained (and so
+// truncated) before returning — do NOT add a budget/early-exit short-circuit
+// here. The dedup is per-call only, so a duplicate left in an un-drained sibling
+// would re-surface on a later call with no cross-call dedup (M5). Budgeting
+// belongs in the caller, applied to the already-fully-drained result.
 export function drainMany(pids) {
     const out = [];
     const seenPids = new Set();
@@ -265,8 +272,42 @@ export function migrateMailbox(fromPid, toPid) {
         }
         if (!raw || !raw.trim())
             return 0;
-        const block = raw.endsWith("\n") ? raw : raw + "\n";
-        const count = raw.split("\n").filter((l) => l.trim().length > 0).length;
+        // Migrate only VALID records, reserialized canonically. A crash mid-append
+        // into the source can leave a torn final line; copying raw bytes would glue
+        // a synthesized newline onto that fragment, promoting garbage into a
+        // standalone (unparseable) line in the dest AND over-counting it (H4). Parse
+        // each line with the same guard drain uses, drop torn/invalid ones, and
+        // rebuild a clean block so the count reflects real, deliverable messages.
+        const valid = [];
+        for (const line of raw.split("\n")) {
+            if (!line.trim())
+                continue;
+            let parsed;
+            try {
+                parsed = JSON.parse(line);
+            }
+            catch {
+                trace("mailbox_migrate_skip_invalid", { fromPid, toPid, line });
+                continue;
+            }
+            if (parsed &&
+                typeof parsed === "object" &&
+                parsed.schema_version === 1 &&
+                typeof parsed.id === "string" &&
+                typeof parsed.body === "string") {
+                valid.push(parsed);
+            }
+            else {
+                trace("mailbox_migrate_skip_invalid", { fromPid, toPid, line });
+            }
+        }
+        if (valid.length === 0) {
+            // Only torn/garbage lines — clear the source and report nothing migrated.
+            truncateSync(src, 0);
+            return 0;
+        }
+        // serializeMailboxLine already terminates each line with "\n", so join("").
+        const block = valid.map((m) => serializeMailboxLine(m)).join("");
         acquireLock(toPid);
         try {
             appendLines(mailboxPath(toPid), block);
@@ -276,7 +317,7 @@ export function migrateMailbox(fromPid, toPid) {
         }
         // Append succeeded → clear the source (still under the source lock).
         truncateSync(src, 0);
-        return count;
+        return valid.length;
     }
     finally {
         releaseLock(fromPid);

package/dist/received.js CHANGED Viewed

@@ -88,24 +88,43 @@ function readLines(sessionId) {
         throw err;
     }
 }
+// The message_id of a serialized ledger line, or null if unparseable. Used to
+// keep recordReceived idempotent without fully deserializing every envelope.
+function lineMessageId(line) {
+    try {
+        const parsed = JSON.parse(line);
+        return typeof parsed.id === "string" ? parsed.id : null;
+    }
+    catch {
+        return null;
+    }
+}
 // Append an inbound envelope to the receiver's ledger and prune to receivedMax()
 // (oldest dropped first). Called by delivery.ts BEFORE the mailbox append.
+// Idempotent by message_id: re-recording an id replaces its prior line.
 export function recordReceived(receiverSessionId, msg) {
     if (!receiverSessionId)
         return;
     acquireLock(receiverSessionId);
     try {
         const lines = readLines(receiverSessionId);
-        lines.push(JSON.stringify(msg));
+        // Idempotent by message_id: a re-record (ask_peer abort recovery, chained
+        // re-delivery) must not append a duplicate ledger line. Duplicates waste the
+        // receivedMax prune budget and can evict still-needed handles early,
+        // surfacing as spurious reply_to_message "message-not-found" (M4). Drop any
+        // prior line for this id, then append the latest. lookupReceived already
+        // returns first-match newest-first, so behavior is unchanged for callers.
+        const deduped = msg.id ? lines.filter((l) => lineMessageId(l) !== msg.id) : lines;
+        deduped.push(JSON.stringify(msg));
         const max = receivedMax();
-        let pruned = lines;
-        if (lines.length > max) {
-            pruned = lines.slice(lines.length - max);
+        let pruned = deduped;
+        if (deduped.length > max) {
+            pruned = deduped.slice(deduped.length - max);
             // No silent caps: a dropped handle becomes reply_to_message
             // "message-not-found", so surface that the bound bit.
             trace("received_ledger_pruned", {
                 session_id: receiverSessionId,
-                dropped: lines.length - max,
+                dropped: deduped.length - max,
                 kept: max,
             });
         }
@@ -136,6 +155,8 @@ export function lookupReceived(receiverSessionId, messageId) {
             }
             if (parsed &&
                 typeof parsed === "object" &&
+                parsed.schema_version === 1 &&
+                typeof parsed.body === "string" &&
                 parsed.id === messageId) {
                 return parsed;
             }

package/dist/registry.js CHANGED Viewed

@@ -68,9 +68,21 @@ export function isValidTmuxSession(s) {
 //     target, so we refuse rather than fall back to the self-written cached value.
 //   - the resolved pane isn't a well-formed pane id (tmux output anomaly).
 // resolvePane is injected in tests; production uses currentPaneForServerPid.
-export function chooseVerifiedWakePane(peer, resolvePane = currentPaneForServerPid) {
+export function chooseVerifiedWakePane(peer, resolvePane = currentPaneForServerPid, resolveSig = processStartSig) {
     if (!peer.tmux_pane)
         return null;
+    // PID-reuse guard: if the entry recorded the server process's start-time
+    // signature, confirm the live pid is STILL that process before resolving and
+    // waking its pane. Otherwise an OS-recycled pid — now an unrelated process
+    // that happens to sit under a different tmux pane — would resolve to, and get
+    // our wake keystrokes typed into, a stranger's pane (M3). Only refuse on a
+    // positively-different signature; an empty reading (transient ps failure)
+    // falls through to pane resolution, which fails closed for a truly dead pid.
+    if (peer.proc_sig) {
+        const liveSig = resolveSig(peer.server_pid);
+        if (liveSig && liveSig !== peer.proc_sig)
+            return null;
+    }
     const live = resolvePane(peer.server_pid);
     if (!live || !isValidTmuxPane(live))
         return null;
@@ -203,11 +215,36 @@ export function resolveTmuxPane(env = process.env, pid = process.pid) {
 export function currentPaneForServerPid(serverPid) {
     return findTmuxPaneByAncestry(serverPid, listTmuxPanePids(), listAllPpids());
 }
+// The OS start-time signature (lstart) of a process, or "" if it can't be read
+// (dead pid, or ps unavailable). Same provenance signal claims.ts uses on
+// ancestor pids: an OS-recycled pid yields a DIFFERENT start time, so comparing
+// a live pid's signature against one captured at register time detects pid reuse
+// — distinguishing "our process is still alive" from "the pid now belongs to an
+// unrelated process."
+export function processStartSig(pid) {
+    try {
+        return execFileSync("ps", ["-o", "lstart=", "-p", String(pid)], {
+            encoding: "utf8",
+            stdio: ["ignore", "pipe", "pipe"],
+        }).trim();
+    }
+    catch {
+        return "";
+    }
+}
+// A process's start time never changes, so capture our own once and reuse it.
+let cachedSelfProcSig;
+function selfProcSig() {
+    if (cachedSelfProcSig === undefined)
+        cachedSelfProcSig = processStartSig(process.pid);
+    return cachedSelfProcSig;
+}
 export function buildEntry(client, env = process.env) {
     const tmux_pane = resolveTmuxPane(env);
     return {
         server_pid: process.pid,
         started_at: Math.floor(Date.now() / 1000),
+        proc_sig: selfProcSig(),
         client,
         tmux_pane,
         tmux_session: resolveTmuxSessionFromPane(tmux_pane),

package/dist/server.js CHANGED Viewed

@@ -10,7 +10,7 @@ import { dirname, join, sep } from "node:path";
 import { clientFromHandshake, detectClient, enrichWithDiagnosis, transcriptPathFor, } from "./clients.js";
 import { isAbstain } from "./detect/index.js";
 import { trace } from "./trace.js";
-import { buildEntry, chooseVerifiedWakePane, findByTmuxSession, readAll, refreshTmuxBinding, register, sessionPidsForId, unregister, } from "./registry.js";
+import { buildEntry, chooseVerifiedWakePane, findByTmuxSession, processStartSig, readAll, refreshTmuxBinding, register, sessionPidsForId, unregister, } from "./registry.js";
 import * as mailbox from "./mailbox.js";
 import * as received from "./received.js";
 import { deliverExistingToPeer, deliverToPeer } from "./delivery.js";
@@ -638,6 +638,11 @@ function refineFromHandshake(trigger) {
     return diagnosis;
 }
 server.server.oninitialized = () => {
+    // Sweep pending-ask records orphaned by a prior session (an ask that timed out,
+    // was never answered, and whose owner went away). gcPendingAsk otherwise only
+    // runs on a later ask_peer timeout, so this startup sweep keeps the dir from
+    // accumulating stale records. Best-effort; never throws.
+    gcPendingAsk(defaultPendingAskDir(), Date.now());
     const diagnosis = refineFromHandshake("oninitialized");
     // After type is known via handshake, schedule retries to catch transcript files
     // that don't exist yet at handshake time. No-op if session_id is already set.
@@ -855,12 +860,16 @@ server.registerTool("get_my_session", {
         // strategy mirrors session_id_source so callers can still see whether
         // env / birth-time / self-register resolved this entry.
         const source = entry.client.session_id_source ?? "self-register";
+        // Report confidence honestly per source: env and explicit self-register
+        // (claim_session) are authoritative ("high"); inferred sources (birth-time,
+        // sticky-claim) are "medium" — matching what the detect strategies return.
+        const confidence = source === "env" || source === "self-register" ? "high" : "medium";
         diagnosis = {
             per_strategy: {},
             winning: {
                 session_id: entry.client.session_id,
                 source,
-                confidence: "high",
+                confidence,
                 strategy: source,
             },
             next_step: null,
@@ -973,7 +982,20 @@ function resolveTarget(target, caller) {
         const fresh = reReadRegistryEntry(e.server_pid);
         if (!fresh)
             return false;
-        return fresh.started_at === e.started_at;
+        if (fresh.started_at !== e.started_at)
+            return false;
+        // PID-reuse: started_at is the original registration time and lives on the
+        // stale on-disk entry, so a recycled pid (alive, file untouched) passes the
+        // check above. If the entry recorded the process start-time signature,
+        // confirm the live pid is still that same process; a recycled pid reads a
+        // different signature and is rejected (M3). Empty reading → indeterminate,
+        // leave it to downstream (the pane wake gate re-verifies before keystrokes).
+        if (fresh.proc_sig) {
+            const liveSig = processStartSig(e.server_pid);
+            if (liveSig && liveSig !== fresh.proc_sig)
+                return false;
+        }
+        return true;
     });
     if (candidates.length === 0)
         return { ok: false, error: "target-not-found" };
@@ -1474,6 +1496,14 @@ async function wakePeer(peer) {
     // No session-name fallback: a self-written tmux_session could target another
     // session, and the verified pane already handles pane-id churn. Pass null.
     const ok = await askPeerWakeImpl(verifiedPane, null, fire);
+    if (!ok && sid) {
+        // The fire failed (e.g. the pane vanished between verification and the
+        // send-keys), so no keystroke landed. Clear the debounce stamp set pre-fire
+        // above — otherwise a genuine retry within WAKE_DEBOUNCE_MS is suppressed as
+        // "debounced" even though the peer was never actually woken (M1). The
+        // pre-stamp only needs to survive a SUCCESSFUL fire's async paste gap.
+        wakeDebounce.delete(sid);
+    }
     return ok ? "fired" : "skipped_no_target";
 }
 // --- send_message wake:auto gating -------------------------------------------

package/dist/wake-debounce.js CHANGED Viewed

@@ -30,7 +30,14 @@ export function newWakeDebounceStore() {
 // True if a wake fired for this key within the window — i.e. skip this one.
 export function recentlyWoke(store, key, nowMs, windowMs = WAKE_DEBOUNCE_MS) {
     const last = store.get(key);
-    return last !== undefined && nowMs - last < windowMs;
+    if (last === undefined)
+        return false;
+    const delta = nowMs - last;
+    // A backwards clock step (NTP correction, laptop resume) makes delta negative
+    // and < windowMs, which would wrongly suppress every wake to this peer until
+    // the clock catches back up. Treat a negative delta as "not recent" (mirrors
+    // the ageMs >= 0 guard in isFreshIdle).
+    return delta >= 0 && delta < windowMs;
 }
 // Record that a wake fired for this key. Opportunistically evicts stale entries
 // so the map can't grow unbounded across many short-lived peers.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "oxtail",
-  "version": "0.15.0",
+  "version": "0.16.0",
   "private": false,
   "type": "module",
   "description": "Coordination layer for parallel AI coding agent sessions, exposed over MCP.",