npm - claude-overnight - Versions diffs - 1.50.0 → 1.50.3 - Mend

claude-overnight 1.50.0 → 1.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/core/_version.d.ts +1 -1
package/dist/core/_version.js +1 -1
package/dist/core/types.d.ts +9 -0
package/dist/run/run.js +15 -11
package/dist/run/wave-loop.d.ts +10 -1
package/dist/run/wave-loop.js +3 -13
package/dist/state/run-state.d.ts +43 -0
package/dist/state/run-state.js +30 -0
package/dist/state/state.js +13 -0
package/dist/swarm/agent-run.js +49 -6
package/package.json +1 -1
package/plugins/claude-overnight/.claude-plugin/plugin.json +1 -1

package/dist/core/_version.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare const VERSION = "1.50.0";
1	+ export declare const VERSION = "1.50.3";

package/dist/core/_version.js CHANGED Viewed

@@ -1,2 +1,2 @@
 // Auto-generated by build — do not edit manually.
-export const VERSION = "1.50.0";
+export const VERSION = "1.50.3";

package/dist/core/types.d.ts CHANGED Viewed

@@ -12,6 +12,15 @@ export interface Task {
     noWorktree?: boolean;
     /** SDK session ID to resume from (set when task was paused mid-turn). */
     resumeSessionId?: string;
+    /**
+     * Discriminator for the (provider, model, cwd) that produced `resumeSessionId`.
+     * The SDK keys sessions by project path locally and by account/model on the
+     * backend; if any of those differ at resume time the saved id points at a
+     * conversation neither side can find. Compared against the live key on resume;
+     * mismatch drops `resumeSessionId` before the SDK errors with
+     * "No conversation found with session ID".
+     */
+    resumeContextKey?: string;
     /** Working directory preserved from a previous run (worktree dir for paused-and-resumed tasks). */
     agentCwd?: string;
     /** The kind of work: "execute" modifies files, others are read-only/analysis. Defaults to "execute". */

package/dist/run/run.js CHANGED Viewed

@@ -10,6 +10,7 @@ import { buildEnvResolver, isCursorProxyProvider } from "../providers/index.js";
 import { RunDisplay } from "../ui/ui.js";
 import { renderSummary } from "../ui/summary.js";
 import { readRunMemory, writeStatus, writeGoalUpdate, saveRunState, saveWaveSession, loadWaveHistory, recordBranches, archiveMilestone, writeSteerInbox, consumeSteerInbox, countSteerInbox, appendOvernightLogStart, updateOvernightLogEnd, } from "../state/state.js";
+import { composeRunState } from "../state/run-state.js";
 import { runPostRunReview } from "./review.js";
 import { printFinalSummary } from "./summary.js";
 import { runWaveLoop } from "./wave-loop.js";
@@ -237,21 +238,23 @@ export async function executeRun(cfg) {
         }
         catch { }
     }
-    const buildRunState = (varying) => ({
-        id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: cfg.budget,
-        remaining, workerModel, plannerModel, fastModel,
-        workerProviderId: cfg.workerProvider?.id, plannerProviderId: cfg.plannerProvider?.id,
+    const runStateBase = {
+        cwd,
+        id: `run-${new Date(cfg.runStartedAt).toISOString().slice(0, 19)}`,
+        startedAt: new Date(cfg.runStartedAt).toISOString(),
+        objective: objective ?? "",
+        budget: cfg.budget,
+        workerProviderId: cfg.workerProvider?.id,
+        plannerProviderId: cfg.plannerProvider?.id,
         fastProviderId: cfg.fastProvider?.id,
-        concurrency,
-        usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
-        flex, useWorktrees, mergeStrategy, waveNum,
-        currentTasks: varying.currentTasks,
-        accCost, accCompleted, accFailed, accIn, accOut, accTools,
-        branches, phase: varying.phase, startedAt: new Date(cfg.runStartedAt).toISOString(), cwd,
+        allowExtraUsage: cfg.allowExtraUsage ?? false,
+        extraUsageBudget: cfg.extraUsageBudget,
+        flex, useWorktrees, mergeStrategy,
         repoFingerprint,
         coachedObjective: cfg.coachedObjective,
         coachedAt: cfg.coachedAt,
-    });
+    };
+    const buildRunState = (varying) => composeRunState({ ...runStateBase, workerModel, plannerModel, fastModel, concurrency, usageCap }, { remaining: varying.remaining, waveNum, accCost, accCompleted, accFailed, accIn, accOut, accTools, branches }, { phase: varying.phase, currentTasks: varying.currentTasks });
     const gracefulStop = () => {
         if (stopping) {
             currentSwarm?.cleanup();
@@ -509,6 +512,7 @@ export async function executeRun(cfg) {
         rlGetter,
         isStopping: () => stopping,
         syncRunInfo,
+        buildRunState,
         renderSummary,
         runDebrief,
         recordBranches: (agents, mergeResults, currentWave) => {

package/dist/run/wave-loop.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { Task, MergeStrategy, BranchRecord, WaveSummary, RLGetter } from "../core/types.js";
+import type { Task, MergeStrategy, BranchRecord, WaveSummary, RLGetter, RunState } from "../core/types.js";
 import { Swarm } from "../swarm/swarm.js";
 import { RunDisplay } from "../ui/ui.js";
 import type { LiveConfig, SteeringContext } from "../ui/ui.js";
@@ -70,6 +70,15 @@ export interface WaveLoopCtx {
         ok: boolean;
     }[], currentWave?: number) => void;
     onLibrarianResult?: (promoted: number, patched: number, quarantined: number, rejected: number) => void;
+    /** Builds a full RunState snapshot. Provided by run.ts so cwd, budget, branches,
+     * provider ids, etc. are preserved — the wave loop used to rebuild a truncated
+     * state that omitted cwd, which made saved runs invisible to `findIncompleteRuns`
+     * (the cwd-equality filter dropped them). */
+    buildRunState: (varying: {
+        remaining: number;
+        phase: RunState["phase"];
+        currentTasks: Task[];
+    }) => RunState;
 }
 export interface WaveLoopResult {
     runAnotherRound: boolean;

package/dist/run/wave-loop.js CHANGED Viewed

@@ -59,7 +59,7 @@ export async function runWaveLoop(host, ctx) {
             if (host.currentTasks.length > host.remaining)
                 host.currentTasks = host.currentTasks.slice(0, host.remaining);
             ctx.syncRunInfo();
-            saveRunState(ctx.runDir, buildRunState(host, "steering", host.currentTasks));
+            saveRunState(ctx.runDir, ctx.buildRunState({ remaining: host.remaining, phase: "steering", currentTasks: host.currentTasks }));
             // ── Pre-wave rate limit gate ──
             await throttleBeforeWave(ctx.rlGetter, (text) => ctx.display.appendSteeringEvent(text), ctx.isStopping);
             if (ctx.isStopping())
@@ -181,7 +181,7 @@ export async function runWaveLoop(host, ctx) {
             // On user-initiated quit mid-wave, "never started" tasks are real leftover
             // work the user expects to see on resume — save them under "stopped".
             const midWavePhase = (ctx.isStopping() || swarm.aborted) ? "stopped" : "steering";
-            saveRunState(ctx.runDir, buildRunState(host, midWavePhase, neverStarted));
+            saveRunState(ctx.runDir, ctx.buildRunState({ remaining: host.remaining, phase: midWavePhase, currentTasks: neverStarted }));
             // Preserve the leftover tasks on the host so resume / verifier see the
             // real pending queue (not the full original batch) after each wave.
             host.currentTasks = neverStarted;
@@ -235,7 +235,7 @@ export async function runWaveLoop(host, ctx) {
             if (circuitHalt) {
                 ctx.display.appendSteeringEvent(`Circuit breaker: 2 consecutive waves produced no merged changes — halting to prevent budget drain`);
                 ctx.display.stop();
-                saveRunState(ctx.runDir, buildRunState(host, "stopped", []));
+                saveRunState(ctx.runDir, ctx.buildRunState({ remaining: host.remaining, phase: "stopped", currentTasks: [] }));
                 ctx.display.stop();
                 console.log(chalk.red(`\n  Circuit breaker: 2 consecutive waves produced no merged changes.`));
                 console.log(chalk.red(`  Halting to prevent budget drain. Run preserved at ${ctx.runDir}.`));
@@ -521,16 +521,6 @@ function handleZeroWorkRetry(swarm, host, ctx) {
     swarm.totalOutputTokens += retrySwarm.totalOutputTokens;
     host.liveConfig.remaining = host.remaining;
 }
-function buildRunState(host, phase, currentTasks) {
-    return {
-        remaining: host.remaining, phase, currentTasks,
-        workerModel: host.workerModel, plannerModel: host.plannerModel, fastModel: host.fastModel,
-        concurrency: host.concurrency,
-        usageCap: host.usageCap, flex: true, waveNum: host.waveNum,
-        accCost: host.accCost, accCompleted: host.accCompleted, accFailed: host.accFailed,
-        accIn: host.accIn, accOut: host.accOut, accTools: host.accTools,
-    };
-}
 function captureAbOutcome(swarm, assignment, host, ctx) {
     const treatmentAgents = swarm.agents.filter(a => assignment.treatmentTaskIds.includes(a.task.id));
     const controlAgents = swarm.agents.filter(a => assignment.controlTaskIds.includes(a.task.id));

package/dist/state/run-state.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import type { RunState, Task, BranchRecord } from "../core/types.js";
+/** Static inputs that don't change between RunState snapshots within a single run. */
+export interface RunStateBase {
+    cwd: string;
+    id: string;
+    startedAt: string;
+    objective: string;
+    budget: number;
+    workerModel: string;
+    plannerModel: string;
+    fastModel: string | undefined;
+    workerProviderId?: string;
+    plannerProviderId?: string;
+    fastProviderId?: string;
+    concurrency: number;
+    usageCap: number | undefined;
+    allowExtraUsage: boolean;
+    extraUsageBudget?: number;
+    flex: boolean;
+    useWorktrees: boolean;
+    mergeStrategy: RunState["mergeStrategy"];
+    repoFingerprint: string;
+    coachedObjective?: string;
+    coachedAt?: number;
+}
+/** Live counters captured at snapshot time. */
+export interface RunStateLive {
+    remaining: number;
+    waveNum: number;
+    accCost: number;
+    accCompleted: number;
+    accFailed: number;
+    accIn: number;
+    accOut: number;
+    accTools: number;
+    branches: BranchRecord[];
+}
+/** Variable-per-snapshot inputs: phase and the task slice for resume. */
+export interface RunStateVarying {
+    phase: RunState["phase"];
+    currentTasks: Task[];
+}
+export declare function composeRunState(base: RunStateBase, live: RunStateLive, varying: RunStateVarying): RunState;

package/dist/state/run-state.js ADDED Viewed

@@ -0,0 +1,30 @@
+// Single source of truth for constructing a RunState snapshot for persistence.
+//
+// Two writers used to exist (run.ts and wave-loop.ts) and one drifted —
+// silently omitting cwd, which made saved runs invisible to findIncompleteRuns.
+// Now both call this. Adding a field to RunState forces an edit here.
+//
+// `saveRunState` enforces required fields at the write boundary; this module
+// enforces them at the call boundary.
+export function composeRunState(base, live, varying) {
+    return {
+        id: base.id, objective: base.objective, budget: base.budget,
+        remaining: live.remaining,
+        workerModel: base.workerModel, plannerModel: base.plannerModel, fastModel: base.fastModel,
+        workerProviderId: base.workerProviderId, plannerProviderId: base.plannerProviderId,
+        fastProviderId: base.fastProviderId,
+        concurrency: base.concurrency,
+        usageCap: base.usageCap, allowExtraUsage: base.allowExtraUsage, extraUsageBudget: base.extraUsageBudget,
+        flex: base.flex, useWorktrees: base.useWorktrees, mergeStrategy: base.mergeStrategy,
+        waveNum: live.waveNum,
+        currentTasks: varying.currentTasks,
+        accCost: live.accCost, accCompleted: live.accCompleted, accFailed: live.accFailed,
+        accIn: live.accIn, accOut: live.accOut, accTools: live.accTools,
+        branches: live.branches,
+        phase: varying.phase,
+        startedAt: base.startedAt, cwd: base.cwd,
+        repoFingerprint: base.repoFingerprint,
+        coachedObjective: base.coachedObjective,
+        coachedAt: base.coachedAt,
+    };
+}

package/dist/state/state.js CHANGED Viewed

@@ -180,7 +180,20 @@ export function updateOvernightLogEnd(cwd, runId, meta) {
     }
 }
 // ── Run state persistence ──
+/**
+ * Required fields on every persisted RunState. The type already marks these as
+ * non-optional, but callers that build state dynamically (or upcast through
+ * `any`) can still slip a truncated snapshot past the compiler. A truncated
+ * snapshot is silently excluded by `findIncompleteRuns` (cwd-equality filter),
+ * so the run becomes unresumable without any visible error. Guard at the write
+ * boundary so the bug surfaces where it's introduced, not weeks later.
+ */
+const REQUIRED_RUN_STATE_FIELDS = ["cwd", "id", "phase", "startedAt"];
 export function saveRunState(runDir, state) {
+    const missing = REQUIRED_RUN_STATE_FIELDS.filter(k => !state[k]);
+    if (missing.length) {
+        throw new Error(`saveRunState: refusing to persist truncated state, missing fields: ${missing.join(", ")}`);
+    }
     mkdirSync(runDir, { recursive: true });
     writeFileSync(join(runDir, "run.json"), JSON.stringify(state, null, 2), "utf-8");
 }

package/dist/swarm/agent-run.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { withCursorWorkspaceHeader, getAgentTimeout } from "./config.js";
 import { renderPrompt } from "../prompts/load.js";
 import { AgentTimeoutError, StreamStalledError, isRateLimitError, isStreamStalledError, isTransientError, sleep } from "./errors.js";
 import { handleMsg, checkStreamHealth, NO_CONTENT_TIMEOUT_MS } from "./message-handler.js";
+import { getModelCapability } from "../core/models.js";
 import { sdkQueryRateLimiter, acquireSdkQueryRateLimit } from "../core/rate-limiter.js";
 import { StreamSink } from "../core/transcripts.js";
 import { StallGuard, StallMonitor, runWithStallRotation } from "../core/stall-guard.js";
@@ -82,6 +83,16 @@ export async function runAgent(host, task) {
             host.log(id, `Worktree failed after retry  -- running without isolation`);
         }
     }
+    const effectiveModelInit = task.model || host.model;
+    const contextKey = sessionContextKey(effectiveModelInit, agentCwd, host.config.envForModel?.(effectiveModelInit));
+    // Drop a saved sessionId whose (provider, model, cwd) no longer matches the
+    // live one. Otherwise the first resume call fails with "No conversation found
+    // with session ID" and burns an attempt before any tool use. See
+    // Task.resumeContextKey for the why.
+    if (task.resumeSessionId && task.resumeContextKey && task.resumeContextKey !== contextKey) {
+        host.log(id, `Dropping stale resume id (context changed: ${task.resumeContextKey} → ${contextKey})`);
+        task = { ...task, resumeSessionId: undefined, resumeContextKey: undefined };
+    }
     const isResumed = !!task.resumeSessionId;
     host.log(id, isResumed ? `Resuming: ${task.prompt.slice(0, 60)}` : `Starting: ${task.prompt.slice(0, 60)}`);
     const maxRetries = host.config.maxRetries ?? 2;
@@ -90,6 +101,21 @@ export async function runAgent(host, task) {
     // Hoisted so the catch block can read the session captured during the turn
     // when routing a pause-interrupt through the requeue path.
     let resumeSessionId = task.resumeSessionId;
+    // Carry the resume session forward only if the prior turn isn't already
+    // close to filling its context window. A saturated session would resume
+    // with little room to do real work and would auto-compact (or hit the
+    // window) almost immediately — cheaper to start the next attempt fresh.
+    const carrySession = () => {
+        if (!resumeSessionId)
+            return false;
+        const safe = getModelCapability(effectiveModelInit ?? "").safeContext;
+        const used = agent.peakContextTokens ?? agent.contextTokens ?? 0;
+        if (safe > 0 && used >= safe * 0.85) {
+            host.log(id, `Discarding resume id (context ${used}/${safe} tokens, near saturation)`);
+            return false;
+        }
+        return true;
+    };
     for (let attempt = 0; attempt <= maxRetries; attempt++) {
         if (attempt > 0) {
             const backoffMs = Math.min(30000, 1000 * 2 ** (attempt - 1)) * (0.5 + Math.random());
@@ -210,9 +236,10 @@ export async function runAgent(host, task) {
                 if (!host.paused || agent.status !== "running")
                     return false;
                 agent.status = "paused";
-                host.log(id, resumeSessionId ? "Paused mid-task (will resume)" : "Paused before first turn (will restart)");
-                host.queue.unshift(resumeSessionId
-                    ? { ...task, resumeSessionId, agentCwd }
+                const carry = carrySession();
+                host.log(id, carry ? "Paused mid-task (will resume)" : "Paused before first turn (will restart)");
+                host.queue.unshift(carry
+                    ? { ...task, resumeSessionId, resumeContextKey: contextKey, agentCwd }
                     : { ...task });
                 return true;
             };
@@ -299,9 +326,12 @@ export async function runAgent(host, task) {
             if (host.paused) {
                 agent.status = "paused";
                 host.log(id, "Paused mid-task (interrupt thrown)");
-                // Reuse resume info when we already have a sessionId; otherwise restart fresh.
-                const reuseSession = (typeof resumeSessionId === "string") && resumeSessionId.length > 0;
-                host.queue.unshift(reuseSession ? { ...task, resumeSessionId, agentCwd } : { ...task });
+                // Reuse resume info when we have a sessionId AND the prior context isn't
+                // already saturated; otherwise restart fresh.
+                const reuseSession = carrySession();
+                host.queue.unshift(reuseSession
+                    ? { ...task, resumeSessionId, resumeContextKey: contextKey, agentCwd }
+                    : { ...task });
                 return;
             }
             // Stream stall: the server went silent mid-response. If we captured a
@@ -423,6 +453,19 @@ function installLspFirstHookInto(worktreeDir) {
     // settings.local.json is gitignored by Claude Code convention — won't pollute the agent's commit.
     writeFileSync(join(dir, "settings.local.json"), JSON.stringify(settings, null, 2), "utf-8");
 }
+/**
+ * Stable per-(provider, model, cwd) tag for scoping `resume` session ids.
+ * Provider matters because Cursor proxy and Anthropic direct keep separate
+ * backend session stores; cwd matters because the SDK keys its on-disk session
+ * cache by project path, so a recreated worktree under a new path can't find
+ * the prior conversation. Model is included for completeness.
+ */
+function sessionContextKey(model, cwd, env) {
+    const isCursor = !!(env?.CURSOR_API_KEY || env?.CURSOR_AUTH_TOKEN || env?.CURSOR_BRIDGE_MODE);
+    const baseUrl = env?.ANTHROPIC_BASE_URL?.trim();
+    const provider = isCursor ? "cursor" : baseUrl ? `url:${baseUrl}` : "anthropic";
+    return `${provider}|${model ?? "default"}|${cwd}`;
+}
 /** Extract a ### SKILL CANDIDATE block from agent text. Returns undefined if not found. */
 function extractSkillProposal(text) {
     const m = text.match(/###\s*SKILL CANDIDATE\s*\n([\s\S]+?)$/);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.50.0",
+  "version": "1.50.3",
   "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
   "type": "module",
   "bin": {

package/plugins/claude-overnight/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-overnight",
-  "version": "1.50.0",
+  "version": "1.50.3",
   "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs  -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
   "author": {
     "name": "Francesco Fornace"