claude-overnight 1.50.0 → 1.50.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- export declare const VERSION = "1.50.0";
1
+ export declare const VERSION = "1.50.3";
@@ -1,2 +1,2 @@
1
1
  // Auto-generated by build — do not edit manually.
2
- export const VERSION = "1.50.0";
2
+ export const VERSION = "1.50.3";
@@ -12,6 +12,15 @@ export interface Task {
12
12
  noWorktree?: boolean;
13
13
  /** SDK session ID to resume from (set when task was paused mid-turn). */
14
14
  resumeSessionId?: string;
15
+ /**
16
+ * Discriminator for the (provider, model, cwd) that produced `resumeSessionId`.
17
+ * The SDK keys sessions by project path locally and by account/model on the
18
+ * backend; if any of those differ at resume time the saved id points at a
19
+ * conversation neither side can find. Compared against the live key on resume;
20
+ * mismatch drops `resumeSessionId` before the SDK errors with
21
+ * "No conversation found with session ID".
22
+ */
23
+ resumeContextKey?: string;
15
24
  /** Working directory preserved from a previous run (worktree dir for paused-and-resumed tasks). */
16
25
  agentCwd?: string;
17
26
  /** The kind of work: "execute" modifies files, others are read-only/analysis. Defaults to "execute". */
package/dist/run/run.js CHANGED
@@ -10,6 +10,7 @@ import { buildEnvResolver, isCursorProxyProvider } from "../providers/index.js";
10
10
  import { RunDisplay } from "../ui/ui.js";
11
11
  import { renderSummary } from "../ui/summary.js";
12
12
  import { readRunMemory, writeStatus, writeGoalUpdate, saveRunState, saveWaveSession, loadWaveHistory, recordBranches, archiveMilestone, writeSteerInbox, consumeSteerInbox, countSteerInbox, appendOvernightLogStart, updateOvernightLogEnd, } from "../state/state.js";
13
+ import { composeRunState } from "../state/run-state.js";
13
14
  import { runPostRunReview } from "./review.js";
14
15
  import { printFinalSummary } from "./summary.js";
15
16
  import { runWaveLoop } from "./wave-loop.js";
@@ -237,21 +238,23 @@ export async function executeRun(cfg) {
237
238
  }
238
239
  catch { }
239
240
  }
240
- const buildRunState = (varying) => ({
241
- id: `run-${new Date().toISOString().slice(0, 19)}`, objective: objective ?? "", budget: cfg.budget,
242
- remaining, workerModel, plannerModel, fastModel,
243
- workerProviderId: cfg.workerProvider?.id, plannerProviderId: cfg.plannerProvider?.id,
241
+ const runStateBase = {
242
+ cwd,
243
+ id: `run-${new Date(cfg.runStartedAt).toISOString().slice(0, 19)}`,
244
+ startedAt: new Date(cfg.runStartedAt).toISOString(),
245
+ objective: objective ?? "",
246
+ budget: cfg.budget,
247
+ workerProviderId: cfg.workerProvider?.id,
248
+ plannerProviderId: cfg.plannerProvider?.id,
244
249
  fastProviderId: cfg.fastProvider?.id,
245
- concurrency,
246
- usageCap, allowExtraUsage: cfg.allowExtraUsage, extraUsageBudget: cfg.extraUsageBudget,
247
- flex, useWorktrees, mergeStrategy, waveNum,
248
- currentTasks: varying.currentTasks,
249
- accCost, accCompleted, accFailed, accIn, accOut, accTools,
250
- branches, phase: varying.phase, startedAt: new Date(cfg.runStartedAt).toISOString(), cwd,
250
+ allowExtraUsage: cfg.allowExtraUsage ?? false,
251
+ extraUsageBudget: cfg.extraUsageBudget,
252
+ flex, useWorktrees, mergeStrategy,
251
253
  repoFingerprint,
252
254
  coachedObjective: cfg.coachedObjective,
253
255
  coachedAt: cfg.coachedAt,
254
- });
256
+ };
257
+ const buildRunState = (varying) => composeRunState({ ...runStateBase, workerModel, plannerModel, fastModel, concurrency, usageCap }, { remaining: varying.remaining, waveNum, accCost, accCompleted, accFailed, accIn, accOut, accTools, branches }, { phase: varying.phase, currentTasks: varying.currentTasks });
255
258
  const gracefulStop = () => {
256
259
  if (stopping) {
257
260
  currentSwarm?.cleanup();
@@ -509,6 +512,7 @@ export async function executeRun(cfg) {
509
512
  rlGetter,
510
513
  isStopping: () => stopping,
511
514
  syncRunInfo,
515
+ buildRunState,
512
516
  renderSummary,
513
517
  runDebrief,
514
518
  recordBranches: (agents, mergeResults, currentWave) => {
@@ -1,4 +1,4 @@
1
- import type { Task, MergeStrategy, BranchRecord, WaveSummary, RLGetter } from "../core/types.js";
1
+ import type { Task, MergeStrategy, BranchRecord, WaveSummary, RLGetter, RunState } from "../core/types.js";
2
2
  import { Swarm } from "../swarm/swarm.js";
3
3
  import { RunDisplay } from "../ui/ui.js";
4
4
  import type { LiveConfig, SteeringContext } from "../ui/ui.js";
@@ -70,6 +70,15 @@ export interface WaveLoopCtx {
70
70
  ok: boolean;
71
71
  }[], currentWave?: number) => void;
72
72
  onLibrarianResult?: (promoted: number, patched: number, quarantined: number, rejected: number) => void;
73
+ /** Builds a full RunState snapshot. Provided by run.ts so cwd, budget, branches,
74
+ * provider ids, etc. are preserved — the wave loop used to rebuild a truncated
75
+ * state that omitted cwd, which made saved runs invisible to `findIncompleteRuns`
76
+ * (the cwd-equality filter dropped them). */
77
+ buildRunState: (varying: {
78
+ remaining: number;
79
+ phase: RunState["phase"];
80
+ currentTasks: Task[];
81
+ }) => RunState;
73
82
  }
74
83
  export interface WaveLoopResult {
75
84
  runAnotherRound: boolean;
@@ -59,7 +59,7 @@ export async function runWaveLoop(host, ctx) {
59
59
  if (host.currentTasks.length > host.remaining)
60
60
  host.currentTasks = host.currentTasks.slice(0, host.remaining);
61
61
  ctx.syncRunInfo();
62
- saveRunState(ctx.runDir, buildRunState(host, "steering", host.currentTasks));
62
+ saveRunState(ctx.runDir, ctx.buildRunState({ remaining: host.remaining, phase: "steering", currentTasks: host.currentTasks }));
63
63
  // ── Pre-wave rate limit gate ──
64
64
  await throttleBeforeWave(ctx.rlGetter, (text) => ctx.display.appendSteeringEvent(text), ctx.isStopping);
65
65
  if (ctx.isStopping())
@@ -181,7 +181,7 @@ export async function runWaveLoop(host, ctx) {
181
181
  // On user-initiated quit mid-wave, "never started" tasks are real leftover
182
182
  // work the user expects to see on resume — save them under "stopped".
183
183
  const midWavePhase = (ctx.isStopping() || swarm.aborted) ? "stopped" : "steering";
184
- saveRunState(ctx.runDir, buildRunState(host, midWavePhase, neverStarted));
184
+ saveRunState(ctx.runDir, ctx.buildRunState({ remaining: host.remaining, phase: midWavePhase, currentTasks: neverStarted }));
185
185
  // Preserve the leftover tasks on the host so resume / verifier see the
186
186
  // real pending queue (not the full original batch) after each wave.
187
187
  host.currentTasks = neverStarted;
@@ -235,7 +235,7 @@ export async function runWaveLoop(host, ctx) {
235
235
  if (circuitHalt) {
236
236
  ctx.display.appendSteeringEvent(`Circuit breaker: 2 consecutive waves produced no merged changes — halting to prevent budget drain`);
237
237
  ctx.display.stop();
238
- saveRunState(ctx.runDir, buildRunState(host, "stopped", []));
238
+ saveRunState(ctx.runDir, ctx.buildRunState({ remaining: host.remaining, phase: "stopped", currentTasks: [] }));
239
239
  ctx.display.stop();
240
240
  console.log(chalk.red(`\n Circuit breaker: 2 consecutive waves produced no merged changes.`));
241
241
  console.log(chalk.red(` Halting to prevent budget drain. Run preserved at ${ctx.runDir}.`));
@@ -521,16 +521,6 @@ function handleZeroWorkRetry(swarm, host, ctx) {
521
521
  swarm.totalOutputTokens += retrySwarm.totalOutputTokens;
522
522
  host.liveConfig.remaining = host.remaining;
523
523
  }
524
- function buildRunState(host, phase, currentTasks) {
525
- return {
526
- remaining: host.remaining, phase, currentTasks,
527
- workerModel: host.workerModel, plannerModel: host.plannerModel, fastModel: host.fastModel,
528
- concurrency: host.concurrency,
529
- usageCap: host.usageCap, flex: true, waveNum: host.waveNum,
530
- accCost: host.accCost, accCompleted: host.accCompleted, accFailed: host.accFailed,
531
- accIn: host.accIn, accOut: host.accOut, accTools: host.accTools,
532
- };
533
- }
534
524
  function captureAbOutcome(swarm, assignment, host, ctx) {
535
525
  const treatmentAgents = swarm.agents.filter(a => assignment.treatmentTaskIds.includes(a.task.id));
536
526
  const controlAgents = swarm.agents.filter(a => assignment.controlTaskIds.includes(a.task.id));
@@ -0,0 +1,43 @@
1
+ import type { RunState, Task, BranchRecord } from "../core/types.js";
2
+ /** Static inputs that don't change between RunState snapshots within a single run. */
3
+ export interface RunStateBase {
4
+ cwd: string;
5
+ id: string;
6
+ startedAt: string;
7
+ objective: string;
8
+ budget: number;
9
+ workerModel: string;
10
+ plannerModel: string;
11
+ fastModel: string | undefined;
12
+ workerProviderId?: string;
13
+ plannerProviderId?: string;
14
+ fastProviderId?: string;
15
+ concurrency: number;
16
+ usageCap: number | undefined;
17
+ allowExtraUsage: boolean;
18
+ extraUsageBudget?: number;
19
+ flex: boolean;
20
+ useWorktrees: boolean;
21
+ mergeStrategy: RunState["mergeStrategy"];
22
+ repoFingerprint: string;
23
+ coachedObjective?: string;
24
+ coachedAt?: number;
25
+ }
26
+ /** Live counters captured at snapshot time. */
27
+ export interface RunStateLive {
28
+ remaining: number;
29
+ waveNum: number;
30
+ accCost: number;
31
+ accCompleted: number;
32
+ accFailed: number;
33
+ accIn: number;
34
+ accOut: number;
35
+ accTools: number;
36
+ branches: BranchRecord[];
37
+ }
38
+ /** Variable-per-snapshot inputs: phase and the task slice for resume. */
39
+ export interface RunStateVarying {
40
+ phase: RunState["phase"];
41
+ currentTasks: Task[];
42
+ }
43
+ export declare function composeRunState(base: RunStateBase, live: RunStateLive, varying: RunStateVarying): RunState;
@@ -0,0 +1,30 @@
1
+ // Single source of truth for constructing a RunState snapshot for persistence.
2
+ //
3
+ // Two writers used to exist (run.ts and wave-loop.ts) and one drifted —
4
+ // silently omitting cwd, which made saved runs invisible to findIncompleteRuns.
5
+ // Now both call this. Adding a field to RunState forces an edit here.
6
+ //
7
+ // `saveRunState` enforces required fields at the write boundary; this module
8
+ // enforces them at the call boundary.
9
+ export function composeRunState(base, live, varying) {
10
+ return {
11
+ id: base.id, objective: base.objective, budget: base.budget,
12
+ remaining: live.remaining,
13
+ workerModel: base.workerModel, plannerModel: base.plannerModel, fastModel: base.fastModel,
14
+ workerProviderId: base.workerProviderId, plannerProviderId: base.plannerProviderId,
15
+ fastProviderId: base.fastProviderId,
16
+ concurrency: base.concurrency,
17
+ usageCap: base.usageCap, allowExtraUsage: base.allowExtraUsage, extraUsageBudget: base.extraUsageBudget,
18
+ flex: base.flex, useWorktrees: base.useWorktrees, mergeStrategy: base.mergeStrategy,
19
+ waveNum: live.waveNum,
20
+ currentTasks: varying.currentTasks,
21
+ accCost: live.accCost, accCompleted: live.accCompleted, accFailed: live.accFailed,
22
+ accIn: live.accIn, accOut: live.accOut, accTools: live.accTools,
23
+ branches: live.branches,
24
+ phase: varying.phase,
25
+ startedAt: base.startedAt, cwd: base.cwd,
26
+ repoFingerprint: base.repoFingerprint,
27
+ coachedObjective: base.coachedObjective,
28
+ coachedAt: base.coachedAt,
29
+ };
30
+ }
@@ -180,7 +180,20 @@ export function updateOvernightLogEnd(cwd, runId, meta) {
180
180
  }
181
181
  }
182
182
  // ── Run state persistence ──
183
+ /**
184
+ * Required fields on every persisted RunState. The type already marks these as
185
+ * non-optional, but callers that build state dynamically (or upcast through
186
+ * `any`) can still slip a truncated snapshot past the compiler. A truncated
187
+ * snapshot is silently excluded by `findIncompleteRuns` (cwd-equality filter),
188
+ * so the run becomes unresumable without any visible error. Guard at the write
189
+ * boundary so the bug surfaces where it's introduced, not weeks later.
190
+ */
191
+ const REQUIRED_RUN_STATE_FIELDS = ["cwd", "id", "phase", "startedAt"];
183
192
  export function saveRunState(runDir, state) {
193
+ const missing = REQUIRED_RUN_STATE_FIELDS.filter(k => !state[k]);
194
+ if (missing.length) {
195
+ throw new Error(`saveRunState: refusing to persist truncated state, missing fields: ${missing.join(", ")}`);
196
+ }
184
197
  mkdirSync(runDir, { recursive: true });
185
198
  writeFileSync(join(runDir, "run.json"), JSON.stringify(state, null, 2), "utf-8");
186
199
  }
@@ -9,6 +9,7 @@ import { withCursorWorkspaceHeader, getAgentTimeout } from "./config.js";
9
9
  import { renderPrompt } from "../prompts/load.js";
10
10
  import { AgentTimeoutError, StreamStalledError, isRateLimitError, isStreamStalledError, isTransientError, sleep } from "./errors.js";
11
11
  import { handleMsg, checkStreamHealth, NO_CONTENT_TIMEOUT_MS } from "./message-handler.js";
12
+ import { getModelCapability } from "../core/models.js";
12
13
  import { sdkQueryRateLimiter, acquireSdkQueryRateLimit } from "../core/rate-limiter.js";
13
14
  import { StreamSink } from "../core/transcripts.js";
14
15
  import { StallGuard, StallMonitor, runWithStallRotation } from "../core/stall-guard.js";
@@ -82,6 +83,16 @@ export async function runAgent(host, task) {
82
83
  host.log(id, `Worktree failed after retry -- running without isolation`);
83
84
  }
84
85
  }
86
+ const effectiveModelInit = task.model || host.model;
87
+ const contextKey = sessionContextKey(effectiveModelInit, agentCwd, host.config.envForModel?.(effectiveModelInit));
88
+ // Drop a saved sessionId whose (provider, model, cwd) no longer matches the
89
+ // live one. Otherwise the first resume call fails with "No conversation found
90
+ // with session ID" and burns an attempt before any tool use. See
91
+ // Task.resumeContextKey for the why.
92
+ if (task.resumeSessionId && task.resumeContextKey && task.resumeContextKey !== contextKey) {
93
+ host.log(id, `Dropping stale resume id (context changed: ${task.resumeContextKey} → ${contextKey})`);
94
+ task = { ...task, resumeSessionId: undefined, resumeContextKey: undefined };
95
+ }
85
96
  const isResumed = !!task.resumeSessionId;
86
97
  host.log(id, isResumed ? `Resuming: ${task.prompt.slice(0, 60)}` : `Starting: ${task.prompt.slice(0, 60)}`);
87
98
  const maxRetries = host.config.maxRetries ?? 2;
@@ -90,6 +101,21 @@ export async function runAgent(host, task) {
90
101
  // Hoisted so the catch block can read the session captured during the turn
91
102
  // when routing a pause-interrupt through the requeue path.
92
103
  let resumeSessionId = task.resumeSessionId;
104
+ // Carry the resume session forward only if the prior turn isn't already
105
+ // close to filling its context window. A saturated session would resume
106
+ // with little room to do real work and would auto-compact (or hit the
107
+ // window) almost immediately — cheaper to start the next attempt fresh.
108
+ const carrySession = () => {
109
+ if (!resumeSessionId)
110
+ return false;
111
+ const safe = getModelCapability(effectiveModelInit ?? "").safeContext;
112
+ const used = agent.peakContextTokens ?? agent.contextTokens ?? 0;
113
+ if (safe > 0 && used >= safe * 0.85) {
114
+ host.log(id, `Discarding resume id (context ${used}/${safe} tokens, near saturation)`);
115
+ return false;
116
+ }
117
+ return true;
118
+ };
93
119
  for (let attempt = 0; attempt <= maxRetries; attempt++) {
94
120
  if (attempt > 0) {
95
121
  const backoffMs = Math.min(30000, 1000 * 2 ** (attempt - 1)) * (0.5 + Math.random());
@@ -210,9 +236,10 @@ export async function runAgent(host, task) {
210
236
  if (!host.paused || agent.status !== "running")
211
237
  return false;
212
238
  agent.status = "paused";
213
- host.log(id, resumeSessionId ? "Paused mid-task (will resume)" : "Paused before first turn (will restart)");
214
- host.queue.unshift(resumeSessionId
215
- ? { ...task, resumeSessionId, agentCwd }
239
+ const carry = carrySession();
240
+ host.log(id, carry ? "Paused mid-task (will resume)" : "Paused before first turn (will restart)");
241
+ host.queue.unshift(carry
242
+ ? { ...task, resumeSessionId, resumeContextKey: contextKey, agentCwd }
216
243
  : { ...task });
217
244
  return true;
218
245
  };
@@ -299,9 +326,12 @@ export async function runAgent(host, task) {
299
326
  if (host.paused) {
300
327
  agent.status = "paused";
301
328
  host.log(id, "Paused mid-task (interrupt thrown)");
302
- // Reuse resume info when we already have a sessionId; otherwise restart fresh.
303
- const reuseSession = (typeof resumeSessionId === "string") && resumeSessionId.length > 0;
304
- host.queue.unshift(reuseSession ? { ...task, resumeSessionId, agentCwd } : { ...task });
329
+ // Reuse resume info when we have a sessionId AND the prior context isn't
330
+ // already saturated; otherwise restart fresh.
331
+ const reuseSession = carrySession();
332
+ host.queue.unshift(reuseSession
333
+ ? { ...task, resumeSessionId, resumeContextKey: contextKey, agentCwd }
334
+ : { ...task });
305
335
  return;
306
336
  }
307
337
  // Stream stall: the server went silent mid-response. If we captured a
@@ -423,6 +453,19 @@ function installLspFirstHookInto(worktreeDir) {
423
453
  // settings.local.json is gitignored by Claude Code convention — won't pollute the agent's commit.
424
454
  writeFileSync(join(dir, "settings.local.json"), JSON.stringify(settings, null, 2), "utf-8");
425
455
  }
456
+ /**
457
+ * Stable per-(provider, model, cwd) tag for scoping `resume` session ids.
458
+ * Provider matters because Cursor proxy and Anthropic direct keep separate
459
+ * backend session stores; cwd matters because the SDK keys its on-disk session
460
+ * cache by project path, so a recreated worktree under a new path can't find
461
+ * the prior conversation. Model is included for completeness.
462
+ */
463
+ function sessionContextKey(model, cwd, env) {
464
+ const isCursor = !!(env?.CURSOR_API_KEY || env?.CURSOR_AUTH_TOKEN || env?.CURSOR_BRIDGE_MODE);
465
+ const baseUrl = env?.ANTHROPIC_BASE_URL?.trim();
466
+ const provider = isCursor ? "cursor" : baseUrl ? `url:${baseUrl}` : "anthropic";
467
+ return `${provider}|${model ?? "default"}|${cwd}`;
468
+ }
426
469
  /** Extract a ### SKILL CANDIDATE block from agent text. Returns undefined if not found. */
427
470
  function extractSkillProposal(text) {
428
471
  const m = text.match(/###\s*SKILL CANDIDATE\s*\n([\s\S]+?)$/);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.50.0",
3
+ "version": "1.50.3",
4
4
  "description": "Parallel Claude agents in git worktrees with a usage cap that reserves headroom for your interactive Claude Code. Crash-safe resume. Provider-agnostic model catalog (Anthropic, Cursor, OpenAI, Gemini, DeepSeek, Llama, Qwen) with capability-based task scoping.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-overnight",
3
- "version": "1.50.0",
3
+ "version": "1.50.3",
4
4
  "description": "Claude Code skill for understanding, installing, and inspecting claude-overnight runs -- parallel Claude agents in git worktrees with thinking waves, multi-wave steering, and crash-safe resume. Supports Cursor API Proxy, Qwen, OpenRouter.",
5
5
  "author": {
6
6
  "name": "Francesco Fornace"