gsd-pi 2.78.1-dev.d8826a445 → 2.78.1-dev.eccf86e27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -7
- package/dist/help-text.js +1 -1
- package/dist/resource-loader.js +6 -1
- package/dist/resources/.managed-resources-content-hash +1 -1
- package/dist/resources/extensions/gsd/auto/detect-stuck.js +41 -5
- package/dist/resources/extensions/gsd/auto/loop.js +235 -36
- package/dist/resources/extensions/gsd/auto/phases.js +7 -5
- package/dist/resources/extensions/gsd/auto/session.js +33 -0
- package/dist/resources/extensions/gsd/auto-dispatch.js +46 -2
- package/dist/resources/extensions/gsd/auto-post-unit.js +19 -11
- package/dist/resources/extensions/gsd/auto-worktree.js +26 -187
- package/dist/resources/extensions/gsd/auto.js +79 -50
- package/dist/resources/extensions/gsd/bootstrap/register-hooks.js +9 -4
- package/dist/resources/extensions/gsd/crash-recovery.js +160 -47
- package/dist/resources/extensions/gsd/db/auto-workers.js +227 -0
- package/dist/resources/extensions/gsd/db/command-queue.js +105 -0
- package/dist/resources/extensions/gsd/db/milestone-leases.js +210 -0
- package/dist/resources/extensions/gsd/db/runtime-kv.js +91 -0
- package/dist/resources/extensions/gsd/db/unit-dispatches.js +322 -0
- package/dist/resources/extensions/gsd/docs/COORDINATION.md +42 -0
- package/dist/resources/extensions/gsd/doctor-proactive.js +4 -0
- package/dist/resources/extensions/gsd/doctor-runtime-checks.js +22 -6
- package/dist/resources/extensions/gsd/doctor.js +12 -2
- package/dist/resources/extensions/gsd/gsd-db.js +161 -3
- package/dist/resources/extensions/gsd/guided-flow.js +6 -2
- package/dist/resources/extensions/gsd/interrupted-session.js +18 -15
- package/dist/resources/extensions/gsd/state.js +21 -6
- package/dist/resources/extensions/gsd/worktree-resolver.js +64 -0
- package/dist/tsconfig.extensions.tsbuildinfo +1 -1
- package/dist/web/standalone/.next/BUILD_ID +1 -1
- package/dist/web/standalone/.next/app-path-routes-manifest.json +12 -12
- package/dist/web/standalone/.next/build-manifest.json +2 -2
- package/dist/web/standalone/.next/prerender-manifest.json +3 -3
- package/dist/web/standalone/.next/server/app/_global-error.html +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_global-error.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_global-error.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.html +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_not-found.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/_not-found.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.html +1 -1
- package/dist/web/standalone/.next/server/app/index.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/__PAGE__.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_full.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_head.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_index.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app/index.segments/_tree.segment.rsc +1 -1
- package/dist/web/standalone/.next/server/app-paths-manifest.json +12 -12
- package/dist/web/standalone/.next/server/middleware-build-manifest.js +1 -1
- package/dist/web/standalone/.next/server/pages/404.html +1 -1
- package/dist/web/standalone/.next/server/pages/500.html +1 -1
- package/dist/web/standalone/.next/server/server-reference-manifest.json +1 -1
- package/package.json +1 -1
- package/src/resources/extensions/gsd/auto/detect-stuck.ts +37 -5
- package/src/resources/extensions/gsd/auto/loop.ts +263 -41
- package/src/resources/extensions/gsd/auto/phases.ts +7 -5
- package/src/resources/extensions/gsd/auto/session.ts +36 -0
- package/src/resources/extensions/gsd/auto-dispatch.ts +53 -2
- package/src/resources/extensions/gsd/auto-post-unit.ts +19 -11
- package/src/resources/extensions/gsd/auto-worktree.ts +26 -211
- package/src/resources/extensions/gsd/auto.ts +89 -44
- package/src/resources/extensions/gsd/bootstrap/register-hooks.ts +9 -4
- package/src/resources/extensions/gsd/crash-recovery.ts +177 -43
- package/src/resources/extensions/gsd/db/auto-workers.ts +273 -0
- package/src/resources/extensions/gsd/db/command-queue.ts +149 -0
- package/src/resources/extensions/gsd/db/milestone-leases.ts +274 -0
- package/src/resources/extensions/gsd/db/runtime-kv.ts +127 -0
- package/src/resources/extensions/gsd/db/unit-dispatches.ts +446 -0
- package/src/resources/extensions/gsd/docs/COORDINATION.md +42 -0
- package/src/resources/extensions/gsd/doctor-proactive.ts +4 -0
- package/src/resources/extensions/gsd/doctor-runtime-checks.ts +24 -6
- package/src/resources/extensions/gsd/doctor.ts +10 -2
- package/src/resources/extensions/gsd/gsd-db.ts +170 -3
- package/src/resources/extensions/gsd/guided-flow.ts +6 -2
- package/src/resources/extensions/gsd/interrupted-session.ts +19 -12
- package/src/resources/extensions/gsd/state.ts +44 -6
- package/src/resources/extensions/gsd/tests/auto-loop-no-copy-artifacts.test.ts +72 -0
- package/src/resources/extensions/gsd/tests/auto-loop-symlink-worktree.test.ts +190 -0
- package/src/resources/extensions/gsd/tests/auto-workers.test.ts +105 -0
- package/src/resources/extensions/gsd/tests/command-queue.test.ts +141 -0
- package/src/resources/extensions/gsd/tests/crash-recovery-via-db.test.ts +203 -0
- package/src/resources/extensions/gsd/tests/crash-recovery.test.ts +169 -59
- package/src/resources/extensions/gsd/tests/detect-stuck-respects-retry.test.ts +173 -0
- package/src/resources/extensions/gsd/tests/integration/auto-worktree.test.ts +22 -12
- package/src/resources/extensions/gsd/tests/integration/doctor-proactive.test.ts +24 -10
- package/src/resources/extensions/gsd/tests/integration/doctor-runtime.test.ts +35 -23
- package/src/resources/extensions/gsd/tests/integration/workspace-collapse-integration.test.ts +3 -5
- package/src/resources/extensions/gsd/tests/interrupted-session-auto.test.ts +72 -25
- package/src/resources/extensions/gsd/tests/interrupted-session-ui.test.ts +72 -25
- package/src/resources/extensions/gsd/tests/memory-pressure-stuck-state.test.ts +9 -6
- package/src/resources/extensions/gsd/tests/milestone-leases.test.ts +152 -0
- package/src/resources/extensions/gsd/tests/parallel-milestone-isolation.test.ts +106 -0
- package/src/resources/extensions/gsd/tests/paused-session-via-db.test.ts +119 -0
- package/src/resources/extensions/gsd/tests/pipeline-variant-dispatch.test.ts +58 -0
- package/src/resources/extensions/gsd/tests/preferences-worktree-sync.test.ts +3 -17
- package/src/resources/extensions/gsd/tests/register-hooks-depth-verification.test.ts +110 -0
- package/src/resources/extensions/gsd/tests/runtime-kv.test.ts +120 -0
- package/src/resources/extensions/gsd/tests/skipped-validation-completion.test.ts +133 -28
- package/src/resources/extensions/gsd/tests/skipped-validation-db-atomicity.test.ts +17 -0
- package/src/resources/extensions/gsd/tests/stuck-state-via-db.test.ts +134 -0
- package/src/resources/extensions/gsd/tests/sync-layer-scope.test.ts +7 -26
- package/src/resources/extensions/gsd/tests/teardown-cleanup-parity.test.ts +4 -8
- package/src/resources/extensions/gsd/tests/unit-dispatches.test.ts +247 -0
- package/src/resources/extensions/gsd/tests/validate-milestone.test.ts +41 -1
- package/src/resources/extensions/gsd/tests/workspace.test.ts +15 -9
- package/src/resources/extensions/gsd/tests/write-gate.test.ts +31 -23
- package/src/resources/extensions/gsd/worktree-resolver.ts +62 -0
- package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +0 -213
- package/src/resources/extensions/gsd/tests/auto-stale-lock-self-kill.test.ts +0 -87
- package/src/resources/extensions/gsd/tests/stop-auto-remote.test.ts +0 -159
- /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_buildManifest.js +0 -0
- /package/dist/web/standalone/.next/static/{AT5qi39nKXkdmQIOIoh0f → Y5UeGFkXTYM9WIQOWHkot}/_ssgManifest.js +0 -0
package/README.md
CHANGED
|
@@ -322,7 +322,7 @@ The database is authoritative for milestones, slices, tasks, requirements, decis
|
|
|
322
322
|
|
|
323
323
|
3. **Git isolation** — When `git.isolation` is set to `worktree` or `branch`, each milestone runs on its own `milestone/<MID>` branch (in a worktree or in-place). All slice work commits sequentially — no branch switching, no merge conflicts. When the milestone completes, it's squash-merged to main as one clean commit. The default is `none` (work on the current branch), configurable via preferences. If `worktree` is configured in a repo with no committed `HEAD`, GSD temporarily behaves as `none` until the first commit exists because git worktrees need a committed start point.
|
|
324
324
|
|
|
325
|
-
4. **Crash recovery** —
|
|
325
|
+
4. **Crash recovery** — Auto mode persists worker state, unit-dispatch state, and paused-session metadata in the project-root SQLite database. If the session dies, the next `/gsd auto` reconstructs the interrupted unit from DB-backed runtime state, reads the surviving session file, synthesizes a recovery briefing from every tool call that made it to disk, and resumes with full context. Parallel orchestrator IPC still lives under `.gsd/parallel/`, so multi-worker sessions survive crashes too. In headless mode, crashes trigger automatic restart with exponential backoff (default 3 attempts).
|
|
326
326
|
|
|
327
327
|
5. **Provider error recovery** — Transient provider errors (rate limits, 500/503 server errors, overloaded) auto-resume after a delay. Permanent errors (auth, billing) pause for manual review. The model fallback chain retries transient network errors before switching models.
|
|
328
328
|
|
|
@@ -414,7 +414,7 @@ gsd
|
|
|
414
414
|
/gsd queue # queue the next milestone
|
|
415
415
|
```
|
|
416
416
|
|
|
417
|
-
Both terminals
|
|
417
|
+
Both terminals coordinate through the same project-root GSD runtime on local disk. The SQLite database is authoritative, `.gsd/` markdown is refreshed from it, and your decisions in terminal 2 are picked up at the next phase boundary without stopping auto mode.
|
|
418
418
|
|
|
419
419
|
### Headless mode — CI and scripts
|
|
420
420
|
|
|
@@ -439,7 +439,7 @@ gsd headless dispatch plan
|
|
|
439
439
|
|
|
440
440
|
Headless auto-responds to interactive prompts, detects completion, and exits with structured codes: `0` complete, `1` error/timeout, `2` blocked. Auto-restarts on crash with exponential backoff. Use `gsd headless query` for instant, machine-readable state inspection — returns phase, next dispatch preview, and parallel worker costs as a single JSON object without spawning an LLM session. Pair with [remote questions](./docs/user-docs/remote-questions.md) to route decisions to Slack or Discord when human input is needed.
|
|
441
441
|
|
|
442
|
-
**Multi-session orchestration** — headless mode supports
|
|
442
|
+
**Multi-session orchestration** — headless mode supports DB-backed coordination across multiple GSD workers on the same machine. Worker registration, milestone leases, unit dispatch tracking, and command delivery live in `.gsd/gsd.db`, while `.gsd/parallel/` remains a local runtime area for per-milestone locks and isolation artifacts.
|
|
443
443
|
|
|
444
444
|
### First launch
|
|
445
445
|
|
|
@@ -705,8 +705,6 @@ The best practice for working in teams is to ensure unique milestone names acros
|
|
|
705
705
|
|
|
706
706
|
```bash
|
|
707
707
|
# ── GSD: Runtime / Ephemeral (per-developer, per-session) ──────────────────
|
|
708
|
-
# Crash detection sentinel — PID lock, written per auto-mode session
|
|
709
|
-
.gsd/auto.lock
|
|
710
708
|
# Auto-mode dispatch tracker — prevents re-running completed units (includes archived per-milestone files)
|
|
711
709
|
.gsd/completed-units*.json
|
|
712
710
|
# State manifest — workflow state for recovery
|
|
@@ -717,11 +715,11 @@ The best practice for working in teams is to ensure unique milestone names acros
|
|
|
717
715
|
.gsd/metrics.json
|
|
718
716
|
# Raw JSONL session dumps — crash recovery forensics, auto-pruned
|
|
719
717
|
.gsd/activity/
|
|
720
|
-
# Unit execution records — dispatch phase, timeouts, recovery tracking
|
|
718
|
+
# Unit execution records — dispatch phase, timeouts, and recovery tracking
|
|
721
719
|
.gsd/runtime/
|
|
722
720
|
# Git worktree working copies
|
|
723
721
|
.gsd/worktrees/
|
|
724
|
-
# Parallel
|
|
722
|
+
# Parallel runtime locks and per-milestone isolation artifacts
|
|
725
723
|
.gsd/parallel/
|
|
726
724
|
# SQLite database and WAL sidecars — authoritative runtime state, local only
|
|
727
725
|
.gsd/gsd.db*
|
package/dist/help-text.js
CHANGED
|
@@ -156,7 +156,7 @@ const SUBCOMMAND_HELP = {
|
|
|
156
156
|
' gsd headless --answers answers.json auto With pre-supplied answers',
|
|
157
157
|
' gsd headless --events agent_end,extension_ui_request auto Filtered event stream',
|
|
158
158
|
' gsd headless query Instant JSON state snapshot',
|
|
159
|
-
' gsd headless recover
|
|
159
|
+
' gsd headless recover Reset hierarchy + validation/gates, then rebuild from markdown',
|
|
160
160
|
'',
|
|
161
161
|
'Exit codes: 0 = success, 1 = error/timeout, 10 = blocked, 11 = cancelled',
|
|
162
162
|
].join('\n'),
|
package/dist/resource-loader.js
CHANGED
|
@@ -29,6 +29,9 @@ export function getExtensionKey(entryPath, extensionsDir) {
|
|
|
29
29
|
const relPath = relative(extensionsDir, entryPath);
|
|
30
30
|
return relPath.split(/[\\/]/)[0].replace(/\.(?:ts|js)$/, '');
|
|
31
31
|
}
|
|
32
|
+
function stripSemverBuildMetadata(version) {
|
|
33
|
+
return version.trim().replace(/^v/, '').split(/[+-]/, 1)[0] || '0.0.0';
|
|
34
|
+
}
|
|
32
35
|
function getManagedResourceManifestPath(agentDir) {
|
|
33
36
|
return join(agentDir, resourceVersionManifestName);
|
|
34
37
|
}
|
|
@@ -166,7 +169,9 @@ export function getNewerManagedResourceVersion(agentDir, currentVersion) {
|
|
|
166
169
|
if (!managedVersion) {
|
|
167
170
|
return null;
|
|
168
171
|
}
|
|
169
|
-
|
|
172
|
+
// Managed resources stamped from the same release line should remain usable
|
|
173
|
+
// against local dev binaries like 2.78.1-dev.<sha>.
|
|
174
|
+
return compareSemver(stripSemverBuildMetadata(managedVersion), stripSemverBuildMetadata(currentVersion)) > 0 ? managedVersion : null;
|
|
170
175
|
}
|
|
171
176
|
/**
|
|
172
177
|
* Recursively makes all files and directories under dirPath owner-writable.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
36cc9805e706289c
|
|
@@ -4,19 +4,53 @@
|
|
|
4
4
|
* Leaf node in the import DAG.
|
|
5
5
|
*/
|
|
6
6
|
import { summarizeLogs } from "../workflow-logger.js";
|
|
7
|
+
import { getLatestForUnit } from "../db/unit-dispatches.js";
|
|
7
8
|
/**
|
|
8
9
|
* Pattern matching ENOENT errors with a file path.
|
|
9
10
|
* Matches: "ENOENT: no such file or directory, access '/path/to/file'"
|
|
10
11
|
* and similar Node.js filesystem error messages.
|
|
11
12
|
*/
|
|
12
13
|
const ENOENT_PATH_RE = /ENOENT[^']*'([^']+)'/;
|
|
14
|
+
/**
|
|
15
|
+
* Phase B / codex review MEDIUM B3 — retry coupling.
|
|
16
|
+
*
|
|
17
|
+
* If unit_dispatches has a recent failed dispatch for `unitKey` whose
|
|
18
|
+
* retry budget is not yet exhausted (attempt_n < max_attempts) AND whose
|
|
19
|
+
* scheduled next_run_at is still in the future, the loop is legitimately
|
|
20
|
+
* waiting on its own backoff. Suppress the stuck verdict in that case so
|
|
21
|
+
* the retry budget can fully drain before we declare stuck.
|
|
22
|
+
*
|
|
23
|
+
* Returns true if the dispatch ledger says we should suppress the stuck
|
|
24
|
+
* signal; false (no suppression) when the ledger is unavailable or has
|
|
25
|
+
* no opinion.
|
|
26
|
+
*/
|
|
27
|
+
function retryBudgetSuppresses(unitKey) {
|
|
28
|
+
try {
|
|
29
|
+
const latest = getLatestForUnit(unitKey);
|
|
30
|
+
if (!latest)
|
|
31
|
+
return false;
|
|
32
|
+
if (latest.attempt_n >= latest.max_attempts)
|
|
33
|
+
return false;
|
|
34
|
+
if (!latest.next_run_at)
|
|
35
|
+
return false;
|
|
36
|
+
const nextRun = Date.parse(latest.next_run_at);
|
|
37
|
+
if (!Number.isFinite(nextRun))
|
|
38
|
+
return false;
|
|
39
|
+
return nextRun > Date.now();
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
13
45
|
/**
|
|
14
46
|
* Analyze a sliding window of recent unit dispatches for stuck patterns.
|
|
15
47
|
* Returns a signal with reason if stuck, null otherwise.
|
|
16
48
|
*
|
|
17
49
|
* Rule 1: Same error string twice in a row → stuck immediately.
|
|
18
50
|
* Rule 2: Same unit key 3+ consecutive times → stuck (preserves prior behavior).
|
|
19
|
-
* Rule 2b: Same unit key appears 3+ times anywhere in the active window → stuck
|
|
51
|
+
* Rule 2b: Same unit key appears 3+ times anywhere in the active window → stuck,
|
|
52
|
+
* UNLESS unit_dispatches says we're inside the retry-backoff window
|
|
53
|
+
* (codex review MEDIUM B3 — Phase B retry coupling).
|
|
20
54
|
* Rule 3: Oscillation A→B→A→B in last 4 entries → stuck.
|
|
21
55
|
* Rule 4: Same ENOENT path in any 2 entries within the window → stuck (#3575).
|
|
22
56
|
* Missing files don't self-heal between retries — retrying wastes budget.
|
|
@@ -39,19 +73,21 @@ export function detectStuck(window) {
|
|
|
39
73
|
reason: `Same error repeated: ${last.error.slice(0, 200)}${suffix}`,
|
|
40
74
|
};
|
|
41
75
|
}
|
|
42
|
-
// Rule 2: Same unit 3+ consecutive times
|
|
76
|
+
// Rule 2: Same unit 3+ consecutive times — suppressed if unit_dispatches
|
|
77
|
+
// says we're inside the retry-backoff window (codex MEDIUM B3).
|
|
43
78
|
if (window.length >= 3) {
|
|
44
79
|
const lastThree = window.slice(-3);
|
|
45
|
-
if (lastThree.every((u) => u.key === last.key)) {
|
|
80
|
+
if (lastThree.every((u) => u.key === last.key) && !retryBudgetSuppresses(last.key)) {
|
|
46
81
|
return {
|
|
47
82
|
stuck: true,
|
|
48
83
|
reason: `${last.key} derived 3 consecutive times without progress${suffix}`,
|
|
49
84
|
};
|
|
50
85
|
}
|
|
51
86
|
}
|
|
52
|
-
// Rule 2b: Same unit key 3+ times anywhere in the active window
|
|
87
|
+
// Rule 2b: Same unit key 3+ times anywhere in the active window — same
|
|
88
|
+
// retry-budget suppression as Rule 2.
|
|
53
89
|
const countInWindow = window.filter((entry) => entry.key === last.key).length;
|
|
54
|
-
if (countInWindow >= 3) {
|
|
90
|
+
if (countInWindow >= 3 && !retryBudgetSuppresses(last.key)) {
|
|
55
91
|
return {
|
|
56
92
|
stuck: true,
|
|
57
93
|
reason: `${last.key} derived ${countInWindow} times in last ${window.length} attempts without progress${suffix}`,
|
|
@@ -16,49 +16,55 @@ import { ModelPolicyDispatchBlockedError } from "../auto-model-selection.js";
|
|
|
16
16
|
import { resolveEngine } from "../engine-resolver.js";
|
|
17
17
|
import { logWarning } from "../workflow-logger.js";
|
|
18
18
|
import { gsdRoot } from "../paths.js";
|
|
19
|
+
import { heartbeatAutoWorker } from "../db/auto-workers.js";
|
|
20
|
+
import { recordDispatchClaim, markRunning as markDispatchRunning, markCompleted as markDispatchCompleted, markFailed as markDispatchFailed, getRecentForUnit as getRecentDispatchesForUnit, getRecentUnitKeysForProjectRoot, } from "../db/unit-dispatches.js";
|
|
21
|
+
import { refreshMilestoneLease } from "../db/milestone-leases.js";
|
|
22
|
+
import { getRuntimeKv, setRuntimeKv } from "../db/runtime-kv.js";
|
|
19
23
|
import { atomicWriteSync } from "../atomic-write.js";
|
|
20
24
|
import { resolveUokFlags } from "../uok/flags.js";
|
|
21
25
|
import { scheduleSidecarQueue } from "../uok/execution-graph.js";
|
|
22
26
|
import { ExecutionGraphScheduler } from "../uok/execution-graph.js";
|
|
23
|
-
import { readFileSync,
|
|
27
|
+
import { readFileSync, mkdirSync, unlinkSync } from "node:fs";
|
|
24
28
|
import { join } from "node:path";
|
|
29
|
+
import { normalizeRealPath } from "../paths.js";
|
|
25
30
|
// ── Stuck detection persistence (#3704) ──────────────────────────────────
|
|
26
|
-
//
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
|
|
30
|
-
|
|
31
|
+
// Phase C migration: stuck-state.json deleted in favor of DB-backed
|
|
32
|
+
// equivalents. recentUnits is rebuilt from unit_dispatches (Phase B
|
|
33
|
+
// ledger) on session start; stuckRecoveryAttempts persists in runtime_kv
|
|
34
|
+
// under a stable project scope (soft state per the runtime_kv invariant). Single-host
|
|
35
|
+
// SQLite WAL only — multi-host would need a real coordinator.
|
|
36
|
+
//
|
|
37
|
+
// When no worker is registered (DB unavailable, fresh project), both
|
|
38
|
+
// helpers degrade to the empty-state fallback that #3704 already
|
|
39
|
+
// tolerates — same behavior as a fresh session.
|
|
40
|
+
const STUCK_RECOVERY_ATTEMPTS_KEY = "stuck_recovery_attempts";
|
|
41
|
+
const RECENT_UNIT_KEYS_LIMIT = 20;
|
|
42
|
+
function stableStuckStateScopeId(s) {
|
|
43
|
+
return normalizeRealPath(s.scope?.workspace.projectRoot ?? (s.originalBasePath || s.basePath));
|
|
31
44
|
}
|
|
32
|
-
function loadStuckState(
|
|
45
|
+
function loadStuckState(s) {
|
|
46
|
+
const scopeId = stableStuckStateScopeId(s);
|
|
47
|
+
if (!scopeId)
|
|
48
|
+
return { recentUnits: [], stuckRecoveryAttempts: 0 };
|
|
33
49
|
try {
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
// autoLoop call in the same process (e.g., a test that completed before this
|
|
38
|
-
// one), not by a crashed session — skip it to prevent test state pollution.
|
|
39
|
-
if (data.pid === process.pid) {
|
|
40
|
-
return { recentUnits: [], stuckRecoveryAttempts: 0 };
|
|
41
|
-
}
|
|
42
|
-
return {
|
|
43
|
-
recentUnits: Array.isArray(data.recentUnits) ? data.recentUnits : [],
|
|
44
|
-
stuckRecoveryAttempts: typeof data.stuckRecoveryAttempts === "number" ? data.stuckRecoveryAttempts : 0,
|
|
45
|
-
};
|
|
50
|
+
const recentUnits = getRecentUnitKeysForProjectRoot(scopeId, RECENT_UNIT_KEYS_LIMIT);
|
|
51
|
+
const stuckRecoveryAttempts = getRuntimeKv("global", scopeId, STUCK_RECOVERY_ATTEMPTS_KEY) ?? 0;
|
|
52
|
+
return { recentUnits, stuckRecoveryAttempts };
|
|
46
53
|
}
|
|
47
54
|
catch (err) {
|
|
48
55
|
debugLog("autoLoop", { phase: "load-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
|
|
49
56
|
return { recentUnits: [], stuckRecoveryAttempts: 0 };
|
|
50
57
|
}
|
|
51
58
|
}
|
|
52
|
-
function saveStuckState(
|
|
59
|
+
function saveStuckState(s, state) {
|
|
60
|
+
const scopeId = stableStuckStateScopeId(s);
|
|
61
|
+
if (!scopeId)
|
|
62
|
+
return;
|
|
63
|
+
// recentUnits is automatically derived from unit_dispatches by the
|
|
64
|
+
// dispatch ledger writes in openDispatchClaim — no separate persistence
|
|
65
|
+
// needed. Only the soft retry counter needs a runtime_kv row.
|
|
53
66
|
try {
|
|
54
|
-
|
|
55
|
-
mkdirSync(join(gsdRoot(basePath), "runtime"), { recursive: true });
|
|
56
|
-
writeFileSync(filePath, JSON.stringify({
|
|
57
|
-
pid: process.pid,
|
|
58
|
-
recentUnits: state.recentUnits.slice(-20), // keep last 20 entries
|
|
59
|
-
stuckRecoveryAttempts: state.stuckRecoveryAttempts,
|
|
60
|
-
updatedAt: new Date().toISOString(),
|
|
61
|
-
}) + "\n");
|
|
67
|
+
setRuntimeKv("global", scopeId, STUCK_RECOVERY_ATTEMPTS_KEY, state.stuckRecoveryAttempts);
|
|
62
68
|
}
|
|
63
69
|
catch (err) {
|
|
64
70
|
debugLog("autoLoop", { phase: "save-stuck-state-failed", error: err instanceof Error ? err.message : String(err) });
|
|
@@ -115,6 +121,57 @@ function saveCustomVerifyRetryCounts(s) {
|
|
|
115
121
|
}
|
|
116
122
|
}
|
|
117
123
|
}
|
|
124
|
+
function openDispatchClaim(s, flowId, turnId, iterData) {
|
|
125
|
+
if (!s.workerId || s.milestoneLeaseToken === null)
|
|
126
|
+
return { kind: "degraded" };
|
|
127
|
+
const mid = iterData.mid;
|
|
128
|
+
if (!mid)
|
|
129
|
+
return { kind: "degraded" };
|
|
130
|
+
const recent = getRecentDispatchesForUnit(iterData.unitId, 1);
|
|
131
|
+
const attemptN = (recent[0]?.attempt_n ?? 0) + 1;
|
|
132
|
+
let claim;
|
|
133
|
+
try {
|
|
134
|
+
claim = recordDispatchClaim({
|
|
135
|
+
traceId: flowId,
|
|
136
|
+
turnId,
|
|
137
|
+
workerId: s.workerId,
|
|
138
|
+
milestoneLeaseToken: s.milestoneLeaseToken,
|
|
139
|
+
milestoneId: mid,
|
|
140
|
+
sliceId: iterData.state.activeSlice?.id ?? null,
|
|
141
|
+
taskId: iterData.state.activeTask?.id ?? null,
|
|
142
|
+
unitType: iterData.unitType,
|
|
143
|
+
unitId: iterData.unitId,
|
|
144
|
+
attemptN,
|
|
145
|
+
});
|
|
146
|
+
if (!claim.ok) {
|
|
147
|
+
debugLog("autoLoop", {
|
|
148
|
+
phase: "dispatch-claim-rejected",
|
|
149
|
+
unitId: iterData.unitId,
|
|
150
|
+
reason: claim.error,
|
|
151
|
+
existingId: "existingId" in claim ? claim.existingId : undefined,
|
|
152
|
+
existingWorker: "existingWorker" in claim ? claim.existingWorker : undefined,
|
|
153
|
+
});
|
|
154
|
+
if (claim.error === "already_active") {
|
|
155
|
+
return {
|
|
156
|
+
kind: "skip",
|
|
157
|
+
reason: "already-active",
|
|
158
|
+
existingId: claim.existingId,
|
|
159
|
+
existingWorker: claim.existingWorker,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
return { kind: "skip", reason: "stale-lease" };
|
|
163
|
+
}
|
|
164
|
+
markDispatchRunning(claim.dispatchId);
|
|
165
|
+
return { kind: "opened", dispatchId: claim.dispatchId };
|
|
166
|
+
}
|
|
167
|
+
catch (err) {
|
|
168
|
+
debugLog("autoLoop", {
|
|
169
|
+
phase: "dispatch-claim-failed",
|
|
170
|
+
error: err instanceof Error ? err.message : String(err),
|
|
171
|
+
});
|
|
172
|
+
return { kind: "degraded" };
|
|
173
|
+
}
|
|
174
|
+
}
|
|
118
175
|
// ── Memory pressure monitoring (#3331) ──────────────────────────────────
|
|
119
176
|
// Check heap usage every N iterations and trigger graceful shutdown before
|
|
120
177
|
// the OS OOM killer sends SIGKILL. The threshold is 90% of the V8 heap
|
|
@@ -220,7 +277,7 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
220
277
|
let iteration = 0;
|
|
221
278
|
const dispatchContract = options?.dispatchContract ?? "legacy-direct";
|
|
222
279
|
// Load persisted stuck state so counters survive session restarts (#3704)
|
|
223
|
-
const persisted = loadStuckState(s
|
|
280
|
+
const persisted = loadStuckState(s);
|
|
224
281
|
const loopState = {
|
|
225
282
|
recentUnits: persisted.recentUnits,
|
|
226
283
|
stuckRecoveryAttempts: persisted.stuckRecoveryAttempts,
|
|
@@ -232,6 +289,23 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
232
289
|
while (s.active) {
|
|
233
290
|
iteration++;
|
|
234
291
|
debugLog("autoLoop", { phase: "loop-top", iteration });
|
|
292
|
+
// Phase B: heartbeat the worker registry + active milestone lease so
|
|
293
|
+
// janitors and concurrent workers see a live process. Best-effort —
|
|
294
|
+
// DB unavailability or stale state must not stop the loop.
|
|
295
|
+
if (s.workerId) {
|
|
296
|
+
try {
|
|
297
|
+
heartbeatAutoWorker(s.workerId);
|
|
298
|
+
if (s.currentMilestoneId && s.milestoneLeaseToken) {
|
|
299
|
+
refreshMilestoneLease(s.workerId, s.currentMilestoneId, s.milestoneLeaseToken);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
catch (err) {
|
|
303
|
+
debugLog("autoLoop", {
|
|
304
|
+
phase: "heartbeat-failed",
|
|
305
|
+
error: err instanceof Error ? err.message : String(err),
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
}
|
|
235
309
|
// ── Journal: per-iteration flow grouping ──
|
|
236
310
|
const flowId = randomUUID();
|
|
237
311
|
let seqCounter = 0;
|
|
@@ -299,6 +373,8 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
299
373
|
finishTurn("stopped", "manual-attention", "missing-command-context");
|
|
300
374
|
break;
|
|
301
375
|
}
|
|
376
|
+
let dispatchId = null;
|
|
377
|
+
let dispatchSettled = false;
|
|
302
378
|
try {
|
|
303
379
|
// ── Blanket try/catch: one bad iteration must not kill the session
|
|
304
380
|
const prefs = deps.loadEffectiveGSDPreferences()?.preferences;
|
|
@@ -359,7 +435,17 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
359
435
|
activeEngineId: s.activeEngineId,
|
|
360
436
|
activeRunDir: s.activeRunDir,
|
|
361
437
|
});
|
|
362
|
-
const engineState = await engine.deriveState(s.
|
|
438
|
+
const engineState = await engine.deriveState(s.canonicalProjectRoot);
|
|
439
|
+
debugLog("autoLoop", {
|
|
440
|
+
phase: "post-derive",
|
|
441
|
+
site: "custom-engine-derive",
|
|
442
|
+
basePath: s.basePath,
|
|
443
|
+
originalBasePath: s.originalBasePath,
|
|
444
|
+
scopeProjectRoot: s.scope?.workspace.projectRoot,
|
|
445
|
+
canonicalProjectRoot: s.canonicalProjectRoot,
|
|
446
|
+
derivedPhase: engineState.phase,
|
|
447
|
+
isComplete: engineState.isComplete,
|
|
448
|
+
});
|
|
363
449
|
if (engineState.isComplete) {
|
|
364
450
|
await deps.stopAuto(ctx, pi, "Workflow complete");
|
|
365
451
|
break;
|
|
@@ -375,7 +461,15 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
375
461
|
}
|
|
376
462
|
// dispatch.action === "dispatch"
|
|
377
463
|
const step = dispatch.step;
|
|
378
|
-
const gsdState = await deps.deriveState(s.
|
|
464
|
+
const gsdState = await deps.deriveState(s.canonicalProjectRoot);
|
|
465
|
+
debugLog("autoLoop", {
|
|
466
|
+
phase: "post-derive",
|
|
467
|
+
site: "custom-engine-gsd-state",
|
|
468
|
+
basePath: s.basePath,
|
|
469
|
+
canonicalProjectRoot: s.canonicalProjectRoot,
|
|
470
|
+
derivedPhase: gsdState.phase,
|
|
471
|
+
activeUnit: gsdState.activeTask?.id ?? gsdState.activeSlice?.id ?? gsdState.activeMilestone?.id,
|
|
472
|
+
});
|
|
379
473
|
iterData = {
|
|
380
474
|
unitType: step.unitType,
|
|
381
475
|
unitId: step.unitId,
|
|
@@ -478,7 +572,7 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
478
572
|
consecutiveCooldowns = 0;
|
|
479
573
|
recentErrorMessages.length = 0;
|
|
480
574
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
|
|
481
|
-
saveStuckState(s
|
|
575
|
+
saveStuckState(s, loopState); // persist across session restarts (#3704)
|
|
482
576
|
debugLog("autoLoop", { phase: "iteration-complete", iteration });
|
|
483
577
|
if (reconcileResult.outcome === "milestone-complete") {
|
|
484
578
|
await deps.stopAuto(ctx, pi, "Workflow complete");
|
|
@@ -552,7 +646,15 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
552
646
|
}
|
|
553
647
|
else {
|
|
554
648
|
// ── Sidecar path: use values from the sidecar item directly ──
|
|
555
|
-
const sidecarState = await deps.deriveState(s.
|
|
649
|
+
const sidecarState = await deps.deriveState(s.canonicalProjectRoot);
|
|
650
|
+
debugLog("autoLoop", {
|
|
651
|
+
phase: "post-derive",
|
|
652
|
+
site: "sidecar",
|
|
653
|
+
basePath: s.basePath,
|
|
654
|
+
canonicalProjectRoot: s.canonicalProjectRoot,
|
|
655
|
+
derivedPhase: sidecarState.phase,
|
|
656
|
+
activeUnit: sidecarState.activeTask?.id ?? sidecarState.activeSlice?.id ?? sidecarState.activeMilestone?.id,
|
|
657
|
+
});
|
|
556
658
|
iterData = {
|
|
557
659
|
unitType: sidecarItem.unitType,
|
|
558
660
|
unitId: sidecarItem.unitId,
|
|
@@ -573,7 +675,39 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
573
675
|
});
|
|
574
676
|
}
|
|
575
677
|
await enforceMinRequestInterval(s, prefs);
|
|
576
|
-
|
|
678
|
+
// Phase B: claim a unit_dispatches row before invoking the unit. The
|
|
679
|
+
// partial unique index idx_unit_dispatches_active_per_unit prevents
|
|
680
|
+
// a second worker from claiming the same unit concurrently. Returns
|
|
681
|
+
// null when DB unavailable, no worker registered, or no active lease
|
|
682
|
+
// — those degraded paths fall through to the existing single-worker
|
|
683
|
+
// semantics with no ledger entry, preserving back-compat.
|
|
684
|
+
const dispatchClaim = openDispatchClaim(s, flowId, turnId, iterData);
|
|
685
|
+
if (dispatchClaim.kind === "skip") {
|
|
686
|
+
finishTurn("skipped", "execution", dispatchClaim.reason);
|
|
687
|
+
continue;
|
|
688
|
+
}
|
|
689
|
+
dispatchId = dispatchClaim.kind === "opened" ? dispatchClaim.dispatchId : null;
|
|
690
|
+
let unitPhaseResult;
|
|
691
|
+
try {
|
|
692
|
+
unitPhaseResult = await runUnitPhaseViaContract(dispatchContract, ic, iterData, loopState, sidecarItem);
|
|
693
|
+
}
|
|
694
|
+
catch (err) {
|
|
695
|
+
if (err instanceof ModelPolicyDispatchBlockedError) {
|
|
696
|
+
throw err;
|
|
697
|
+
}
|
|
698
|
+
if (dispatchId !== null) {
|
|
699
|
+
try {
|
|
700
|
+
markDispatchFailed(dispatchId, {
|
|
701
|
+
errorSummary: `exception:${err instanceof Error ? err.message : String(err)}`,
|
|
702
|
+
});
|
|
703
|
+
dispatchSettled = true;
|
|
704
|
+
}
|
|
705
|
+
catch (ledgerErr) {
|
|
706
|
+
debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: ledgerErr instanceof Error ? ledgerErr.message : String(ledgerErr) });
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
throw err;
|
|
710
|
+
}
|
|
577
711
|
if (unitPhaseResult.action === "next") {
|
|
578
712
|
const requestTimestamp = unitPhaseResult.data.requestDispatchedAt ?? unitPhaseResult.data.unitStartedAt;
|
|
579
713
|
if (typeof requestTimestamp === "number")
|
|
@@ -584,11 +718,37 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
584
718
|
unitId: iterData.unitId,
|
|
585
719
|
});
|
|
586
720
|
if (unitPhaseResult.action === "break") {
|
|
721
|
+
if (dispatchId !== null) {
|
|
722
|
+
try {
|
|
723
|
+
markDispatchFailed(dispatchId, { errorSummary: "unit-break" });
|
|
724
|
+
dispatchSettled = true;
|
|
725
|
+
}
|
|
726
|
+
catch (err) {
|
|
727
|
+
debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
|
|
728
|
+
}
|
|
729
|
+
}
|
|
587
730
|
finishTurn("stopped", "execution", "unit-break");
|
|
588
731
|
break;
|
|
589
732
|
}
|
|
590
733
|
// ── Phase 5: Finalize ───────────────────────────────────────────────
|
|
591
|
-
|
|
734
|
+
let finalizeResult;
|
|
735
|
+
try {
|
|
736
|
+
finalizeResult = await runFinalize(ic, iterData, loopState, sidecarItem);
|
|
737
|
+
}
|
|
738
|
+
catch (err) {
|
|
739
|
+
if (dispatchId !== null) {
|
|
740
|
+
try {
|
|
741
|
+
markDispatchFailed(dispatchId, {
|
|
742
|
+
errorSummary: `exception:${err instanceof Error ? err.message : String(err)}`,
|
|
743
|
+
});
|
|
744
|
+
dispatchSettled = true;
|
|
745
|
+
}
|
|
746
|
+
catch (ledgerErr) {
|
|
747
|
+
debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: ledgerErr instanceof Error ? ledgerErr.message : String(ledgerErr) });
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
throw err;
|
|
751
|
+
}
|
|
592
752
|
deps.uokObserver?.onPhaseResult("finalize", finalizeResult.action, {
|
|
593
753
|
unitType: iterData.unitType,
|
|
594
754
|
unitId: iterData.unitId,
|
|
@@ -597,24 +757,63 @@ export async function autoLoop(ctx, pi, s, deps, options) {
|
|
|
597
757
|
const finalizeFailureClass = finalizeResult.reason === "git-closeout-failure"
|
|
598
758
|
? "git"
|
|
599
759
|
: "closeout";
|
|
760
|
+
if (dispatchId !== null) {
|
|
761
|
+
try {
|
|
762
|
+
markDispatchFailed(dispatchId, { errorSummary: `finalize-break:${finalizeResult.reason ?? "unknown"}` });
|
|
763
|
+
dispatchSettled = true;
|
|
764
|
+
}
|
|
765
|
+
catch (err) {
|
|
766
|
+
debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
|
|
767
|
+
}
|
|
768
|
+
}
|
|
600
769
|
finishTurn("stopped", finalizeFailureClass, "finalize-break");
|
|
601
770
|
break;
|
|
602
771
|
}
|
|
603
772
|
if (finalizeResult.action === "continue") {
|
|
773
|
+
if (dispatchId !== null) {
|
|
774
|
+
try {
|
|
775
|
+
markDispatchFailed(dispatchId, { errorSummary: "finalize-retry" });
|
|
776
|
+
dispatchSettled = true;
|
|
777
|
+
}
|
|
778
|
+
catch (err) {
|
|
779
|
+
debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
|
|
780
|
+
}
|
|
781
|
+
}
|
|
604
782
|
finishTurn("retry");
|
|
605
783
|
continue;
|
|
606
784
|
}
|
|
785
|
+
if (dispatchId !== null) {
|
|
786
|
+
try {
|
|
787
|
+
markDispatchCompleted(dispatchId);
|
|
788
|
+
dispatchSettled = true;
|
|
789
|
+
}
|
|
790
|
+
catch (err) {
|
|
791
|
+
debugLog("autoLoop", { phase: "dispatch-ledger-write-failed", error: err instanceof Error ? err.message : String(err) });
|
|
792
|
+
}
|
|
793
|
+
}
|
|
607
794
|
consecutiveErrors = 0; // Iteration completed successfully
|
|
608
795
|
consecutiveCooldowns = 0;
|
|
609
796
|
recentErrorMessages.length = 0;
|
|
610
797
|
deps.emitJournalEvent({ ts: new Date().toISOString(), flowId, seq: nextSeq(), eventType: "iteration-end", data: { iteration } });
|
|
611
|
-
saveStuckState(s
|
|
798
|
+
saveStuckState(s, loopState); // persist across session restarts (#4382)
|
|
612
799
|
debugLog("autoLoop", { phase: "iteration-complete", iteration });
|
|
613
800
|
finishTurn("completed");
|
|
614
801
|
}
|
|
615
802
|
catch (loopErr) {
|
|
616
803
|
// ── Blanket catch: absorb unexpected exceptions, apply graduated recovery ──
|
|
617
804
|
const msg = loopErr instanceof Error ? loopErr.message : String(loopErr);
|
|
805
|
+
if (dispatchId !== null && !dispatchSettled && !(loopErr instanceof ModelPolicyDispatchBlockedError)) {
|
|
806
|
+
try {
|
|
807
|
+
markDispatchFailed(dispatchId, { errorSummary: `unhandled-error:${msg.slice(0, 200)}` });
|
|
808
|
+
dispatchSettled = true;
|
|
809
|
+
}
|
|
810
|
+
catch (err) {
|
|
811
|
+
debugLog("autoLoop", {
|
|
812
|
+
phase: "dispatch-ledger-write-failed",
|
|
813
|
+
error: err instanceof Error ? err.message : String(err),
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
}
|
|
618
817
|
// Always emit iteration-end on error so the journal records iteration
|
|
619
818
|
// completion even on failure (#2344). Without this, errors in
|
|
620
819
|
// runFinalize leave the journal incomplete, making diagnosis harder.
|
|
@@ -289,8 +289,10 @@ export async function runPreDispatch(ic, loopState) {
|
|
|
289
289
|
s.currentMilestoneId) {
|
|
290
290
|
deps.syncProjectRootToWorktree(s.originalBasePath, s.basePath, s.currentMilestoneId);
|
|
291
291
|
}
|
|
292
|
-
// Derive state
|
|
293
|
-
|
|
292
|
+
// Derive state — use canonical project root so the cache key is stable
|
|
293
|
+
// across worktree↔project-root path-form alternation. See PR #5236
|
|
294
|
+
// (workspace handle infrastructure) and the Phase A pt 2 plan.
|
|
295
|
+
let state = await deps.deriveState(s.canonicalProjectRoot);
|
|
294
296
|
const { getDeepStageGate } = await import("../auto-dispatch.js");
|
|
295
297
|
const deepStageGate = getDeepStageGate(prefs, s.basePath);
|
|
296
298
|
const canRunDeepSetupGate = state.phase === "pre-planning" ||
|
|
@@ -324,7 +326,7 @@ export async function runPreDispatch(ic, loopState) {
|
|
|
324
326
|
let compiled = ensurePlanV2Graph(s.basePath, state);
|
|
325
327
|
if (isEmptyPlanV2GraphResult(compiled)) {
|
|
326
328
|
deps.invalidateAllCaches();
|
|
327
|
-
state = await deps.deriveState(s.
|
|
329
|
+
state = await deps.deriveState(s.canonicalProjectRoot);
|
|
328
330
|
compiled = shouldRunPlanV2Gate(state.phase)
|
|
329
331
|
? ensurePlanV2Graph(s.basePath, state)
|
|
330
332
|
: {
|
|
@@ -477,7 +479,7 @@ export async function runPreDispatch(ic, loopState) {
|
|
|
477
479
|
}
|
|
478
480
|
// PR creation (auto_pr) is handled inside mergeMilestoneToMain (#2302)
|
|
479
481
|
deps.invalidateAllCaches();
|
|
480
|
-
state = await deps.deriveState(s.
|
|
482
|
+
state = await deps.deriveState(s.canonicalProjectRoot);
|
|
481
483
|
mid = state.activeMilestone?.id;
|
|
482
484
|
midTitle = state.activeMilestone?.title;
|
|
483
485
|
if (mid) {
|
|
@@ -596,7 +598,7 @@ export async function runPreDispatch(ic, loopState) {
|
|
|
596
598
|
}
|
|
597
599
|
if (mergeReconcileResult === "reconciled") {
|
|
598
600
|
deps.invalidateAllCaches();
|
|
599
|
-
state = await deps.deriveState(s.
|
|
601
|
+
state = await deps.deriveState(s.canonicalProjectRoot);
|
|
600
602
|
mid = state.activeMilestone?.id;
|
|
601
603
|
midTitle = state.activeMilestone?.title;
|
|
602
604
|
}
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
* `let` or `var` declarations.
|
|
17
17
|
*/
|
|
18
18
|
import { resolveWorktreeProjectRoot } from "../worktree-root.js";
|
|
19
|
+
import { normalizeRealPath } from "../paths.js";
|
|
19
20
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
20
21
|
export const STUB_RECOVERY_THRESHOLD = 2;
|
|
21
22
|
export const NEW_SESSION_TIMEOUT_MS = 120_000;
|
|
@@ -34,6 +35,20 @@ export class AutoSession {
|
|
|
34
35
|
originalBasePath = "";
|
|
35
36
|
// TODO(C8): remove basePath/originalBasePath once all readers use s.scope
|
|
36
37
|
scope = null;
|
|
38
|
+
// ── Coordination identity (Phase B — DB-backed coordination) ────────────
|
|
39
|
+
/**
|
|
40
|
+
* Worker registry ID set by registerAutoWorker() at session start. Used by
|
|
41
|
+
* heartbeatAutoWorker() each loop iteration and by recordDispatchClaim()
|
|
42
|
+
* to fence dispatch ledger writes against stale workers.
|
|
43
|
+
*/
|
|
44
|
+
workerId = null;
|
|
45
|
+
/**
|
|
46
|
+
* Active milestone lease fencing token, set by claimMilestoneLease() inside
|
|
47
|
+
* worktree-resolver.enterMilestone(). Threaded into recordDispatchClaim()
|
|
48
|
+
* as milestone_lease_token so out-of-band dispatches by a stale worker
|
|
49
|
+
* are detectable.
|
|
50
|
+
*/
|
|
51
|
+
milestoneLeaseToken = null;
|
|
37
52
|
previousProjectRootEnv = null;
|
|
38
53
|
hadProjectRootEnv = false;
|
|
39
54
|
projectRootEnvCaptured = false;
|
|
@@ -162,6 +177,22 @@ export class AutoSession {
|
|
|
162
177
|
get lockBasePath() {
|
|
163
178
|
return resolveWorktreeProjectRoot(this.basePath, this.originalBasePath);
|
|
164
179
|
}
|
|
180
|
+
/**
|
|
181
|
+
* Canonical project root for state-derivation reads AND writer paths.
|
|
182
|
+
*
|
|
183
|
+
* Prefers the realpath-normalized projectRoot from the MilestoneScope
|
|
184
|
+
* (introduced by PR #5236), falling back to resolveWorktreeProjectRoot
|
|
185
|
+
* during early lifecycle / engine-bypass paths where scope may be null.
|
|
186
|
+
*
|
|
187
|
+
* Always realpath-normalized so cache keys (e.g. deriveState's _stateCache)
|
|
188
|
+
* cannot drift across worktree↔project-root path-string variants for the
|
|
189
|
+
* same filesystem location.
|
|
190
|
+
*/
|
|
191
|
+
get canonicalProjectRoot() {
|
|
192
|
+
const root = this.scope?.workspace.projectRoot
|
|
193
|
+
?? resolveWorktreeProjectRoot(this.basePath, this.originalBasePath);
|
|
194
|
+
return normalizeRealPath(root);
|
|
195
|
+
}
|
|
165
196
|
reset() {
|
|
166
197
|
this.clearTimers();
|
|
167
198
|
// Lifecycle
|
|
@@ -176,6 +207,8 @@ export class AutoSession {
|
|
|
176
207
|
this.basePath = "";
|
|
177
208
|
this.originalBasePath = "";
|
|
178
209
|
this.scope = null;
|
|
210
|
+
this.workerId = null;
|
|
211
|
+
this.milestoneLeaseToken = null;
|
|
179
212
|
this.previousProjectRootEnv = null;
|
|
180
213
|
this.hadProjectRootEnv = false;
|
|
181
214
|
this.projectRootEnvCaptured = false;
|