@ikunin/sprintpilot 2.0.6 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -185
- package/_Sprintpilot/manifest.yaml +1 -1
- package/_Sprintpilot/scripts/dispatch-layer.js +121 -16
- package/_Sprintpilot/scripts/log-timing.js +31 -13
- package/_Sprintpilot/scripts/merge-shards.js +252 -52
- package/_Sprintpilot/scripts/resolve-dag.js +55 -9
- package/_Sprintpilot/scripts/state-shard.js +26 -2
- package/_Sprintpilot/scripts/summarize-timings.js +84 -6
- package/_Sprintpilot/skills/sprint-autopilot-on/workflow.md +110 -142
- package/lib/core/bmad-config.js +11 -6
- package/package.json +1 -1
|
@@ -64,8 +64,25 @@ function parseLayer(raw) {
|
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
function planLayer({ keys, maxParallel, projectRoot, branchPrefix, baseBranch }) {
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
// Dedupe story keys — a duplicated key in --layer would otherwise
|
|
68
|
+
// produce two entries pointing at the same worktree path and same
|
|
69
|
+
// branch name, racing on `git worktree add`.
|
|
70
|
+
const seen = new Set();
|
|
71
|
+
const dedupedKeys = [];
|
|
72
|
+
for (const k of keys) {
|
|
73
|
+
if (seen.has(k)) continue;
|
|
74
|
+
seen.add(k);
|
|
75
|
+
dedupedKeys.push(k);
|
|
76
|
+
}
|
|
77
|
+
const effectiveParallel = Math.max(1, Math.min(maxParallel | 0, dedupedKeys.length));
|
|
78
|
+
// CAP: only dispatch the first `effectiveParallel` stories. The
|
|
79
|
+
// remaining keys are deferred — the autopilot loop will pick them up
|
|
80
|
+
// in the next iteration after this batch completes. Pre-2.0.8 the
|
|
81
|
+
// script created worktrees for ALL keys regardless of the cap, then
|
|
82
|
+
// the workflow spawned N agents anyway, fully ignoring --max-parallel.
|
|
83
|
+
const dispatchedKeys = dedupedKeys.slice(0, effectiveParallel);
|
|
84
|
+
const deferredKeys = dedupedKeys.slice(effectiveParallel);
|
|
85
|
+
const worktrees = dispatchedKeys.map((key) => ({
|
|
69
86
|
story: key,
|
|
70
87
|
worktree: path.join(projectRoot, '.worktrees', key),
|
|
71
88
|
branch: `${branchPrefix}${key}`,
|
|
@@ -77,6 +94,7 @@ function planLayer({ keys, maxParallel, projectRoot, branchPrefix, baseBranch })
|
|
|
77
94
|
effective_parallel: effectiveParallel,
|
|
78
95
|
max_parallel: maxParallel,
|
|
79
96
|
stories: worktrees,
|
|
97
|
+
deferred: deferredKeys,
|
|
80
98
|
};
|
|
81
99
|
}
|
|
82
100
|
|
|
@@ -90,8 +108,14 @@ function writePlan(projectRoot, plan) {
|
|
|
90
108
|
return file;
|
|
91
109
|
}
|
|
92
110
|
|
|
111
|
+
// Match git's "branch already exists" diagnostic. We retry without -b
|
|
112
|
+
// only when the FIRST attempt failed for this specific reason —
|
|
113
|
+
// pre-2.0.8 the bare retry fired on ANY first-attempt failure and
|
|
114
|
+
// silently checked out whatever stale branch happened to exist at the
|
|
115
|
+
// requested name (e.g. last week's commits from an abandoned story).
|
|
116
|
+
const BRANCH_EXISTS_RE = /a branch named .* already exists/i;
|
|
117
|
+
|
|
93
118
|
function createWorktree({ projectRoot, worktree, branch, baseBranch }) {
|
|
94
|
-
// Try -b first, fall back to checkout-existing-branch if already present.
|
|
95
119
|
const args = ['worktree', 'add', worktree, '-b', branch];
|
|
96
120
|
if (baseBranch) args.push(baseBranch);
|
|
97
121
|
const first = spawnSync('git', ['-C', projectRoot, ...args], {
|
|
@@ -99,7 +123,16 @@ function createWorktree({ projectRoot, worktree, branch, baseBranch }) {
|
|
|
99
123
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
100
124
|
});
|
|
101
125
|
if (first.status === 0) return { created: true, retried: false, stderr: first.stderr || '' };
|
|
102
|
-
//
|
|
126
|
+
// Only retry without -b if git specifically reported the branch
|
|
127
|
+
// already exists. Any other error (path collision, missing base
|
|
128
|
+
// branch, dirty index, etc.) is propagated rather than masked.
|
|
129
|
+
if (!BRANCH_EXISTS_RE.test(first.stderr || '')) {
|
|
130
|
+
return {
|
|
131
|
+
created: false,
|
|
132
|
+
retried: false,
|
|
133
|
+
stderr: first.stderr || '',
|
|
134
|
+
};
|
|
135
|
+
}
|
|
103
136
|
const second = spawnSync(
|
|
104
137
|
'git',
|
|
105
138
|
['-C', projectRoot, 'worktree', 'add', worktree, branch],
|
|
@@ -112,26 +145,45 @@ function createWorktree({ projectRoot, worktree, branch, baseBranch }) {
|
|
|
112
145
|
};
|
|
113
146
|
}
|
|
114
147
|
|
|
148
|
+
// After a worktree is created, disable gc.auto on it. The sequential
|
|
149
|
+
// path in workflow.md does this at line 738; pre-2.0.8 the parallel
|
|
150
|
+
// path skipped it, so concurrent sub-agents in heavy repos could
|
|
151
|
+
// trigger gc on each worktree mid-dispatch. Best-effort — never block
|
|
152
|
+
// dispatch on a config write.
|
|
153
|
+
function disableGcAutoOnWorktree(worktree) {
|
|
154
|
+
spawnSync('git', ['-C', worktree, 'config', '--local', 'gc.auto', '0'], {
|
|
155
|
+
encoding: 'utf8',
|
|
156
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Roll back successful worktrees when a later create fails — leaves
|
|
161
|
+
// no orphaned worktrees on disk, no `.layer-plan.json` describing
|
|
162
|
+
// state that doesn't exist. Best-effort; rollback failures are logged
|
|
163
|
+
// but don't change the overall non-zero exit.
|
|
164
|
+
function rollbackWorktrees(projectRoot, created) {
|
|
165
|
+
for (const entry of created) {
|
|
166
|
+
const r = spawnSync(
|
|
167
|
+
'git',
|
|
168
|
+
['-C', projectRoot, 'worktree', 'remove', '--force', entry.worktree],
|
|
169
|
+
{ encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] },
|
|
170
|
+
);
|
|
171
|
+
if (r.status !== 0) {
|
|
172
|
+
log.warn(`failed to roll back worktree ${entry.worktree}: ${r.stderr || 'unknown'}`);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
115
177
|
function dispatch({ keys, maxParallel, projectRoot, branchPrefix, baseBranch, dryRun }) {
|
|
116
178
|
const plan = planLayer({ keys, maxParallel, projectRoot, branchPrefix, baseBranch });
|
|
117
179
|
const results = {
|
|
118
180
|
plan_file: null,
|
|
119
181
|
effective_parallel: plan.effective_parallel,
|
|
120
182
|
stories: [],
|
|
183
|
+
deferred: plan.deferred,
|
|
121
184
|
dry_run: !!dryRun,
|
|
122
185
|
};
|
|
123
|
-
if (
|
|
124
|
-
for (const entry of plan.stories) {
|
|
125
|
-
const out = createWorktree({
|
|
126
|
-
projectRoot,
|
|
127
|
-
worktree: entry.worktree,
|
|
128
|
-
branch: entry.branch,
|
|
129
|
-
baseBranch: entry.base_branch,
|
|
130
|
-
});
|
|
131
|
-
results.stories.push({ story: entry.story, worktree: entry.worktree, branch: entry.branch, ...out });
|
|
132
|
-
}
|
|
133
|
-
results.plan_file = writePlan(projectRoot, plan);
|
|
134
|
-
} else {
|
|
186
|
+
if (dryRun) {
|
|
135
187
|
results.stories = plan.stories.map((e) => ({
|
|
136
188
|
story: e.story,
|
|
137
189
|
worktree: e.worktree,
|
|
@@ -140,7 +192,60 @@ function dispatch({ keys, maxParallel, projectRoot, branchPrefix, baseBranch, dr
|
|
|
140
192
|
retried: false,
|
|
141
193
|
stderr: '(dry-run)',
|
|
142
194
|
}));
|
|
195
|
+
return results;
|
|
196
|
+
}
|
|
197
|
+
// Real dispatch. Track successful creates so we can roll them back if
|
|
198
|
+
// a later create fails — leaving an orphan worktree + a plan file
|
|
199
|
+
// claiming it succeeded was the v2.0.7 partial-failure bug.
|
|
200
|
+
const succeeded = [];
|
|
201
|
+
let failureIndex = -1;
|
|
202
|
+
for (let i = 0; i < plan.stories.length; i++) {
|
|
203
|
+
const entry = plan.stories[i];
|
|
204
|
+
const out = createWorktree({
|
|
205
|
+
projectRoot,
|
|
206
|
+
worktree: entry.worktree,
|
|
207
|
+
branch: entry.branch,
|
|
208
|
+
baseBranch: entry.base_branch,
|
|
209
|
+
});
|
|
210
|
+
results.stories.push({
|
|
211
|
+
story: entry.story,
|
|
212
|
+
worktree: entry.worktree,
|
|
213
|
+
branch: entry.branch,
|
|
214
|
+
...out,
|
|
215
|
+
});
|
|
216
|
+
if (out.created) {
|
|
217
|
+
disableGcAutoOnWorktree(entry.worktree);
|
|
218
|
+
succeeded.push(entry);
|
|
219
|
+
} else {
|
|
220
|
+
failureIndex = i;
|
|
221
|
+
break; // stop creating; remaining keys are not attempted
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
if (failureIndex !== -1) {
|
|
225
|
+
rollbackWorktrees(projectRoot, succeeded);
|
|
226
|
+
// Mark the previously-succeeded entries as rolled back so the
|
|
227
|
+
// workflow doesn't think their worktrees still exist on disk.
|
|
228
|
+
for (let i = 0; i < failureIndex; i++) {
|
|
229
|
+
results.stories[i].rolled_back = true;
|
|
230
|
+
results.stories[i].created = false;
|
|
231
|
+
}
|
|
232
|
+
// Mark untried-after-failure stories (the keys past failureIndex
|
|
233
|
+
// that we never attempted) so the workflow can see what's missing.
|
|
234
|
+
for (let i = failureIndex + 1; i < plan.stories.length; i++) {
|
|
235
|
+
results.stories.push({
|
|
236
|
+
story: plan.stories[i].story,
|
|
237
|
+
worktree: plan.stories[i].worktree,
|
|
238
|
+
branch: plan.stories[i].branch,
|
|
239
|
+
created: false,
|
|
240
|
+
retried: false,
|
|
241
|
+
stderr: '(skipped — earlier dispatch failed)',
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
// Do NOT write the plan file on partial failure — workflow.md
|
|
245
|
+
// should never read a plan describing worktrees that don't exist.
|
|
246
|
+
return results;
|
|
143
247
|
}
|
|
248
|
+
results.plan_file = writePlan(projectRoot, plan);
|
|
144
249
|
return results;
|
|
145
250
|
}
|
|
146
251
|
|
|
@@ -52,12 +52,17 @@ const VALID_ACTIONS = ['start', 'end', 'once', 'mark'];
|
|
|
52
52
|
// rename target). The constant is gone; runtime always uses per-story
|
|
53
53
|
// paths.
|
|
54
54
|
//
|
|
55
|
-
// Sanity ceiling for a single duration record.
|
|
56
|
-
//
|
|
57
|
-
//
|
|
58
|
-
//
|
|
59
|
-
//
|
|
60
|
-
|
|
55
|
+
// Sanity ceiling for a single duration record. Phase durations longer
|
|
56
|
+
// than this are treated as overflow (likely a stale marker from an
|
|
57
|
+
// abandoned session) and clamped to 0 with `over_threshold: true`
|
|
58
|
+
// stamped. 7 days chosen so legitimate weekend-spanning sprint-level
|
|
59
|
+
// phases (sprint, dispatch.layer-X) are preserved; only genuinely
|
|
60
|
+
// stale markers get clamped.
|
|
61
|
+
//
|
|
62
|
+
// Negative deltas (wall-clock backsteps) are an orthogonal anomaly,
|
|
63
|
+
// flagged with `clock_skew: true` instead. The two flags are mutually
|
|
64
|
+
// exclusive — see the JSDoc on `markPhase`.
|
|
65
|
+
const MAX_PLAUSIBLE_DURATION_MS = 7 * 24 * 60 * 60 * 1000;
|
|
61
66
|
|
|
62
67
|
function help() {
|
|
63
68
|
log.out(
|
|
@@ -314,10 +319,16 @@ function clearMarker(projectRoot, story) {
|
|
|
314
319
|
* duration record but the next mark will read the new marker (not the
|
|
315
320
|
* stale prev) and won't double-count.
|
|
316
321
|
*
|
|
317
|
-
*
|
|
318
|
-
*
|
|
319
|
-
*
|
|
320
|
-
*
|
|
322
|
+
* Two anomaly classes are flagged separately so consumers can treat
|
|
323
|
+
* them differently. Both clamp `duration_ms` to 0:
|
|
324
|
+
* - `clock_skew: true` — wall-clock went backwards (NTP backstep,
|
|
325
|
+
* DST transition, manual clock change). The flag is reliable as a
|
|
326
|
+
* "the clock did something weird" signal.
|
|
327
|
+
* - `over_threshold: true` — elapsed time exceeded
|
|
328
|
+
* `MAX_PLAUSIBLE_DURATION_MS` (7 days). Almost always a stale
|
|
329
|
+
* marker from an abandoned session, not a real measurement.
|
|
330
|
+
* The flags are mutually exclusive (a single rawDelta can be either
|
|
331
|
+
* negative OR exceed the ceiling, never both).
|
|
321
332
|
*
|
|
322
333
|
* Returns { duration_ms, prev_phase } so callers can log/inspect.
|
|
323
334
|
*/
|
|
@@ -336,8 +347,14 @@ function markPhase(projectRoot, story, phase, meta) {
|
|
|
336
347
|
const prevTs = Date.parse(prev.ts);
|
|
337
348
|
if (!Number.isNaN(prevTs)) {
|
|
338
349
|
const rawDelta = now.getTime() - prevTs;
|
|
339
|
-
|
|
340
|
-
|
|
350
|
+
// Two distinct anomalies — flagged separately so consumers can
|
|
351
|
+
// treat them differently. clock_skew = wall-clock went backwards
|
|
352
|
+
// (NTP backstep, DST, manual change). over_threshold = elapsed
|
|
353
|
+
// time exceeds the sanity ceiling (likely a stale marker, not
|
|
354
|
+
// genuine clock skew). Both clamp duration_ms to 0.
|
|
355
|
+
const clockSkew = rawDelta < 0;
|
|
356
|
+
const overThreshold = rawDelta > MAX_PLAUSIBLE_DURATION_MS;
|
|
357
|
+
durationMs = clockSkew || overThreshold ? 0 : rawDelta;
|
|
341
358
|
prevPhase = prev.phase;
|
|
342
359
|
durationEntry = {
|
|
343
360
|
event: 'duration',
|
|
@@ -347,7 +364,8 @@ function markPhase(projectRoot, story, phase, meta) {
|
|
|
347
364
|
ended: now.toISOString(),
|
|
348
365
|
duration_ms: durationMs,
|
|
349
366
|
};
|
|
350
|
-
if (
|
|
367
|
+
if (clockSkew) durationEntry.clock_skew = true;
|
|
368
|
+
if (overThreshold) durationEntry.over_threshold = true;
|
|
351
369
|
if (prev.meta !== undefined) durationEntry.meta = prev.meta;
|
|
352
370
|
}
|
|
353
371
|
}
|
|
@@ -51,8 +51,100 @@ function help() {
|
|
|
51
51
|
);
|
|
52
52
|
}
|
|
53
53
|
|
|
54
|
+
// Read BMad's `output_folder` from _bmad/bmm/config.yaml if present, so
|
|
55
|
+
// projects that have configured a non-default output dir don't desync
|
|
56
|
+
// from sibling scripts (mark-done-stories-tasks.js etc.).
|
|
57
|
+
function readOutputFolder(projectRoot) {
|
|
58
|
+
const cfg = path.join(projectRoot, '_bmad', 'bmm', 'config.yaml');
|
|
59
|
+
if (!fs.existsSync(cfg)) return null;
|
|
60
|
+
try {
|
|
61
|
+
const body = fs.readFileSync(cfg, 'utf8');
|
|
62
|
+
const m = body.match(/^output_folder\s*:\s*(\S+)/m);
|
|
63
|
+
if (!m) return null;
|
|
64
|
+
return m[1].replace(/^["']|["']$/g, '').trim();
|
|
65
|
+
} catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
54
70
|
function implArtifactsDir(projectRoot) {
|
|
55
|
-
|
|
71
|
+
const folder = readOutputFolder(projectRoot) || '_bmad-output';
|
|
72
|
+
return path.join(projectRoot, folder, 'implementation-artifacts');
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// ──────────────────────────────────────────────────────────────────
|
|
76
|
+
// Cross-process merge lock
|
|
77
|
+
// ──────────────────────────────────────────────────────────────────
|
|
78
|
+
//
|
|
79
|
+
// Pre-2.0.8 two concurrent merge invocations would each compute the
|
|
80
|
+
// merge in-memory then both rename their tmp file over autopilot-state
|
|
81
|
+
// .yaml. Tmp filenames are unique, so the renames never collided on
|
|
82
|
+
// the source — but the LAST rename wins on the destination, and the
|
|
83
|
+
// earlier merge (potentially with newer shard data) was clobbered.
|
|
84
|
+
// Combined with the archive race below, the loser's archived shards
|
|
85
|
+
// were also gone — silent state rewind.
|
|
86
|
+
//
|
|
87
|
+
// The fix: a sibling lock file. If another invocation holds the lock,
|
|
88
|
+
// either wait briefly + retry, or fail with a clear message naming
|
|
89
|
+
// the holder's pid and start time so the operator can diagnose.
|
|
90
|
+
|
|
91
|
+
const MERGE_LOCK_FILE = '.merge-shards.lock';
|
|
92
|
+
const STALE_LOCK_AGE_MS = 5 * 60 * 1000; // 5 minutes — merges are fast
|
|
93
|
+
|
|
94
|
+
function lockPath(projectRoot) {
|
|
95
|
+
return path.join(implArtifactsDir(projectRoot), MERGE_LOCK_FILE);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function acquireMergeLock(projectRoot) {
|
|
99
|
+
const file = lockPath(projectRoot);
|
|
100
|
+
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
101
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
102
|
+
try {
|
|
103
|
+
const fd = fs.openSync(file, 'wx');
|
|
104
|
+
const payload = JSON.stringify({
|
|
105
|
+
pid: process.pid,
|
|
106
|
+
ts: new Date().toISOString(),
|
|
107
|
+
});
|
|
108
|
+
fs.writeSync(fd, payload);
|
|
109
|
+
fs.closeSync(fd);
|
|
110
|
+
return file;
|
|
111
|
+
} catch (e) {
|
|
112
|
+
if (e.code !== 'EEXIST') throw e;
|
|
113
|
+
// Try stale-recovery once.
|
|
114
|
+
if (attempt === 0) {
|
|
115
|
+
try {
|
|
116
|
+
const st = fs.statSync(file);
|
|
117
|
+
if (Date.now() - st.mtimeMs > STALE_LOCK_AGE_MS) {
|
|
118
|
+
log.warn(`merge-shards: removing stale lock ${file} (older than ${STALE_LOCK_AGE_MS}ms)`);
|
|
119
|
+
fs.unlinkSync(file);
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
} catch {
|
|
123
|
+
/* lock vanished between EEXIST and stat — retry */
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
let holder = '';
|
|
128
|
+
try {
|
|
129
|
+
holder = fs.readFileSync(file, 'utf8');
|
|
130
|
+
} catch {
|
|
131
|
+
/* ignore */
|
|
132
|
+
}
|
|
133
|
+
throw new Error(
|
|
134
|
+
`merge-shards: another invocation holds ${file} (${holder}); ` +
|
|
135
|
+
'wait for it to finish or remove the lock manually if known stale',
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
throw new Error(`merge-shards: failed to acquire lock at ${file}`);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function releaseMergeLock(file) {
|
|
143
|
+
try {
|
|
144
|
+
fs.unlinkSync(file);
|
|
145
|
+
} catch {
|
|
146
|
+
/* idempotent */
|
|
147
|
+
}
|
|
56
148
|
}
|
|
57
149
|
|
|
58
150
|
function readShardFile(file) {
|
|
@@ -91,16 +183,49 @@ function compareStamps(a, b) {
|
|
|
91
183
|
return 0;
|
|
92
184
|
}
|
|
93
185
|
|
|
186
|
+
// Snapshot file stat at read time so we can verify it's unchanged
|
|
187
|
+
// before archiving. Pre-2.0.8: a worker writing a fresh shard between
|
|
188
|
+
// merge-read and archive-rename would have its shard moved into
|
|
189
|
+
// .archive/ without ever being folded into the merged YAML — silent
|
|
190
|
+
// data loss under parallel dispatch.
|
|
191
|
+
function snapshotShard(file) {
|
|
192
|
+
try {
|
|
193
|
+
const st = fs.statSync(file);
|
|
194
|
+
return { mtime: st.mtimeMs, size: st.size, ino: st.ino };
|
|
195
|
+
} catch {
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function shardUnchanged(file, snapshot) {
|
|
201
|
+
if (!snapshot) return false;
|
|
202
|
+
try {
|
|
203
|
+
const st = fs.statSync(file);
|
|
204
|
+
return (
|
|
205
|
+
st.mtimeMs === snapshot.mtime &&
|
|
206
|
+
st.size === snapshot.size &&
|
|
207
|
+
st.ino === snapshot.ino
|
|
208
|
+
);
|
|
209
|
+
} catch {
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
94
214
|
function mergeStateShards(projectRoot) {
|
|
95
|
-
// Returns { byStory: { [storyKey]: shard },
|
|
215
|
+
// Returns { byStory: { [storyKey]: shard }, snapshots: { [storyKey]: stat },
|
|
216
|
+
// corrupt: [...], invalid: [...] }
|
|
96
217
|
const dir = path.join(implArtifactsDir(projectRoot), KIND_DIR.state);
|
|
97
|
-
if (!fs.existsSync(dir)) return { byStory: {}, corrupt: [], invalid: [] };
|
|
218
|
+
if (!fs.existsSync(dir)) return { byStory: {}, snapshots: {}, corrupt: [], invalid: [] };
|
|
98
219
|
const stories = listShardStories(projectRoot, 'state');
|
|
99
220
|
const byStory = {};
|
|
221
|
+
const snapshots = {};
|
|
100
222
|
const corrupt = [];
|
|
101
223
|
const invalid = [];
|
|
102
224
|
for (const story of stories) {
|
|
103
225
|
const file = path.join(dir, `${story}.yaml`);
|
|
226
|
+
// Snapshot BEFORE reading so a writer that touches the file during
|
|
227
|
+
// read still produces a stat mismatch later.
|
|
228
|
+
const snap = snapshotShard(file);
|
|
104
229
|
let shard;
|
|
105
230
|
try {
|
|
106
231
|
shard = readShardFile(file);
|
|
@@ -113,19 +238,33 @@ function mergeStateShards(projectRoot) {
|
|
|
113
238
|
continue;
|
|
114
239
|
}
|
|
115
240
|
byStory[story] = shard;
|
|
241
|
+
snapshots[story] = snap;
|
|
116
242
|
}
|
|
117
|
-
return { byStory, corrupt, invalid };
|
|
243
|
+
return { byStory, snapshots, corrupt, invalid };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Parse a timestamp string defensively: malformed `ts` returns 0
|
|
247
|
+
// rather than NaN (which Array.sort treats unpredictably). Pre-2.0.8
|
|
248
|
+
// `Date.parse('not-a-date')` returned NaN, NaN comparisons returned
|
|
249
|
+
// 0, and entries clustered in undefined order — the documented
|
|
250
|
+
// "sort by ts ascending" claim was silently violated.
|
|
251
|
+
function tsToMs(ts) {
|
|
252
|
+
if (!ts) return 0;
|
|
253
|
+
const v = Date.parse(ts);
|
|
254
|
+
return Number.isFinite(v) ? v : 0;
|
|
118
255
|
}
|
|
119
256
|
|
|
120
257
|
function mergeDecisionShards(projectRoot) {
|
|
121
258
|
const dir = path.join(implArtifactsDir(projectRoot), KIND_DIR['decision-log']);
|
|
122
|
-
if (!fs.existsSync(dir)) return { entries: [], corrupt: [], invalid: [] };
|
|
123
|
-
const stories = listShardStories(projectRoot, 'decision-log');
|
|
259
|
+
if (!fs.existsSync(dir)) return { entries: [], snapshots: {}, corrupt: [], invalid: [] };
|
|
260
|
+
const stories = listShardStories(projectRoot, 'decision-log').sort();
|
|
124
261
|
const entries = [];
|
|
262
|
+
const snapshots = {};
|
|
125
263
|
const corrupt = [];
|
|
126
264
|
const invalid = [];
|
|
127
265
|
for (const story of stories) {
|
|
128
266
|
const file = path.join(dir, `${story}.yaml`);
|
|
267
|
+
const snap = snapshotShard(file);
|
|
129
268
|
let shard;
|
|
130
269
|
try {
|
|
131
270
|
shard = readShardFile(file);
|
|
@@ -142,29 +281,50 @@ function mergeDecisionShards(projectRoot) {
|
|
|
142
281
|
if (!item || typeof item !== 'object') continue;
|
|
143
282
|
entries.push({ ...item, _story: story });
|
|
144
283
|
}
|
|
284
|
+
snapshots[story] = snap;
|
|
145
285
|
}
|
|
146
|
-
//
|
|
286
|
+
// Deterministic dedup: sort by (id asc, ts DESC) first, then keep the
|
|
287
|
+
// first entry for each id — that's the latest-by-ts. Pre-2.0.8 the
|
|
288
|
+
// dedup was iteration-order-dependent (filesystem readdir order is
|
|
289
|
+
// unspecified), so identical inputs produced different outputs across
|
|
290
|
+
// OSes. Idempotency claim was filesystem-dependent.
|
|
291
|
+
entries.sort((a, b) => {
|
|
292
|
+
const ai = a.id !== undefined && a.id !== null ? String(a.id) : '';
|
|
293
|
+
const bi = b.id !== undefined && b.id !== null ? String(b.id) : '';
|
|
294
|
+
if (ai !== bi) return ai < bi ? -1 : 1;
|
|
295
|
+
// Within same id: latest ts wins (desc).
|
|
296
|
+
const aw = tsToMs(a.ts);
|
|
297
|
+
const bw = tsToMs(b.ts);
|
|
298
|
+
return bw - aw;
|
|
299
|
+
});
|
|
147
300
|
const seen = new Set();
|
|
148
301
|
const deduped = [];
|
|
149
302
|
for (const e of entries) {
|
|
150
|
-
if (e.id !== undefined && e.id !== null
|
|
151
|
-
|
|
303
|
+
if (e.id !== undefined && e.id !== null) {
|
|
304
|
+
if (seen.has(String(e.id))) continue;
|
|
305
|
+
seen.add(String(e.id));
|
|
306
|
+
}
|
|
152
307
|
deduped.push(e);
|
|
153
308
|
}
|
|
309
|
+
// Final sort for output: ts ascending, with deterministic tiebreaks.
|
|
154
310
|
deduped.sort((a, b) => {
|
|
155
|
-
const aw =
|
|
156
|
-
const bw =
|
|
311
|
+
const aw = tsToMs(a.ts);
|
|
312
|
+
const bw = tsToMs(b.ts);
|
|
157
313
|
if (aw !== bw) return aw - bw;
|
|
158
|
-
// Tiebreak alphabetically by id then story for determinism.
|
|
159
314
|
const ai = a.id !== undefined ? String(a.id) : '';
|
|
160
315
|
const bi = b.id !== undefined ? String(b.id) : '';
|
|
161
316
|
if (ai !== bi) return ai < bi ? -1 : 1;
|
|
162
317
|
return (a._story || '').localeCompare(b._story || '');
|
|
163
318
|
});
|
|
164
|
-
return {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
319
|
+
return {
|
|
320
|
+
entries: deduped.map((e) => {
|
|
321
|
+
const { _story, ...rest } = e;
|
|
322
|
+
return rest;
|
|
323
|
+
}),
|
|
324
|
+
snapshots,
|
|
325
|
+
corrupt,
|
|
326
|
+
invalid,
|
|
327
|
+
};
|
|
168
328
|
}
|
|
169
329
|
|
|
170
330
|
function archiveCorrupt(projectRoot, kind, story, file, reason) {
|
|
@@ -186,18 +346,38 @@ function archiveCorrupt(projectRoot, kind, story, file, reason) {
|
|
|
186
346
|
return { archived: dest, reason };
|
|
187
347
|
}
|
|
188
348
|
|
|
189
|
-
function archiveShardsToLayer(projectRoot, layerId,
|
|
190
|
-
|
|
349
|
+
function archiveShardsToLayer(projectRoot, layerId, snapshotsByKind) {
|
|
350
|
+
// Default layerId includes pid + hrtime to avoid collision when two
|
|
351
|
+
// archive operations land in the same millisecond on fast CI. Pre-
|
|
352
|
+
// 2.0.8 the bare ISO timestamp could collide and the second archive
|
|
353
|
+
// would race-clobber the first.
|
|
354
|
+
const ts =
|
|
355
|
+
layerId ||
|
|
356
|
+
`${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${process.hrtime.bigint().toString(36)}`;
|
|
191
357
|
const base = path.join(implArtifactsDir(projectRoot), '.archive', `layer-${ts}`);
|
|
192
358
|
fs.mkdirSync(base, { recursive: true });
|
|
359
|
+
// Snapshot-verify each shard before moving — if a writer touched the
|
|
360
|
+
// file after merge-read but before archive, the stat won't match and
|
|
361
|
+
// we must NOT move it (otherwise the fresh shard's contents are lost
|
|
362
|
+
// without ever being folded into the merged YAML). Skip + log so the
|
|
363
|
+
// shard stays on disk for the next merge to pick up.
|
|
364
|
+
const skipped = [];
|
|
193
365
|
for (const kind of ['state', 'decision-log']) {
|
|
194
366
|
const src = path.join(implArtifactsDir(projectRoot), KIND_DIR[kind]);
|
|
195
367
|
if (!fs.existsSync(src)) continue;
|
|
196
368
|
const destDir = path.join(base, KIND_DIR[kind]);
|
|
197
369
|
fs.mkdirSync(destDir, { recursive: true });
|
|
198
|
-
|
|
370
|
+
const snapshots = (snapshotsByKind && snapshotsByKind[kind]) || {};
|
|
371
|
+
for (const story of Object.keys(snapshots)) {
|
|
199
372
|
const file = path.join(src, `${story}.yaml`);
|
|
200
373
|
if (!fs.existsSync(file)) continue;
|
|
374
|
+
if (!shardUnchanged(file, snapshots[story])) {
|
|
375
|
+
log.warn(
|
|
376
|
+
`merge-shards: shard ${file} changed during merge; not archiving (will be folded into next merge)`,
|
|
377
|
+
);
|
|
378
|
+
skipped.push({ kind, story, file, reason: 'changed during merge' });
|
|
379
|
+
continue;
|
|
380
|
+
}
|
|
201
381
|
const dest = path.join(destDir, `${story}.yaml`);
|
|
202
382
|
try {
|
|
203
383
|
fs.renameSync(file, dest);
|
|
@@ -207,7 +387,7 @@ function archiveShardsToLayer(projectRoot, layerId, storyKeys) {
|
|
|
207
387
|
}
|
|
208
388
|
}
|
|
209
389
|
}
|
|
210
|
-
return base;
|
|
390
|
+
return { dir: base, skipped };
|
|
211
391
|
}
|
|
212
392
|
|
|
213
393
|
function writeAuthoritative(projectRoot, filename, body, { dryRun } = {}) {
|
|
@@ -263,43 +443,63 @@ function composeDecisionYaml(decisionMerge) {
|
|
|
263
443
|
}
|
|
264
444
|
|
|
265
445
|
function merge(projectRoot, { layerId, archive, dryRun } = {}) {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
446
|
+
// Acquire cross-process lock. Even dry-run takes the lock so a real
|
|
447
|
+
// merge in progress doesn't have its shard reads disturbed by a
|
|
448
|
+
// concurrent dry-run that might (e.g.) tail the same files.
|
|
449
|
+
const lockFile = acquireMergeLock(projectRoot);
|
|
450
|
+
try {
|
|
451
|
+
const state = mergeStateShards(projectRoot);
|
|
452
|
+
const decisions = mergeDecisionShards(projectRoot);
|
|
453
|
+
|
|
454
|
+
// Archive corrupt shards before writing merged files so subsequent
|
|
455
|
+
// merges don't re-surface the same errors.
|
|
456
|
+
const archivedCorrupt = [];
|
|
457
|
+
if (!dryRun) {
|
|
458
|
+
for (const c of state.corrupt.concat(state.invalid)) {
|
|
459
|
+
const arch = archiveCorrupt(projectRoot, 'state', c.story, c.file, c.error || c.reason);
|
|
460
|
+
archivedCorrupt.push({ kind: 'state', story: c.story, ...arch });
|
|
461
|
+
}
|
|
462
|
+
for (const c of decisions.corrupt.concat(decisions.invalid)) {
|
|
463
|
+
const arch = archiveCorrupt(projectRoot, 'decision-log', c.story, c.file, c.error || c.reason);
|
|
464
|
+
archivedCorrupt.push({ kind: 'decision-log', story: c.story, ...arch });
|
|
465
|
+
}
|
|
280
466
|
}
|
|
281
|
-
}
|
|
282
467
|
|
|
283
|
-
|
|
284
|
-
|
|
468
|
+
const stateBody = composeStateYaml(state);
|
|
469
|
+
const decisionBody = composeDecisionYaml(decisions);
|
|
285
470
|
|
|
286
|
-
|
|
287
|
-
|
|
471
|
+
const stateWrite = writeAuthoritative(projectRoot, 'autopilot-state.yaml', stateBody, { dryRun });
|
|
472
|
+
const decisionWrite = writeAuthoritative(projectRoot, 'decision-log.yaml', decisionBody, { dryRun });
|
|
288
473
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
474
|
+
let archiveDir = null;
|
|
475
|
+
let archiveSkipped = [];
|
|
476
|
+
if (archive && !dryRun) {
|
|
477
|
+
const archResult = archiveShardsToLayer(projectRoot, layerId, {
|
|
478
|
+
state: state.snapshots,
|
|
479
|
+
'decision-log': decisions.snapshots,
|
|
480
|
+
});
|
|
481
|
+
archiveDir = archResult.dir;
|
|
482
|
+
archiveSkipped = archResult.skipped;
|
|
483
|
+
}
|
|
294
484
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
485
|
+
return {
|
|
486
|
+
state: {
|
|
487
|
+
stories: Object.keys(state.byStory).length,
|
|
488
|
+
problems: state.corrupt.length + state.invalid.length,
|
|
489
|
+
},
|
|
490
|
+
decisions: {
|
|
491
|
+
entries: decisions.entries.length,
|
|
492
|
+
problems: decisions.corrupt.length + decisions.invalid.length,
|
|
493
|
+
},
|
|
494
|
+
files: { state: stateWrite.file, decisions: decisionWrite.file },
|
|
495
|
+
archived_corrupt: archivedCorrupt,
|
|
496
|
+
archive_dir: archiveDir,
|
|
497
|
+
archive_skipped: archiveSkipped,
|
|
498
|
+
dry_run: !!dryRun,
|
|
499
|
+
};
|
|
500
|
+
} finally {
|
|
501
|
+
releaseMergeLock(lockFile);
|
|
502
|
+
}
|
|
303
503
|
}
|
|
304
504
|
|
|
305
505
|
function main() {
|