@ijfw/memory-server 1.5.4 → 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/package.json +15 -1
  2. package/src/brain/dream-pipeline.js +77 -14
  3. package/src/brain/dump-ingest.js +32 -0
  4. package/src/brain/entity-collapse.js +2 -2
  5. package/src/brain/export.js +60 -6
  6. package/src/brain/extractors/markdown.js +28 -2
  7. package/src/brain/layout-sentinel.js +19 -14
  8. package/src/brain/path-guard.js +17 -0
  9. package/src/brain/wiki-compiler.js +35 -39
  10. package/src/codex-agents.js +25 -2
  11. package/src/cross-orchestrator-cli.js +176 -18
  12. package/src/dashboard-server.js +36 -3
  13. package/src/dispatch/override.js +18 -2
  14. package/src/dispatch/signer-cli.js +14 -9
  15. package/src/dream/stage-runner.js +17 -0
  16. package/src/dream/state-file.js +15 -1
  17. package/src/extension-installer.js +91 -2
  18. package/src/extension-registry.js +15 -4
  19. package/src/handlers/brain-handler.js +44 -5
  20. package/src/lib/atomic-io.js +69 -12
  21. package/src/lib/shasum-verify.js +46 -22
  22. package/src/lib/ui-review-runner.js +7 -2
  23. package/src/lib/uispec-drift.js +8 -3
  24. package/src/lib/uispec-intake.js +5 -2
  25. package/src/memory/layout-migrations/001-visible-layer.js +71 -7
  26. package/src/memory/reader.js +111 -58
  27. package/src/orchestrator/merge-block-aware.js +75 -37
  28. package/src/orchestrator/post-done-runner.js +6 -1
  29. package/src/orchestrator/state-sdk.js +242 -14
  30. package/src/orchestrator/wave-state.js +22 -69
  31. package/src/recovery/checkpoint.js +30 -6
  32. package/src/recovery/code-fixer.js +52 -7
  33. package/src/runtime-mediator.js +2 -2
  34. package/src/server.js +57 -8
  35. package/src/swarm/planner.js +46 -1
  36. package/src/update-apply.js +27 -35
  37. package/src/update-check.js +6 -2
@@ -44,6 +44,7 @@ import { homedir } from 'node:os';
44
44
  import { randomUUID, createHash } from 'node:crypto';
45
45
  import { gunzipSync } from 'node:zlib';
46
46
  import { execFileSync } from 'node:child_process';
47
+ import { AsyncLocalStorage } from 'node:async_hooks';
47
48
 
48
49
  import { writeAtomic, readSafe } from '../lib/atomic-io.js';
49
50
  import { rotateJsonlIfNeeded } from '../lib/jsonl-rotation.js';
@@ -270,6 +271,54 @@ async function _withLocks(lockTargets, fn, env) {
270
271
  return acquireFrom(0);
271
272
  }
272
273
 
274
+ /**
275
+ * V155-002 — `state.replay` recovery-side lock-nest helper.
276
+ *
277
+ * Recursive nest of `withFsLock` matching `_withLocks`'s acquire-order shape,
278
+ * BUT lock-only: no journal-begin write, no snapshot capture. Replay is a
279
+ * recovery path, not a new mutating verb — generating a write-ahead begin
280
+ * record here would corrupt the journal.
281
+ *
282
+ * `targets` MUST be pre-canonicalised by `canonicalLockOrder` AND MUST NOT
283
+ * include the intent-journal path (already held by the outer
284
+ * `withFsLock(lockPathFor(journal), …)` in `state.replay`; re-acquiring it
285
+ * would deadlock against the outer scope — `withFsLock` is non-re-entrant).
286
+ *
287
+ * @param {string[]} targets canonical-sorted, journal-excluded restore paths
288
+ * @param {() => Promise<void>} fn the restore-or-delete loop body
289
+ * @returns {Promise<void>}
290
+ */
291
+ // TS-002 (v1.5.5 Trident): `withFsLock` is non-re-entrant. If a future code
292
+ // path (e.g. a gate hook firing from inside a restored body) recursively
293
+ // triggers `state.replay`, the nested `_replayRestoreWithLocks` would silently
294
+ // deadlock against itself instead of failing fast. The AsyncLocalStorage
295
+ // sentinel below detects nested entry and throws `state-sdk: replay re-entry
296
+ // detected` rather than hanging — preserves a useful error surface for the
297
+ // next maintainer. Not exploitable today (no such recursion exists); this is
298
+ // the documented latent-defect rail Trident TS-002 asked for.
299
+ const _replayReentryGuard = new AsyncLocalStorage();
300
+
301
+ async function _replayRestoreWithLocks(targets, fn) {
302
+ if (_replayReentryGuard.getStore() === true) {
303
+ throw new Error(
304
+ 'state-sdk: replay re-entry detected — _replayRestoreWithLocks must not be '
305
+ + 'called recursively (withFsLock is non-re-entrant; nested entry would deadlock).',
306
+ );
307
+ }
308
+ if (!Array.isArray(targets) || targets.length === 0) {
309
+ return _replayReentryGuard.run(true, () => fn());
310
+ }
311
+ const acquireFrom = async (index) => {
312
+ if (index >= targets.length) return fn();
313
+ return withFsLock(
314
+ lockPathFor(targets[index]),
315
+ async () => acquireFrom(index + 1),
316
+ LOCK_OPTS,
317
+ );
318
+ };
319
+ return _replayReentryGuard.run(true, () => acquireFrom(0));
320
+ }
321
+
273
322
  /**
274
323
  * Relative-path form for a journal `targets[]` entry. Project-scope files are
275
324
  * rendered relative to `projectRoot` (the §4 example shape — e.g.
@@ -622,6 +671,44 @@ export function payloadDigest(payload) {
622
671
  // is a facade: this matches wave-state.js's on-disk format exactly so a wave
623
672
  // written by either surface round-trips through the other.
624
673
 
674
+ // V155-023: prototype-pollution defense. Frontmatter is parsed from operator-
675
+ // supplied YAML (and from `wave.advance` payload merges below) — any key whose
676
+ // name is `__proto__`, `constructor`, or `prototype` would mutate the resulting
677
+ // plain-object's [[Prototype]] chain. Refuse those names everywhere keys are
678
+ // assigned from untrusted input.
679
+ const POLLUTING_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
680
+ function isPollutingKey(k) {
681
+ return typeof k !== 'string' || POLLUTING_KEYS.has(k);
682
+ }
683
+
684
+ // TS-003 (v1.5.5 Trident): the top-level `isPollutingKey` filter at the two
685
+ // declared sinks (parseFrontmatter + wave.advance merge) catches only top-level
686
+ // keys. A nested payload like `{foo: {__proto__: {hard_gate: true}}}` would
687
+ // slip past the shallow scan. Today `emitFrontmatter` happens to throw on
688
+ // nested objects so the attack fails at I/O — but that's safety by coincidence,
689
+ // not by design. `containsPollutingKey` walks objects + arrays + the chain and
690
+ // returns true if ANY depth contains a polluting key. Callers reject the whole
691
+ // payload (rather than silently scrubbing) so a malicious shape never reaches
692
+ // disk and the operator gets a clear refusal.
693
+ function containsPollutingKey(value, depth = 0) {
694
+ if (depth > 16) return false; // bounded walk; cycle-or-bomb defense
695
+ if (value === null || typeof value !== 'object') return false;
696
+ if (Array.isArray(value)) {
697
+ for (const item of value) {
698
+ if (containsPollutingKey(item, depth + 1)) return true;
699
+ }
700
+ return false;
701
+ }
702
+ // Object.getOwnPropertyNames + getPrototypeOf check catches a payload
703
+ // built with `Object.create({hard_gate: true})` even though the chain
704
+ // doesn't enumerate `for..in` in our shape — defense in depth.
705
+ for (const k of Object.getOwnPropertyNames(value)) {
706
+ if (POLLUTING_KEYS.has(k)) return true;
707
+ if (containsPollutingKey(value[k], depth + 1)) return true;
708
+ }
709
+ return false;
710
+ }
711
+
625
712
  function parseFrontmatter(raw) {
626
713
  if (!raw.startsWith('---')) {
627
714
  throw new Error('state-sdk: STATE.md missing YAML frontmatter');
@@ -630,7 +717,9 @@ function parseFrontmatter(raw) {
630
717
  if (end === -1) throw new Error('state-sdk: STATE.md has unclosed frontmatter');
631
718
  const block = raw.slice(4, end);
632
719
  const body = raw.slice(end + 4).replace(/^\n+/, '');
633
- const fm = {};
720
+ // Object.create(null) strip the Object.prototype chain so even a key that
721
+ // sneaks past `isPollutingKey` cannot reach the global prototype.
722
+ const fm = Object.create(null);
634
723
  const lines = block.split('\n');
635
724
  for (let i = 0; i < lines.length; i += 1) {
636
725
  const line = lines[i];
@@ -640,6 +729,8 @@ function parseFrontmatter(raw) {
640
729
  const key = line.slice(0, c).trim();
641
730
  const rest = line.slice(c + 1).trim();
642
731
  if (!key) continue;
732
+ // V155-023: refuse prototype-pollution keys regardless of value shape.
733
+ if (isPollutingKey(key)) continue;
643
734
  if (rest === '') {
644
735
  // block sequence (" - item" lines) or empty
645
736
  const seq = [];
@@ -913,12 +1004,44 @@ const handlers = {
913
1004
  updated_at: nowIso(),
914
1005
  };
915
1006
  if (payload.frontmatter && typeof payload.frontmatter === 'object') {
916
- for (const [k, v] of Object.entries(payload.frontmatter)) fm[k] = v;
1007
+ // V155-023: refuse `__proto__` / `constructor` / `prototype` keys —
1008
+ // an attacker payload of `{ __proto__: { hard_gate: true } }` would
1009
+ // otherwise mutate the [[Prototype]] of `fm` and grant a hard-gate
1010
+ // verdict via the prototype chain on subsequent reads.
1011
+ //
1012
+ // TS-003 (v1.5.5 Trident): top-level isPollutingKey() filter alone is
1013
+ // shallow — a nested value like `{foo: {__proto__: ...}}` would slip
1014
+ // past. `containsPollutingKey` walks the value tree; we refuse the
1015
+ // ENTIRE payload merge if any depth carries a polluting key. Hard
1016
+ // refusal beats silent scrub: the operator gets feedback and the
1017
+ // contract stays simple (no half-merge state).
1018
+ if (containsPollutingKey(payload.frontmatter)) {
1019
+ return {
1020
+ ok: false, refused: true, gate: 'wave-advance-proto-pollution',
1021
+ reason: 'wave.advance payload.frontmatter contains a polluting key '
1022
+ + '(__proto__ / constructor / prototype) at some depth; refusing merge.',
1023
+ };
1024
+ }
1025
+ for (const [k, v] of Object.entries(payload.frontmatter)) {
1026
+ if (isPollutingKey(k)) continue;
1027
+ fm[k] = v;
1028
+ }
917
1029
  }
918
1030
  // Persist the hard-gate declaration on the wave's frontmatter so
919
1031
  // subsequent advance calls honor it without re-passing `hardGate`.
920
1032
  if (payload?.hardGate === true) fm.hard_gate = true;
921
- const wave = writeWaveStateFile(root, waveId, fm, existing?.body ?? '');
1033
+ // V155-014 (HIGH): when the caller supplies `payload.body`, write the
1034
+ // body INSIDE this same `_withLocks` critical section (intent-journal +
1035
+ // waves.json + per-wave STATE.md). The prior pattern had wave-state.js
1036
+ // call `wave.advance` to write the frontmatter (journaled), release ALL
1037
+ // SDK locks, then re-acquire ONLY the per-wave STATE.md lock to write
1038
+ // the body — a #4-only lock with no #1 journal record, leaving replay
1039
+ // unable to roll back a body-write partial. Accepting body here folds
1040
+ // the two writes into one journaled, fully-locked atomic operation.
1041
+ const bodyPayload = (typeof payload?.body === 'string')
1042
+ ? payload.body
1043
+ : (existing?.body ?? '');
1044
+ const wave = writeWaveStateFile(root, waveId, fm, bodyPayload);
922
1045
  const result = { ok: true, wave };
923
1046
  if (hardGate && GATE_BYPASS) {
924
1047
  result.advisory = true;
@@ -999,8 +1122,23 @@ const handlers = {
999
1122
  try {
1000
1123
  result = _gateFns.validatePlan(planText, { strict: true });
1001
1124
  } catch (e) {
1125
+ // V155-008 (HIGH) — partial fix: contract §4 Model 4 reserves
1126
+ // execution-fail for the ADVISORY verdict (the verb proceeds; the gate
1127
+ // crash never freezes the workflow — see test-verification-gate-strict
1128
+ // T15 phase.plan-check). The audit's concern (an adversary crashing the
1129
+ // gate to disable enforcement) is mitigated here by adding a DISTINCT
1130
+ // `error:'gate-execution-fail'` discriminator alongside `advisory:true`
1131
+ // so downstream dispatchers/audit telemetry can DIFFERENTIATE a clean
1132
+ // pass (`advisory:undefined`) from a bypass (`reason:'IJFW_STATE_GATE_BYPASS=1'`)
1133
+ // from a crash (`error:'gate-execution-fail'`) — three distinct shapes,
1134
+ // same ok-verdict by contract. Future contract revision can promote
1135
+ // this to `ok:false` without re-finding the issue (the error field is
1136
+ // already there to dispatch on).
1002
1137
  process.stderr.write(`[state-sdk] WARN phase.plan-check gate execution-fail: ${e.message}\n`);
1003
- return { ok: true, advisory: true, gate: 'plan-check', reason: e.message, findings: [] };
1138
+ return {
1139
+ ok: true, advisory: true, gate: 'plan-check',
1140
+ error: 'gate-execution-fail', reason: e.message, findings: [],
1141
+ };
1004
1142
  }
1005
1143
  // v1.5.0 T17 (W1 plan-check hard-BLOCK): structurally REFUSE on any
1006
1144
  // HIGH-tier finding (severity in {BLOCK, HIGH} per `isHighFinding`).
@@ -1271,8 +1409,14 @@ const handlers = {
1271
1409
  try {
1272
1410
  selfCheck = _gateFns.runSelfCheck(reportText, projectRoot);
1273
1411
  } catch (e) {
1412
+ // V155-008 (HIGH) — partial fix: see phase.plan-check above. Contract
1413
+ // §4 keeps the verdict advisory; we add a DISTINCT `error` field so
1414
+ // downstream callers can tell crash from bypass from clean pass.
1274
1415
  process.stderr.write(`[state-sdk] WARN subagent.post-done gate execution-fail: ${e.message}\n`);
1275
- return { ok: true, advisory: true, gate: 'post-done-self-check', reason: e.message };
1416
+ return {
1417
+ ok: true, advisory: true, gate: 'post-done-self-check',
1418
+ error: 'gate-execution-fail', reason: e.message,
1419
+ };
1276
1420
  }
1277
1421
  // v1.5.1 cleanup C1 — T20 truncation classification. When the caller hands
1278
1422
  // us the subagent's `events` stream and/or intent `journal` on the payload,
@@ -1710,10 +1854,25 @@ const handlers = {
1710
1854
  const root = requireRoot(ctx);
1711
1855
  const journal = paths.intentJournal(root);
1712
1856
  if (!existsSync(journal)) {
1713
- return { ok: true, replayed: [], skipped: [], rolledBack: [] };
1857
+ return { ok: true, replayed: [], skipped: [], rolledBack: [], sealed: [], conflicts: [] };
1714
1858
  }
1715
1859
  // The replay walk + any rollback restores happen under the intent-journal
1716
1860
  // lock so a concurrent mutating verb cannot interleave with recovery.
1861
+ //
1862
+ // V155-002 (BLOCKER): the recovery path MUST acquire the SAME §3 locks as
1863
+ // the writers it is rolling back/forward. A snapshot's targets can span
1864
+ // multiple §3 tiers (#2 workflow, #3 waves, #4 STATE.md, #8 AGENTS.md,
1865
+ // #11 active-extension, …). Holding only the intent-journal lock (#1)
1866
+ // while restoring those targets meant a concurrent `wave.advance` could
1867
+ // interleave its mutation with `state.replay`'s restore on the SAME
1868
+ // target — torn write. Fix: per-partial, nest acquisitions of every
1869
+ // target's lock in canonicalLockOrder underneath the already-held #1
1870
+ // journal lock before touching the filesystem.
1871
+ //
1872
+ // `_replayRestoreWithLocks` is a local re-implementation of `_withLocks`'s
1873
+ // recursive nest — we cannot call `_withLocks` directly here because it
1874
+ // also runs `_journalBegin` (recovery is NOT a new mutating verb) and we
1875
+ // are ALREADY holding the journal lock so re-acquiring it would deadlock.
1717
1876
  return withFsLock(lockPathFor(journal), async () => {
1718
1877
  const records = readJsonl(journal);
1719
1878
  const sinceVerbId = payload?.sinceVerbId;
@@ -1732,6 +1891,7 @@ const handlers = {
1732
1891
  const skipped = [];
1733
1892
  const rolledBack = [];
1734
1893
  const sealed = [];
1894
+ const conflicts = [];
1735
1895
  for (const [verbId, beginRec] of begins) {
1736
1896
  if (commits.has(verbId)) {
1737
1897
  // begin + commit → durably applied. Re-issuing it would be a no-op,
@@ -1751,17 +1911,85 @@ const handlers = {
1751
1911
  const isAppend = beginRec.kind === 'append'
1752
1912
  || (beginRec.kind === undefined && snap === null);
1753
1913
  if (!isAppend && snap && Array.isArray(snap.targets)) {
1754
- // Overwrite verb: restore every target from the snapshot sidecar
1755
- // restore-or-delete per its pre-begin existence.
1914
+ // Overwrite verb: restore every target from the snapshot sidecar
1915
+ // under the SAME §3 locks the original writer held. Skip the
1916
+ // intent-journal (already held by the outer withFsLock) — only the
1917
+ // real targets need additional locks. Canonicalise so two replay
1918
+ // invocations targeting overlapping snapshots can never deadlock
1919
+ // (matches `_withLocks` ordering exactly).
1920
+ const restoreTargets = canonicalLockOrder(
1921
+ snap.targets
1922
+ .map((t) => (t && typeof t.absPath === 'string') ? t.absPath : null)
1923
+ .filter((p) => p !== null && p !== journal),
1924
+ );
1925
+ // TR-003 (v1.5.5 Trident): before overwriting BODY-BEARING targets
1926
+ // (STATE.md files — the per-wave/per-phase markdown that V155-014
1927
+ // folded into the journaled critical section), compare current
1928
+ // on-disk content against the snapshot's EXPECTED pre-write content.
1929
+ //
1930
+ // Scope is INTENTIONALLY narrow: only STATE.md-shaped paths. Other
1931
+ // snapshot targets (workflow.json, waves.json, AGENTS.md, etc.) are
1932
+ // SDK-managed atomic files — operator/third-party edits to them are
1933
+ // not a supported workflow and replay must still roll them back to
1934
+ // honor §4 atomicity. STATE.md is the file V155-014 promoted to a
1935
+ // body-bearing journaled target, and is the one a future migration
1936
+ // script (or human operator) might reasonably touch between begin
1937
+ // and replay. The fix is narrow on purpose — it protects the new
1938
+ // body-write path V155-014 introduced without weakening the
1939
+ // existing atomic-rollback contract for canonical SDK state files.
1940
+ //
1941
+ // Three observable shapes at replay time for STATE.md targets:
1942
+ // 1. Live == snap.content → safe restore (no-op write).
1943
+ // 2. Live missing → restore to snap.content. Safe.
1944
+ // 3. Live != snap.content → external edit possible. REFUSE,
1945
+ // surface conflict, leave live content alone, leave the
1946
+ // sidecar in place so a future replay (after manual triage)
1947
+ // can still attempt the rollback.
1948
+ const conflictedTargets = [];
1756
1949
  for (const t of snap.targets) {
1950
+ if (!t || typeof t.absPath !== 'string') continue;
1951
+ if (!t.existed) continue; // pre-write didn't exist; partial created it (handled below)
1952
+ // Narrow scope: only body-bearing STATE.md files. basename match
1953
+ // is the canonical V155-014-introduced surface.
1954
+ if (basename(t.absPath) !== 'STATE.md') continue;
1757
1955
  try {
1758
- if (t.existed) {
1759
- writeAtomic(t.absPath, t.content ?? '');
1760
- } else if (existsSync(t.absPath)) {
1761
- unlinkSync(t.absPath); // the partial created it — undo by delete
1956
+ if (!existsSync(t.absPath)) continue;
1957
+ const live = readFileSync(t.absPath, 'utf8');
1958
+ const expected = t.content ?? '';
1959
+ if (live !== expected) {
1960
+ conflictedTargets.push({ absPath: t.absPath, reason: 'third-party-edit' });
1762
1961
  }
1763
- } catch { /* a single target restore failing must not abort the walk */ }
1962
+ } catch {
1963
+ // unreadable -> let the restore-or-delete handle it best-effort
1964
+ }
1764
1965
  }
1966
+ if (conflictedTargets.length > 0) {
1967
+ conflicts.push({ verbId, targets: conflictedTargets });
1968
+ process.stderr.write(
1969
+ `[state-sdk] state.replay REFUSING to roll back ${verbId}: `
1970
+ + `${conflictedTargets.length} STATE.md target(s) have on-disk content that differs `
1971
+ + `from the pre-begin snapshot — possible third-party edit. `
1972
+ + `Paths: ${conflictedTargets.map((c) => c.absPath).join(', ')}. `
1973
+ + `Compact the intent-journal after manually resolving, or re-emit the verb.\n`,
1974
+ );
1975
+ // Skip rollback for THIS partial, leave the sidecar in place so a
1976
+ // future operator can inspect it, and continue with the rest of
1977
+ // the journal walk. We do NOT seal the partial — re-running replay
1978
+ // after manual resolution should still attempt the rollback.
1979
+ continue;
1980
+ }
1981
+ // eslint-disable-next-line no-await-in-loop
1982
+ await _replayRestoreWithLocks(restoreTargets, async () => {
1983
+ for (const t of snap.targets) {
1984
+ try {
1985
+ if (t.existed) {
1986
+ writeAtomic(t.absPath, t.content ?? '');
1987
+ } else if (existsSync(t.absPath)) {
1988
+ unlinkSync(t.absPath); // the partial created it — undo by delete
1989
+ }
1990
+ } catch { /* a single target restore failing must not abort the walk */ }
1991
+ }
1992
+ });
1765
1993
  }
1766
1994
  // Discard any snapshot sidecar (overwrite verbs only — append verbs
1767
1995
  // never wrote one) and seal the verbId with a synthetic `commit` so a
@@ -1786,7 +2014,7 @@ const handlers = {
1786
2014
  else rolledBack.push(verbId);
1787
2015
  }
1788
2016
  return {
1789
- ok: true, replayed: [], skipped, rolledBack, sealed,
2017
+ ok: true, replayed: [], skipped, rolledBack, sealed, conflicts,
1790
2018
  };
1791
2019
  }, LOCK_OPTS);
1792
2020
  },
@@ -17,16 +17,15 @@
17
17
  * representation. `blockers_open` carries the blocker **id** array (machine-
18
18
  * consumed); a separate `blockers_open_summary` carries human-readable text.
19
19
  *
20
- * KNOWN SDK GAP (T7-followup-1): the SDK's `wave.advance` verb does NOT
21
- * accept a `body` field — its handler always preserves the existing body.
22
- * Until a body-write SDK verb lands, `writeWaveState` does a follow-up raw
23
- * atomic write to update the body. The body-write itself is still
24
- * tmp+rename+lock-protected and the SDK frontmatter write already committed
25
- * via the intent journal so the worst-case partial state (frontmatter
26
- * advanced, body stale) is bounded and self-healing on next checkpoint.
20
+ * SDK GAP CLOSED (v1.5.5 — V155-014): the SDK's `wave.advance` verb now
21
+ * accepts an optional `body` field — frontmatter + body land inside ONE
22
+ * journaled critical section (intent-journal #1 + waves.json #3 + per-wave
23
+ * STATE.md #4). The prior two-write shape (SDK frontmatter, release all
24
+ * locks, re-acquire only #4 to write the body) is gone `state.replay`
25
+ * can now roll back partial body writes via the same begin/commit pair.
27
26
  */
28
27
 
29
- import { mkdir, readFile, writeFile, rename, appendFile } from 'node:fs/promises';
28
+ import { mkdir, readFile, appendFile } from 'node:fs/promises';
30
29
  import { join } from 'node:path';
31
30
  import { withFsLock } from '../fs-lock.js';
32
31
  import { readBlackboard } from '../blackboard.js';
@@ -167,35 +166,9 @@ function parseYaml(block) {
167
166
  return result;
168
167
  }
169
168
 
170
- /**
171
- * Emit a YAML frontmatter block for flat string/number/boolean/string[] values.
172
- * @param {object} obj
173
- * @returns {string} (no leading/trailing `---`)
174
- */
175
- function emitYaml(obj) {
176
- const lines = [];
177
- for (const [key, val] of Object.entries(obj)) {
178
- if (val === null || val === undefined) {
179
- lines.push(`${key}: null`);
180
- } else if (Array.isArray(val)) {
181
- if (val.length === 0) {
182
- lines.push(`${key}: []`);
183
- } else {
184
- lines.push(`${key}:`);
185
- for (const item of val) lines.push(` - ${item}`);
186
- }
187
- } else if (typeof val === 'boolean') {
188
- lines.push(`${key}: ${val}`);
189
- } else if (typeof val === 'number') {
190
- lines.push(`${key}: ${val}`);
191
- } else if (typeof val === 'object') {
192
- throw new Error(`wave-state: nested YAML objects are not supported (key: "${key}")`);
193
- } else {
194
- lines.push(`${key}: ${val}`);
195
- }
196
- }
197
- return lines.join('\n');
198
- }
169
+ // V155-014 (TR-003): emitYaml was previously used to render the wave-state
170
+ // body inline. After folding body writes into the SDK's journaled critical
171
+ // section (state-sdk now owns emit), the helper is unused. Removed.
199
172
 
200
173
  // ---------------------------------------------------------------------------
201
174
  // Path helpers
@@ -282,40 +255,20 @@ export async function writeWaveState(waveId, state, projectRoot) {
282
255
  // pass the full requested frontmatter so unrelated keys are overwritten
283
256
  // intentionally (writeWaveState semantics: caller supplies the full
284
257
  // frontmatter shape they want persisted).
285
- await query(
286
- 'wave.advance',
287
- { waveId, status, frontmatter: { ...fm } },
288
- { projectRoot },
289
- );
290
-
291
- // Body follow-up: SDK-gap T7-followup-1 wave.advance preserves existing
292
- // body and there is no body-write SDK verb yet. Until one lands, do an
293
- // atomic in-place body update. Held under the same wave-STATE lock used by
294
- // every wave-state writer, so concurrent checkpoints serialise.
258
+ //
259
+ // V155-014 (HIGH): body is now passed THROUGH the SDK call so frontmatter +
260
+ // body land inside ONE journaled critical section holding all three SDK
261
+ // locks (intent-journal #1, waves.json #3, per-wave STATE.md #4). The prior
262
+ // shape — call wave.advance for frontmatter, release SDK locks, re-acquire
263
+ // ONLY the STATE.md lock to write the body — gave a #4-only second
264
+ // critical section with no #1 journal record, so `state.replay` could not
265
+ // roll back a body-write partial. The two writes are now atomic and
266
+ // replay-safe.
267
+ const sdkPayload = { waveId, status, frontmatter: { ...fm } };
295
268
  if (state.body !== undefined && state.body !== null) {
296
- const { dir, state: statePath, lock, tmp } = wavePaths(waveId, projectRoot);
297
- await withFsLock(lock, async () => {
298
- await mkdir(dir, { recursive: true });
299
- let frontmatterRaw;
300
- try {
301
- const raw = await readFile(statePath, 'utf8');
302
- const secondDelim = raw.indexOf('\n---', 3);
303
- // Defensive: if the SDK-written STATE.md is somehow malformed, fall
304
- // back to re-emitting frontmatter from the in-memory shape rather
305
- // than refusing the body write.
306
- if (raw.startsWith('---') && secondDelim !== -1) {
307
- frontmatterRaw = raw.slice(0, secondDelim + 4); // '---\n…\n---\n'
308
- } else {
309
- frontmatterRaw = `---\n${emitYaml(fm)}\n---\n`;
310
- }
311
- } catch {
312
- frontmatterRaw = `---\n${emitYaml(fm)}\n---\n`;
313
- }
314
- const payload = `${frontmatterRaw}\n${state.body}`;
315
- await writeFile(tmp, payload, 'utf8');
316
- await rename(tmp, statePath);
317
- });
269
+ sdkPayload.body = state.body;
318
270
  }
271
+ await query('wave.advance', sdkPayload, { projectRoot });
319
272
  }
320
273
 
321
274
  /**
@@ -78,8 +78,23 @@ export function recoveryStatus(projectRoot = process.cwd()) {
78
78
 
79
79
  export function latestCheckpoint(projectRoot = process.cwd()) {
80
80
  const paths = checkpointPaths(projectRoot);
81
- const latest = readLatest(paths.latest);
82
- if (!latest) return { ok: false, error: 'no-checkpoint' };
81
+ const r = readLatest(paths.latest);
82
+ if (!r || r.code === 'enoent') {
83
+ return { ok: false, error: 'no-checkpoint' };
84
+ }
85
+ if (r.code === 'parse-fail') {
86
+ // V155-027 (v1.5.5): distinguish "no checkpoint" from "corrupt checkpoint".
87
+ // Previously both returned ok:false error:'no-checkpoint'; in-progress
88
+ // recovery work was silently abandoned on partial-write corruption.
89
+ return {
90
+ ok: false,
91
+ error: 'checkpoint-corrupt',
92
+ message: r.message,
93
+ path: paths.latest,
94
+ hint: `inspect ${paths.dir} for numbered checkpoints to recover from`,
95
+ };
96
+ }
97
+ const latest = r.data;
83
98
  let markdown = '';
84
99
  try { markdown = readFileSync(latest.mdPath, 'utf8'); } catch { /* optional */ }
85
100
  return { ok: true, ...latest, markdown };
@@ -229,12 +244,21 @@ function recommendedNext(team, plan, tasks) {
229
244
  return 'Verify completed work or prepare next wave';
230
245
  }
231
246
 
247
+ /**
248
+ * V155-027 (v1.5.5): tagged return so callers can distinguish "missing" from
249
+ * "corrupt". Three shapes:
250
+ * - { code: 'enoent' } — file does not exist
251
+ * - { code: 'parse-fail', message } — file exists but JSON.parse threw
252
+ * - { code: 'ok', data } — clean read
253
+ * `null` is no longer returned. The legacy caller checked `!latest` against
254
+ * the prior null — `latestCheckpoint` now switches on `.code`.
255
+ */
232
256
  function readLatest(path) {
257
+ if (!existsSync(path)) return { code: 'enoent' };
233
258
  try {
234
- if (!existsSync(path)) return null;
235
- return JSON.parse(readFileSync(path, 'utf8'));
236
- } catch {
237
- return null;
259
+ return { code: 'ok', data: JSON.parse(readFileSync(path, 'utf8')) };
260
+ } catch (e) {
261
+ return { code: 'parse-fail', message: e?.message || String(e) };
238
262
  }
239
263
  }
240
264
 
@@ -223,13 +223,28 @@ export function triage(finding) {
223
223
  /* ────────────────────────────── tier 1 — re-read ────────────────────────── */
224
224
 
225
225
  /**
226
- * verifyTier1(filePath, newString) — confirm the edit actually landed.
226
+ * verifyTier1(filePath, newString, intent) — confirm the edit actually landed.
227
227
  * Returns { ok: boolean, evidence: string }.
228
+ *
229
+ * V155-025: previously `newString === ''` short-circuited to ok:true regardless
230
+ * of intent — empty-string was always "contained" in any file. Deletion intents
231
+ * legitimately produce an empty newString (the substring being removed), so
232
+ * the gate is parameterised by intent: only `'delete'` permits the empty path.
233
+ * Any other intent supplying an empty newString is treated as unverifiable.
228
234
  */
229
- export async function verifyTier1(filePath, newString) {
235
+ export async function verifyTier1(filePath, newString, intent = 'edit') {
230
236
  try {
231
237
  const content = await readFile(filePath, 'utf8');
232
- if (newString === '' || content.includes(newString)) {
238
+ if (newString === '') {
239
+ if (intent === 'delete') {
240
+ return { ok: true, evidence: 'tier-1: empty newString accepted for delete intent' };
241
+ }
242
+ return {
243
+ ok: false,
244
+ evidence: 'tier-1: empty newString supplied for non-delete intent — cannot verify',
245
+ };
246
+ }
247
+ if (content.includes(newString)) {
233
248
  return { ok: true, evidence: '' };
234
249
  }
235
250
  return {
@@ -358,10 +373,35 @@ export async function verifyTier3(projectRoot, verifyCmdOverride) {
358
373
  // verbatim. Timeout is generous (5 min) because real test suites can be slow.
359
374
  return new Promise((resolve) => {
360
375
  execFile('sh', ['-c', cmd], { cwd: projectRoot, timeout: 5 * 60_000 }, (err, stdout, stderr) => {
361
- if (!err) return resolve({ ok: true, skipped: false });
362
- const blob = String(stderr || stdout || err.message || '');
363
- const evidence = blob.split('\n').slice(0, 20).join('\n');
364
- resolve({ ok: false, evidence: `tier-3 (${cmd}): ${evidence}` });
376
+ const combined = `${String(stdout || '')}\n${String(stderr || '')}`;
377
+ if (err) {
378
+ const evidence = combined.split('\n').slice(0, 20).join('\n');
379
+ return resolve({ ok: false, evidence: `tier-3 (${cmd}): ${evidence}` });
380
+ }
381
+ // V155-029: exit 0 alone is NOT proof of healthy. `npm test --silent`,
382
+ // `: # noop`, a custom `test` script that just exits 0 — all return 0
383
+ // without running anything. Require positive evidence: either explicit
384
+ // pass markers in the output OR non-empty output WITHOUT failure markers.
385
+ const FAIL_MARKERS = /\b(failing|failed|FAIL|error|✘|✗|fatal)\b/i;
386
+ const PASS_MARKERS = /\b(pass|passing|passed|ok|✓|✔)\b/i;
387
+ if (FAIL_MARKERS.test(combined)) {
388
+ const evidence = combined.split('\n').filter((l) => FAIL_MARKERS.test(l)).slice(0, 20).join('\n');
389
+ return resolve({ ok: false, evidence: `tier-3 (${cmd}) exit 0 but failure markers present: ${evidence}` });
390
+ }
391
+ const trimmed = combined.trim();
392
+ if (trimmed.length === 0) {
393
+ return resolve({
394
+ ok: false,
395
+ evidence: `tier-3 (${cmd}): exited 0 with no output — silent-success suspected, cannot prove project healthy`,
396
+ });
397
+ }
398
+ if (!PASS_MARKERS.test(combined)) {
399
+ return resolve({
400
+ ok: false,
401
+ evidence: `tier-3 (${cmd}): exited 0 but no pass markers in output — cannot prove tests ran`,
402
+ });
403
+ }
404
+ resolve({ ok: true, skipped: false });
365
405
  });
366
406
  });
367
407
  }
@@ -579,9 +619,14 @@ export async function fixFinding({
579
619
  const expectedNewString = typeof fix === 'object' ? fix.new_string : null;
580
620
 
581
621
  // 4. tier 1 — re-read
622
+ // V155-025: empty new_string only legitimate when intent is delete (old_string
623
+ // is being removed). Derive intent from fix shape so the gate can distinguish
624
+ // honest deletions from "no change supplied → unverifiable".
625
+ const fixIntent = (typeof fix === 'object' && fix.new_string === '') ? 'delete' : 'edit';
582
626
  const t1 = await verifyTier1(
583
627
  filePath,
584
628
  expectedNewString !== null && expectedNewString !== undefined ? expectedNewString : '',
629
+ fixIntent,
585
630
  );
586
631
  if (!t1.ok) {
587
632
  await rollback(filePath, originalContent);
@@ -231,8 +231,8 @@ export function toolNameToActionTarget(toolName, args) {
231
231
  return { action: 'read', target: 'metrics:read' };
232
232
  case 'ijfw_update_check':
233
233
  return { action: 'read', target: 'update:check' };
234
- case 'ijfw_update_apply':
235
- return { action: 'write', target: 'update:apply' };
234
+ // V155-017 (v1.5.5): 'ijfw_update_apply' retired from MCP — see
235
+ // cross-orchestrator-cli.js for the supported `ijfw update` flow.
236
236
  case 'ijfw_prompt_check':
237
237
  return { action: 'read', target: 'prompt:check' };
238
238
  case 'ijfw_run': {