@pilotspace/add 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tooling/add.py CHANGED
@@ -13,6 +13,7 @@ from __future__ import annotations
13
13
 
14
14
  import argparse
15
15
  import getpass
16
+ import hashlib
16
17
  import json
17
18
  import os
18
19
  import re
@@ -34,8 +35,14 @@ STAGES = ("prototype", "poc", "mvp", "production")
34
35
  # v22 stage-graduation: the read-only cue `status` shows when the MVP is covered.
35
36
  # Worded as the ACTION (never a file) so it stands before graduate.md exists.
36
37
  GRADUATION_CUE = "MVP covered → propose graduation"
37
- PHASES = ("specify", "scenarios", "contract", "tests", "build", "verify", "observe", "done")
38
+ PHASES = ("ground", "specify", "scenarios", "contract", "tests", "build", "verify", "observe", "done")
38
39
  GATES = ("none", "PASS", "RISK-ACCEPTED", "HARD-STOP")
40
+ # heal-then-escalate (verify-integrity): the bounded self-heal loop cap. A CONFIRMED cheat
41
+ # (mechanical tripwire divergence, or an agent-reported semantic refute-read finding) returns
42
+ # the task to BUILD for an honest redo; after HEAL_CAP such attempts the next confirmed cheat
43
+ # forces a HARD-STOP escalation to the human. MONOTONIC — attempts never auto-resets (a gamed
44
+ # green is never auto-passed; the loop is never unbounded).
45
+ HEAL_CAP = 3
39
46
 
40
47
 
41
48
  def _phase_index(name: str) -> int:
@@ -45,6 +52,8 @@ def _phase_index(name: str) -> int:
45
52
  # `add.py guide` copy: per-phase (concrete next action, book chapter to read).
46
53
  # Keep the action wording aligned with each phase's EXIT line in the TASK template.
47
54
  PHASE_GUIDE = {
55
+ "ground": ("gather the real codebase the task touches — files, symbols, signatures, conventions, and the anchor points the contract will cite; defer to PROJECT.md/CONVENTIONS.md and gather only the task delta",
56
+ "02-the-flow.md"),
48
57
  "specify": ("state every rule — Must / Reject (+ named code) / After; rank assumptions lowest-confidence first and flag the biggest risk",
49
58
  "03-step-1-specify.md"),
50
59
  "scenarios": ("write one Given/When/Then per Must AND per Reject; every result observable",
@@ -67,10 +76,11 @@ PHASE_GUIDE = {
67
76
  # follows the book's who-does-what table (Verify is "human only"); `tests`/`build`/`observe`
68
77
  # are AI-led. A phase missing here is `unmapped_phase` (fail closed) — never defaulted.
69
78
  PHASE_OWNER = {
79
+ "ground": "ai",
70
80
  "specify": "human", "scenarios": "human", "contract": "seam",
71
81
  "tests": "ai", "build": "ai", "verify": "human", "observe": "ai", "done": "human",
72
82
  }
73
- SETUP_FILES = ("PROJECT.md", "CONVENTIONS.md", "GLOSSARY.md", "MODEL_REGISTRY.md", "dependencies.allowlist")
83
+ SETUP_FILES = ("PROJECT.md", "CONVENTIONS.md", "GLOSSARY.md", "MODEL_REGISTRY.md", "dependencies.allowlist", "DESIGN.md")
74
84
 
75
85
  # Guideline-injection targets + version-stable markers. NEVER change these marker
76
86
  # strings: a re-run finds the old block by exact match, so changing them would
@@ -84,7 +94,13 @@ _GUIDE_END = "<!-- ADD:END -->"
84
94
  _FALLBACK_TASK = """# TASK: {title}
85
95
 
86
96
  slug: {slug} · created: {date} · stage: {stage}
87
- phase: specify
97
+ autonomy: auto
98
+ phase: ground
99
+
100
+ ## 0 · GROUND
101
+ Touches (files · symbols · signatures):
102
+ Honors (patterns / conventions):
103
+ Anchors the contract cites:
88
104
 
89
105
  ## 1 · SPECIFY
90
106
  Feature:
@@ -431,12 +447,20 @@ def cmd_new_task(args: argparse.Namespace) -> None:
431
447
  (tdir / "tests").mkdir(parents=True, exist_ok=True)
432
448
  (tdir / "src").mkdir(parents=True, exist_ok=True)
433
449
  title = args.title or slug.replace("-", " ").replace("_", " ").title()
450
+ # inherit the project's DECLARED autonomy default (task init-auto-default) — fail-SAFE:
451
+ # absent -> auto, garbled -> conservative; the posture is project-scoped, not hardcoded.
452
+ autonomy = _project_autonomy(root)
434
453
  _atomic_write(task_md, _render_template(
435
- "TASK.md", title=title, slug=slug, date=date.today().isoformat(), stage=state["stage"]))
454
+ "TASK.md", title=title, slug=slug, date=date.today().isoformat(),
455
+ stage=state["stage"], autonomy=autonomy))
456
+ if _project_autonomy_token(root) == "?":
457
+ print("warning: garbled_project_autonomy — PROJECT.md declares an unrecognized "
458
+ f"autonomy token; new task seeded fail-safe '{autonomy}' "
459
+ "(set autonomy: manual|conservative|auto in PROJECT.md)", file=sys.stderr)
436
460
 
437
461
  state["tasks"][slug] = {
438
462
  "title": title,
439
- "phase": "specify",
463
+ "phase": "ground",
440
464
  "gate": "none",
441
465
  "milestone": milestone,
442
466
  "depends_on": depends_on,
@@ -454,7 +478,8 @@ def cmd_new_task(args: argparse.Namespace) -> None:
454
478
  # intake -> milestone flow. Speaks of STRUCTURE (not attached), never the act.
455
479
  print(f"note: '{slug}' is not attached to a milestone — size it via /add (intake), "
456
480
  "or pass --milestone <id>")
457
- print("active task set. phase: specify. Fill section 1 (SPECIFY), then: add.py advance")
481
+ print("active task set. phase: ground. Gather the real codebase (section 0 GROUND).")
482
+ print(_next_footer(root, state)) # converges the old "then: add.py advance" hint
458
483
 
459
484
 
460
485
  def _parse_deps(raw: str | None) -> list[str]:
@@ -507,6 +532,7 @@ def cmd_phase(args: argparse.Namespace) -> None:
507
532
  _sync_task_marker(root, slug, args.phase)
508
533
  save_state(root, state)
509
534
  print(f"task '{slug}' phase -> {args.phase}")
535
+ print(_next_footer(root, state))
510
536
 
511
537
 
512
538
  def cmd_advance(args: argparse.Namespace) -> None:
@@ -536,21 +562,80 @@ def cmd_advance(args: argparse.Namespace) -> None:
536
562
  "+ substantive content; bare 'none' only as 'none material — "
537
563
  "biggest risk: X') before crossing into build")
538
564
  state["tasks"][slug]["flag_verified"] = True
565
+ # tamper tripwire (verify-integrity): snapshot the red test files + the frozen
566
+ # §3 md5s so the verify gate can prove the green was EARNED, not edited into
567
+ # place. UNCONDITIONAL overwrite — a legit change-request that re-crosses
568
+ # tests->build re-snapshots cleanly. Co-witnessed by flag_verified (above).
569
+ state["tasks"][slug]["tripwire"] = _tripwire_snapshot(root, slug, raw3)
570
+ # §5 scope gate (build-scope-lock): when the task declares its Scope, freeze
571
+ # the project tree into a sidecar (payload) + a state.json anchor (md5 of the
572
+ # sidecar bytes). Same UNCONDITIONAL-overwrite semantics as the tripwire.
573
+ # UNDECLARED (no Scope line) takes no snapshot — grandfathered, never retro-red
574
+ # — and CLEANS UP a previous declaration's leftovers (v3): a declared->
575
+ # undeclared re-cross pops the stale anchor + unlinks the stale sidecar, so
576
+ # "UNDECLARED is never refused" holds on every path.
577
+ declared = _declared_scope(root, slug)
578
+ side = root / "tasks" / slug / "scope-snapshot.json"
579
+ if declared is not None:
580
+ payload = json.dumps({"version": 1,
581
+ "files": _scope_walk(root.parent.resolve())},
582
+ sort_keys=True)
583
+ side.write_text(payload, encoding="utf-8")
584
+ state["tasks"][slug]["scope"] = {"declared": declared,
585
+ "snapshot_md5": _md5_text(payload)}
586
+ else:
587
+ state["tasks"][slug].pop("scope", None)
588
+ try:
589
+ side.unlink()
590
+ except OSError:
591
+ pass
539
592
  state["tasks"][slug]["phase"] = nxt
540
593
  state["tasks"][slug]["updated"] = _now()
541
594
  _sync_task_marker(root, slug, nxt)
542
595
  save_state(root, state)
543
596
  print(f"task '{slug}' phase {cur} -> {nxt}")
597
+ print(_next_footer(root, state))
598
+
599
+
600
+ # The mechanized high-risk guard (run.md, v14; widened by explicit-autonomy-dial):
601
+ # judging WHAT is high-risk stays human — a scope declares `risk: high` in its TASK.md
602
+ # header at the freeze. The engine then enforces the pure token contradiction: risk: high
603
+ # WITHOUT a lowered autonomy rung (manual or conservative) is unguarded, and completion is
604
+ # refused. Tokens are read from the header region (text before the first section heading)
605
+ # with HTML comments stripped — a documentation comment is never a declaration. A token
606
+ # counts ONLY at a DECLARATION position — line-start (optionally indented) or just after the
607
+ # `·` slug-line separator — so a freeform H1 title or quoted prose that happens to contain
608
+ # "risk: high" / "autonomy: <x>" is never mistaken for a declaration (a title substring must
609
+ # not be able to fool the guard either way).
610
+ _RISK_HIGH_RE = re.compile(r"(?:^|·)[ \t]*risk:[ \t]*high\b", re.MULTILINE)
611
+
612
+ # the explicit 3-mode autonomy dial (task explicit-autonomy-dial): an ordered ladder
613
+ # manual < conservative < auto, declared as a per-task `autonomy:` header token.
614
+ _AUTONOMY_LEVELS = ("manual", "conservative", "auto")
615
+ # anchored to a DECLARATION position — line-start `autonomy:` OR the inline slug-line form
616
+ # `… · autonomy: conservative` (the `·`-preceded shape) — never a title/prose substring; the
617
+ # value stops at space/`<`/`#`/`|` so an unfilled `<manual | … >` placeholder captures nothing
618
+ # and reads as UNSET.
619
+ _AUTONOMY_LINE_RE = re.compile(r"(?:^|·)[ \t]*autonomy:[ \t]*([^\s<#|]+)", re.MULTILINE)
620
+
621
+
622
+ def _autonomy_level(hdr: str):
623
+ """The declared autonomy rung from a TASK.md header region (HTML comments
624
+ already stripped by _task_header). Returns a member of _AUTONOMY_LEVELS, or
625
+ None when no `autonomy:` line is present (UNSET — an unfilled `<…>` placeholder,
626
+ whose value the regex declines, counts as unset), or "?" when a REAL token outside
627
+ the set was written (unknown). PURE."""
628
+ m = _AUTONOMY_LINE_RE.search(hdr)
629
+ if not m:
630
+ return None
631
+ tok = m.group(1).strip().lower()
632
+ return tok if tok in _AUTONOMY_LEVELS else "?"
544
633
 
545
634
 
546
- # The mechanized high-risk guard (run.md, v14): judging WHAT is high-risk stays
547
- # human a scope declares `risk: high` in its TASK.md header at the freeze. The
548
- # engine then enforces the pure token contradiction: risk: high WITHOUT
549
- # autonomy: conservative is unguarded, and completion is refused. Tokens are
550
- # read from the header region (text before the first section heading) with HTML
551
- # comments stripped — a documentation comment is never a declaration.
552
- _RISK_HIGH_RE = re.compile(r"\brisk:\s*high\b")
553
- _AUTONOMY_CONSERVATIVE_RE = re.compile(r"\bautonomy:\s*conservative\b")
635
+ def _autonomy_lowered(hdr: str) -> bool:
636
+ """True iff the declared rung is high-risk-safe (manual or conservative). A
637
+ high-risk scope must be lowered to one of these; `auto` and UNSET are not."""
638
+ return _autonomy_level(hdr) in ("manual", "conservative")
554
639
 
555
640
 
556
641
  def _task_header(root: Path, slug: str) -> str:
@@ -563,6 +648,37 @@ def _task_header(root: Path, slug: str) -> str:
563
648
  return re.sub(r"<!--.*?-->", "", text.split("\n## ", 1)[0], flags=re.S)
564
649
 
565
650
 
651
+ def _effective_autonomy(root: Path, state: dict, slug: str) -> str:
652
+ """The autonomy rung that governs `slug` right now: the task's own declared rung,
653
+ falling back to the project default when the task line is UNSET (None) or an
654
+ unrecognized token ("?") — the same fail-safe chain cmd_new_task seeds from
655
+ (_project_autonomy: absent -> auto, garbled -> conservative). PURE. `state` is unused
656
+ today; it is kept in the signature beside _driver_stop for symmetry."""
657
+ lvl = _autonomy_level(_task_header(root, slug))
658
+ return lvl if lvl in _AUTONOMY_LEVELS else _project_autonomy(root)
659
+
660
+
661
+ def _driver_stop(root: Path, state: dict, slug: str, phase: str) -> bool:
662
+ """True iff a HUMAN owns the next step for `phase` under the effective autonomy — the
663
+ SINGLE source the footer marker and the guide TEXT marker both render (task
664
+ gate-owner-marker). Refines _phase_owner with the autonomy level at exactly ONE phase,
665
+ verify:
666
+ verify -> the human gates UNLESS the run may auto-gate (effective autonomy == auto)
667
+ else -> the structural owner stops (owner != "ai"), independent of the level
668
+ The frozen machine-state-json JSON `stop` keeps its own structural value (Option F);
669
+ this resolver feeds ONLY the human-facing footer + guide TEXT. _phase_owner still
670
+ _die("unmapped_phase") on a bad phase — the marker invents no default."""
671
+ if phase == "verify":
672
+ return _effective_autonomy(root, state, slug) != "auto"
673
+ return _phase_owner(phase) != "ai"
674
+
675
+
676
+ def _driver_marker(stop: bool) -> str:
677
+ """Render _driver_stop as the reserved-slot word (one leading space each) — the exact
678
+ strings next-footer-engine reserved: ` [human gate]` (a human owns it) / ` [you drive]`."""
679
+ return " [human gate]" if stop else " [you drive]"
680
+
681
+
566
682
  def cmd_gate(args: argparse.Namespace) -> None:
567
683
  root = _require_root()
568
684
  state = load_state(root)
@@ -588,10 +704,18 @@ def cmd_gate(args: argparse.Namespace) -> None:
588
704
  # COMPLETION (PASS / RISK-ACCEPTED) until the dial is lowered and a human
589
705
  # owns the gate. HARD-STOP is never blocked — stopping is always allowed.
590
706
  hdr = _task_header(root, slug)
591
- if _RISK_HIGH_RE.search(hdr) and not _AUTONOMY_CONSERVATIVE_RE.search(hdr):
707
+ if _RISK_HIGH_RE.search(hdr) and not _autonomy_lowered(hdr):
592
708
  _die(f"unguarded_high_risk_auto: task '{slug}' declares risk: high "
593
- "without autonomy: conservativelower the autonomy level in the TASK.md "
594
- "header; a human must own a high-risk gate (run.md guard)")
709
+ "without a lowered autonomy levelset autonomy: manual or conservative in "
710
+ "the TASK.md header; a human must own a high-risk gate (run.md guard)")
711
+ # tamper tripwire (verify-integrity): the method's first mechanical cheat
712
+ # block. A completing outcome is refused if the red suite or the frozen §3
713
+ # changed since the tests->build snapshot. Placed BEFORE the waiver write so
714
+ # a tamper finding is never launderable through RISK-ACCEPTED.
715
+ _tamper_guard(root, state, slug)
716
+ # §5 scope gate (build-scope-lock): touched ⊆ declared, or a named refusal —
717
+ # same placement discipline as the tripwire (before the waiver, never on HARD-STOP).
718
+ _scope_guard(root, state, slug)
595
719
  if args.outcome == "RISK-ACCEPTED":
596
720
  # A waiver must be SIGNED: owner, ticket, expiry (glossary). Stored in state
597
721
  # so a later `check` can read/expire it. Refuse a partial waiver outright.
@@ -609,8 +733,9 @@ def cmd_gate(args: argparse.Namespace) -> None:
609
733
  state["tasks"][slug]["updated"] = _now()
610
734
  save_state(root, state)
611
735
  print(f"task '{slug}' gate -> {args.outcome}")
612
- if args.outcome == "HARD-STOP":
613
- print("HARD-STOP recorded: return to BUILD; nothing ships on a failing/security gate.")
736
+ # the engine-sourced next step (next-footer-engine): a completing gate hands off to the
737
+ # state arm; HARD-STOP routes to "resolve HARD-STOP …" converging the old bespoke line.
738
+ print(_next_footer(root, state))
614
739
 
615
740
 
616
741
  def cmd_reopen(args: argparse.Namespace) -> None:
@@ -636,8 +761,8 @@ def cmd_reopen(args: argparse.Namespace) -> None:
636
761
  if not reason:
637
762
  _die("reopen_reason_required: reopen records WHY — supply a non-empty --reason")
638
763
  target = args.to
639
- if target not in PHASES[:7]: # specify..observe; never "done", never an unknown name
640
- _die(f"reopen_target_invalid: --to must be one of {', '.join(PHASES[:7])} (got {target!r})")
764
+ if target not in PHASES[:-1]: # ground..observe; never "done", never an unknown name
765
+ _die(f"reopen_target_invalid: --to must be one of {', '.join(PHASES[:-1])} (got {target!r})")
641
766
  now = _now()
642
767
  entry = {"from": "done", "to": target, "reason": reason, "at": now,
643
768
  "prior_gate": t.get("gate", "none")}
@@ -650,6 +775,32 @@ def cmd_reopen(args: argparse.Namespace) -> None:
650
775
  _sync_task_marker(root, slug, target)
651
776
  save_state(root, state)
652
777
  print(f"task '{slug}' reopened: done -> {target} (reason recorded); gate reset to none")
778
+ print(_next_footer(root, state))
779
+
780
+
781
+ def cmd_heal(args: argparse.Namespace) -> None:
782
+ """Report a CONFIRMED semantic cheat — an earned-green failure the adversarial refute-read
783
+ found — and enter the bounded self-heal loop (heal-then-escalate). The judgment rubric (the
784
+ specific cheats and how to spot them) lives in 6-verify.md, never the engine.
785
+
786
+ The engine cannot SEE a judgment cheat — this is the agent's honest report (honor-system,
787
+ necessary-not-sufficient; the human verify gate stays the real backstop, and the engine
788
+ never spawns the refute-read). It routes through the SAME _heal_or_escalate as the
789
+ mechanical tripwire: return-to-build for an honest redo (≤HEAL_CAP), then a HARD-STOP
790
+ escalation. The refute-read is a verify-gate activity, so the task must be at verify."""
791
+ root = _require_root()
792
+ state = load_state(root)
793
+ slug = _resolve_task(state, args.slug)
794
+ reason = (args.reason or "").strip()
795
+ if not reason:
796
+ _die("heal_reason_required: heal records the refute-read finding — supply a "
797
+ "non-empty --reason (never a silent loop)")
798
+ phase = state["tasks"][slug].get("phase")
799
+ if phase != "verify":
800
+ _die(f"heal_not_at_verify: task '{slug}' is at '{phase}', not verify — the "
801
+ "adversarial refute-read is a verify-gate activity; build then advance to "
802
+ "verify before reporting a cheat")
803
+ _heal_or_escalate(root, state, slug, reason="refute-read:" + reason, source="refute-read")
653
804
 
654
805
 
655
806
  def cmd_lock(args: argparse.Namespace) -> None:
@@ -680,6 +831,7 @@ def cmd_lock(args: argparse.Namespace) -> None:
680
831
  separators=(",", ":")))
681
832
  else:
682
833
  print(f"locked setup ({','.join(layers)}) by {who} @ {when}")
834
+ print(_next_footer(root, state))
683
835
 
684
836
 
685
837
  def _has_production_roadmap(state: dict) -> bool:
@@ -713,6 +865,7 @@ def cmd_stage(args: argparse.Namespace) -> None:
713
865
  print(f"project stage -> {args.stage}")
714
866
  if bypassing:
715
867
  print("(--force: bypassed roadmap check — no production milestone drafted)")
868
+ print(_next_footer(root, state))
716
869
 
717
870
 
718
871
  def cmd_status(args: argparse.Namespace) -> None:
@@ -744,6 +897,9 @@ def cmd_status(args: argparse.Namespace) -> None:
744
897
  # Reuses the canonical helper — do NOT write a parallel predicate.
745
898
  unlocked = not _setup_locked(state)
746
899
  print(f"project : {state.get('project', '(unknown)')}")
900
+ # project autonomy default (task init-auto-default): the posture new tasks INHERIT,
901
+ # read LIVE from PROJECT.md so the human sees the project-wide throttle every session.
902
+ print(f"project autonomy: {_project_autonomy(root)} (default — new tasks inherit)")
747
903
  print(f"stage : {state.get('stage', '(unknown)')}")
748
904
  # project GOAL + active-milestone goal (v20) — the loop's orientation anchor, read
749
905
  # LIVE from PROJECT.md / MILESTONE.md (never state.json). Additive: every existing
@@ -752,6 +908,13 @@ def cmd_status(args: argparse.Namespace) -> None:
752
908
  _active_ms = state.get("active_milestone")
753
909
  if _active_ms:
754
910
  print(f"m-goal : {_milestone_doc(root, _active_ms)[1]} (← {_active_ms})")
911
+ # goal-ready (task goal-auto-ready-gate): is the active milestone's goal AUTO-READY
912
+ # — every exit criterion citing a verifier `(verify: …)` so the engine can self-verify
913
+ # the result against it? Read LIVE from MILESTONE.md; surfaced every session so the
914
+ # human sees the goal-clarity gap. Additive — human-readable only, never the JSON surface.
915
+ _gr_cited, _gr_total = _exit_criteria_cited(root, _active_ms)
916
+ _gr_state = "auto-ready ✓" if _goal_auto_ready(root, _active_ms) else "NOT auto-ready"
917
+ print(f"goal-ready: {_gr_state} ({_gr_cited}/{_gr_total} exit criteria cite a verifier)")
755
918
  # foundation pointer — read the cross-milestone context first (anti-rot)
756
919
  if (root / "PROJECT.md").exists():
757
920
  print("context : .add/PROJECT.md (foundation: domain · spec · UI/UX — read first)")
@@ -791,6 +954,18 @@ def cmd_status(args: argparse.Namespace) -> None:
791
954
  f"({m_tasks} task{'s' if m_tasks != 1 else ''})")
792
955
 
793
956
  print(f"active : {active or '(none)'}")
957
+ # surface the active task's autonomy level (task explicit-autonomy-dial) so the human
958
+ # reads the throttle every session; "unset" when no explicit `autonomy:` line is present.
959
+ if active and active in tasks:
960
+ print(f"autonomy: {_autonomy_level(_task_header(root, active)) or 'unset'}")
961
+ # grounded (task ground-bundle-wiring): does the active task's §0 GROUND map cite the
962
+ # anchors §3 names? measure-not-block, human-readable only (never the JSON surface). A
963
+ # pre-ground / legacy task (no §0) -> _task_grounded None -> NO line, so the surface is
964
+ # purely additive: an existing task's status output is byte-unchanged.
965
+ _g = _task_grounded(root, active)
966
+ if _g is not None:
967
+ print("grounded: " + ("grounded ✓ — §0 cites the anchors §3 names" if _g
968
+ else "not yet — fill the §0 GROUND anchors (add.py guide)"))
794
969
  if not tasks:
795
970
  # First-run panel: a brand-new project's status is the moment a user is most
796
971
  # lost. When the setup is unlocked, the only correct next move is review+lock —
@@ -840,6 +1015,7 @@ def cmd_status(args: argparse.Namespace) -> None:
840
1015
  # routed there through the CLI alone. Never a dead pointer: the path is printed
841
1016
  # only if the file exists; a missing tree gets an install hint instead.
842
1017
  _PHASE_GUIDE_FILES = {
1018
+ "ground": "0-ground.md",
843
1019
  "specify": "1-specify.md", "scenarios": "2-scenarios.md",
844
1020
  "contract": "3-contract.md", "tests": "4-tests.md",
845
1021
  "build": "5-build.md", "verify": "6-verify.md", "observe": "7-observe.md",
@@ -897,9 +1073,13 @@ def cmd_guide(args: argparse.Namespace) -> None:
897
1073
  if entry is None: # corrupted/hand-edited state.json — fail clean, not KeyError
898
1074
  _die(f"task '{slug}' has unknown phase '{phase}' (state.json corrupted?)")
899
1075
  action, chapter = entry
1076
+ # the guide names the driver too (task gate-owner-marker) — the SAME _driver_stop the
1077
+ # footer renders, on the next-step line. Computed AFTER the unknown-phase guard above,
1078
+ # so a bad phase fails clean and never reaches the marker (it invents no default).
1079
+ marker = _driver_marker(_driver_stop(root, state, slug, phase))
900
1080
  print(f"active : {slug} (phase: {phase})")
901
1081
  print(f"goal : {_project_goal(root)}") # v20 — the next-step surface still shows what the work is FOR
902
- print(f"next : {action}")
1082
+ print(f"next : {action}{marker}")
903
1083
  print(f"read : .add/docs/{chapter}")
904
1084
  gp = _phase_guide_path(root.parent, phase)
905
1085
  if gp is not None:
@@ -926,6 +1106,404 @@ def _read_task_phase(root: Path, slug: str) -> str | None:
926
1106
  return None
927
1107
 
928
1108
 
1109
+ # --- UDD token-layer validator (udd-token-schema) -----------------------------
1110
+ # A pure, stdlib checker for the compact-DTCG 3-layer token dialect. Returns a
1111
+ # list of (code, path, detail) violations — [] means valid. NOT wired into
1112
+ # cmd_check here: udd-check-lint surfaces these as named reds + adds the catalog/
1113
+ # tree rules (the Fork-A boundary frozen in udd-token-schema §3). The dialect and
1114
+ # its NAMED divergences from DTCG 2025.10 live in templates/udd-tokens.md.
1115
+ _TOKEN_LAYERS = ("primitive", "semantic", "component")
1116
+ _TOKEN_LAYER_CITES = {"semantic": "primitive", "component": "semantic"}
1117
+ _TOKEN_TYPES = ("color", "dimension", "number", "fontFamily", "fontWeight", "duration")
1118
+ _TOKEN_HEX_RE = re.compile(r"^#(?:[0-9A-Fa-f]{6}|[0-9A-Fa-f]{8})$")
1119
+ _TOKEN_DIM_RE = re.compile(r"^-?\d+(?:\.\d+)?(?:px|rem|em|%|vh|vw)$")
1120
+ _TOKEN_DUR_RE = re.compile(r"^\d+(?:\.\d+)?(?:ms|s)$")
1121
+
1122
+
1123
+ def _token_value_form_ok(ttype: str, value: object) -> bool:
1124
+ """True if a LITERAL value matches the compact form for its $type."""
1125
+ if ttype == "color":
1126
+ return isinstance(value, str) and bool(_TOKEN_HEX_RE.match(value))
1127
+ if ttype == "dimension":
1128
+ return isinstance(value, str) and bool(_TOKEN_DIM_RE.match(value))
1129
+ if ttype == "number":
1130
+ return isinstance(value, (int, float)) and not isinstance(value, bool)
1131
+ if ttype == "fontWeight":
1132
+ return isinstance(value, str) or (
1133
+ isinstance(value, int) and not isinstance(value, bool) and 100 <= value <= 900)
1134
+ if ttype == "duration":
1135
+ return isinstance(value, str) and bool(_TOKEN_DUR_RE.match(value))
1136
+ if ttype == "fontFamily":
1137
+ return isinstance(value, str) or (
1138
+ isinstance(value, list) and bool(value) and all(isinstance(x, str) for x in value))
1139
+ return False
1140
+
1141
+
1142
+ def _token_layer_violations(tokens: dict) -> list[tuple[str, str, str]]:
1143
+ """Validate a compact-DTCG token dict against the 3-layer citation rules.
1144
+
1145
+ Pure (never mutates `tokens`), stdlib-only, deterministic document order.
1146
+ Returns [] when valid, else one (code, path, detail) per violation. The six
1147
+ codes are the token-layer named reds udd-check-lint surfaces. A token's LAYER
1148
+ is its top-level group name; value forms diverge from DTCG 2025.10 to compact
1149
+ scalars (color "#hex", dimension "<n><unit>") — see templates/udd-tokens.md.
1150
+ """
1151
+ if not isinstance(tokens, dict):
1152
+ return [("malformed_value", "", "root is not a JSON object")]
1153
+
1154
+ # index every token (object bearing $value) by dotted path — for alias resolution
1155
+ index: dict[str, dict] = {}
1156
+
1157
+ def _index(node: object, path: list[str]) -> None:
1158
+ if not isinstance(node, dict):
1159
+ return
1160
+ if "$value" in node:
1161
+ index[".".join(path)] = node
1162
+ for key, child in node.items(): # descend even past a token — never skip a subtree
1163
+ if not key.startswith("$"):
1164
+ _index(child, path + [key])
1165
+
1166
+ for top, node in tokens.items():
1167
+ if top in _TOKEN_LAYERS:
1168
+ _index(node, [top])
1169
+
1170
+ out: list[tuple[str, str, str]] = []
1171
+
1172
+ def _walk(node: object, path: list[str], layer: str, inherited: "str | None") -> None:
1173
+ if not isinstance(node, dict):
1174
+ return
1175
+ if "$value" in node: # a token
1176
+ pathstr = ".".join(path)
1177
+ ttype = node.get("$type", inherited)
1178
+ value = node.get("$value")
1179
+ if ttype not in _TOKEN_TYPES:
1180
+ out.append(("unknown_type", pathstr, f"$type {ttype!r} not in {list(_TOKEN_TYPES)}"))
1181
+ elif isinstance(value, str) and value.startswith("{") and value.endswith("}"):
1182
+ target = value[1:-1] # an alias
1183
+ if layer == "primitive":
1184
+ out.append(("primitive_has_alias", pathstr,
1185
+ f"a primitive token must hold a literal, not alias {value}"))
1186
+ elif target not in index:
1187
+ out.append(("unresolved_alias", pathstr, f"{value} resolves to no token"))
1188
+ else:
1189
+ target_layer = target.split(".", 1)[0]
1190
+ if target_layer != _TOKEN_LAYER_CITES[layer]:
1191
+ out.append(("cross_layer_citation", pathstr,
1192
+ f"{layer} may alias only {_TOKEN_LAYER_CITES[layer]}, not {target_layer}"))
1193
+ elif not _token_value_form_ok(ttype, value): # a literal
1194
+ out.append(("malformed_value", pathstr, f"{value!r} is not a valid {ttype}"))
1195
+ # a token should be a leaf; if it carries non-$ children, validate them too rather
1196
+ # than letting them pass silently (fail-closed — never skip a subtree).
1197
+ for key, child in node.items():
1198
+ if not key.startswith("$"):
1199
+ _walk(child, path + [key], layer, ttype)
1200
+ return
1201
+ gtype = node.get("$type", inherited) # a group
1202
+ for key, child in node.items():
1203
+ if not key.startswith("$"):
1204
+ _walk(child, path + [key], layer, gtype)
1205
+
1206
+ for top, node in tokens.items():
1207
+ if top not in _TOKEN_LAYERS:
1208
+ out.append(("unknown_layer", top, f"top-level group {top!r} is not a layer"))
1209
+ continue
1210
+ _walk(node, [top], top, None)
1211
+
1212
+ return out
1213
+
1214
+
1215
+ # ---- udd-catalog-content-schema (task 2/4): component catalog + content-tree validator ----
1216
+ _PROPSPEC_LITERALS = ("string", "number", "boolean")
1217
+
1218
+
1219
+ def _propspec_malformed(spec: object) -> "str | None":
1220
+ """Return a reason if a catalog PropSpec is malformed, else None.
1221
+
1222
+ A PropSpec is exactly one of: {type: string|number|boolean} ·
1223
+ {type: enum, values: [str,…]} · {type: token, token: <$type>} (a task-1 $type).
1224
+ """
1225
+ if not isinstance(spec, dict):
1226
+ return "PropSpec is not an object"
1227
+ ptype = spec.get("type")
1228
+ if ptype in _PROPSPEC_LITERALS:
1229
+ return None
1230
+ if ptype == "enum":
1231
+ values = spec.get("values")
1232
+ if not isinstance(values, list) or not values or not all(isinstance(x, str) for x in values):
1233
+ return "enum PropSpec needs a non-empty list of string values"
1234
+ return None
1235
+ if ptype == "token":
1236
+ ttype = spec.get("token")
1237
+ if ttype not in _TOKEN_TYPES:
1238
+ return f"token PropSpec names unknown $type {ttype!r}"
1239
+ return None
1240
+ return f"unknown PropSpec type {ptype!r}"
1241
+
1242
+
1243
+ def _prop_value_code(spec: dict, value: object) -> "str | None":
1244
+ """Return a violation CODE if a tree prop value mismatches its well-formed PropSpec, else None.
1245
+
1246
+ token props are LAYER-only here (frozen §3 @ v2): the value must be a
1247
+ `{semantic.*}` alias. A non-alias literal → prop_type_mismatch; a wrong-layer
1248
+ alias → non_semantic_prop_token. Target existence + $type-match defer to
1249
+ udd-check-lint (the composer that holds tokens.json).
1250
+ """
1251
+ ptype = spec.get("type")
1252
+ if ptype == "string":
1253
+ return None if isinstance(value, str) else "prop_type_mismatch"
1254
+ if ptype == "number":
1255
+ ok = isinstance(value, (int, float)) and not isinstance(value, bool)
1256
+ return None if ok else "prop_type_mismatch"
1257
+ if ptype == "boolean":
1258
+ return None if isinstance(value, bool) else "prop_type_mismatch"
1259
+ if ptype == "enum":
1260
+ return None if value in spec.get("values", []) else "prop_type_mismatch"
1261
+ if ptype == "token":
1262
+ if not (isinstance(value, str) and value.startswith("{") and value.endswith("}")):
1263
+ return "prop_type_mismatch" # a token prop must be an alias, not a literal
1264
+ if value[1:-1].split(".", 1)[0] != "semantic":
1265
+ return "non_semantic_prop_token" # v2: the alias must target the semantic layer
1266
+ return None
1267
+ return None # unreachable for well-formed specs
1268
+
1269
+
1270
+ def _catalog_tree_violations(catalog: dict, tree: dict) -> list[tuple[str, str, str]]:
1271
+ """Validate a json-render content TREE against OUR component CATALOG.
1272
+
1273
+ Pure (never mutates `catalog`/`tree`), stdlib-only, deterministic order. Returns
1274
+ [] when valid, else one (code, path, detail) per violation. The eight named reds:
1275
+ tree_cites_uncataloged_component · unknown_prop · prop_type_mismatch ·
1276
+ non_semantic_prop_token · dangling_child · children_not_allowed · missing_root ·
1277
+ malformed_catalog. SEPARATE from _token_layer_violations; udd-check-lint composes
1278
+ both. non_semantic_prop_token is LAYER-only (§3 @ v2) — token existence/$type-match
1279
+ are udd-check-lint's job (it holds tokens.json). See templates/udd-catalog.md.
1280
+ """
1281
+ out: list[tuple[str, str, str]] = []
1282
+
1283
+ # 1. catalog PropSpecs (malformed_catalog) — and collect the well-formed specs
1284
+ components = catalog.get("components") if isinstance(catalog, dict) else None
1285
+ if not isinstance(components, dict):
1286
+ out.append(("malformed_catalog", "components", "catalog has no 'components' object"))
1287
+ components = {}
1288
+ specs: dict[str, dict[str, dict]] = {} # component -> {prop: well-formed spec}
1289
+ declared_names: dict[str, set] = {} # component -> all declared prop names
1290
+ for cname, comp in components.items():
1291
+ if not isinstance(comp, dict): # v3: a component entry must be an object
1292
+ out.append(("malformed_catalog", f"components.{cname}", "component entry is not an object"))
1293
+ declared_names[cname] = set()
1294
+ specs[cname] = {}
1295
+ continue
1296
+ cprops = comp.get("props", {})
1297
+ cprops = cprops if isinstance(cprops, dict) else {}
1298
+ declared_names[cname] = set(cprops.keys())
1299
+ ok: dict[str, dict] = {}
1300
+ for pname, spec in cprops.items():
1301
+ reason = _propspec_malformed(spec)
1302
+ if reason is not None:
1303
+ out.append(("malformed_catalog", f"components.{cname}.props.{pname}", reason))
1304
+ else:
1305
+ ok[pname] = spec
1306
+ specs[cname] = ok
1307
+
1308
+ # 2. root (missing_root) — checked before the elements walk
1309
+ elements = tree.get("elements") if isinstance(tree, dict) else None
1310
+ elements = elements if isinstance(elements, dict) else {}
1311
+ root = tree.get("root") if isinstance(tree, dict) else None
1312
+ if not isinstance(root, str) or root not in elements:
1313
+ out.append(("missing_root", "root", f"root {root!r} is absent from elements"))
1314
+
1315
+ # 3. elements (document key order)
1316
+ for eid, el in elements.items():
1317
+ if not isinstance(el, dict): # v3: an element must be an object
1318
+ out.append(("malformed_element", f"elements.{eid}", "element is not an object"))
1319
+ continue
1320
+ etype = el.get("type")
1321
+ cataloged = isinstance(etype, str) and etype in components
1322
+ if not cataloged:
1323
+ out.append(("tree_cites_uncataloged_component", f"elements.{eid}.type",
1324
+ f"type {etype!r} not in catalog"))
1325
+
1326
+ props = el.get("props")
1327
+ if "props" in el and not isinstance(props, dict): # v3: props must be an object
1328
+ out.append(("malformed_element", f"elements.{eid}.props", "props is not an object"))
1329
+ elif cataloged and isinstance(props, dict):
1330
+ for pname, value in props.items():
1331
+ if pname not in declared_names.get(etype, set()):
1332
+ out.append(("unknown_prop", f"elements.{eid}.props.{pname}",
1333
+ f"{pname!r} not declared on {etype}"))
1334
+ elif pname in specs.get(etype, {}): # declared + well-formed spec → value-check
1335
+ code = _prop_value_code(specs[etype][pname], value)
1336
+ if code is not None:
1337
+ out.append((code, f"elements.{eid}.props.{pname}",
1338
+ f"{value!r} does not satisfy {specs[etype][pname]}"))
1339
+ # declared-but-malformed-spec prop: the catalog error is already logged; skip value-check
1340
+
1341
+ children = el.get("children")
1342
+ if "children" in el and not isinstance(children, list): # v3: children must be an array
1343
+ out.append(("malformed_element", f"elements.{eid}.children", "children is not an array"))
1344
+ elif isinstance(children, list) and children: # empty list == absent (no violation)
1345
+ comp_entry = components.get(etype)
1346
+ has_children = (bool(comp_entry.get("hasChildren", False))
1347
+ if cataloged and isinstance(comp_entry, dict) else False)
1348
+ if cataloged and not has_children:
1349
+ out.append(("children_not_allowed", f"elements.{eid}.children",
1350
+ f"{etype} does not declare hasChildren"))
1351
+ else:
1352
+ for cid in children:
1353
+ if cid not in elements:
1354
+ out.append(("dangling_child", f"elements.{eid}.children.{cid}",
1355
+ f"child id {cid!r} absent from elements"))
1356
+
1357
+ return out
1358
+
1359
+
1360
+ # ---- udd-check-lint (task 4/4): the composer + cross-file token resolution ----
1361
+ # The single holder of tokens + catalog + tree. _catalog_tree_violations checks a
1362
+ # token-prop alias LAYER-only (it must target `semantic`); here we close the deferral
1363
+ # task 2 left — resolve that alias against tokens.json for EXISTENCE + $type-match.
1364
+
1365
+ def _semantic_token_index(tokens: dict) -> dict[str, "str | None"]:
1366
+ """Map each semantic token's dotted path -> its effective $type.
1367
+
1368
+ A token is a node bearing $value; its $type is the nearest $type on its path
1369
+ (DTCG group inheritance — $type sits on the GROUP, the leaf carries only $value).
1370
+ Keys carry the layer prefix ("semantic.color.accent"), matching the alias body.
1371
+ """
1372
+ out: dict[str, "str | None"] = {}
1373
+ sem = tokens.get("semantic") if isinstance(tokens, dict) else None
1374
+ if not isinstance(sem, dict):
1375
+ return out
1376
+
1377
+ def _walk(node: object, path: list[str], inherited: "str | None") -> None:
1378
+ if not isinstance(node, dict):
1379
+ return
1380
+ ttype = node.get("$type", inherited)
1381
+ if "$value" in node: # a token (a leaf bearing $value)
1382
+ out[".".join(path)] = ttype
1383
+ for key, child in node.items(): # descend even past a token — never skip a subtree
1384
+ if not key.startswith("$"):
1385
+ _walk(child, path + [key], ttype)
1386
+
1387
+ _walk(sem, ["semantic"], None)
1388
+ return out
1389
+
1390
+
1391
+ def _prop_token_resolution_violations(tokens: dict, catalog: dict, tree: dict) -> list[tuple[str, str, str]]:
1392
+ """Resolve a tree's semantic token-prop aliases against tokens.json.
1393
+
1394
+ Pure + TOTAL (never mutates inputs; stdlib only; never raises on dict inputs).
1395
+ Deterministic document order; [] == every token-prop alias resolves to an
1396
+ existing semantic token of the right $type. Acts ONLY on a prop that is BOTH a
1397
+ catalog PropSpec {type:token, token:<$type>} AND a tree {semantic.*} alias (the
1398
+ props _catalog_tree_violations passed LAYER-only); everything else is task 1/2's.
1399
+ Two codes: unresolved_prop_token · prop_token_type_mismatch.
1400
+ """
1401
+ out: list[tuple[str, str, str]] = []
1402
+ sem_index = _semantic_token_index(tokens)
1403
+ components = catalog.get("components") if isinstance(catalog, dict) else None
1404
+ components = components if isinstance(components, dict) else {}
1405
+ elements = tree.get("elements") if isinstance(tree, dict) else None
1406
+ elements = elements if isinstance(elements, dict) else {}
1407
+
1408
+ for eid, el in elements.items():
1409
+ if not isinstance(el, dict):
1410
+ continue # malformed_element — _catalog_tree_violations' job
1411
+ etype = el.get("type")
1412
+ comp = components.get(etype) if isinstance(etype, str) else None
1413
+ if not isinstance(comp, dict):
1414
+ continue # uncataloged / malformed — already flagged there
1415
+ cprops = comp.get("props")
1416
+ cprops = cprops if isinstance(cprops, dict) else {}
1417
+ props = el.get("props")
1418
+ if not isinstance(props, dict):
1419
+ continue
1420
+ for pname, value in props.items():
1421
+ spec = cprops.get(pname)
1422
+ if not isinstance(spec, dict) or spec.get("type") != "token":
1423
+ continue # only catalog token-props
1424
+ if not (isinstance(value, str) and value.startswith("{") and value.endswith("}")):
1425
+ continue # non-alias literal → task-2's prop_type_mismatch
1426
+ target = value[1:-1]
1427
+ if target.split(".", 1)[0] != "semantic":
1428
+ continue # non-semantic alias → task-2's non_semantic_prop_token
1429
+ want = spec.get("token") # the declared $type
1430
+ if want not in _TOKEN_TYPES:
1431
+ continue # malformed token PropSpec → task-2's malformed_catalog owns it
1432
+ path = f"elements.{eid}.props.{pname}"
1433
+ if target not in sem_index:
1434
+ out.append(("unresolved_prop_token", path, f"{value} resolves to no semantic token"))
1435
+ continue
1436
+ got = sem_index[target] # the resolved token's inherited $type
1437
+ if got not in _TOKEN_TYPES:
1438
+ continue # resolved token's $type malformed → task-1's unknown_type owns it
1439
+ if got != want:
1440
+ out.append(("prop_token_type_mismatch", path,
1441
+ f"{value} is {got!r}, but prop wants {want!r}"))
1442
+ return out
1443
+
1444
+
1445
+ def _udd_named_set_checks(root: Path) -> list[tuple[bool, str, str]]:
1446
+ """Lint a project's UDD named set under `.add/design/` (silent when absent).
1447
+
1448
+ Composes _token_layer_violations + _catalog_tree_violations +
1449
+ _prop_token_resolution_violations into cmd_check's (ok, desc, reason) checks.
1450
+ READ-ONLY; FAIL-CLOSED on malformed JSON (a named code, never a crash). Returns
1451
+ [] when no named set exists — so a clean / non-UI project stays untouched.
1452
+ """
1453
+ design = root / "design"
1454
+ tok_path, cat_path = design / "tokens.json", design / "catalog.json"
1455
+ proto_dir = design / "prototypes"
1456
+ trees = sorted(p for p in proto_dir.glob("*.json") if p.is_file()) if proto_dir.is_dir() else []
1457
+ if not (tok_path.exists() or cat_path.exists() or trees):
1458
+ return [] # silent-when-absent
1459
+
1460
+ def _load(p: Path) -> "tuple[object, str | None]":
1461
+ try:
1462
+ return json.loads(p.read_text(encoding="utf-8")), None
1463
+ except (json.JSONDecodeError, OSError) as e:
1464
+ return None, str(e)
1465
+
1466
+ out: list[tuple[bool, str, str]] = []
1467
+
1468
+ tokens = None
1469
+ if tok_path.exists():
1470
+ tokens, err = _load(tok_path)
1471
+ if err is not None:
1472
+ out.append((False, "tokens.json parses", f"malformed_tokens_json: {err}"))
1473
+ tokens = None
1474
+ else:
1475
+ v = _token_layer_violations(tokens)
1476
+ if not v:
1477
+ out.append((True, "tokens.json layer-valid", ""))
1478
+ else:
1479
+ out += [(False, "tokens.json layer-valid", f"{c}: {p} — {d}") for c, p, d in v]
1480
+
1481
+ catalog = None
1482
+ if cat_path.exists():
1483
+ catalog, err = _load(cat_path)
1484
+ if err is not None:
1485
+ out.append((False, "catalog.json parses", f"malformed_catalog_json: {err}"))
1486
+ catalog = None
1487
+
1488
+ for tp in trees:
1489
+ name = tp.stem
1490
+ tree, err = _load(tp)
1491
+ if err is not None:
1492
+ out.append((False, f"prototype '{name}' parses", f"malformed_prototype_json: {err}"))
1493
+ continue
1494
+ if catalog is None:
1495
+ continue # no catalog to validate a tree against — skip quietly
1496
+ v = list(_catalog_tree_violations(catalog, tree))
1497
+ if tokens is not None:
1498
+ v += _prop_token_resolution_violations(tokens, catalog, tree)
1499
+ if not v:
1500
+ out.append((True, f"prototype '{name}' valid", ""))
1501
+ else:
1502
+ out += [(False, f"prototype '{name}' valid", f"{c}: {p} — {d}") for c, p, d in v]
1503
+
1504
+ return out
1505
+
1506
+
929
1507
  def cmd_check(args: argparse.Namespace) -> None:
930
1508
  """Read-only integrity check of the .add project. Exit 1 if anything fails."""
931
1509
  as_json = getattr(args, "json", False)
@@ -964,6 +1542,16 @@ def cmd_check(args: argparse.Namespace) -> None:
964
1542
  # the intake flow — NOT a failure. Names structure, never the act of intake.
965
1543
  warnings.append((f"task '{slug}'", "is outside a milestone — size it via the /add "
966
1544
  "intake flow (or attach with --milestone)"))
1545
+ # autonomy level (task explicit-autonomy-dial): a REAL out-of-set token is a hard
1546
+ # unknown_autonomy_level; a LIVE task (phase before done/observe) with no `autonomy:`
1547
+ # line is implicit_autonomy — a WARN, never red. Done/observe predecessors are SKIPPED
1548
+ # (a fresh live-only predicate, NOT the audit open-front skip) so the board never floods.
1549
+ _alvl = _autonomy_level(_task_header(root, slug))
1550
+ checks.append((_alvl != "?", f"task '{slug}' autonomy level recognized",
1551
+ "unknown_autonomy_level (token outside manual|conservative|auto)"))
1552
+ if _alvl is None and t.get("phase") not in ("done", "observe"):
1553
+ warnings.append((f"task '{slug}'", "has no explicit autonomy level (implicit_autonomy) "
1554
+ "— set `autonomy: manual|conservative|auto` in the header"))
967
1555
  for dep in t.get("depends_on") or []:
968
1556
  checks.append((dep in tasks or dep in archived_slugs,
969
1557
  f"task '{slug}' dep '{dep}' resolves", "unknown task"))
@@ -985,6 +1573,31 @@ def cmd_check(args: argparse.Namespace) -> None:
985
1573
  if lint_result is not None:
986
1574
  ok, reason = lint_result
987
1575
  checks.append((ok, f"task '{slug}' deltas well-formed", reason))
1576
+ # tamper tripwire standing monitor (verify-integrity): a non-done task whose
1577
+ # snapshot has diverged is surfaced EARLY — WARN, never red (the verify GATE
1578
+ # is where it bites, HARD-STOP). Fail-closed via _tripwire_divergence.
1579
+ if not _task_done(t):
1580
+ _tw = t.get("tripwire")
1581
+ if _tw and _tripwire_divergence(root, slug, _tw):
1582
+ warnings.append((f"task '{slug}'", "tampered since its tests->build "
1583
+ "snapshot (build_tampered) — a tracked test or the "
1584
+ "frozen §3 changed; the verify gate will HARD-STOP it"))
1585
+ # §5 scope standing monitor (build-scope-lock): a pending out-of-scope
1586
+ # touch (or a tampered baseline) surfaces EARLY — WARN, never red; the
1587
+ # verify gate is where it bites.
1588
+ _sc = t.get("scope")
1589
+ if isinstance(_sc, dict):
1590
+ _tamper, _out = _scope_findings(root, slug, _sc)
1591
+ if _tamper:
1592
+ warnings.append((f"task '{slug}'", "scope-snapshot.json is "
1593
+ f"{_tamper} against its anchor "
1594
+ "(scope_snapshot_tampered pending) — the verify "
1595
+ "gate will refuse it"))
1596
+ elif _out:
1597
+ warnings.append((f"task '{slug}'", "touched outside its declared "
1598
+ f"§5 Scope: {' · '.join(_out[:3])} "
1599
+ "(scope_violation pending) — the verify gate "
1600
+ "will refuse it"))
988
1601
 
989
1602
  # drift: a done milestone must have no unfinished tasks
990
1603
  for mslug, m in milestones.items():
@@ -994,11 +1607,69 @@ def cmd_check(args: argparse.Namespace) -> None:
994
1607
  checks.append((not unfinished, f"done milestone '{mslug}' fully complete",
995
1608
  f"unfinished: {unfinished}"))
996
1609
 
1610
+ # goal-auto-ready (task goal-auto-ready-gate): nudge the ACTIVE milestone toward a
1611
+ # machine-checkable goal — every exit criterion citing a verifier `(verify: …)` so the
1612
+ # engine can self-verify the result against it. WARN, NEVER red (measurement, not a gate);
1613
+ # fired IFF the goal HAS criteria but not all cite (total >= 1 AND cited < total) — a
1614
+ # zero-criteria milestone is shaping's nudge, not this one's. LIVE-ONLY: the OPEN active
1615
+ # milestone only — a done-but-not-yet-archived one (still the active pointer until
1616
+ # archive clears it) and closed/archived predecessors are never retro-flagged (Must #4).
1617
+ _active_ms = state.get("active_milestone")
1618
+ if _active_ms in milestones and milestones[_active_ms].get("status") != "done":
1619
+ _cited, _total = _exit_criteria_cited(root, _active_ms)
1620
+ if _total >= 1 and _cited < _total:
1621
+ warnings.append(("goal_not_auto_ready",
1622
+ f"milestone '{_active_ms}' goal not auto-ready "
1623
+ f"({_cited}/{_total} exit criteria cite a verifier) — add "
1624
+ "(verify: <test|command|metric>) to each bare criterion"))
1625
+
1626
+ # grounded (task ground-bundle-wiring): the freeze review checklist asks the human to
1627
+ # confirm the contract is grounded; this is the standing monitor for the gap. WARN, NEVER
1628
+ # red (measure-not-block, mirrors goal_not_auto_ready) — fires IFF the ACTIVE task's §3 is
1629
+ # FROZEN AND its §0 GROUND map is ungrounded (the precise "froze without grounding" gap, so
1630
+ # no nag during pre-freeze drafting). A pre-ground / legacy task (no §0 -> _grounded_state
1631
+ # None) is EXEMPT, never retro-flagged. Rides the existing `warnings` array — no new key.
1632
+ _at = state.get("active_task")
1633
+ if _at in tasks:
1634
+ _raw = _raw_phase_bodies(root, _at)
1635
+ if _contract_frozen(_raw.get(3, "")) and _grounded_state(_raw) is False:
1636
+ warnings.append(("task_not_grounded",
1637
+ f"task '{_at}' froze its contract without grounding — fill the "
1638
+ "§0 GROUND anchors the contract cites (add.py guide)"))
1639
+
1640
+ # wave-ledger fork-base (engine-merge-base-enforcement): the engine EXECUTES the
1641
+ # streams.md rule — every roster echo must match `base:`. A FILLED mismatch is red at
1642
+ # ANY status; a pending row is red at `status: merging` (merge-time strictness) but only
1643
+ # a WARN at `status: live` (measure-not-block: step-0 echoes land mid-wave). An
1644
+ # unparseable ledger is fail-closed (`wave_ledger_malformed`) — never a silent skip.
1645
+ for _wp in _wave_ledgers(root):
1646
+ _wm = _wp.parent.name
1647
+ _w = _parse_wave_ledger(_wp)
1648
+ if _w.get("error"):
1649
+ checks.append((False, f"wave '{_wm}' ledger parses",
1650
+ f"wave_ledger_malformed: {_w['error']}"))
1651
+ continue
1652
+ _bad = [r["task"] for r in _w["rows"] if r["filled"] and not r["matched"]]
1653
+ _pending = [r["task"] for r in _w["rows"] if not r["filled"]]
1654
+ if _w["status"] == "merging":
1655
+ _bad += _pending # merge-time strictness: pending == unverified
1656
+ _pending = []
1657
+ checks.append((not _bad, f"wave '{_wm}' fork-base echoes match base",
1658
+ "unverified_fork_base: " + ", ".join(_bad)))
1659
+ for _t in _pending:
1660
+ warnings.append(("fork_base_pending",
1661
+ f"wave '{_wm}' roster row '{_t}' awaits its step-0 echo"))
1662
+
997
1663
  # dependency graph must be acyclic
998
1664
  cycle = _find_cycle(tasks)
999
1665
  checks.append((cycle is None, "task dependencies are acyclic",
1000
1666
  f"cycle: {' -> '.join(cycle)}" if cycle else ""))
1001
1667
 
1668
+ # UDD foundation (udd-check-lint): lint a project's named set under .add/design/ —
1669
+ # composes the token + catalog/tree validators + the cross-file prop-token resolution.
1670
+ # Silent when absent; read-only; fail-closed on malformed JSON.
1671
+ checks.extend(_udd_named_set_checks(root))
1672
+
1002
1673
  passed = sum(1 for ok, _, _ in checks if ok)
1003
1674
  failed = len(checks) - passed
1004
1675
  if as_json:
@@ -1022,6 +1693,144 @@ def cmd_check(args: argparse.Namespace) -> None:
1022
1693
  raise SystemExit(1)
1023
1694
 
1024
1695
 
1696
+ # ---------------------------------------------------------------------------
1697
+ # wave-ledger fork-base enforcement (engine-merge-base-enforcement)
1698
+ #
1699
+ # streams.md states the rule; these helpers EXECUTE it (words-exist != method-works).
1700
+ # The ledger is the hand-written `.add/milestones/<m>/WAVE.md` per the streams.md
1701
+ # template: a `base: <sha>` line, a `status: live|merging` field on the header line,
1702
+ # and a `### Roster` table whose 3rd column holds the PASTED `rev-parse HEAD` echo.
1703
+ # Parsing is FAIL-CLOSED: anything off-grammar names the unparseable piece rather
1704
+ # than silently passing — a silent skip would un-guard the trust layer.
1705
+
1706
+ _WAVE_SHA_RE = re.compile(r"\b[0-9a-f]{7,40}\b")
1707
+
1708
+
1709
+ def _sha_match(a: str, b: str) -> bool:
1710
+ """Exact or prefix match, both tokens >=7 hex chars (git short-sha tolerant)."""
1711
+ if len(a) < 7 or len(b) < 7:
1712
+ return False
1713
+ return a == b or a.startswith(b) or b.startswith(a)
1714
+
1715
+
1716
+ def _wave_ledgers(root: Path) -> list:
1717
+ """Every live wave ledger, stable order (the same glob as the status hint)."""
1718
+ return sorted(p for p in (root / "milestones").glob("*/WAVE.md") if p.is_file())
1719
+
1720
+
1721
+ def _parse_wave_ledger(path: Path) -> dict:
1722
+ """Parse a WAVE.md against the streams.md template grammar. Fail-closed: a dict
1723
+ with an "error" key names exactly the piece that did not parse."""
1724
+ try:
1725
+ text = path.read_text(encoding="utf-8")
1726
+ except OSError as e:
1727
+ return {"error": f"unreadable ({e.__class__.__name__})"}
1728
+ # status is read ONLY from the FIRST `wave:` line — the header. Body text must
1729
+ # never rescue a malformed/invalid header: not free prose (heal-1 FG-2, an
1730
+ # unanchored search) and not a later wave:-prefixed line either (heal-2 FG-3 —
1731
+ # `(?m)^wave:.*?status:` happily skipped a status-less header to a body line).
1732
+ m_header = re.search(r"(?m)^wave:.*$", text)
1733
+ if not m_header:
1734
+ return {"error": "no 'wave:' header line"}
1735
+ # the status value is the EXACT token after `status:`, terminated only by
1736
+ # whitespace, the `·` separator, or end-of-line (v3): `\b` is not a token
1737
+ # terminator on hand-written input — it fires at `|` and `-`, so the unfilled
1738
+ # template placeholder `live|merging` (and drift like `live-ish`) parsed as
1739
+ # its valid prefix and greened an unfilled ledger (5th refute pass). The
1740
+ # `status:` label must itself START a field — start-of-line, whitespace, or
1741
+ # `·` before it (v4): an embedded `substatus:` is not a status field
1742
+ # (6th refute pass, N12).
1743
+ m_status = re.search(r"(?:^|[\s·])status:[ \t]*([^\s·]*)", m_header.group(0))
1744
+ if not m_status:
1745
+ return {"error": "no 'status: live|merging' on the wave: header line"}
1746
+ if m_status.group(1) not in ("live", "merging"):
1747
+ return {"error": "status token "
1748
+ f"{m_status.group(1)!r} is not exactly live or merging"}
1749
+ # base is read ONLY from the FIRST `base:` line, token on THAT line (heal-3 Pex:
1750
+ # `(?m)^base:\s*(\S+)` let \s cross the newline, so an EMPTY base: line parsed
1751
+ # as filled with whatever token the next line started with).
1752
+ m_base_line = re.search(r"(?m)^base:.*$", text)
1753
+ base = ""
1754
+ if m_base_line:
1755
+ m_tok = re.search(r"base:[ \t]*(\S+)", m_base_line.group(0))
1756
+ base = m_tok.group(1) if m_tok else ""
1757
+ if not re.fullmatch(r"[0-9a-f]{7,40}", base):
1758
+ return {"error": "no parseable 'base:' sha (7-40 hex)"}
1759
+ rows, in_roster, echo_col = [], False, None
1760
+ for line in text.splitlines():
1761
+ if line.startswith("### "):
1762
+ in_roster = line.lower().startswith("### roster")
1763
+ echo_col = None
1764
+ continue
1765
+ if not in_roster or not line.lstrip().startswith("|"):
1766
+ continue
1767
+ cells = [c.strip() for c in line.strip().strip("|").split("|")]
1768
+ if echo_col is None:
1769
+ # the column-header row MUST name the fork-base column, and the echo is
1770
+ # read from WHEREVER that label sits (heal-3: a hardcoded cells[2] let an
1771
+ # extra leading column hide the echo, and a headerless roster silently
1772
+ # swallowed its first DATA row as the header — a silent skip, refused).
1773
+ # EXACTLY one label may match (v2 ambiguity refusal): first-wins on a
1774
+ # hand-written artifact is fail-open — a second matching label such as
1775
+ # "fork-base-prev" would steal the echo and green a mismatched roster
1776
+ # (4th refute pass, N1/N10).
1777
+ matches = [i for i, c in enumerate(cells) if "fork-base" in c.lower()]
1778
+ if not matches:
1779
+ return {"error": "roster column-header row names no 'fork-base' column"}
1780
+ if len(matches) > 1:
1781
+ labels = ", ".join(cells[i] for i in matches)
1782
+ return {"error": f"ambiguous fork-base columns: {labels}"}
1783
+ echo_col = matches[0]
1784
+ continue
1785
+ if all(set(c) <= set("-: ") for c in cells):
1786
+ continue # the |---| separator row
1787
+ if len(cells) <= echo_col:
1788
+ return {"error": f"roster row with no fork-base cell: {line.strip()!r}"}
1789
+ shas = _WAVE_SHA_RE.findall(cells[echo_col])
1790
+ # fail-closed cell semantics (heal-1 FG-1): the cell must BE the pasted echo,
1791
+ # so EVERY sha token in it must match base — `any()` would green a drift note
1792
+ # ("<alien-sha> synced-to <base-prefix>") that documents the very mismatch
1793
+ # this gate exists to refuse. One alien token -> the row is NOT verified.
1794
+ rows.append({"task": cells[0], "filled": bool(shas),
1795
+ "matched": bool(shas) and all(_sha_match(s, base) for s in shas)})
1796
+ if not rows:
1797
+ return {"error": "no roster row"}
1798
+ return {"status": m_status.group(1), "base": base, "rows": rows}
1799
+
1800
+
1801
+ def cmd_wave_verify(args: argparse.Namespace) -> None:
1802
+ """The explicit merge-time gate: strict at any status, read-only, judgment-free.
1803
+ Exit 0 only when EVERY roster echo matches `base:` — run before the first
1804
+ merge-back. Never mutates the ledger, its status field, or state.json."""
1805
+ root = _require_root()
1806
+ if args.milestone:
1807
+ target = root / "milestones" / args.milestone / "WAVE.md"
1808
+ if not target.is_file():
1809
+ _die(f"wave_not_found: no WAVE.md for milestone '{args.milestone}'")
1810
+ else:
1811
+ ledgers = _wave_ledgers(root)
1812
+ if not ledgers:
1813
+ _die("wave_not_found: no WAVE.md under .add/milestones/ — nothing to verify")
1814
+ if len(ledgers) > 1:
1815
+ _die("wave_ambiguous: " + ", ".join(p.parent.name for p in ledgers)
1816
+ + " — name one: add.py wave-verify <milestone>")
1817
+ target = ledgers[0]
1818
+ w = _parse_wave_ledger(target)
1819
+ if w.get("error"):
1820
+ _die(f"wave_ledger_malformed: {w['error']} ({target.parent.name}/WAVE.md)")
1821
+ bad = []
1822
+ for r in w["rows"]:
1823
+ verdict = "ok" if r["matched"] else ("MISMATCH" if r["filled"] else "PENDING")
1824
+ print(f" {r['task']}: {verdict}")
1825
+ if not r["matched"]:
1826
+ bad.append(r["task"])
1827
+ if bad:
1828
+ _die("unverified_fork_base: " + ", ".join(bad)
1829
+ + f" — every roster echo must match base {w['base'][:12]} before merge-back")
1830
+ print(f"wave '{target.parent.name}' verified — every fork-base echo matches base "
1831
+ f"{w['base'][:12]}; merge-back may proceed (the ledger is untouched).")
1832
+
1833
+
1025
1834
  def cmd_new_milestone(args: argparse.Namespace) -> None:
1026
1835
  root = _require_root()
1027
1836
  state = load_state(root)
@@ -1045,7 +1854,8 @@ def cmd_new_milestone(args: argparse.Namespace) -> None:
1045
1854
  state["active_milestone"] = slug
1046
1855
  save_state(root, state)
1047
1856
  print(f"created milestone '{slug}' -> {mfile}")
1048
- print(f"active milestone set. Decompose it into tasks: add.py new-task <slug> --depends-on ...")
1857
+ print("active milestone set.")
1858
+ print(_next_footer(root, state)) # converges the old "Decompose it into tasks: …" hint
1049
1859
 
1050
1860
 
1051
1861
  def cmd_ready(args: argparse.Namespace) -> None:
@@ -1134,13 +1944,14 @@ def cmd_milestone_done(args: argparse.Namespace) -> None:
1134
1944
  tail = f" ({len(waived)} via a signed RISK-ACCEPTED waiver)" if waived else ""
1135
1945
  print(f"milestone '{slug}' -> done ({len(members)} tasks complete{tail}).")
1136
1946
  print(f"wrote {retro_path.relative_to(root.parent)} (milestone exit report)")
1137
- print("Confirm the MILESTONE.md exit criteria are checked, then archive/start the next.")
1138
1947
  # fold-pressure nudge: milestone close is the natural fold point for open deltas (v11)
1139
1948
  open_deltas = sum(len(v) for v in _collect_open_deltas(root).values())
1140
1949
  if open_deltas:
1141
1950
  noun = "delta" if open_deltas == 1 else "deltas"
1142
1951
  print(f"note: {open_deltas} open {noun} to consolidate into the foundation "
1143
1952
  f"— review with: add.py deltas")
1953
+ # the engine-sourced next step (converges the old "Confirm … archive/start the next" hint)
1954
+ print(_next_footer(root, state))
1144
1955
 
1145
1956
 
1146
1957
  def cmd_archive_milestone(args: argparse.Namespace) -> None:
@@ -1193,6 +2004,7 @@ def cmd_archive_milestone(args: argparse.Namespace) -> None:
1193
2004
  save_state(root, state)
1194
2005
  print(f"archived milestone '{slug}' ({len(members)} tasks) — removed from active state.")
1195
2006
  print("files on disk are untouched; see `add.py status` for the archived rollup.")
2007
+ print(_next_footer(root, state))
1196
2008
 
1197
2009
 
1198
2010
  def cmd_compact(args: argparse.Namespace) -> None:
@@ -1257,6 +2069,7 @@ def cmd_compact(args: argparse.Namespace) -> None:
1257
2069
  for path, n in moved:
1258
2070
  print(f" moved {path} ({n} files)")
1259
2071
  print("recovery: reverse the moves (mv the bundle's parts back) — state needs no edit.")
2072
+ print(_next_footer(root, state))
1260
2073
 
1261
2074
 
1262
2075
  def cmd_set_milestone(args: argparse.Namespace) -> None:
@@ -1275,6 +2088,7 @@ def cmd_set_milestone(args: argparse.Namespace) -> None:
1275
2088
  state["tasks"][task]["updated"] = _now()
1276
2089
  save_state(root, state)
1277
2090
  print(f"task '{task}' -> milestone '{new}'" if new else f"task '{task}' -> milestone (none)")
2091
+ print(_next_footer(root, state))
1278
2092
 
1279
2093
 
1280
2094
  def cmd_use(args: argparse.Namespace) -> None:
@@ -1289,6 +2103,7 @@ def cmd_use(args: argparse.Namespace) -> None:
1289
2103
  state["active_task"] = slug
1290
2104
  save_state(root, state)
1291
2105
  print(f"active task -> '{slug}' (phase={state['tasks'][slug]['phase']})")
2106
+ print(_next_footer(root, state))
1292
2107
 
1293
2108
 
1294
2109
  def _find_cycle(tasks: dict) -> list[str] | None:
@@ -1370,7 +2185,7 @@ def _bar(num: int, den: int, cells: int, g: dict) -> str:
1370
2185
 
1371
2186
 
1372
2187
  def _phase_track(phase: str, g: dict) -> str:
1373
- """Compact 8-cell pipeline (no labels — a single legend explains it):
2188
+ """Compact 9-cell pipeline (no labels — a single legend explains it):
1374
2189
  reached · current · pending. A done task -> all reached."""
1375
2190
  try:
1376
2191
  ci = PHASES.index(phase)
@@ -1434,6 +2249,27 @@ def _project_goal(root: Path) -> str:
1434
2249
  return GOAL_UNSET
1435
2250
 
1436
2251
 
2252
+ def _project_autonomy_token(root: Path):
2253
+ """The RAW autonomy declaration in PROJECT.md — a recognized rung, None when no
2254
+ declaration line is present, or "?" for a real-but-unrecognized token. Uses the
2255
+ anchored _autonomy_level (a title/prose substring is never a declaration) with
2256
+ HTML comments stripped. Unreadable foundation -> None. Read-only and PURE."""
2257
+ try:
2258
+ text = (root / "PROJECT.md").read_text(encoding="utf-8")
2259
+ except OSError:
2260
+ return None
2261
+ return _autonomy_level(re.sub(r"<!--.*?-->", "", text, flags=re.S))
2262
+
2263
+
2264
+ def _project_autonomy(root: Path) -> str:
2265
+ """The autonomy rung a new task INHERITS from the project default. Fail-SAFE:
2266
+ no declaration -> "auto" (the method default; v7: absent = auto); an unrecognized
2267
+ token -> "conservative" (NEVER silently "auto"); an unreadable foundation -> "auto".
2268
+ Read-only and PURE — mirrors _project_goal; the seed source for cmd_new_task."""
2269
+ tok = _project_autonomy_token(root)
2270
+ return "auto" if tok is None else ("conservative" if tok == "?" else tok)
2271
+
2272
+
1437
2273
  def _milestone_doc(root: Path, mslug: str) -> tuple[str, str]:
1438
2274
  """(title, goal) from MILESTONE.md; ('(unknown)','(unknown)') if the doc is gone."""
1439
2275
  f = root / "milestones" / mslug / MILESTONE_FILE
@@ -1463,6 +2299,41 @@ def _exit_criteria(root: Path, mslug: str) -> tuple[int, int]:
1463
2299
  return met, total
1464
2300
 
1465
2301
 
2302
+ # A non-empty `(verify: <citation>)` on an exit-criterion line — at least one non-whitespace
2303
+ # char inside, so a bare `(verify:)`/`(verify: )` does NOT count (the mid-text substring trap).
2304
+ _VERIFY_CITE_RE = re.compile(r"\(verify:\s*\S.*?\)", re.I)
2305
+
2306
+
2307
+ def _exit_criteria_cited(root: Path, mslug: str) -> tuple[int, int]:
2308
+ """(cited, total) over MILESTONE.md's 'Exit criteria' section. total = every
2309
+ `- [ ]`/`- [x]` criterion line; cited = those carrying a NON-EMPTY
2310
+ `(verify: <citation>)`. Read-only and PURE; missing file/section -> (0, 0).
2311
+ Mirrors _exit_criteria (the checkbox tally) — an ADDITIVE classification beside
2312
+ it; it never touches `milestone_goal_unmet`."""
2313
+ f = root / "milestones" / mslug / MILESTONE_FILE
2314
+ if not f.exists():
2315
+ return 0, 0
2316
+ m = re.search(r"## Exit criteria.*?(?=\n## |\Z)", f.read_text(encoding="utf-8"), re.S)
2317
+ if not m:
2318
+ return 0, 0
2319
+ cited = total = 0
2320
+ for ln in m.group(0).splitlines():
2321
+ if re.match(r"\s*- \[[ x]\]", ln):
2322
+ total += 1
2323
+ if _VERIFY_CITE_RE.search(ln):
2324
+ cited += 1
2325
+ return cited, total
2326
+
2327
+
2328
+ def _goal_auto_ready(root: Path, mslug: str) -> bool:
2329
+ """True iff the milestone goal is AUTO-READY: its Exit criteria has >= 1 criterion
2330
+ AND every one cites a verifier (cited == total) — so the engine can self-verify the
2331
+ result against the goal without human judgement. A zero-criteria goal is NOT
2332
+ auto-ready (you cannot self-verify against nothing). PURE."""
2333
+ cited, total = _exit_criteria_cited(root, mslug)
2334
+ return total >= 1 and cited == total
2335
+
2336
+
1466
2337
  def _stage_criteria(root: Path) -> tuple[int, int]:
1467
2338
  """(met, total) checkbox tally inside PROJECT.md's 'Stage goal criteria' section — the
1468
2339
  PROJECT.md analog of _exit_criteria (v22): the human's stage-covered affirmation. Read-only
@@ -1507,11 +2378,17 @@ def _count_test_defs(f: Path) -> int:
1507
2378
  return 0
1508
2379
 
1509
2380
 
1510
- def _tests_count(root: Path, slug: str) -> int:
2381
+ def _primary_test_files(root: Path, slug: str) -> list[Path]:
2382
+ """The PRIMARY test set — *.py directly in the task's tests/ dir (the stable
2383
+ path). A list so the tamper tripwire can hash exactly what the engine counts."""
1511
2384
  d = root / "tasks" / slug / "tests"
1512
2385
  if not d.is_dir():
1513
- return 0
1514
- return sum(_count_test_defs(f) for f in d.glob("*.py"))
2386
+ return []
2387
+ return sorted(d.glob("*.py"))
2388
+
2389
+
2390
+ def _tests_count(root: Path, slug: str) -> int:
2391
+ return sum(_count_test_defs(f) for f in _primary_test_files(root, slug))
1515
2392
 
1516
2393
 
1517
2394
  def _confined(p: Path, rootp: Path) -> bool:
@@ -1523,18 +2400,18 @@ def _confined(p: Path, rootp: Path) -> bool:
1523
2400
  return False
1524
2401
 
1525
2402
 
1526
- def _declared_tests_count(root: Path, slug: str) -> int:
1527
- """Count tests at the §4 'Tests live in:' declared path(s). PURE, fail-closed 0.
2403
+ def _declared_test_files(root: Path, slug: str) -> list[Path]:
2404
+ """Resolve the §4 'Tests live in:' declared path(s) to a deduped file list. PURE.
1528
2405
  Tokens are the backticked spans on the FIRST declaring line of the raw §4 body.
1529
2406
  Resolution: './…' -> task dir · contains '/' -> project root (parent of .add) ·
1530
2407
  bare name -> sibling of the previous resolved token (else task dir). A directory
1531
- token counts the *.py files directly inside it; resolved files are deduped.
1532
- v2 confinement: every file read must resolve inside the project root — '..'
1533
- traversal, absolute tokens, and symlink escapes all contribute 0, fail-closed."""
2408
+ token yields the *.py files directly inside it; resolved files are deduped.
2409
+ v2 confinement: every path must resolve inside the project root — '..' traversal,
2410
+ absolute tokens, and symlink escapes are all dropped, fail-closed."""
1534
2411
  body = _raw_phase_bodies(root, slug).get(4, "")
1535
2412
  m = re.search(r"^\s*Tests live in:.*$", body, re.M)
1536
2413
  if not m:
1537
- return 0
2414
+ return []
1538
2415
  tdir = root / "tasks" / slug
1539
2416
  rootp = root.parent.resolve()
1540
2417
  files: list[Path] = []
@@ -1560,7 +2437,12 @@ def _declared_tests_count(root: Path, slug: str) -> int:
1560
2437
  except OSError:
1561
2438
  continue
1562
2439
  files.extend(f for f in cand if f not in files)
1563
- return sum(_count_test_defs(f) for f in files)
2440
+ return files
2441
+
2442
+
2443
+ def _declared_tests_count(root: Path, slug: str) -> int:
2444
+ """Count tests at the §4 'Tests live in:' declared path(s). PURE, fail-closed 0."""
2445
+ return sum(_count_test_defs(f) for f in _declared_test_files(root, slug))
1564
2446
 
1565
2447
 
1566
2448
  def _tests_info(root: Path, slug: str) -> tuple[int, bool]:
@@ -1574,6 +2456,279 @@ def _tests_info(root: Path, slug: str) -> tuple[int, bool]:
1574
2456
  return (declared, True) if declared > 0 else (0, False)
1575
2457
 
1576
2458
 
2459
+ def _resolved_test_files(root: Path, slug: str) -> list[Path]:
2460
+ """The file set the engine treats as this task's tests — the PRIMARY set wins
2461
+ when it yields any test defs, else the §4-declared set (mirrors _tests_info's
2462
+ selection). The tamper tripwire hashes exactly THIS set, never a fresh glob."""
2463
+ primary = _primary_test_files(root, slug)
2464
+ if sum(_count_test_defs(f) for f in primary) > 0:
2465
+ return primary
2466
+ return _declared_test_files(root, slug)
2467
+
2468
+
2469
+ def _md5_text(s: str) -> str:
2470
+ return hashlib.md5(s.encode("utf-8")).hexdigest()
2471
+
2472
+
2473
+ def _md5_file(p: Path) -> str | None:
2474
+ """md5 of a file's bytes; None on ANY read error (fail-closed — a tracked file
2475
+ that cannot be read counts as DIVERGED at the gate, never a crash)."""
2476
+ try:
2477
+ return hashlib.md5(p.read_bytes()).hexdigest()
2478
+ except OSError:
2479
+ return None
2480
+
2481
+
2482
+ def _tripwire_snapshot(root: Path, slug: str, raw3: str) -> dict:
2483
+ """Freeze the md5 of the resolved red test files + the frozen §3 contract — the
2484
+ tamper baseline (verify-integrity). Keys are project-root-relative paths (stable
2485
+ across the snapshot->gate window). Tool-agnostic: hashes bytes only, never runs
2486
+ tests or measures coverage."""
2487
+ rootp = root.parent.resolve()
2488
+ tests: dict[str, str] = {}
2489
+ for f in _resolved_test_files(root, slug):
2490
+ h = _md5_file(f)
2491
+ if h is None:
2492
+ continue
2493
+ try:
2494
+ rel = str(f.resolve().relative_to(rootp))
2495
+ except (ValueError, OSError):
2496
+ rel = str(f)
2497
+ tests[rel] = h
2498
+ return {"contract_md5": _md5_text(raw3), "tests": tests}
2499
+
2500
+
2501
+ def _tripwire_divergence(root: Path, slug: str, tw: dict) -> list[str]:
2502
+ """Tamper codes for a PRESENT snapshot; [] means clean. Re-reads each tracked
2503
+ path directly (never re-globs), so a weakened, deleted, or unreadable test file
2504
+ and an edited frozen §3 all surface. Fail-closed: an unreadable file -> diverged."""
2505
+ diffs: list[str] = []
2506
+ if _md5_text(_raw_phase_bodies(root, slug).get(3, "")) != tw.get("contract_md5"):
2507
+ diffs.append("contract_tampered")
2508
+ rootp = root.parent.resolve()
2509
+ for rel, snap in (tw.get("tests") or {}).items():
2510
+ if _md5_file(rootp / rel) != snap:
2511
+ diffs.append(f"build_tampered:{rel}")
2512
+ return diffs
2513
+
2514
+
2515
+ # ── §5 scope gate (build-scope-lock): touched ⊆ declared, from bytes alone ──────────
2516
+ # The walk's NAMED exclusion set — ONE constant; widening it is an additive
2517
+ # change-request, never silent. `.add` is engine domain (tripwire + audit guard it);
2518
+ # the rest is VCS/bytecode/OS junk with no build signal.
2519
+ _SCOPE_EXCLUDE_DIRS = (".git", ".add", "__pycache__", "node_modules")
2520
+ _SCOPE_EXCLUDE_FILES = (".DS_Store",) # plus *.pyc by suffix
2521
+
2522
+
2523
+ def _declared_scope(root: Path, slug: str) -> list[str] | None:
2524
+ """Resolve the §5 'Scope (may touch):' declaration to project-root-relative
2525
+ strings (directory tokens keep a trailing '/'). The frozen scope-decl-template
2526
+ grammar: the §4 token rules — backticked spans on the FIRST declaring line ·
2527
+ './…' -> task dir · contains '/' -> project root · bare -> sibling of the
2528
+ previous token's dir · v2 confinement drops everything outside the project
2529
+ root, fail-closed — with ONE divergence: a directory token covers its WHOLE
2530
+ subtree (containment, judged by _in_scope). None = no Scope line (UNDECLARED,
2531
+ grandfathered — never retro-red); [] = a line whose every token was dropped
2532
+ (a garbage declaration grants NO cover)."""
2533
+ body = _raw_phase_bodies(root, slug).get(5, "")
2534
+ m = re.search(r"^\s*Scope \(may touch\):.*$", body, re.M)
2535
+ if not m:
2536
+ return None
2537
+ tdir = root / "tasks" / slug
2538
+ rootp = root.parent.resolve()
2539
+ out: list[str] = []
2540
+ prev_dir = None
2541
+ for tok in re.findall(r"`([^`]+)`", m.group(0)):
2542
+ tok = tok.strip()
2543
+ if tok.startswith("./"):
2544
+ p = tdir / tok[2:]
2545
+ elif "/" in tok:
2546
+ p = root.parent / tok
2547
+ else:
2548
+ p = (prev_dir or tdir) / tok
2549
+ try:
2550
+ if not _confined(p, rootp):
2551
+ continue
2552
+ rp = p.resolve()
2553
+ rel = str(rp.relative_to(rootp))
2554
+ if tok.endswith("/") or rp.is_dir():
2555
+ prev_dir, rel = p, rel.rstrip("/") + "/"
2556
+ else:
2557
+ prev_dir = p.parent
2558
+ except OSError:
2559
+ continue
2560
+ if rel not in out:
2561
+ out.append(rel)
2562
+ return out
2563
+
2564
+
2565
+ def _in_scope(rel: str, declared: list[str]) -> bool:
2566
+ """True when rel falls under any declared token — exact match for a file
2567
+ token, whole-subtree prefix containment for a directory token ('…/')."""
2568
+ for tok in declared:
2569
+ if tok.endswith("/"):
2570
+ if rel.startswith(tok) or rel == tok.rstrip("/"):
2571
+ return True
2572
+ elif rel == tok:
2573
+ return True
2574
+ return False
2575
+
2576
+
2577
+ def _scope_walk(rootp: Path) -> dict[str, str]:
2578
+ """{project-root-relative path: md5} over the project tree, pruning
2579
+ _SCOPE_EXCLUDE_DIRS at any depth and skipping bytecode/OS junk. A file
2580
+ unreadable at SNAPSHOT time is skipped; at the GATE the resulting absence
2581
+ reads as a touch (fail-closed at the biting end). Bytes only — no git."""
2582
+ files: dict[str, str] = {}
2583
+ for dirpath, dirnames, filenames in os.walk(rootp):
2584
+ dirnames[:] = [d for d in dirnames if d not in _SCOPE_EXCLUDE_DIRS]
2585
+ for name in filenames:
2586
+ if name in _SCOPE_EXCLUDE_FILES or name.endswith(".pyc"):
2587
+ continue
2588
+ p = Path(dirpath) / name
2589
+ h = _md5_file(p)
2590
+ if h is None:
2591
+ continue
2592
+ try:
2593
+ files[str(p.relative_to(rootp))] = h
2594
+ except ValueError:
2595
+ continue
2596
+ return files
2597
+
2598
+
2599
+ def _scope_findings(root: Path, slug: str, anchor: dict) -> tuple[str | None, list[str]]:
2600
+ """(tamper_reason, out_of_scope_touches) for a scope-anchored task. PURE read.
2601
+ The sidecar is integrity-checked against the state.json anchor BEFORE it is
2602
+ trusted; touched = modified ∪ added ∪ deleted vs the snapshot."""
2603
+ side = root / "tasks" / slug / "scope-snapshot.json"
2604
+ try:
2605
+ raw = side.read_text(encoding="utf-8")
2606
+ except OSError:
2607
+ return "missing", []
2608
+ if _md5_text(raw) != anchor.get("snapshot_md5"):
2609
+ return "diverged", []
2610
+ try:
2611
+ snap = json.loads(raw).get("files", {})
2612
+ except (ValueError, AttributeError):
2613
+ return "unparseable", []
2614
+ if not isinstance(snap, dict):
2615
+ return "unparseable", []
2616
+ now = _scope_walk(root.parent.resolve())
2617
+ touched = sorted({k for k, v in snap.items() if now.get(k) != v}
2618
+ | {k for k in now if k not in snap})
2619
+ declared = anchor.get("declared") or []
2620
+ return None, [p for p in touched if not _in_scope(p, declared)]
2621
+
2622
+
2623
+ def _scope_guard(root: Path, state: dict, slug: str) -> None:
2624
+ """Refuse a COMPLETING gate when the build touched outside its declared §5
2625
+ Scope (build-scope-lock). The anchor (state.json) and the sidecar co-witness
2626
+ each other — born in the same tests->build crossing, so EITHER single-file
2627
+ erase is caught (v2, refute-driven): an anchor-less task whose sidecar still
2628
+ EXISTS is scope_anchor_missing, never a silent skip. Both absent -> UNDECLARED
2629
+ or legacy: silent, the grandfather rule (the simultaneous two-file erase is
2630
+ the explicitly accepted floor — the tripwire shares it). Sits directly after
2631
+ _tamper_guard, BEFORE the waiver write, so a violation is never launderable
2632
+ through RISK-ACCEPTED; HARD-STOP never calls it (stopping is always allowed).
2633
+
2634
+ Routing (scope-violation-heal, build-scope-lock 3/3) — tripwire-parity: the
2635
+ RECOVERABLE findings (an out-of-scope touch, a present-but-wrong sidecar) are
2636
+ fixable from BUILD, so they enter the SAME bounded self-heal loop the tamper
2637
+ tripwire uses (_heal_or_escalate, shared HEAL_CAP) — return to build for an
2638
+ honest redo (exit 3), then HARD-STOP at the cap. The ERASED baselines stay
2639
+ die-in-place (exit 1, no heal): a redo cannot recreate an erased anchor or a
2640
+ deleted sidecar — that is tripwire_missing parity. Every heal reason CARRIES
2641
+ its named code, so the existing refusal-token assertions still match."""
2642
+ anchor = state["tasks"][slug].get("scope")
2643
+ if not isinstance(anchor, dict):
2644
+ if (root / "tasks" / slug / "scope-snapshot.json").exists():
2645
+ _die(f"scope_anchor_missing: task '{slug}' carries a scope-snapshot.json "
2646
+ "but no state.json anchor — the touch baseline was erased from "
2647
+ "state; re-establish it (re-advance through tests->build) before "
2648
+ "completing")
2649
+ return
2650
+ tamper, out = _scope_findings(root, slug, anchor)
2651
+ if tamper == "missing":
2652
+ # erased baseline — a redo cannot recreate the evidence (tripwire_missing parity)
2653
+ _die(f"scope_snapshot_tampered: task '{slug}' — scope-snapshot.json is "
2654
+ "missing against its state.json anchor; the touch baseline is "
2655
+ "evidence and must survive the build untouched")
2656
+ if tamper:
2657
+ # diverged | unparseable — present-but-wrong bytes are revertable from build
2658
+ _heal_or_escalate(root, state, slug, source="scope-tamper",
2659
+ reason=(f"scope_snapshot_tampered: task '{slug}' — "
2660
+ f"scope-snapshot.json is {tamper} against its "
2661
+ "state.json anchor; revert it to the snapshot bytes"))
2662
+ if out:
2663
+ shown = " · ".join(out[:5])
2664
+ _heal_or_escalate(root, state, slug, source="scope",
2665
+ reason=(f"scope_violation: task '{slug}' touched outside its "
2666
+ f"declared §5 Scope — {shown} ({len(out)} total)"))
2667
+
2668
+
2669
+ def _heal_or_escalate(root: Path, state: dict, slug: str, *, reason: str, source: str) -> None:
2670
+ """The bounded self-heal router (verify-integrity, heal-then-escalate). Called ONLY when
2671
+ a cheat is CONFIRMED at this point — mechanical (tripwire divergence, source "tamper") or
2672
+ semantic (an agent-reported refute-read finding, source "refute-read").
2673
+
2674
+ attempts < HEAL_CAP -> record the attempt, return the task to BUILD for an honest redo,
2675
+ exit 3 (a redo signal, NOT a completing outcome). The phase is set DIRECTLY (never via
2676
+ advance) so the tripwire baseline is not re-snapshotted mid-loop. The increment is saved
2677
+ BEFORE the exit, so a re-run never grants a free attempt (atomic, fail-closed).
2678
+
2679
+ attempts >= HEAL_CAP -> the next confirmed cheat: record gate = HARD-STOP and escalate to
2680
+ the human (_die). A gamed green is NEVER auto-passed; the loop is never unbounded. The
2681
+ counter is MONOTONIC — it never auto-resets (cmd_phase is unguarded, so a reset would be a
2682
+ zero-human cap bypass)."""
2683
+ t = state["tasks"][slug]
2684
+ heal = t.setdefault("heal", {"attempts": 0, "history": []})
2685
+ entry = {"at": _now(), "reason": reason, "source": source}
2686
+ if heal.get("attempts", 0) >= HEAL_CAP:
2687
+ heal.setdefault("history", []).append(entry)
2688
+ t["gate"] = "HARD-STOP" # never a completing outcome; phase stays put
2689
+ t["updated"] = _now()
2690
+ save_state(root, state) # the escalation verdict is durable
2691
+ _die(f"heal_exhausted: task '{slug}' — a confirmed cheat ({reason}) persisted past "
2692
+ f"{HEAL_CAP} honest re-build attempts. HARD-STOP escalated to the human: fix the "
2693
+ "spec (change-request -> re-freeze) or abandon. A gamed green is never auto-passed.")
2694
+ heal["attempts"] = heal.get("attempts", 0) + 1
2695
+ heal.setdefault("history", []).append(entry)
2696
+ t["phase"] = "build" # DIRECT — never via advance (no re-snapshot)
2697
+ t["updated"] = _now()
2698
+ _sync_task_marker(root, slug, "build")
2699
+ save_state(root, state) # the increment is durable BEFORE the exit
2700
+ print(f"return_to_build: task '{slug}' — cheat detected ({reason}); RETURN TO BUILD for an "
2701
+ f"HONEST redo, attempt {heal['attempts']} of {HEAL_CAP}. Revert the tampered file or "
2702
+ "rebuild src honestly, then advance back to verify.")
2703
+ raise SystemExit(3) # redo signal (distinct from _die's 1, argparse's 2)
2704
+
2705
+
2706
+ def _tamper_guard(root: Path, state: dict, slug: str) -> None:
2707
+ """HARD-STOP a COMPLETING gate when the tripwire shows tampering — the method's
2708
+ first mechanical cheat block (verify-integrity). Tri-state, co-witnessed by
2709
+ flag_verified: present+diverged -> stop; absent+flag_verified -> suspicious stop
2710
+ (the snapshot was crossed-then-erased); absent+not-verified -> skip (a legacy task
2711
+ or one that never crossed tests->build). A cheat is HARD-STOP-class — this runs
2712
+ for RISK-ACCEPTED too, BEFORE the waiver is recorded, so it is never launderable."""
2713
+ t = state["tasks"][slug]
2714
+ tw = t.get("tripwire")
2715
+ if tw is None:
2716
+ if t.get("flag_verified"):
2717
+ _die(f"tripwire_missing: task '{slug}' crossed tests->build "
2718
+ "(flag_verified) but carries no tamper snapshot — the evidence "
2719
+ "baseline was erased. Re-establish it (reopen -> re-advance through "
2720
+ "tests->build) before completing; a missing baseline is HARD-STOP.")
2721
+ return # legacy: predates the tripwire, or never crossed tests->build
2722
+ diffs = _tripwire_divergence(root, slug, tw)
2723
+ if diffs:
2724
+ # heal-then-escalate (verify-integrity): a mechanical cheat no longer dies on sight —
2725
+ # it enters the bounded self-heal loop (≤HEAL_CAP honest re-build attempts, then a
2726
+ # HARD-STOP escalation). Still HARD-STOP-class: never auto-passed, never launderable
2727
+ # (this runs BEFORE the waiver write). The router returns to build or escalates.
2728
+ _heal_or_escalate(root, state, slug,
2729
+ reason="tamper_detected:" + ",".join(diffs), source="tamper")
2730
+
2731
+
1577
2732
  def _task_prose(root: Path, slug: str) -> tuple[str, list[str]]:
1578
2733
  """(observe_delta, [delta lines]) from the task's TASK.md §7 — captured at FULL
1579
2734
  fidelity: both fields wrap across physical lines in real files, so continuation
@@ -1730,7 +2885,7 @@ def _phase_spans(text: str) -> dict[int, str]:
1730
2885
  m = head.match(ln)
1731
2886
  if m:
1732
2887
  n = int(m.group(1))
1733
- if 1 <= n <= 7 and n not in starts:
2888
+ if 0 <= n <= 7 and n not in starts:
1734
2889
  starts[n] = idx
1735
2890
  out: dict[int, str] = {}
1736
2891
  for n, idx in starts.items():
@@ -1754,23 +2909,23 @@ def _raw_phase_bodies(root: Path, slug: str) -> dict[int, str]:
1754
2909
 
1755
2910
 
1756
2911
  def task_phases(root: Path, slug: str) -> list[dict]:
1757
- """The frozen per-task PHASE-DETAIL shape (v9-1): parse TASK.md §1–§7 into seven
1758
- blocks specify→observe. PURE — NO writes. Each entry is
1759
- { "phase": <name>, "n": <1..7>, "body": <cleaned text | "(empty)"> }.
2912
+ """The frozen per-task PHASE-DETAIL shape (v9-1): parse TASK.md §0–§7 into eight
2913
+ blocks ground→observe. PURE — NO writes. Each entry is
2914
+ { "phase": <name>, "n": <0..7>, "body": <cleaned text | "(empty)"> }.
1760
2915
 
1761
2916
  The heading scan lives in _phase_spans (shared with the decide digest); this view
1762
2917
  CLEANS each body. Missing file / missing section / placeholder-only body ->
1763
2918
  "(empty)" (fail-closed)."""
1764
- names = PHASES[:7] # specify..observe; "done" is a terminal STATE, not a section
2919
+ names = PHASES[:-1] # ground..observe; "done" is a terminal STATE, not a section
1765
2920
  f = root / "tasks" / slug / "TASK.md"
1766
2921
  try:
1767
2922
  text = f.read_text(encoding="utf-8")
1768
2923
  except OSError: # missing OR unreadable -> every phase fail-closed to "(empty)"
1769
- return [{"phase": names[n - 1], "n": n, "body": "(empty)"} for n in range(1, 8)]
2924
+ return [{"phase": names[n], "n": n, "body": "(empty)"} for n in range(0, 8)]
1770
2925
  spans = _phase_spans(text)
1771
- return [{"phase": names[n - 1], "n": n,
2926
+ return [{"phase": names[n], "n": n,
1772
2927
  "body": _clean_phase_body(spans[n]) if n in spans else "(empty)"}
1773
- for n in range(1, 8)]
2928
+ for n in range(0, 8)]
1774
2929
 
1775
2930
 
1776
2931
  def _task_title(root: Path, slug: str) -> str:
@@ -1846,7 +3001,7 @@ def render_task_detail(root: Path, state: dict, mslug: str, slug: str, *,
1846
3001
  L.append(f" PHASE {phase} GATE {gate}")
1847
3002
  L.append(banner)
1848
3003
  for p in task_phases(root, slug):
1849
- i = p["n"] - 1
3004
+ i = p["n"] # n IS the PHASES index now (ground=0 .. observe=7)
1850
3005
  mk = (g["reached"] if (phase == "done" or i < ci)
1851
3006
  else g["current"] if i == ci else g["pending"])
1852
3007
  L.append("")
@@ -1981,6 +3136,36 @@ def _contract_frozen(raw3: str) -> bool:
1981
3136
  return any(re.match(r"\s*Status:\s*FROZEN", ln) for ln in raw3.splitlines())
1982
3137
 
1983
3138
 
3139
+ def _section0_anchors(raw0: str) -> str | None:
3140
+ """The value of the §0 GROUND "Anchors the contract cites:" line, stripped.
3141
+ None when the §0 body carries no such line (no §0, or a malformed map). PURE."""
3142
+ for ln in raw0.splitlines():
3143
+ m = re.match(r"\s*Anchors the contract cites:\s*(.*)$", ln)
3144
+ if m:
3145
+ return m.group(1).strip()
3146
+ return None
3147
+
3148
+
3149
+ def _grounded_state(raw: dict[int, str]) -> bool | None:
3150
+ """Tri-state grounding measure over a task's RAW §bodies (measure-not-block):
3151
+ True — the §0 "Anchors the contract cites:" line is filled (real content)
3152
+ False — the §0 section exists but its Anchors line is the "<…>" placeholder / empty
3153
+ None — no §0 section (a pre-ground / legacy task), OR a §0 with no Anchors line
3154
+ PURE; fail-open (an unparseable §0 -> None, never a false False). The freeze review
3155
+ checklist asks the human to confirm True; status/check surface it, never block on it."""
3156
+ if 0 not in raw:
3157
+ return None
3158
+ anchors = _section0_anchors(raw[0])
3159
+ if anchors is None:
3160
+ return None
3161
+ return bool(anchors) and not anchors.startswith("<")
3162
+
3163
+
3164
+ def _task_grounded(root: Path, slug: str) -> bool | None:
3165
+ """`_grounded_state` for one task by slug (reads its RAW §bodies). Read-only."""
3166
+ return _grounded_state(_raw_phase_bodies(root, slug))
3167
+
3168
+
1984
3169
  _FLAG_LABEL_RE = re.compile(r"Least-sure flag surfaced at freeze\s*:", re.I)
1985
3170
  _FLAG_PART_RE = re.compile(
1986
3171
  r"\[(?:spec|scenario|contract|test)(?:/(?:spec|scenario|contract|test))*\]")
@@ -2022,6 +3207,8 @@ def decide_data(root: Path, state: dict, mslug: str, slug: str) -> dict:
2022
3207
  gate = t.get("gate", "none")
2023
3208
  if gate != "none" or phase in ("observe", "done"):
2024
3209
  seam = "recorded"
3210
+ elif phase == "ground":
3211
+ seam = "ground"
2025
3212
  elif phase in _FRONT_PHASES:
2026
3213
  seam = "front"
2027
3214
  else:
@@ -2032,6 +3219,8 @@ def decide_data(root: Path, state: dict, mslug: str, slug: str) -> dict:
2032
3219
  judgment = _decision_markers(raw.get(6, ""), 6) + _decision_markers(raw.get(1, ""), 1)
2033
3220
  elif seam == "front" and not frozen:
2034
3221
  judgment = _decision_markers(raw.get(1, ""), 1) + _decision_markers(raw.get(3, ""), 3)
3222
+ elif seam == "ground":
3223
+ judgment = _decision_markers(raw.get(0, ""), 0)
2035
3224
  else:
2036
3225
  judgment = []
2037
3226
 
@@ -2051,6 +3240,9 @@ def decide_data(root: Path, state: dict, mslug: str, slug: str) -> dict:
2051
3240
  elif seam == "front":
2052
3241
  unlocks = "none"
2053
3242
  decide = "no decision pending — frozen; the run owns it. next decision point: verify gate"
3243
+ elif seam == "ground":
3244
+ unlocks = "gather the codebase -> advance to specify"
3245
+ decide = "gather the real codebase (the section 0 GROUND map), then: add.py advance"
2054
3246
  else:
2055
3247
  unlocks = "none"
2056
3248
  decide = f"no decision pending — recorded gate: {gate}"
@@ -2069,7 +3261,7 @@ def render_decide(root: Path, state: dict, mslug: str, slug: str, *,
2069
3261
  g = _ASCII if ascii else _UNICODE
2070
3262
  banner = g["h"] * width
2071
3263
  seam_label = {"gate": "VERIFY GATE", "front": "CONTRACT APPROVAL",
2072
- "recorded": "RECORDED"}[d["seam"]]
3264
+ "recorded": "RECORDED", "ground": "GROUND"}[d["seam"]]
2073
3265
  L = [banner, f" DECIDE · {mslug or '—'} · {slug} · decision point: {seam_label}", banner]
2074
3266
  if d["decide"].startswith("no decision pending"):
2075
3267
  L.append(f" {d['decide']}")
@@ -2134,14 +3326,22 @@ def _planned_hint(d: dict) -> str:
2134
3326
  return f" — {len(planned)} planned not yet scaffolded: " + " · ".join(planned)
2135
3327
 
2136
3328
 
2137
- def _decide_next_base(state: dict, d: dict) -> str:
3329
+ def _decide_next_pair(state: dict, d: dict) -> tuple[str, bool]:
3330
+ """(next-step text, human_stop) over the active-milestone rollup. `human_stop` is the
3331
+ driver behind the step (task gate-owner-marker): True for every DECISION point a human
3332
+ owns — decompose · resolve HARD-STOP · goal-not-met · consolidate/archive · approve
3333
+ contract · gate — and False ONLY for the run-in-progress fallthrough, the one branch
3334
+ where the AI just continues an in-flight run. Derived from the rollup `d`, never from
3335
+ the rendered prose (the §5 safety rule). The bare string is `_decide_next_base` below."""
2138
3336
  ms = d["milestone"]["slug"]
2139
3337
  rows = d["tasks"]
2140
3338
  if not rows:
2141
- return "none no tasks yet"
3339
+ # command-first (next-footer-engine): an empty milestone's next step is to
3340
+ # decompose it — name the command, not the dead-end "none — no tasks yet".
3341
+ return f"decompose into tasks — add.py new-task {ms}", True
2142
3342
  stopped = [r for r in rows if r["gate"] == "HARD-STOP"]
2143
3343
  if stopped:
2144
- return f"resolve HARD-STOP on {stopped[0]['slug']}"
3344
+ return f"resolve HARD-STOP on {stopped[0]['slug']}", True
2145
3345
  s = d["summary"]
2146
3346
  if s["tasks_done"] == s["tasks_total"]:
2147
3347
  # tasks complete — but the milestone holds while the goal (exit criteria) is
@@ -2151,8 +3351,8 @@ def _decide_next_base(state: dict, d: dict) -> str:
2151
3351
  met, total = ec.get("met", 0), ec.get("total", 0)
2152
3352
  if total > 0 and met < total:
2153
3353
  return (f"goal not met ({met}/{total} exit criteria) — propose next tasks "
2154
- f"from open deltas / the unscaffolded plan (add.py deltas)")
2155
- return f"consolidate learnings + archive-milestone {ms}"
3354
+ f"from open deltas / the unscaffolded plan (add.py deltas)"), True
3355
+ return f"consolidate learnings + archive-milestone {ms}", True
2156
3356
  active = state.get("active_task")
2157
3357
  order = sorted(rows, key=lambda r: 0 if r["slug"] == active else 1) # stable
2158
3358
  for r in order:
@@ -2160,11 +3360,58 @@ def _decide_next_base(state: dict, d: dict) -> str:
2160
3360
  continue
2161
3361
  if r["phase"] in _FRONT_PHASES:
2162
3362
  return (f"approve the contract of {r['slug']} — "
2163
- f"add.py report {ms} {r['slug']} --decide")
3363
+ f"add.py report {ms} {r['slug']} --decide"), True
2164
3364
  if r["phase"] == "verify" and r["gate"] == "none":
2165
- return f"gate {r['slug']} — add.py report {ms} {r['slug']} --decide"
3365
+ return f"gate {r['slug']} — add.py report {ms} {r['slug']} --decide", True
2166
3366
  r = next(x for x in order if not x["done"])
2167
- return f"none — run in progress ({r['slug']} at {r['phase']})"
3367
+ return f"none — run in progress ({r['slug']} at {r['phase']})", False
3368
+
3369
+
3370
+ def _decide_next_base(state: dict, d: dict) -> str:
3371
+ """The next-step TEXT only — the thin str wrapper the report rollup/digest callers use.
3372
+ The driver behind it (human_stop) is in _decide_next_pair, read by the footer Arm B."""
3373
+ return _decide_next_pair(state, d)[0]
3374
+
3375
+
3376
+ def _next_footer(root: Path, state: dict) -> str:
3377
+ """The single engine-sourced `next:` line a COMPLETING (exit-0) mutating verb prints
3378
+ as its last stdout (task next-footer-engine). ONE resolver, two arms — reusing the
3379
+ guide path, never a parallel next-step source:
3380
+
3381
+ Arm A — an active IN-FLIGHT task (gate == "none" AND phase != "done"): the phase's
3382
+ own command (advance, or the gate verbs at verify) + its PHASE_GUIDE why.
3383
+ The gate=="none" guard is precise — a HARD-STOPped task keeps gate=="HARD-STOP"
3384
+ (never done) so it falls to Arm B and is never told to re-gate itself.
3385
+ Arm B — otherwise: `_decide_next_base` over the active milestone's rollup — the SAME
3386
+ precedence the report dashboard renders (HARD-STOP -> "resolve HARD-STOP …",
3387
+ empty milestone -> "decompose … add.py new-task <ms>").
3388
+
3389
+ Fail-soft (design-for-failure): the footer is computed AFTER save_state, so a
3390
+ resolution error — no active milestone, an unreadable doc, a corrupt rollup — must
3391
+ NEVER turn a saved mutation into a crash; it degrades to one generic re-orient line.
3392
+ Pure render: it writes nothing. The trailing MARKER slot (task gate-owner-marker) names
3393
+ the driver — ` [you drive]` (the AI proceeds) / ` [human gate]` (a human owns it) — from
3394
+ `_driver_stop`: Arm A by phase×autonomy, Arm B by the rollup's own decision (human_stop).
3395
+ The fail-soft line carries NO marker — never assert a driver that could not be computed.
3396
+ """
3397
+ try:
3398
+ slug = state.get("active_task")
3399
+ t = (state.get("tasks") or {}).get(slug) if slug else None
3400
+ if t and t.get("gate", "none") == "none" and t.get("phase") != "done":
3401
+ phase = t.get("phase")
3402
+ why = PHASE_GUIDE[phase][0].split(" — ")[0].strip() # the short phase clause
3403
+ command = ("add.py gate PASS | RISK-ACCEPTED | HARD-STOP"
3404
+ if phase == "verify" else "add.py advance")
3405
+ marker = _driver_marker(_driver_stop(root, state, slug, phase))
3406
+ return f"next: {command} — {why}{marker}"
3407
+ mslug = state.get("active_milestone")
3408
+ if mslug:
3409
+ d = report_data(root, state, mslug)
3410
+ text, human_stop = _decide_next_pair(state, d)
3411
+ return "next: " + text + _driver_marker(human_stop)
3412
+ except Exception:
3413
+ pass # a footer never aborts the verb that already saved its state
3414
+ return "next: add.py status — re-orient"
2168
3415
 
2169
3416
 
2170
3417
  def render_decide_next(root: Path, state: dict, mslug: str, *,
@@ -2421,9 +3668,9 @@ def _audit_findings(root: Path, state: dict) -> tuple[int, list[dict]]:
2421
3668
  # catches post-gate header tampering and auto-resolved high-risk gates.
2422
3669
  hdr = _task_header(root, slug)
2423
3670
  if _RISK_HIGH_RE.search(hdr):
2424
- if not _AUTONOMY_CONSERVATIVE_RE.search(hdr):
3671
+ if not _autonomy_lowered(hdr):
2425
3672
  f(slug, "unguarded_high_risk_auto",
2426
- "risk: high declared but autonomy is not 'conservative'")
3673
+ "risk: high declared but autonomy is not lowered (manual or conservative)")
2427
3674
  elif rev and "auto-gate" in rev.group(1):
2428
3675
  f(slug, "unguarded_high_risk_auto",
2429
3676
  "risk: high task whose GATE RECORD reviewer is the auto-gate")
@@ -2799,11 +4046,11 @@ def build_parser() -> argparse.ArgumentParser:
2799
4046
  pp = sub.add_parser("phase", help="set a task's phase explicitly")
2800
4047
  pp.add_argument("phase", choices=PHASES)
2801
4048
  pp.add_argument("slug", nargs="?", default=None)
2802
- pp.set_defaults(func=cmd_phase)
4049
+ pp.set_defaults(func=cmd_phase, _opt_positionals=("slug",))
2803
4050
 
2804
4051
  pa = sub.add_parser("advance", help="move a task to the next phase")
2805
4052
  pa.add_argument("slug", nargs="?", default=None)
2806
- pa.set_defaults(func=cmd_advance)
4053
+ pa.set_defaults(func=cmd_advance, _opt_positionals=("slug",))
2807
4054
 
2808
4055
  pg = sub.add_parser("gate", help="record a verify gate outcome")
2809
4056
  pg.add_argument("outcome", choices=GATES)
@@ -2811,15 +4058,22 @@ def build_parser() -> argparse.ArgumentParser:
2811
4058
  pg.add_argument("--owner", help="RISK-ACCEPTED waiver: accountable owner")
2812
4059
  pg.add_argument("--ticket", help="RISK-ACCEPTED waiver: tracking ticket/link")
2813
4060
  pg.add_argument("--expires", help="RISK-ACCEPTED waiver: expiry date")
2814
- pg.set_defaults(func=cmd_gate)
4061
+ pg.set_defaults(func=cmd_gate, _opt_positionals=("slug",))
2815
4062
 
2816
4063
  pr = sub.add_parser("reopen", help="return a done task to an earlier phase with a recorded reason")
2817
4064
  pr.add_argument("slug", nargs="?", default=None)
2818
4065
  # --to / --reason are validated in-body (not argparse choices) so the named reject
2819
4066
  # codes fire (reopen_target_invalid / reopen_reason_required), not a bare exit-2.
2820
- pr.add_argument("--to", default=None, help="target phase (specify..observe)")
4067
+ pr.add_argument("--to", default=None, help="target phase (ground..observe)")
2821
4068
  pr.add_argument("--reason", default="", help="why the task is reopened (required, non-empty)")
2822
- pr.set_defaults(func=cmd_reopen)
4069
+ pr.set_defaults(func=cmd_reopen, _opt_positionals=("slug",))
4070
+
4071
+ ph = sub.add_parser("heal", help="report a confirmed cheat: bounded return-to-build, then escalate")
4072
+ ph.add_argument("slug", nargs="?", default=None)
4073
+ # --reason validated in-body so the named rejects fire (heal_reason_required /
4074
+ # heal_not_at_verify), not a bare argparse usage-2.
4075
+ ph.add_argument("--reason", default="", help="the refute-read finding (required, non-empty)")
4076
+ ph.set_defaults(func=cmd_heal, _opt_positionals=("slug",))
2823
4077
 
2824
4078
  ps = sub.add_parser("stage", help="set the project stage")
2825
4079
  ps.add_argument("stage", choices=STAGES)
@@ -2835,6 +4089,13 @@ def build_parser() -> argparse.ArgumentParser:
2835
4089
  pck.add_argument("--json", action="store_true", help="machine-readable JSON output")
2836
4090
  pck.set_defaults(func=cmd_check)
2837
4091
 
4092
+ pwv = sub.add_parser("wave-verify",
4093
+ help="read-only merge-time gate: every WAVE.md roster echo must match "
4094
+ "base (refuses unverified_fork_base) — run before the first merge-back")
4095
+ pwv.add_argument("milestone", nargs="?", default=None,
4096
+ help="milestone whose WAVE.md to verify (default: the single live ledger)")
4097
+ pwv.set_defaults(func=cmd_wave_verify, _opt_positionals=("milestone",))
4098
+
2838
4099
  psg = sub.add_parser("sync-guidelines",
2839
4100
  help="(re)write the ADD guideline block into AGENTS.md + CLAUDE.md")
2840
4101
  psg.set_defaults(func=cmd_sync_guidelines)
@@ -2842,7 +4103,7 @@ def build_parser() -> argparse.ArgumentParser:
2842
4103
  pgd = sub.add_parser("guide", help="print the one concrete next step for the active task")
2843
4104
  pgd.add_argument("slug", nargs="?", default=None, help="task slug (default: active task)")
2844
4105
  pgd.add_argument("--json", action="store_true", help="machine-readable JSON output")
2845
- pgd.set_defaults(func=cmd_guide)
4106
+ pgd.set_defaults(func=cmd_guide, _opt_positionals=("slug",))
2846
4107
 
2847
4108
  prp = sub.add_parser("report",
2848
4109
  help="capture/render a milestone's what-happened report (read-only)")
@@ -2862,7 +4123,7 @@ def build_parser() -> argparse.ArgumentParser:
2862
4123
  help="decision-point digest: what needs the human's judgment NOW "
2863
4124
  "(task -> decision digest; milestone -> DECIDE NEXT only; "
2864
4125
  "bare -> the active task)")
2865
- prp.set_defaults(func=cmd_report)
4126
+ prp.set_defaults(func=cmd_report, _opt_positionals=("milestone", "task"))
2866
4127
 
2867
4128
  pdt = sub.add_parser("deltas",
2868
4129
  help="read-only report: open lessons learned grouped by competency")
@@ -2888,9 +4149,33 @@ def build_parser() -> argparse.ArgumentParser:
2888
4149
  return p
2889
4150
 
2890
4151
 
4152
+ def _rebind_optional_positionals(parser: argparse.ArgumentParser,
4153
+ args: argparse.Namespace,
4154
+ extras: list[str]) -> argparse.Namespace:
4155
+ """argv portability (py<=3.12): argparse cannot bind an optional positional that
4156
+ trails value-taking flags once a REQUIRED positional was consumed in an earlier
4157
+ block — `gate RISK-ACCEPTED --owner X --ticket Y --expires Z <slug>` dies
4158
+ `unrecognized arguments: <slug>` on 3.10/3.11/3.12 (3.13+ parses it natively).
4159
+ Fix at main(): parse_known_args leaves the stranded slug in `extras`; re-bind
4160
+ non-flag extras into UNFILLED (still-default-None) optional positionals, in the
4161
+ order each subparser declared via set_defaults(_opt_positionals=...).
4162
+ Safety rule (frozen §3, engine-argv-portability): ANY flag-like extra refuses the
4163
+ WHOLE re-bind, and leftover extras re-raise the stock exit-2 error — a typo'd
4164
+ flag's value must never be mis-bound as a slug (that would gate the WRONG task)."""
4165
+ slots = [name for name in getattr(args, "_opt_positionals", ())
4166
+ if getattr(args, name, None) is None]
4167
+ if any(tok.startswith("-") for tok in extras) or len(extras) > len(slots):
4168
+ parser.error("unrecognized arguments: " + " ".join(extras))
4169
+ for name, value in zip(slots, extras):
4170
+ setattr(args, name, value)
4171
+ return args
4172
+
4173
+
2891
4174
  def main(argv: list[str] | None = None) -> int:
2892
4175
  parser = build_parser()
2893
- args = parser.parse_args(argv)
4176
+ args, extras = parser.parse_known_args(argv)
4177
+ if extras:
4178
+ args = _rebind_optional_positionals(parser, args, extras)
2894
4179
  args.func(args)
2895
4180
  return 0
2896
4181