npm - @linimin/pi-letscook - Versions diffs - 0.1.29 → 0.1.31 - Mend

@linimin/pi-letscook 0.1.29 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/CHANGELOG.md +22 -0
package/README.md +49 -2
package/agents/completion-auditor.md +17 -0
package/agents/completion-reviewer.md +17 -0
package/agents/completion-stop-judge.md +17 -0
package/extensions/completion/index.ts +749 -195
package/extensions/completion/role-reporting.js +356 -0
package/package.json +2 -1
package/scripts/context-proposal-test.sh +115 -6
package/scripts/refocus-test.sh +11 -0
package/scripts/release-check.sh +2 -0
package/scripts/rubric-contract-test.sh +249 -0
package/scripts/smoke-test.sh +154 -23
package/skills/completion-protocol/SKILL.md +39 -0
package/skills/completion-protocol/references/completion.md +71 -0

package/scripts/context-proposal-test.sh CHANGED Viewed

@@ -120,30 +120,57 @@ PY
 # No workflow yet: /cook with no goal should infer from recent discussion through analyst output.
 SESSION_ONE="$TMPDIR/session-one.jsonl"
 DISCUSSION_ONE="$DISCUSSION_ZERO"
-ANALYST_OUTPUT_ONE='{"mission":"Remove the completion status line while keeping the completion widget.","scope":["Keep the non-running completion widget.","Suppress the widget while a completion role is active."],"constraints":["Do not reintroduce any other completion status surface."],"acceptance":["Update README to match the shipped behavior.","Keep observability regression coverage truthful."],"confidence":0.94}'
+ANALYST_OUTPUT_ONE='{"mission":"Remove the completion status line while keeping the completion widget.","scope":["Keep the non-running completion widget.","Suppress the widget while a completion role is active."],"constraints":["Do not reintroduce any other completion status surface."],"acceptance":["Update README to match the shipped behavior.","Keep observability regression coverage truthful."],"critique":["Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal."],"risks":["Stale widget-removal discussion could broaden the startup plan if it gets treated as mission text."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","possible_noise":["older widget restyle ideas"],"confidence":0.94}'
+DISCUSSION_SNAPSHOT_ONE="$TMPDIR/context-proposal-discussion-hints.json"
 write_session "$SESSION_ONE" "$ROOT" "$DISCUSSION_ONE"
 PI_COMPLETION_CONTEXT_PROPOSAL_ACTION=accept \
 PI_COMPLETION_CONTEXT_PROPOSAL_ANALYST_OUTPUT="$ANALYST_OUTPUT_ONE" \
+PI_COMPLETION_TEST_CONTEXT_PROPOSAL_PATH="$DISCUSSION_SNAPSHOT_ONE" \
 PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
 pi --session "$SESSION_ONE" -e "$PKG_ROOT" -p "/cook" >/tmp/pi-completion-context-proposal-bootstrap.out 2>/tmp/pi-completion-context-proposal-bootstrap.err
-python3 - <<'PY'
+python3 - "$DISCUSSION_SNAPSHOT_ONE" <<'PY'
 import json
+import sys
 from pathlib import Path
 mission = 'Remove the completion status line while keeping the completion widget.'
+expected_task_type = 'completion-workflow'
+expected_eval_profile = 'completion-rubric-v1'
 mission_text = Path('.agent/mission.md').read_text()
+profile = json.loads(Path('.agent/profile.json').read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 plan = json.loads(Path('.agent/plan.json').read_text())
 active = json.loads(Path('.agent/active-slice.json').read_text())
+proposal = json.loads(Path(sys.argv[1]).read_text())
 assert mission in mission_text, '.agent/mission.md did not record the analyst-derived mission anchor'
+assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after analyst-derived bootstrap'
+assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after analyst-derived bootstrap'
 assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after analyst-derived bootstrap'
+assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after analyst-derived bootstrap'
+assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after analyst-derived bootstrap'
 assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after analyst-derived bootstrap'
+assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after analyst-derived bootstrap'
+assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after analyst-derived bootstrap'
 assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after analyst-derived bootstrap'
+assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after analyst-derived bootstrap'
+assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after analyst-derived bootstrap'
+assert proposal['mission'] == mission, 'discussion-only proposal snapshot should keep the inferred mission anchor'
+assert proposal['analysis']['taskType'] == expected_task_type, 'discussion-only proposal snapshot should expose task_type hints separately'
+assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'discussion-only proposal snapshot should expose evaluation_profile hints separately'
+assert proposal['analysis']['critique'] == ['Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal.'], 'discussion-only proposal snapshot should preserve critique hints'
+assert proposal['analysis']['risks'] == ['Stale widget-removal discussion could broaden the startup plan if it gets treated as mission text.'], 'discussion-only proposal snapshot should preserve risk hints'
+assert proposal['analysis']['possibleNoise'] == ['older widget restyle ideas'], 'discussion-only proposal snapshot should preserve possible_noise hints'
+assert 'Critique:' not in proposal['goalText'], 'goalText should keep critique separate from mission/scope/constraints/acceptance'
+assert 'Task type:' not in proposal['goalText'], 'goalText should keep task_type hints separate from the mission body'
 assert state['current_phase'] == 'reground', 'state.json current_phase should start at reground after analyst-derived bootstrap'
 assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should start at completion-regrounder after analyst-derived bootstrap'
+assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'initial startup should record the accepted startup routing in continuation_reason'
+assert 'task_type=completion-workflow' in state['continuation_reason'], 'initial startup should persist the selected task_type in continuation_reason'
+assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'initial startup should persist the selected evaluation_profile in continuation_reason'
+assert 'Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal.' in state['continuation_reason'], 'initial startup should persist the accepted critique outcome in continuation_reason'
 PY
 # Completed workflow: /cook with no goal should infer the next round from recent discussion through analyst output.
@@ -164,21 +191,35 @@ import json
 from pathlib import Path
 mission = 'Ship the next workflow round for richer context-derived /cook startup.'
+expected_task_type = 'completion-workflow'
+expected_eval_profile = 'completion-rubric-v1'
 mission_text = Path('.agent/mission.md').read_text()
+profile = json.loads(Path('.agent/profile.json').read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 plan = json.loads(Path('.agent/plan.json').read_text())
 active = json.loads(Path('.agent/active-slice.json').read_text())
 assert mission in mission_text, '.agent/mission.md did not update to the next-round context-derived mission anchor'
+assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after next-round startup'
+assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after next-round startup'
 assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after starting the next workflow round'
+assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after starting the next workflow round'
+assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after starting the next workflow round'
 assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after starting the next workflow round'
+assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after starting the next workflow round'
+assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after starting the next workflow round'
 assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after starting the next workflow round'
+assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after starting the next workflow round'
+assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after starting the next workflow round'
 assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground for the next workflow round'
 assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue for the next workflow round'
 assert state['requires_reground'] is True, 'requires_reground should reset to true for the next workflow round'
 assert state['project_done'] is False, 'project_done should reset to false for the next workflow round'
 assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should reset to completion-regrounder for the next workflow round'
 assert state['continuation_reason'].startswith('User refocused workflow via /cook:'), 'continuation_reason should record the next-round refocus'
+assert 'task_type=completion-workflow' in state['continuation_reason'], 'next-round refocus should persist the selected task_type'
+assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'next-round refocus should persist the selected evaluation_profile'
+assert 'critique outcome=accepted critique=none' in state['continuation_reason'], 'next-round refocus should persist that no critique notes were accepted'
 assert plan['plan_basis'] == 'user_refocus', 'plan_basis should reset to user_refocus for the next workflow round'
 assert active['status'] == 'idle', 'active-slice should reset to idle for the next workflow round'
 PY
@@ -200,19 +241,33 @@ import json
 from pathlib import Path
 mission = 'Explicit replacement mission for the active workflow.'
+expected_task_type = 'completion-workflow'
+expected_eval_profile = 'completion-rubric-v1'
 mission_text = Path('.agent/mission.md').read_text()
+profile = json.loads(Path('.agent/profile.json').read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 plan = json.loads(Path('.agent/plan.json').read_text())
 active = json.loads(Path('.agent/active-slice.json').read_text())
 assert mission in mission_text, '.agent/mission.md did not update to the explicit replacement mission anchor'
+assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-goal replacement'
+assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-goal replacement'
 assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-goal replacement'
+assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-goal replacement'
+assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-goal replacement'
 assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after explicit-goal replacement'
+assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after explicit-goal replacement'
+assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after explicit-goal replacement'
 assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after explicit-goal replacement'
+assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after explicit-goal replacement'
+assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after explicit-goal replacement'
 assert state['current_phase'] == 'reground', 'current_phase should reset to reground after explicit-goal replacement'
 assert state['continuation_policy'] == 'continue', 'continuation_policy should stay continue after explicit-goal replacement'
 assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after explicit-goal replacement'
 assert state['continuation_reason'].startswith('User refocused workflow via /cook:'), 'continuation_reason should record the explicit-goal replacement'
+assert 'task_type=completion-workflow' in state['continuation_reason'], 'explicit-goal replacement should persist the selected task_type'
+assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'explicit-goal replacement should persist the selected evaluation_profile'
+assert 'critique outcome=accepted critique=none' in state['continuation_reason'], 'explicit-goal replacement should persist the accepted critique outcome even when no critique was derived'
 assert 'Preserve the richer proposal structure from discussion.' not in state['continuation_reason'], 'session scope should not be merged when analyst output is unavailable'
 assert 'Keep explicit goals as the mission anchor when they conflict with earlier text.' not in state['continuation_reason'], 'session constraints should not be merged when analyst output is unavailable'
 assert 'Refresh canonical state from the replacement mission.' not in state['continuation_reason'], 'session acceptance should not be merged when analyst output is unavailable'
@@ -226,35 +281,60 @@ mark_done
 SESSION_FOUR="$TMPDIR/session-four.jsonl"
 DISCUSSION_FOUR=$'Scope:\n- Add session-only scope.\n- Restyle widget.\nConstraints:\n- Keep rules.\nAcceptance:\n- Add test.'
-EXPLICIT_GOAL_FOUR=$'Mission: Filter scope by mission.\nScope:\n- Keep explicit scope.'
+EXPLICIT_GOAL_FOUR=$'Mission: Filter scope by mission.\nScope:\n- Keep explicit scope.\nCritique:\n- Keep critique notes separate from the mission anchor.\nRisks:\n- Session-only scope could leak into the next workflow round.\nTask type: completion-workflow\nEvaluation profile: completion-rubric-v1'
+EXPLICIT_SNAPSHOT_FOUR="$TMPDIR/context-proposal-explicit-hints.json"
 write_session "$SESSION_FOUR" "$ROOT" "$DISCUSSION_FOUR"
 PI_COMPLETION_CONTEXT_PROPOSAL_ACTION=accept \
 PI_COMPLETION_DISABLE_CONTEXT_PROPOSAL_ANALYST=1 \
+PI_COMPLETION_TEST_CONTEXT_PROPOSAL_PATH="$EXPLICIT_SNAPSHOT_FOUR" \
 PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
 pi --session "$SESSION_FOUR" -e "$PKG_ROOT" -p "/cook $EXPLICIT_GOAL_FOUR" >/tmp/pi-completion-context-proposal-done-goal.out 2>/tmp/pi-completion-context-proposal-done-goal.err
-python3 - <<'PY'
+python3 - "$EXPLICIT_SNAPSHOT_FOUR" <<'PY'
 import json
+import sys
 from pathlib import Path
 mission = 'Filter scope by mission.'
+expected_task_type = 'completion-workflow'
+expected_eval_profile = 'completion-rubric-v1'
 mission_text = Path('.agent/mission.md').read_text()
+profile = json.loads(Path('.agent/profile.json').read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 plan = json.loads(Path('.agent/plan.json').read_text())
 active = json.loads(Path('.agent/active-slice.json').read_text())
+proposal = json.loads(Path(sys.argv[1]).read_text())
 continuation_reason = state['continuation_reason']
 assert mission in mission_text, '.agent/mission.md did not update to the explicit next-round mission anchor'
+assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-goal next-round start'
+assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-goal next-round start'
 assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-goal next-round start'
+assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-goal next-round start'
+assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-goal next-round start'
 assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after explicit-goal next-round start'
+assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after explicit-goal next-round start'
+assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after explicit-goal next-round start'
 assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after explicit-goal next-round start'
+assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after explicit-goal next-round start'
+assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after explicit-goal next-round start'
+assert proposal['mission'] == mission, 'explicit-goal proposal snapshot should preserve the explicit mission anchor'
+assert proposal['analysis']['taskType'] == expected_task_type, 'explicit-goal proposal snapshot should preserve task_type hints from the goal text'
+assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'explicit-goal proposal snapshot should preserve evaluation_profile hints from the goal text'
+assert proposal['analysis']['critique'] == ['Keep critique notes separate from the mission anchor.'], 'explicit-goal proposal snapshot should preserve critique hints from the goal text'
+assert proposal['analysis']['risks'] == ['Session-only scope could leak into the next workflow round.'], 'explicit-goal proposal snapshot should preserve risk hints from the goal text'
+assert 'Critique:' not in proposal['goalText'], 'goalText should keep critique notes separate from mission/scope/constraints/acceptance'
+assert 'Task type:' not in proposal['goalText'], 'goalText should keep task_type hints separate from the mission body'
 assert state['current_phase'] == 'reground', 'current_phase should reset to reground after explicit-goal next-round start'
 assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue after explicit-goal next-round start'
 assert state['project_done'] is False, 'project_done should reset to false after explicit-goal next-round start'
 assert state['requires_reground'] is True, 'requires_reground should reset to true after explicit-goal next-round start'
 assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after explicit-goal next-round start'
 assert continuation_reason.startswith('User refocused workflow via /cook:'), 'continuation_reason should record the explicit-goal next-round start'
+assert 'task_type=completion-workflow' in continuation_reason, 'explicit-goal next-round start should persist the selected task_type'
+assert 'evaluation_profile=completion-rubric-v1' in continuation_reason, 'explicit-goal next-round start should persist the selected evaluation_profile'
+assert 'Keep critique notes separate from the mission anchor.' in continuation_reason, 'explicit-goal next-round start should persist the accepted critique outcome'
 assert 'Keep explicit scope.' in continuation_reason, 'explicit scope should remain in the explicit-goal proposal'
 assert 'Add session-only scope.' not in continuation_reason, 'session-derived scope should not be merged when analyst output is unavailable'
 assert 'Restyle widget.' not in continuation_reason, 'unrelated session-derived scope should not be merged when analyst output is unavailable'
@@ -283,19 +363,33 @@ import json
 from pathlib import Path
 mission = 'Use a proposal analyst to summarize natural discussion before /cook writes canonical state.'
+expected_task_type = 'completion-workflow'
+expected_eval_profile = 'completion-rubric-v1'
 mission_text = Path('.agent/mission.md').read_text()
+profile = json.loads(Path('.agent/profile.json').read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 plan = json.loads(Path('.agent/plan.json').read_text())
 active = json.loads(Path('.agent/active-slice.json').read_text())
 continuation_reason = state['continuation_reason']
 assert mission in mission_text, '.agent/mission.md did not record the analyst-derived mission anchor'
+assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after analyst-derived restart'
+assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after analyst-derived restart'
 assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after analyst-derived bootstrap'
+assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after analyst-derived bootstrap'
+assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after analyst-derived bootstrap'
 assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after analyst-derived bootstrap'
+assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after analyst-derived bootstrap'
+assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after analyst-derived bootstrap'
 assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after analyst-derived bootstrap'
+assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after analyst-derived bootstrap'
+assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after analyst-derived bootstrap'
 assert state['current_phase'] == 'reground', 'current_phase should reset to reground after analyst-derived bootstrap'
 assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after analyst-derived bootstrap'
 assert continuation_reason.startswith('User refocused workflow via /cook:'), 'continuation_reason should record the analyst-derived restart'
+assert 'task_type=completion-workflow' in continuation_reason, 'analyst-derived restart should persist the selected task_type'
+assert 'evaluation_profile=completion-rubric-v1' in continuation_reason, 'analyst-derived restart should persist the selected evaluation_profile'
+assert 'critique outcome=accepted critique=none' in continuation_reason, 'analyst-derived restart should persist that no critique notes were accepted'
 assert 'Keep explicit goals anchored.' in continuation_reason, 'analyst-derived scope should be preserved'
 PY
@@ -307,7 +401,7 @@ git init -q
 UI_SESSION_START="$TMPDIR/ui-session-start.jsonl"
 UI_DISCUSSION_START=$'Mission: Replace the crowded selector with a clearer action layout.\nScope:\n- Separate proposal text from actions.\nConstraints:\n- Preserve Start/Edit/Cancel behavior.\nAcceptance:\n- Add regression coverage.'
-UI_ANALYST_OUTPUT_START='{"mission":"Replace the crowded selector with a clearer action layout.","scope":["Separate proposal text from actions."],"constraints":["Preserve Start/Edit/Cancel behavior."],"acceptance":["Add regression coverage."],"confidence":0.95}'
+UI_ANALYST_OUTPUT_START='{"mission":"Replace the crowded selector with a clearer action layout.","scope":["Separate proposal text from actions."],"constraints":["Preserve Start/Edit/Cancel behavior."],"acceptance":["Add regression coverage."],"critique":["Keep critique details separate from the editable proposal body."],"risks":["Bundling critique into the action list would make the confirmation harder to scan."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","possible_noise":["old selector wording"],"confidence":0.95}'
 UI_SNAPSHOT_START="$TMPDIR/context-proposal-ui-start.json"
 write_session "$UI_SESSION_START" "$UI_ROOT_START" "$UI_DISCUSSION_START"
@@ -326,13 +420,25 @@ snapshot = json.loads(Path(sys.argv[1]).read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 assert snapshot['proposalHeading'] == 'Proposed workflow', 'custom confirmation snapshot should expose a dedicated proposal section'
+assert snapshot['critiqueHeading'] == 'Critique and risks', 'custom confirmation snapshot should expose critique separately from the proposal body'
+assert snapshot['routingHeading'] == 'Routing recommendations', 'custom confirmation snapshot should expose routing recommendations separately from the proposal body'
+assert state['task_type'] == 'completion-workflow', 'start action should preserve canonical task_type'
+assert state['evaluation_profile'] == 'completion-rubric-v1', 'start action should preserve canonical evaluation_profile'
 assert 'Mission\nReplace the crowded selector with a clearer action layout.' in snapshot['proposalBody'], 'proposal body should be captured separately from the action list'
+assert 'Keep critique details separate from the editable proposal body.' not in snapshot['proposalBody'], 'critique notes should not be embedded in the proposal body'
+assert 'Critique\n- Keep critique details separate from the editable proposal body.' in snapshot['critiqueBody'], 'critique section should render accepted critique notes separately'
+assert 'Risks\n- Bundling critique into the action list would make the confirmation harder to scan.' in snapshot['critiqueBody'], 'critique section should render risk notes separately'
+assert 'Possible noise\n- old selector wording' in snapshot['critiqueBody'], 'critique section should render possible-noise notes separately'
+assert '- task_type: completion-workflow' in snapshot['routingBody'], 'routing section should render the recommended task_type'
+assert '- evaluation_profile: completion-rubric-v1' in snapshot['routingBody'], 'routing section should render the recommended evaluation_profile'
 assert [action['id'] for action in snapshot['actions']] == ['start', 'edit', 'cancel'], 'custom confirmation actions should stay Start/Edit/Cancel'
 assert [action['label'] for action in snapshot['actions']] == ['Start', 'Edit', 'Cancel'], 'custom confirmation action labels should be concise'
 for action in snapshot['actions']:
     assert 'Replace the crowded selector with a clearer action layout.' not in action['label'], 'proposal mission should not be embedded in action labels'
     assert 'Separate proposal text from actions.' not in action['description'], 'proposal scope should not be embedded in action descriptions'
 assert state['mission_anchor'] == 'Replace the crowded selector with a clearer action layout.', 'start action should still accept the proposed mission'
+assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'start action should persist the startup routing outcome in continuation_reason'
+assert 'Keep critique details separate from the editable proposal body.' in state['continuation_reason'], 'start action should persist the accepted critique outcome canonically'
 PY
 # Custom confirmation UI: edit should keep the existing editor/parsing flow when the action comes from the new layout.
@@ -343,7 +449,7 @@ git init -q
 UI_SESSION_EDIT="$TMPDIR/ui-session-edit.jsonl"
 UI_DISCUSSION_EDIT=$'Mission: Keep editing support in the custom confirmation UI.\nScope:\n- Preserve the proposal editor.\nConstraints:\n- Keep parsing structured proposal text.\nAcceptance:\n- Update the mission anchor after edit.'
-UI_ANALYST_OUTPUT_EDIT='{"mission":"Keep editing support in the custom confirmation UI.","scope":["Preserve the proposal editor."],"constraints":["Keep parsing structured proposal text."],"acceptance":["Update the mission anchor after edit."],"confidence":0.94}'
+UI_ANALYST_OUTPUT_EDIT='{"mission":"Keep editing support in the custom confirmation UI.","scope":["Preserve the proposal editor."],"constraints":["Keep parsing structured proposal text."],"acceptance":["Update the mission anchor after edit."],"critique":["Keep critique persistence even when the operator edits the proposal body."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","confidence":0.94}'
 UI_EDIT_TEXT=$'Mission: Edited mission from the custom confirmation UI.\nScope:\n- Preserve parsing after edit.\nConstraints:\n- Keep the shared confirmation flow.\nAcceptance:\n- Start the workflow from the edited proposal.'
 write_session "$UI_SESSION_EDIT" "$UI_ROOT_EDIT" "$UI_DISCUSSION_EDIT"
@@ -363,9 +469,12 @@ active = json.loads(Path('.agent/active-slice.json').read_text())
 mission = 'Edited mission from the custom confirmation UI.'
 assert state['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update state.json'
+assert state['task_type'] == 'completion-workflow', 'edit action should preserve canonical task_type'
+assert state['evaluation_profile'] == 'completion-rubric-v1', 'edit action should preserve canonical evaluation_profile'
 assert plan['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update plan.json'
 assert active['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update active-slice.json'
 assert state['current_phase'] == 'reground', 'edit action should still bootstrap/reground the workflow'
+assert 'Keep critique persistence even when the operator edits the proposal body.' in state['continuation_reason'], 'edit action should preserve the accepted critique outcome canonically'
 PY
 # Custom confirmation UI: cancel should exit without writing canonical state.

package/scripts/refocus-test.sh CHANGED Viewed

@@ -44,15 +44,26 @@ import json
 from pathlib import Path
 new_anchor = 'refocused smoke-test mission with tests and docs parity.'
+expected_task_type = 'completion-workflow'
+expected_eval_profile = 'completion-rubric-v1'
 mission_text = Path('.agent/mission.md').read_text()
+profile = json.loads(Path('.agent/profile.json').read_text())
 state = json.loads(Path('.agent/state.json').read_text())
 plan = json.loads(Path('.agent/plan.json').read_text())
 active = json.loads(Path('.agent/active-slice.json').read_text())
 assert new_anchor in mission_text, '.agent/mission.md did not update to the refocused mission anchor'
+assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after refocus'
+assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after refocus'
 assert state['mission_anchor'] == new_anchor, 'state.json mission_anchor mismatch after refocus'
+assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after refocus'
+assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after refocus'
 assert plan['mission_anchor'] == new_anchor, 'plan.json mission_anchor mismatch after refocus'
+assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after refocus'
+assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after refocus'
 assert active['mission_anchor'] == new_anchor, 'active-slice.json mission_anchor mismatch after refocus'
+assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after refocus'
+assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after refocus'
 assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground after refocus'
 assert state['requires_reground'] is True, 'state.json requires_reground should be true after refocus'
 assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should reset to completion-regrounder'

package/scripts/release-check.sh CHANGED Viewed

@@ -4,10 +4,12 @@ set -euo pipefail
 ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
 cd "$ROOT"
+echo "[release-check] running startup/refocus/context regressions, including critique-aware /cook confirmation coverage"
 npm run smoke-test
 npm run refocus-test
 npm run context-proposal-test
 npm run observability-status-test
+npm run rubric-contract-test
 npm pack --dry-run >/dev/null
 echo "release check passed"

package/scripts/rubric-contract-test.sh ADDED Viewed

@@ -0,0 +1,249 @@
+#!/usr/bin/env bash
+set -euo pipefail
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+cd "$ROOT"
+node <<'NODE'
+const fs = require('node:fs');
+const read = (file) => fs.readFileSync(file, 'utf8');
+const assertIncludes = (file, snippet) => {
+  const text = read(file);
+  if (!text.includes(snippet)) {
+    console.error(`${file} is missing required rubric-contract text: ${snippet}`);
+    process.exit(1);
+  }
+};
+const rubricHeading = '## Structured Evaluation Rubric Foundation';
+const rubricDimensions = [
+  'Contract coverage',
+  'Correctness risk',
+  'Verification evidence',
+  'Docs/state parity',
+];
+const verdictSnippets = [
+  '`pass` — no material issue remains',
+  '`concern` — a real caveat or remaining gap exists',
+  '`fail` — a blocking issue or contradictory truth exists',
+];
+for (const file of [
+  'skills/completion-protocol/SKILL.md',
+  'skills/completion-protocol/references/completion.md',
+]) {
+  assertIncludes(file, rubricHeading);
+  assertIncludes(file, 'canonical `task_type` and `evaluation_profile` signaling');
+  assertIncludes(file, 'routing metadata only; later slices may still add stricter profile-aware rubric-output enforcement');
+  assertIncludes(file, '- `Rubric:`');
+  for (const dimension of rubricDimensions) {
+    assertIncludes(file, `- \`- ${dimension}: pass|concern|fail - ...\``);
+  }
+  for (const snippet of verdictSnippets) {
+    assertIncludes(file, snippet);
+  }
+}
+for (const file of [
+  'agents/completion-reviewer.md',
+  'agents/completion-auditor.md',
+  'agents/completion-stop-judge.md',
+]) {
+  assertIncludes(file, 'Always emit the shared rubric section');
+  assertIncludes(file, 'Use these exact rubric dimension names and verdict words');
+  assertIncludes(file, '`evaluation_profile`');
+  assertIncludes(file, '`implementation_surfaces`');
+  assertIncludes(file, '`verification_commands`');
+  assertIncludes(file, '- `Rubric:`');
+  for (const dimension of rubricDimensions) {
+    assertIncludes(file, `- \`- ${dimension}: pass|concern|fail - ...\``);
+  }
+}
+assertIncludes('README.md', '## Structured evaluation rubrics');
+assertIncludes('README.md', '- `task_type: completion-workflow`');
+assertIncludes('README.md', '- `evaluation_profile: completion-rubric-v1`');
+assertIncludes('README.md', 'kickoff/reminder/resume text and reviewer/auditor/stop-judge evaluation handoffs so downstream roles can rely on canonical signaling instead of prose inference alone.');
+assertIncludes('README.md', 'Reviewer, auditor, and stop-judge dispatch/reminder surfaces now also thread the current active-slice implementation contract');
+assertIncludes('README.md', 'Canonical reviewer/auditor/stop-judge transcription now fails closed on malformed rubric-bearing reports');
+assertIncludes('README.md', 'npm run rubric-contract-test`, which now exercises reviewer, auditor, and stop-judge transcription paths');
+for (const dimension of rubricDimensions) {
+  assertIncludes('README.md', `- \`${dimension}\``);
+}
+assertIncludes('CHANGELOG.md', 'shared structured evaluation-rubric contract');
+assertIncludes('CHANGELOG.md', 'added canonical `task_type: completion-workflow` and `evaluation_profile: completion-rubric-v1` signaling across the packaged control-plane defaults, verifier schema, and kickoff/reminder/resume surfaces');
+assertIncludes('CHANGELOG.md', 'threaded canonical `evaluation_profile` plus the active-slice implementation contract into reviewer/auditor/stop-judge reminder and dispatch surfaces');
+assertIncludes('CHANGELOG.md', 'made reviewer/auditor/stop-judge transcription fail closed on malformed rubric-bearing outputs while still accepting valid reports');
+assertIncludes('extensions/completion/index.ts', 'Canonical routing profile:\\n- task_type: ${taskType}\\n- evaluation_profile: ${evaluationProfile}');
+assertIncludes('extensions/completion/index.ts', '`Task type: ${currentTaskType(snapshot) ?? "(missing)"}`');
+assertIncludes('extensions/completion/index.ts', '`Evaluation profile: ${currentEvaluationProfile(snapshot) ?? "(missing)"}`');
+assertIncludes('extensions/completion/index.ts', '`task_type: ${currentTaskType(snapshot) ?? "(missing)"}`');
+assertIncludes('extensions/completion/index.ts', '`evaluation_profile: ${currentEvaluationProfile(snapshot) ?? "(missing)"}`');
+assertIncludes('extensions/completion/index.ts', 'Canonical evaluation handoff for ${role}:');
+assertIncludes('extensions/completion/index.ts', 'buildEvaluationRoleReminderText(snapshot, nextRole)');
+assertIncludes('extensions/completion/index.ts', 'roleReporting.transcribeCanonicalRoleReport');
+assertIncludes('extensions/completion/role-reporting.js', 'Missing Rubric heading for ${role}.');
+assertIncludes('extensions/completion/role-reporting.js', 'Reviewer output cannot mark \'Acceptable as-is: yes\' when any rubric line is fail.');
+assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
+assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Stale or conflicting canonical state\' with yes or no.');
+assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Plan truthfully captures remaining slice backlog\' with yes or no.');
+assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output cannot mark \'Can the project stop now: yes\' when any rubric line is fail.');
+assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Docs/config/runbooks match shipped behavior\' with yes or no.');
+assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
+assertIncludes('package.json', '"rubric-contract-test": "bash ./scripts/rubric-contract-test.sh"');
+assertIncludes('scripts/release-check.sh', 'npm run rubric-contract-test');
+assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
+NODE
+node <<'NODE'
+const fs = require('node:fs');
+const path = require('node:path');
+const {
+  parseReportFields,
+  transcribeCanonicalRoleReport,
+} = require('./extensions/completion/role-reporting.js');
+const tempRootBase = path.join(process.cwd(), '.agent', 'tmp');
+fs.mkdirSync(tempRootBase, { recursive: true });
+const tempRoot = fs.mkdtempSync(path.join(tempRootBase, 'rubric-role-reporting-'));
+const snapshotFiles = {
+  sliceHistoryPath: path.join(tempRoot, 'slice-history.jsonl'),
+  stopHistoryPath: path.join(tempRoot, 'stop-check-history.jsonl'),
+};
+fs.writeFileSync(snapshotFiles.sliceHistoryPath, '');
+fs.writeFileSync(snapshotFiles.stopHistoryPath, '');
+const readJsonl = (file) => fs.readFileSync(file, 'utf8').split('\n').filter(Boolean).map((line) => JSON.parse(line));
+const assert = (condition, message) => {
+  if (!condition) throw new Error(message);
+};
+const reviewerReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - Locked acceptance criteria match the committed slice.\n- Correctness risk: pass - No blocking regression is evident.\n- Verification evidence: pass - Deterministic proof was rerun successfully.\n- Docs/state parity: pass - Docs and canonical state are aligned.\nFindings: none.\nAcceptable as-is: yes\nSmallest follow-up slice: none.`;
+const reviewerMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - Locked acceptance criteria match the committed slice.\n- Correctness risk: pass - No blocking regression is evident.\n- Verification evidence: pass - Deterministic proof was rerun successfully.\nFindings: none.\nAcceptable as-is: yes\nSmallest follow-up slice: none.`;
+const auditorReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - The accepted slice remains satisfied on HEAD.\n- Correctness risk: concern - Remaining planned work still keeps the project open.\n- Verification evidence: pass - Verification was rerun for the accepted slice.\n- Docs/state parity: pass - Canonical state can be reconciled truthfully.\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: yes\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: no\nPlan truthfully captures remaining slice backlog: yes - one planned slice remains.`;
+const auditorMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: yes\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: no\nPlan truthfully captures remaining slice backlog: yes - one planned slice remains.`;
+const stopJudgeReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: pass - All implementation slices are accepted on HEAD.\n- Correctness risk: pass - No remaining blocker or high-value gap is evident.\n- Verification evidence: pass - Final verification passes for the current head.\n- Docs/state parity: pass - Docs, config, and canonical state match shipped behavior.\nCan the project stop now: yes\nExact remaining open top-level contract IDs: none\nBlocker count: 0\nHigh-value gap count: 0\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: yes\nTracked and unignored worktree is clean: yes\nBrief justification: Current HEAD satisfies the stop criteria.`;
+const stopJudgeMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: fail - A blocking contract is still open.\n- Correctness risk: pass - No additional risk was found.\n- Verification evidence: pass - Verification still passes.\n- Docs/state parity: pass - Docs and state match.\nCan the project stop now: yes\nExact remaining open top-level contract IDs: TEST-CONTRACT\nBlocker count: 1\nHigh-value gap count: 0\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: yes\nTracked and unignored worktree is clean: yes\nBrief justification: This should be rejected because the rubric blocks stop.`;
+const auditorMalformedYesNo = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - The accepted slice remains satisfied on HEAD.\n- Correctness risk: concern - Remaining planned work still keeps the project open.\n- Verification evidence: pass - Verification was rerun for the accepted slice.\n- Docs/state parity: pass - Canonical state can be reconciled truthfully.\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: maybe\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: perhaps\nPlan truthfully captures remaining slice backlog: sorta.`;
+const stopJudgeMalformedYesNo = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: pass - All implementation slices are accepted on HEAD.\n- Correctness risk: pass - No remaining blocker or high-value gap is evident.\n- Verification evidence: pass - Final verification passes for the current head.\n- Docs/state parity: pass - Docs, config, and canonical state match shipped behavior.\nCan the project stop now: no\nExact remaining open top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: maybe\nTracked and unignored worktree is clean: perhaps\nBrief justification: This should be rejected because malformed yes/no-style fields must fail closed.`;
+(async () => {
+  const reviewed = await transcribeCanonicalRoleReport({
+    role: 'completion-reviewer',
+    output: reviewerReport,
+    reportFields: parseReportFields(reviewerReport),
+    snapshotFiles,
+    headSha: '1111111111111111111111111111111111111111',
+    sliceId: 'slice-review',
+    recordedAt: 1,
+  });
+  assert(reviewed.errors.length === 0, `reviewer valid report should transcribe cleanly: ${reviewed.errors.join(' | ')}`);
+  assert(reviewed.appended.includes('reviewed:slice-review'), 'reviewer transcription should append reviewed record');
+  assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'reviewer transcription should create one slice-history record');
+  const reviewerRejected = await transcribeCanonicalRoleReport({
+    role: 'completion-reviewer',
+    output: reviewerMalformed,
+    reportFields: parseReportFields(reviewerMalformed),
+    snapshotFiles,
+    headSha: '2222222222222222222222222222222222222222',
+    sliceId: 'slice-review',
+    recordedAt: 2,
+  });
+  assert(reviewerRejected.errors.some((error) => error.includes('Docs/state parity')), 'reviewer malformed report should be rejected for missing rubric line');
+  assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'rejected reviewer report must not append history');
+  const audited = await transcribeCanonicalRoleReport({
+    role: 'completion-auditor',
+    output: auditorReport,
+    reportFields: parseReportFields(auditorReport),
+    snapshotFiles,
+    headSha: '3333333333333333333333333333333333333333',
+    sliceId: 'slice-audit',
+    recordedAt: 3,
+  });
+  assert(audited.errors.length === 0, `auditor valid report should transcribe cleanly: ${audited.errors.join(' | ')}`);
+  assert(audited.appended.includes('audited:slice-audit'), 'auditor transcription should append audited record');
+  assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'auditor transcription should append a second slice-history record');
+  const auditorRejected = await transcribeCanonicalRoleReport({
+    role: 'completion-auditor',
+    output: auditorMalformed,
+    reportFields: parseReportFields(auditorMalformed),
+    snapshotFiles,
+    headSha: '4444444444444444444444444444444444444444',
+    sliceId: 'slice-audit',
+    recordedAt: 4,
+  });
+  assert(auditorRejected.errors.some((error) => error.includes('Missing Rubric heading')), 'auditor malformed report should be rejected without rubric heading');
+  assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor report must not append history');
+  const judged = await transcribeCanonicalRoleReport({
+    role: 'completion-stop-judge',
+    output: stopJudgeReport,
+    reportFields: parseReportFields(stopJudgeReport),
+    snapshotFiles,
+    headSha: '5555555555555555555555555555555555555555',
+    recordedAt: 5,
+  });
+  assert(judged.errors.length === 0, `stop-judge valid report should transcribe cleanly: ${judged.errors.join(' | ')}`);
+  assert(judged.appended.includes('judgment:555555555555'), 'stop-judge transcription should append judgment record');
+  assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'stop-judge transcription should create one judgment record');
+  const judgeRejected = await transcribeCanonicalRoleReport({
+    role: 'completion-stop-judge',
+    output: stopJudgeMalformed,
+    reportFields: parseReportFields(stopJudgeMalformed),
+    snapshotFiles,
+    headSha: '6666666666666666666666666666666666666666',
+    recordedAt: 6,
+  });
+  assert(judgeRejected.errors.some((error) => error.includes("Can the project stop now: yes")), 'stop-judge malformed report should be rejected when fail rubric contradicts yes verdict');
+  assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'rejected stop-judge report must not append judgment history');
+  const auditorYesNoRejected = await transcribeCanonicalRoleReport({
+    role: 'completion-auditor',
+    output: auditorMalformedYesNo,
+    reportFields: parseReportFields(auditorMalformedYesNo),
+    snapshotFiles,
+    headSha: '7777777777777777777777777777777777777777',
+    sliceId: 'slice-audit',
+    recordedAt: 7,
+  });
+  assert(auditorYesNoRejected.errors.some((error) => error.includes("Tracked and unignored worktree is clean")), 'auditor malformed yes/no report should reject invalid worktree cleanliness values');
+  assert(auditorYesNoRejected.errors.some((error) => error.includes("Stale or conflicting canonical state")), 'auditor malformed yes/no report should reject invalid canonical-state values');
+  assert(auditorYesNoRejected.errors.some((error) => error.includes("Plan truthfully captures remaining slice backlog")), 'auditor malformed yes/no report should reject invalid backlog-truth values');
+  assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor yes/no report must not append history');
+  const stopJudgeYesNoRejected = await transcribeCanonicalRoleReport({
+    role: 'completion-stop-judge',
+    output: stopJudgeMalformedYesNo,
+    reportFields: parseReportFields(stopJudgeMalformedYesNo),
+    snapshotFiles,
+    headSha: '8888888888888888888888888888888888888888',
+    recordedAt: 8,
+  });
+  assert(stopJudgeYesNoRejected.errors.some((error) => error.includes("Docs/config/runbooks match shipped behavior")), 'stop-judge malformed yes/no report should reject invalid docs parity values');
+  assert(stopJudgeYesNoRejected.errors.some((error) => error.includes("Tracked and unignored worktree is clean")), 'stop-judge malformed yes/no report should reject invalid worktree cleanliness values');
+  assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'rejected stop-judge yes/no report must not append judgment history');
+  fs.rmSync(tempRoot, { recursive: true, force: true });
+})().catch((error) => {
+  try {
+    fs.rmSync(tempRoot, { recursive: true, force: true });
+  } catch {}
+  console.error(error instanceof Error ? error.message : String(error));
+  process.exit(1);
+});
+NODE
+echo "rubric contract test passed"