@linimin/pi-letscook 0.1.29 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,30 +120,57 @@ PY
120
120
  # No workflow yet: /cook with no goal should infer from recent discussion through analyst output.
121
121
  SESSION_ONE="$TMPDIR/session-one.jsonl"
122
122
  DISCUSSION_ONE="$DISCUSSION_ZERO"
123
- ANALYST_OUTPUT_ONE='{"mission":"Remove the completion status line while keeping the completion widget.","scope":["Keep the non-running completion widget.","Suppress the widget while a completion role is active."],"constraints":["Do not reintroduce any other completion status surface."],"acceptance":["Update README to match the shipped behavior.","Keep observability regression coverage truthful."],"confidence":0.94}'
123
+ ANALYST_OUTPUT_ONE='{"mission":"Remove the completion status line while keeping the completion widget.","scope":["Keep the non-running completion widget.","Suppress the widget while a completion role is active."],"constraints":["Do not reintroduce any other completion status surface."],"acceptance":["Update README to match the shipped behavior.","Keep observability regression coverage truthful."],"critique":["Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal."],"risks":["Stale widget-removal discussion could broaden the startup plan if it gets treated as mission text."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","possible_noise":["older widget restyle ideas"],"confidence":0.94}'
124
+ DISCUSSION_SNAPSHOT_ONE="$TMPDIR/context-proposal-discussion-hints.json"
124
125
  write_session "$SESSION_ONE" "$ROOT" "$DISCUSSION_ONE"
125
126
 
126
127
  PI_COMPLETION_CONTEXT_PROPOSAL_ACTION=accept \
127
128
  PI_COMPLETION_CONTEXT_PROPOSAL_ANALYST_OUTPUT="$ANALYST_OUTPUT_ONE" \
129
+ PI_COMPLETION_TEST_CONTEXT_PROPOSAL_PATH="$DISCUSSION_SNAPSHOT_ONE" \
128
130
  PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
129
131
  pi --session "$SESSION_ONE" -e "$PKG_ROOT" -p "/cook" >/tmp/pi-completion-context-proposal-bootstrap.out 2>/tmp/pi-completion-context-proposal-bootstrap.err
130
132
 
131
- python3 - <<'PY'
133
+ python3 - "$DISCUSSION_SNAPSHOT_ONE" <<'PY'
132
134
  import json
135
+ import sys
133
136
  from pathlib import Path
134
137
 
135
138
  mission = 'Remove the completion status line while keeping the completion widget.'
139
+ expected_task_type = 'completion-workflow'
140
+ expected_eval_profile = 'completion-rubric-v1'
136
141
  mission_text = Path('.agent/mission.md').read_text()
142
+ profile = json.loads(Path('.agent/profile.json').read_text())
137
143
  state = json.loads(Path('.agent/state.json').read_text())
138
144
  plan = json.loads(Path('.agent/plan.json').read_text())
139
145
  active = json.loads(Path('.agent/active-slice.json').read_text())
146
+ proposal = json.loads(Path(sys.argv[1]).read_text())
140
147
 
141
148
  assert mission in mission_text, '.agent/mission.md did not record the analyst-derived mission anchor'
149
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after analyst-derived bootstrap'
150
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after analyst-derived bootstrap'
142
151
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after analyst-derived bootstrap'
152
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after analyst-derived bootstrap'
153
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after analyst-derived bootstrap'
143
154
  assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after analyst-derived bootstrap'
155
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after analyst-derived bootstrap'
156
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after analyst-derived bootstrap'
144
157
  assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after analyst-derived bootstrap'
158
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after analyst-derived bootstrap'
159
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after analyst-derived bootstrap'
160
+ assert proposal['mission'] == mission, 'discussion-only proposal snapshot should keep the inferred mission anchor'
161
+ assert proposal['analysis']['taskType'] == expected_task_type, 'discussion-only proposal snapshot should expose task_type hints separately'
162
+ assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'discussion-only proposal snapshot should expose evaluation_profile hints separately'
163
+ assert proposal['analysis']['critique'] == ['Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal.'], 'discussion-only proposal snapshot should preserve critique hints'
164
+ assert proposal['analysis']['risks'] == ['Stale widget-removal discussion could broaden the startup plan if it gets treated as mission text.'], 'discussion-only proposal snapshot should preserve risk hints'
165
+ assert proposal['analysis']['possibleNoise'] == ['older widget restyle ideas'], 'discussion-only proposal snapshot should preserve possible_noise hints'
166
+ assert 'Critique:' not in proposal['goalText'], 'goalText should keep critique separate from mission/scope/constraints/acceptance'
167
+ assert 'Task type:' not in proposal['goalText'], 'goalText should keep task_type hints separate from the mission body'
145
168
  assert state['current_phase'] == 'reground', 'state.json current_phase should start at reground after analyst-derived bootstrap'
146
169
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should start at completion-regrounder after analyst-derived bootstrap'
170
+ assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'initial startup should record the accepted startup routing in continuation_reason'
171
+ assert 'task_type=completion-workflow' in state['continuation_reason'], 'initial startup should persist the selected task_type in continuation_reason'
172
+ assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'initial startup should persist the selected evaluation_profile in continuation_reason'
173
+ assert 'Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal.' in state['continuation_reason'], 'initial startup should persist the accepted critique outcome in continuation_reason'
147
174
  PY
148
175
 
149
176
  # Completed workflow: /cook with no goal should infer the next round from recent discussion through analyst output.
@@ -164,21 +191,35 @@ import json
164
191
  from pathlib import Path
165
192
 
166
193
  mission = 'Ship the next workflow round for richer context-derived /cook startup.'
194
+ expected_task_type = 'completion-workflow'
195
+ expected_eval_profile = 'completion-rubric-v1'
167
196
  mission_text = Path('.agent/mission.md').read_text()
197
+ profile = json.loads(Path('.agent/profile.json').read_text())
168
198
  state = json.loads(Path('.agent/state.json').read_text())
169
199
  plan = json.loads(Path('.agent/plan.json').read_text())
170
200
  active = json.loads(Path('.agent/active-slice.json').read_text())
171
201
 
172
202
  assert mission in mission_text, '.agent/mission.md did not update to the next-round context-derived mission anchor'
203
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after next-round startup'
204
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after next-round startup'
173
205
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after starting the next workflow round'
206
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after starting the next workflow round'
207
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after starting the next workflow round'
174
208
  assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after starting the next workflow round'
209
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after starting the next workflow round'
210
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after starting the next workflow round'
175
211
  assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after starting the next workflow round'
212
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after starting the next workflow round'
213
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after starting the next workflow round'
176
214
  assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground for the next workflow round'
177
215
  assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue for the next workflow round'
178
216
  assert state['requires_reground'] is True, 'requires_reground should reset to true for the next workflow round'
179
217
  assert state['project_done'] is False, 'project_done should reset to false for the next workflow round'
180
218
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should reset to completion-regrounder for the next workflow round'
181
219
  assert state['continuation_reason'].startswith('User refocused workflow via /cook:'), 'continuation_reason should record the next-round refocus'
220
+ assert 'task_type=completion-workflow' in state['continuation_reason'], 'next-round refocus should persist the selected task_type'
221
+ assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'next-round refocus should persist the selected evaluation_profile'
222
+ assert 'critique outcome=accepted critique=none' in state['continuation_reason'], 'next-round refocus should persist that no critique notes were accepted'
182
223
  assert plan['plan_basis'] == 'user_refocus', 'plan_basis should reset to user_refocus for the next workflow round'
183
224
  assert active['status'] == 'idle', 'active-slice should reset to idle for the next workflow round'
184
225
  PY
@@ -200,19 +241,33 @@ import json
200
241
  from pathlib import Path
201
242
 
202
243
  mission = 'Explicit replacement mission for the active workflow.'
244
+ expected_task_type = 'completion-workflow'
245
+ expected_eval_profile = 'completion-rubric-v1'
203
246
  mission_text = Path('.agent/mission.md').read_text()
247
+ profile = json.loads(Path('.agent/profile.json').read_text())
204
248
  state = json.loads(Path('.agent/state.json').read_text())
205
249
  plan = json.loads(Path('.agent/plan.json').read_text())
206
250
  active = json.loads(Path('.agent/active-slice.json').read_text())
207
251
 
208
252
  assert mission in mission_text, '.agent/mission.md did not update to the explicit replacement mission anchor'
253
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-goal replacement'
254
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-goal replacement'
209
255
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-goal replacement'
256
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-goal replacement'
257
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-goal replacement'
210
258
  assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after explicit-goal replacement'
259
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after explicit-goal replacement'
260
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after explicit-goal replacement'
211
261
  assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after explicit-goal replacement'
262
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after explicit-goal replacement'
263
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after explicit-goal replacement'
212
264
  assert state['current_phase'] == 'reground', 'current_phase should reset to reground after explicit-goal replacement'
213
265
  assert state['continuation_policy'] == 'continue', 'continuation_policy should stay continue after explicit-goal replacement'
214
266
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after explicit-goal replacement'
215
267
  assert state['continuation_reason'].startswith('User refocused workflow via /cook:'), 'continuation_reason should record the explicit-goal replacement'
268
+ assert 'task_type=completion-workflow' in state['continuation_reason'], 'explicit-goal replacement should persist the selected task_type'
269
+ assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'explicit-goal replacement should persist the selected evaluation_profile'
270
+ assert 'critique outcome=accepted critique=none' in state['continuation_reason'], 'explicit-goal replacement should persist the accepted critique outcome even when no critique was derived'
216
271
  assert 'Preserve the richer proposal structure from discussion.' not in state['continuation_reason'], 'session scope should not be merged when analyst output is unavailable'
217
272
  assert 'Keep explicit goals as the mission anchor when they conflict with earlier text.' not in state['continuation_reason'], 'session constraints should not be merged when analyst output is unavailable'
218
273
  assert 'Refresh canonical state from the replacement mission.' not in state['continuation_reason'], 'session acceptance should not be merged when analyst output is unavailable'
@@ -226,35 +281,60 @@ mark_done
226
281
 
227
282
  SESSION_FOUR="$TMPDIR/session-four.jsonl"
228
283
  DISCUSSION_FOUR=$'Scope:\n- Add session-only scope.\n- Restyle widget.\nConstraints:\n- Keep rules.\nAcceptance:\n- Add test.'
229
- EXPLICIT_GOAL_FOUR=$'Mission: Filter scope by mission.\nScope:\n- Keep explicit scope.'
284
+ EXPLICIT_GOAL_FOUR=$'Mission: Filter scope by mission.\nScope:\n- Keep explicit scope.\nCritique:\n- Keep critique notes separate from the mission anchor.\nRisks:\n- Session-only scope could leak into the next workflow round.\nTask type: completion-workflow\nEvaluation profile: completion-rubric-v1'
285
+ EXPLICIT_SNAPSHOT_FOUR="$TMPDIR/context-proposal-explicit-hints.json"
230
286
  write_session "$SESSION_FOUR" "$ROOT" "$DISCUSSION_FOUR"
231
287
 
232
288
  PI_COMPLETION_CONTEXT_PROPOSAL_ACTION=accept \
233
289
  PI_COMPLETION_DISABLE_CONTEXT_PROPOSAL_ANALYST=1 \
290
+ PI_COMPLETION_TEST_CONTEXT_PROPOSAL_PATH="$EXPLICIT_SNAPSHOT_FOUR" \
234
291
  PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
235
292
  pi --session "$SESSION_FOUR" -e "$PKG_ROOT" -p "/cook $EXPLICIT_GOAL_FOUR" >/tmp/pi-completion-context-proposal-done-goal.out 2>/tmp/pi-completion-context-proposal-done-goal.err
236
293
 
237
- python3 - <<'PY'
294
+ python3 - "$EXPLICIT_SNAPSHOT_FOUR" <<'PY'
238
295
  import json
296
+ import sys
239
297
  from pathlib import Path
240
298
 
241
299
  mission = 'Filter scope by mission.'
300
+ expected_task_type = 'completion-workflow'
301
+ expected_eval_profile = 'completion-rubric-v1'
242
302
  mission_text = Path('.agent/mission.md').read_text()
303
+ profile = json.loads(Path('.agent/profile.json').read_text())
243
304
  state = json.loads(Path('.agent/state.json').read_text())
244
305
  plan = json.loads(Path('.agent/plan.json').read_text())
245
306
  active = json.loads(Path('.agent/active-slice.json').read_text())
307
+ proposal = json.loads(Path(sys.argv[1]).read_text())
246
308
  continuation_reason = state['continuation_reason']
247
309
 
248
310
  assert mission in mission_text, '.agent/mission.md did not update to the explicit next-round mission anchor'
311
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-goal next-round start'
312
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-goal next-round start'
249
313
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-goal next-round start'
314
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-goal next-round start'
315
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-goal next-round start'
250
316
  assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after explicit-goal next-round start'
317
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after explicit-goal next-round start'
318
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after explicit-goal next-round start'
251
319
  assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after explicit-goal next-round start'
320
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after explicit-goal next-round start'
321
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after explicit-goal next-round start'
322
+ assert proposal['mission'] == mission, 'explicit-goal proposal snapshot should preserve the explicit mission anchor'
323
+ assert proposal['analysis']['taskType'] == expected_task_type, 'explicit-goal proposal snapshot should preserve task_type hints from the goal text'
324
+ assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'explicit-goal proposal snapshot should preserve evaluation_profile hints from the goal text'
325
+ assert proposal['analysis']['critique'] == ['Keep critique notes separate from the mission anchor.'], 'explicit-goal proposal snapshot should preserve critique hints from the goal text'
326
+ assert proposal['analysis']['risks'] == ['Session-only scope could leak into the next workflow round.'], 'explicit-goal proposal snapshot should preserve risk hints from the goal text'
327
+ assert 'Critique:' not in proposal['goalText'], 'goalText should keep critique notes separate from mission/scope/constraints/acceptance'
328
+ assert 'Task type:' not in proposal['goalText'], 'goalText should keep task_type hints separate from the mission body'
252
329
  assert state['current_phase'] == 'reground', 'current_phase should reset to reground after explicit-goal next-round start'
253
330
  assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue after explicit-goal next-round start'
254
331
  assert state['project_done'] is False, 'project_done should reset to false after explicit-goal next-round start'
255
332
  assert state['requires_reground'] is True, 'requires_reground should reset to true after explicit-goal next-round start'
256
333
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after explicit-goal next-round start'
257
334
  assert continuation_reason.startswith('User refocused workflow via /cook:'), 'continuation_reason should record the explicit-goal next-round start'
335
+ assert 'task_type=completion-workflow' in continuation_reason, 'explicit-goal next-round start should persist the selected task_type'
336
+ assert 'evaluation_profile=completion-rubric-v1' in continuation_reason, 'explicit-goal next-round start should persist the selected evaluation_profile'
337
+ assert 'Keep critique notes separate from the mission anchor.' in continuation_reason, 'explicit-goal next-round start should persist the accepted critique outcome'
258
338
  assert 'Keep explicit scope.' in continuation_reason, 'explicit scope should remain in the explicit-goal proposal'
259
339
  assert 'Add session-only scope.' not in continuation_reason, 'session-derived scope should not be merged when analyst output is unavailable'
260
340
  assert 'Restyle widget.' not in continuation_reason, 'unrelated session-derived scope should not be merged when analyst output is unavailable'
@@ -283,19 +363,33 @@ import json
283
363
  from pathlib import Path
284
364
 
285
365
  mission = 'Use a proposal analyst to summarize natural discussion before /cook writes canonical state.'
366
+ expected_task_type = 'completion-workflow'
367
+ expected_eval_profile = 'completion-rubric-v1'
286
368
  mission_text = Path('.agent/mission.md').read_text()
369
+ profile = json.loads(Path('.agent/profile.json').read_text())
287
370
  state = json.loads(Path('.agent/state.json').read_text())
288
371
  plan = json.loads(Path('.agent/plan.json').read_text())
289
372
  active = json.loads(Path('.agent/active-slice.json').read_text())
290
373
  continuation_reason = state['continuation_reason']
291
374
 
292
375
  assert mission in mission_text, '.agent/mission.md did not record the analyst-derived mission anchor'
376
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after analyst-derived restart'
377
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after analyst-derived restart'
293
378
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after analyst-derived bootstrap'
379
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after analyst-derived bootstrap'
380
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after analyst-derived bootstrap'
294
381
  assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after analyst-derived bootstrap'
382
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after analyst-derived bootstrap'
383
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after analyst-derived bootstrap'
295
384
  assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after analyst-derived bootstrap'
385
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after analyst-derived bootstrap'
386
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after analyst-derived bootstrap'
296
387
  assert state['current_phase'] == 'reground', 'current_phase should reset to reground after analyst-derived bootstrap'
297
388
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after analyst-derived bootstrap'
298
389
  assert continuation_reason.startswith('User refocused workflow via /cook:'), 'continuation_reason should record the analyst-derived restart'
390
+ assert 'task_type=completion-workflow' in continuation_reason, 'analyst-derived restart should persist the selected task_type'
391
+ assert 'evaluation_profile=completion-rubric-v1' in continuation_reason, 'analyst-derived restart should persist the selected evaluation_profile'
392
+ assert 'critique outcome=accepted critique=none' in continuation_reason, 'analyst-derived restart should persist that no critique notes were accepted'
299
393
  assert 'Keep explicit goals anchored.' in continuation_reason, 'analyst-derived scope should be preserved'
300
394
  PY
301
395
 
@@ -307,7 +401,7 @@ git init -q
307
401
 
308
402
  UI_SESSION_START="$TMPDIR/ui-session-start.jsonl"
309
403
  UI_DISCUSSION_START=$'Mission: Replace the crowded selector with a clearer action layout.\nScope:\n- Separate proposal text from actions.\nConstraints:\n- Preserve Start/Edit/Cancel behavior.\nAcceptance:\n- Add regression coverage.'
310
- UI_ANALYST_OUTPUT_START='{"mission":"Replace the crowded selector with a clearer action layout.","scope":["Separate proposal text from actions."],"constraints":["Preserve Start/Edit/Cancel behavior."],"acceptance":["Add regression coverage."],"confidence":0.95}'
404
+ UI_ANALYST_OUTPUT_START='{"mission":"Replace the crowded selector with a clearer action layout.","scope":["Separate proposal text from actions."],"constraints":["Preserve Start/Edit/Cancel behavior."],"acceptance":["Add regression coverage."],"critique":["Keep critique details separate from the editable proposal body."],"risks":["Bundling critique into the action list would make the confirmation harder to scan."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","possible_noise":["old selector wording"],"confidence":0.95}'
311
405
  UI_SNAPSHOT_START="$TMPDIR/context-proposal-ui-start.json"
312
406
  write_session "$UI_SESSION_START" "$UI_ROOT_START" "$UI_DISCUSSION_START"
313
407
 
@@ -326,13 +420,25 @@ snapshot = json.loads(Path(sys.argv[1]).read_text())
326
420
  state = json.loads(Path('.agent/state.json').read_text())
327
421
 
328
422
  assert snapshot['proposalHeading'] == 'Proposed workflow', 'custom confirmation snapshot should expose a dedicated proposal section'
423
+ assert snapshot['critiqueHeading'] == 'Critique and risks', 'custom confirmation snapshot should expose critique separately from the proposal body'
424
+ assert snapshot['routingHeading'] == 'Routing recommendations', 'custom confirmation snapshot should expose routing recommendations separately from the proposal body'
425
+ assert state['task_type'] == 'completion-workflow', 'start action should preserve canonical task_type'
426
+ assert state['evaluation_profile'] == 'completion-rubric-v1', 'start action should preserve canonical evaluation_profile'
329
427
  assert 'Mission\nReplace the crowded selector with a clearer action layout.' in snapshot['proposalBody'], 'proposal body should be captured separately from the action list'
428
+ assert 'Keep critique details separate from the editable proposal body.' not in snapshot['proposalBody'], 'critique notes should not be embedded in the proposal body'
429
+ assert 'Critique\n- Keep critique details separate from the editable proposal body.' in snapshot['critiqueBody'], 'critique section should render accepted critique notes separately'
430
+ assert 'Risks\n- Bundling critique into the action list would make the confirmation harder to scan.' in snapshot['critiqueBody'], 'critique section should render risk notes separately'
431
+ assert 'Possible noise\n- old selector wording' in snapshot['critiqueBody'], 'critique section should render possible-noise notes separately'
432
+ assert '- task_type: completion-workflow' in snapshot['routingBody'], 'routing section should render the recommended task_type'
433
+ assert '- evaluation_profile: completion-rubric-v1' in snapshot['routingBody'], 'routing section should render the recommended evaluation_profile'
330
434
  assert [action['id'] for action in snapshot['actions']] == ['start', 'edit', 'cancel'], 'custom confirmation actions should stay Start/Edit/Cancel'
331
435
  assert [action['label'] for action in snapshot['actions']] == ['Start', 'Edit', 'Cancel'], 'custom confirmation action labels should be concise'
332
436
  for action in snapshot['actions']:
333
437
  assert 'Replace the crowded selector with a clearer action layout.' not in action['label'], 'proposal mission should not be embedded in action labels'
334
438
  assert 'Separate proposal text from actions.' not in action['description'], 'proposal scope should not be embedded in action descriptions'
335
439
  assert state['mission_anchor'] == 'Replace the crowded selector with a clearer action layout.', 'start action should still accept the proposed mission'
440
+ assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'start action should persist the startup routing outcome in continuation_reason'
441
+ assert 'Keep critique details separate from the editable proposal body.' in state['continuation_reason'], 'start action should persist the accepted critique outcome canonically'
336
442
  PY
337
443
 
338
444
  # Custom confirmation UI: edit should keep the existing editor/parsing flow when the action comes from the new layout.
@@ -343,7 +449,7 @@ git init -q
343
449
 
344
450
  UI_SESSION_EDIT="$TMPDIR/ui-session-edit.jsonl"
345
451
  UI_DISCUSSION_EDIT=$'Mission: Keep editing support in the custom confirmation UI.\nScope:\n- Preserve the proposal editor.\nConstraints:\n- Keep parsing structured proposal text.\nAcceptance:\n- Update the mission anchor after edit.'
346
- UI_ANALYST_OUTPUT_EDIT='{"mission":"Keep editing support in the custom confirmation UI.","scope":["Preserve the proposal editor."],"constraints":["Keep parsing structured proposal text."],"acceptance":["Update the mission anchor after edit."],"confidence":0.94}'
452
+ UI_ANALYST_OUTPUT_EDIT='{"mission":"Keep editing support in the custom confirmation UI.","scope":["Preserve the proposal editor."],"constraints":["Keep parsing structured proposal text."],"acceptance":["Update the mission anchor after edit."],"critique":["Keep critique persistence even when the operator edits the proposal body."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","confidence":0.94}'
347
453
  UI_EDIT_TEXT=$'Mission: Edited mission from the custom confirmation UI.\nScope:\n- Preserve parsing after edit.\nConstraints:\n- Keep the shared confirmation flow.\nAcceptance:\n- Start the workflow from the edited proposal.'
348
454
  write_session "$UI_SESSION_EDIT" "$UI_ROOT_EDIT" "$UI_DISCUSSION_EDIT"
349
455
 
@@ -363,9 +469,12 @@ active = json.loads(Path('.agent/active-slice.json').read_text())
363
469
  mission = 'Edited mission from the custom confirmation UI.'
364
470
 
365
471
  assert state['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update state.json'
472
+ assert state['task_type'] == 'completion-workflow', 'edit action should preserve canonical task_type'
473
+ assert state['evaluation_profile'] == 'completion-rubric-v1', 'edit action should preserve canonical evaluation_profile'
366
474
  assert plan['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update plan.json'
367
475
  assert active['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update active-slice.json'
368
476
  assert state['current_phase'] == 'reground', 'edit action should still bootstrap/reground the workflow'
477
+ assert 'Keep critique persistence even when the operator edits the proposal body.' in state['continuation_reason'], 'edit action should preserve the accepted critique outcome canonically'
369
478
  PY
370
479
 
371
480
  # Custom confirmation UI: cancel should exit without writing canonical state.
@@ -44,15 +44,26 @@ import json
44
44
  from pathlib import Path
45
45
 
46
46
  new_anchor = 'refocused smoke-test mission with tests and docs parity.'
47
+ expected_task_type = 'completion-workflow'
48
+ expected_eval_profile = 'completion-rubric-v1'
47
49
  mission_text = Path('.agent/mission.md').read_text()
50
+ profile = json.loads(Path('.agent/profile.json').read_text())
48
51
  state = json.loads(Path('.agent/state.json').read_text())
49
52
  plan = json.loads(Path('.agent/plan.json').read_text())
50
53
  active = json.loads(Path('.agent/active-slice.json').read_text())
51
54
 
52
55
  assert new_anchor in mission_text, '.agent/mission.md did not update to the refocused mission anchor'
56
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after refocus'
57
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after refocus'
53
58
  assert state['mission_anchor'] == new_anchor, 'state.json mission_anchor mismatch after refocus'
59
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after refocus'
60
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after refocus'
54
61
  assert plan['mission_anchor'] == new_anchor, 'plan.json mission_anchor mismatch after refocus'
62
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after refocus'
63
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after refocus'
55
64
  assert active['mission_anchor'] == new_anchor, 'active-slice.json mission_anchor mismatch after refocus'
65
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after refocus'
66
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after refocus'
56
67
  assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground after refocus'
57
68
  assert state['requires_reground'] is True, 'state.json requires_reground should be true after refocus'
58
69
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should reset to completion-regrounder'
@@ -4,10 +4,12 @@ set -euo pipefail
4
4
  ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
5
  cd "$ROOT"
6
6
 
7
+ echo "[release-check] running startup/refocus/context regressions, including critique-aware /cook confirmation coverage"
7
8
  npm run smoke-test
8
9
  npm run refocus-test
9
10
  npm run context-proposal-test
10
11
  npm run observability-status-test
12
+ npm run rubric-contract-test
11
13
  npm pack --dry-run >/dev/null
12
14
 
13
15
  echo "release check passed"
@@ -0,0 +1,249 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+ cd "$ROOT"
6
+
7
+ node <<'NODE'
8
+ const fs = require('node:fs');
9
+
10
+ const read = (file) => fs.readFileSync(file, 'utf8');
11
+ const assertIncludes = (file, snippet) => {
12
+ const text = read(file);
13
+ if (!text.includes(snippet)) {
14
+ console.error(`${file} is missing required rubric-contract text: ${snippet}`);
15
+ process.exit(1);
16
+ }
17
+ };
18
+
19
+ const rubricHeading = '## Structured Evaluation Rubric Foundation';
20
+ const rubricDimensions = [
21
+ 'Contract coverage',
22
+ 'Correctness risk',
23
+ 'Verification evidence',
24
+ 'Docs/state parity',
25
+ ];
26
+ const verdictSnippets = [
27
+ '`pass` — no material issue remains',
28
+ '`concern` — a real caveat or remaining gap exists',
29
+ '`fail` — a blocking issue or contradictory truth exists',
30
+ ];
31
+
32
+ for (const file of [
33
+ 'skills/completion-protocol/SKILL.md',
34
+ 'skills/completion-protocol/references/completion.md',
35
+ ]) {
36
+ assertIncludes(file, rubricHeading);
37
+ assertIncludes(file, 'canonical `task_type` and `evaluation_profile` signaling');
38
+ assertIncludes(file, 'routing metadata only; later slices may still add stricter profile-aware rubric-output enforcement');
39
+ assertIncludes(file, '- `Rubric:`');
40
+ for (const dimension of rubricDimensions) {
41
+ assertIncludes(file, `- \`- ${dimension}: pass|concern|fail - ...\``);
42
+ }
43
+ for (const snippet of verdictSnippets) {
44
+ assertIncludes(file, snippet);
45
+ }
46
+ }
47
+
48
+ for (const file of [
49
+ 'agents/completion-reviewer.md',
50
+ 'agents/completion-auditor.md',
51
+ 'agents/completion-stop-judge.md',
52
+ ]) {
53
+ assertIncludes(file, 'Always emit the shared rubric section');
54
+ assertIncludes(file, 'Use these exact rubric dimension names and verdict words');
55
+ assertIncludes(file, '`evaluation_profile`');
56
+ assertIncludes(file, '`implementation_surfaces`');
57
+ assertIncludes(file, '`verification_commands`');
58
+ assertIncludes(file, '- `Rubric:`');
59
+ for (const dimension of rubricDimensions) {
60
+ assertIncludes(file, `- \`- ${dimension}: pass|concern|fail - ...\``);
61
+ }
62
+ }
63
+
64
+ assertIncludes('README.md', '## Structured evaluation rubrics');
65
+ assertIncludes('README.md', '- `task_type: completion-workflow`');
66
+ assertIncludes('README.md', '- `evaluation_profile: completion-rubric-v1`');
67
+ assertIncludes('README.md', 'kickoff/reminder/resume text and reviewer/auditor/stop-judge evaluation handoffs so downstream roles can rely on canonical signaling instead of prose inference alone.');
68
+ assertIncludes('README.md', 'Reviewer, auditor, and stop-judge dispatch/reminder surfaces now also thread the current active-slice implementation contract');
69
+ assertIncludes('README.md', 'Canonical reviewer/auditor/stop-judge transcription now fails closed on malformed rubric-bearing reports');
70
+ assertIncludes('README.md', 'npm run rubric-contract-test`, which now exercises reviewer, auditor, and stop-judge transcription paths');
71
+ for (const dimension of rubricDimensions) {
72
+ assertIncludes('README.md', `- \`${dimension}\``);
73
+ }
74
+
75
+ assertIncludes('CHANGELOG.md', 'shared structured evaluation-rubric contract');
76
+ assertIncludes('CHANGELOG.md', 'added canonical `task_type: completion-workflow` and `evaluation_profile: completion-rubric-v1` signaling across the packaged control-plane defaults, verifier schema, and kickoff/reminder/resume surfaces');
77
+ assertIncludes('CHANGELOG.md', 'threaded canonical `evaluation_profile` plus the active-slice implementation contract into reviewer/auditor/stop-judge reminder and dispatch surfaces');
78
+ assertIncludes('CHANGELOG.md', 'made reviewer/auditor/stop-judge transcription fail closed on malformed rubric-bearing outputs while still accepting valid reports');
79
+ assertIncludes('extensions/completion/index.ts', 'Canonical routing profile:\\n- task_type: ${taskType}\\n- evaluation_profile: ${evaluationProfile}');
80
+ assertIncludes('extensions/completion/index.ts', '`Task type: ${currentTaskType(snapshot) ?? "(missing)"}`');
81
+ assertIncludes('extensions/completion/index.ts', '`Evaluation profile: ${currentEvaluationProfile(snapshot) ?? "(missing)"}`');
82
+ assertIncludes('extensions/completion/index.ts', '`task_type: ${currentTaskType(snapshot) ?? "(missing)"}`');
83
+ assertIncludes('extensions/completion/index.ts', '`evaluation_profile: ${currentEvaluationProfile(snapshot) ?? "(missing)"}`');
84
+ assertIncludes('extensions/completion/index.ts', 'Canonical evaluation handoff for ${role}:');
85
+ assertIncludes('extensions/completion/index.ts', 'buildEvaluationRoleReminderText(snapshot, nextRole)');
86
+ assertIncludes('extensions/completion/index.ts', 'roleReporting.transcribeCanonicalRoleReport');
87
+ assertIncludes('extensions/completion/role-reporting.js', 'Missing Rubric heading for ${role}.');
88
+ assertIncludes('extensions/completion/role-reporting.js', 'Reviewer output cannot mark \'Acceptable as-is: yes\' when any rubric line is fail.');
89
+ assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
90
+ assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Stale or conflicting canonical state\' with yes or no.');
91
+ assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Plan truthfully captures remaining slice backlog\' with yes or no.');
92
+ assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output cannot mark \'Can the project stop now: yes\' when any rubric line is fail.');
93
+ assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Docs/config/runbooks match shipped behavior\' with yes or no.');
94
+ assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
95
+ assertIncludes('package.json', '"rubric-contract-test": "bash ./scripts/rubric-contract-test.sh"');
96
+ assertIncludes('scripts/release-check.sh', 'npm run rubric-contract-test');
97
+ assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
98
+ NODE
99
+
100
+ node <<'NODE'
101
+ const fs = require('node:fs');
102
+ const path = require('node:path');
103
+ const {
104
+ parseReportFields,
105
+ transcribeCanonicalRoleReport,
106
+ } = require('./extensions/completion/role-reporting.js');
107
+
108
+ const tempRootBase = path.join(process.cwd(), '.agent', 'tmp');
109
+ fs.mkdirSync(tempRootBase, { recursive: true });
110
+ const tempRoot = fs.mkdtempSync(path.join(tempRootBase, 'rubric-role-reporting-'));
111
+ const snapshotFiles = {
112
+ sliceHistoryPath: path.join(tempRoot, 'slice-history.jsonl'),
113
+ stopHistoryPath: path.join(tempRoot, 'stop-check-history.jsonl'),
114
+ };
115
+ fs.writeFileSync(snapshotFiles.sliceHistoryPath, '');
116
+ fs.writeFileSync(snapshotFiles.stopHistoryPath, '');
117
+
118
+ const readJsonl = (file) => fs.readFileSync(file, 'utf8').split('\n').filter(Boolean).map((line) => JSON.parse(line));
119
+ const assert = (condition, message) => {
120
+ if (!condition) throw new Error(message);
121
+ };
122
+
123
+ const reviewerReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - Locked acceptance criteria match the committed slice.\n- Correctness risk: pass - No blocking regression is evident.\n- Verification evidence: pass - Deterministic proof was rerun successfully.\n- Docs/state parity: pass - Docs and canonical state are aligned.\nFindings: none.\nAcceptable as-is: yes\nSmallest follow-up slice: none.`;
124
+
125
+ const reviewerMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - Locked acceptance criteria match the committed slice.\n- Correctness risk: pass - No blocking regression is evident.\n- Verification evidence: pass - Deterministic proof was rerun successfully.\nFindings: none.\nAcceptable as-is: yes\nSmallest follow-up slice: none.`;
126
+
127
+ const auditorReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - The accepted slice remains satisfied on HEAD.\n- Correctness risk: concern - Remaining planned work still keeps the project open.\n- Verification evidence: pass - Verification was rerun for the accepted slice.\n- Docs/state parity: pass - Canonical state can be reconciled truthfully.\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: yes\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: no\nPlan truthfully captures remaining slice backlog: yes - one planned slice remains.`;
128
+
129
+ const auditorMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: yes\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: no\nPlan truthfully captures remaining slice backlog: yes - one planned slice remains.`;
130
+
131
+ const stopJudgeReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: pass - All implementation slices are accepted on HEAD.\n- Correctness risk: pass - No remaining blocker or high-value gap is evident.\n- Verification evidence: pass - Final verification passes for the current head.\n- Docs/state parity: pass - Docs, config, and canonical state match shipped behavior.\nCan the project stop now: yes\nExact remaining open top-level contract IDs: none\nBlocker count: 0\nHigh-value gap count: 0\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: yes\nTracked and unignored worktree is clean: yes\nBrief justification: Current HEAD satisfies the stop criteria.`;
132
+
133
+ const stopJudgeMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: fail - A blocking contract is still open.\n- Correctness risk: pass - No additional risk was found.\n- Verification evidence: pass - Verification still passes.\n- Docs/state parity: pass - Docs and state match.\nCan the project stop now: yes\nExact remaining open top-level contract IDs: TEST-CONTRACT\nBlocker count: 1\nHigh-value gap count: 0\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: yes\nTracked and unignored worktree is clean: yes\nBrief justification: This should be rejected because the rubric blocks stop.`;
134
+
135
+ const auditorMalformedYesNo = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - The accepted slice remains satisfied on HEAD.\n- Correctness risk: concern - Remaining planned work still keeps the project open.\n- Verification evidence: pass - Verification was rerun for the accepted slice.\n- Docs/state parity: pass - Canonical state can be reconciled truthfully.\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: maybe\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: perhaps\nPlan truthfully captures remaining slice backlog: sorta.`;
136
+
137
+ const stopJudgeMalformedYesNo = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: pass - All implementation slices are accepted on HEAD.\n- Correctness risk: pass - No remaining blocker or high-value gap is evident.\n- Verification evidence: pass - Final verification passes for the current head.\n- Docs/state parity: pass - Docs, config, and canonical state match shipped behavior.\nCan the project stop now: no\nExact remaining open top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: maybe\nTracked and unignored worktree is clean: perhaps\nBrief justification: This should be rejected because malformed yes/no-style fields must fail closed.`;
138
+
139
+ (async () => {
140
+ const reviewed = await transcribeCanonicalRoleReport({
141
+ role: 'completion-reviewer',
142
+ output: reviewerReport,
143
+ reportFields: parseReportFields(reviewerReport),
144
+ snapshotFiles,
145
+ headSha: '1111111111111111111111111111111111111111',
146
+ sliceId: 'slice-review',
147
+ recordedAt: 1,
148
+ });
149
+ assert(reviewed.errors.length === 0, `reviewer valid report should transcribe cleanly: ${reviewed.errors.join(' | ')}`);
150
+ assert(reviewed.appended.includes('reviewed:slice-review'), 'reviewer transcription should append reviewed record');
151
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'reviewer transcription should create one slice-history record');
152
+
153
+ const reviewerRejected = await transcribeCanonicalRoleReport({
154
+ role: 'completion-reviewer',
155
+ output: reviewerMalformed,
156
+ reportFields: parseReportFields(reviewerMalformed),
157
+ snapshotFiles,
158
+ headSha: '2222222222222222222222222222222222222222',
159
+ sliceId: 'slice-review',
160
+ recordedAt: 2,
161
+ });
162
+ assert(reviewerRejected.errors.some((error) => error.includes('Docs/state parity')), 'reviewer malformed report should be rejected for missing rubric line');
163
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'rejected reviewer report must not append history');
164
+
165
+ const audited = await transcribeCanonicalRoleReport({
166
+ role: 'completion-auditor',
167
+ output: auditorReport,
168
+ reportFields: parseReportFields(auditorReport),
169
+ snapshotFiles,
170
+ headSha: '3333333333333333333333333333333333333333',
171
+ sliceId: 'slice-audit',
172
+ recordedAt: 3,
173
+ });
174
+ assert(audited.errors.length === 0, `auditor valid report should transcribe cleanly: ${audited.errors.join(' | ')}`);
175
+ assert(audited.appended.includes('audited:slice-audit'), 'auditor transcription should append audited record');
176
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'auditor transcription should append a second slice-history record');
177
+
178
+ const auditorRejected = await transcribeCanonicalRoleReport({
179
+ role: 'completion-auditor',
180
+ output: auditorMalformed,
181
+ reportFields: parseReportFields(auditorMalformed),
182
+ snapshotFiles,
183
+ headSha: '4444444444444444444444444444444444444444',
184
+ sliceId: 'slice-audit',
185
+ recordedAt: 4,
186
+ });
187
+ assert(auditorRejected.errors.some((error) => error.includes('Missing Rubric heading')), 'auditor malformed report should be rejected without rubric heading');
188
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor report must not append history');
189
+
190
+ const judged = await transcribeCanonicalRoleReport({
191
+ role: 'completion-stop-judge',
192
+ output: stopJudgeReport,
193
+ reportFields: parseReportFields(stopJudgeReport),
194
+ snapshotFiles,
195
+ headSha: '5555555555555555555555555555555555555555',
196
+ recordedAt: 5,
197
+ });
198
+ assert(judged.errors.length === 0, `stop-judge valid report should transcribe cleanly: ${judged.errors.join(' | ')}`);
199
+ assert(judged.appended.includes('judgment:555555555555'), 'stop-judge transcription should append judgment record');
200
+ assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'stop-judge transcription should create one judgment record');
201
+
202
+ const judgeRejected = await transcribeCanonicalRoleReport({
203
+ role: 'completion-stop-judge',
204
+ output: stopJudgeMalformed,
205
+ reportFields: parseReportFields(stopJudgeMalformed),
206
+ snapshotFiles,
207
+ headSha: '6666666666666666666666666666666666666666',
208
+ recordedAt: 6,
209
+ });
210
+ assert(judgeRejected.errors.some((error) => error.includes("Can the project stop now: yes")), 'stop-judge malformed report should be rejected when fail rubric contradicts yes verdict');
211
+ assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'rejected stop-judge report must not append judgment history');
212
+
213
+ const auditorYesNoRejected = await transcribeCanonicalRoleReport({
214
+ role: 'completion-auditor',
215
+ output: auditorMalformedYesNo,
216
+ reportFields: parseReportFields(auditorMalformedYesNo),
217
+ snapshotFiles,
218
+ headSha: '7777777777777777777777777777777777777777',
219
+ sliceId: 'slice-audit',
220
+ recordedAt: 7,
221
+ });
222
+ assert(auditorYesNoRejected.errors.some((error) => error.includes("Tracked and unignored worktree is clean")), 'auditor malformed yes/no report should reject invalid worktree cleanliness values');
223
+ assert(auditorYesNoRejected.errors.some((error) => error.includes("Stale or conflicting canonical state")), 'auditor malformed yes/no report should reject invalid canonical-state values');
224
+ assert(auditorYesNoRejected.errors.some((error) => error.includes("Plan truthfully captures remaining slice backlog")), 'auditor malformed yes/no report should reject invalid backlog-truth values');
225
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor yes/no report must not append history');
226
+
227
+ const stopJudgeYesNoRejected = await transcribeCanonicalRoleReport({
228
+ role: 'completion-stop-judge',
229
+ output: stopJudgeMalformedYesNo,
230
+ reportFields: parseReportFields(stopJudgeMalformedYesNo),
231
+ snapshotFiles,
232
+ headSha: '8888888888888888888888888888888888888888',
233
+ recordedAt: 8,
234
+ });
235
+ assert(stopJudgeYesNoRejected.errors.some((error) => error.includes("Docs/config/runbooks match shipped behavior")), 'stop-judge malformed yes/no report should reject invalid docs parity values');
236
+ assert(stopJudgeYesNoRejected.errors.some((error) => error.includes("Tracked and unignored worktree is clean")), 'stop-judge malformed yes/no report should reject invalid worktree cleanliness values');
237
+ assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'rejected stop-judge yes/no report must not append judgment history');
238
+
239
+ fs.rmSync(tempRoot, { recursive: true, force: true });
240
+ })().catch((error) => {
241
+ try {
242
+ fs.rmSync(tempRoot, { recursive: true, force: true });
243
+ } catch {}
244
+ console.error(error instanceof Error ? error.message : String(error));
245
+ process.exit(1);
246
+ });
247
+ NODE
248
+
249
+ echo "rubric contract test passed"