@linimin/pi-letscook 0.1.30 → 0.1.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/README.md +48 -1
- package/agents/completion-auditor.md +17 -0
- package/agents/completion-reviewer.md +17 -0
- package/agents/completion-stop-judge.md +17 -0
- package/extensions/completion/index.ts +912 -205
- package/extensions/completion/role-reporting.js +356 -0
- package/package.json +2 -1
- package/scripts/context-proposal-test.sh +115 -6
- package/scripts/refocus-test.sh +11 -0
- package/scripts/release-check.sh +2 -0
- package/scripts/rubric-contract-test.sh +249 -0
- package/scripts/smoke-test.sh +175 -23
- package/skills/completion-protocol/SKILL.md +39 -0
- package/skills/completion-protocol/references/completion.md +71 -0
|
@@ -120,30 +120,57 @@ PY
|
|
|
120
120
|
# No workflow yet: /cook with no goal should infer from recent discussion through analyst output.
|
|
121
121
|
SESSION_ONE="$TMPDIR/session-one.jsonl"
|
|
122
122
|
DISCUSSION_ONE="$DISCUSSION_ZERO"
|
|
123
|
-
ANALYST_OUTPUT_ONE='{"mission":"Remove the completion status line while keeping the completion widget.","scope":["Keep the non-running completion widget.","Suppress the widget while a completion role is active."],"constraints":["Do not reintroduce any other completion status surface."],"acceptance":["Update README to match the shipped behavior.","Keep observability regression coverage truthful."],"confidence":0.94}'
|
|
123
|
+
ANALYST_OUTPUT_ONE='{"mission":"Remove the completion status line while keeping the completion widget.","scope":["Keep the non-running completion widget.","Suppress the widget while a completion role is active."],"constraints":["Do not reintroduce any other completion status surface."],"acceptance":["Update README to match the shipped behavior.","Keep observability regression coverage truthful."],"critique":["Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal."],"risks":["Stale widget-removal discussion could broaden the startup plan if it gets treated as mission text."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","possible_noise":["older widget restyle ideas"],"confidence":0.94}'
|
|
124
|
+
DISCUSSION_SNAPSHOT_ONE="$TMPDIR/context-proposal-discussion-hints.json"
|
|
124
125
|
write_session "$SESSION_ONE" "$ROOT" "$DISCUSSION_ONE"
|
|
125
126
|
|
|
126
127
|
PI_COMPLETION_CONTEXT_PROPOSAL_ACTION=accept \
|
|
127
128
|
PI_COMPLETION_CONTEXT_PROPOSAL_ANALYST_OUTPUT="$ANALYST_OUTPUT_ONE" \
|
|
129
|
+
PI_COMPLETION_TEST_CONTEXT_PROPOSAL_PATH="$DISCUSSION_SNAPSHOT_ONE" \
|
|
128
130
|
PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
|
|
129
131
|
pi --session "$SESSION_ONE" -e "$PKG_ROOT" -p "/cook" >/tmp/pi-completion-context-proposal-bootstrap.out 2>/tmp/pi-completion-context-proposal-bootstrap.err
|
|
130
132
|
|
|
131
|
-
python3 - <<'PY'
|
|
133
|
+
python3 - "$DISCUSSION_SNAPSHOT_ONE" <<'PY'
|
|
132
134
|
import json
|
|
135
|
+
import sys
|
|
133
136
|
from pathlib import Path
|
|
134
137
|
|
|
135
138
|
mission = 'Remove the completion status line while keeping the completion widget.'
|
|
139
|
+
expected_task_type = 'completion-workflow'
|
|
140
|
+
expected_eval_profile = 'completion-rubric-v1'
|
|
136
141
|
mission_text = Path('.agent/mission.md').read_text()
|
|
142
|
+
profile = json.loads(Path('.agent/profile.json').read_text())
|
|
137
143
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
138
144
|
plan = json.loads(Path('.agent/plan.json').read_text())
|
|
139
145
|
active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
146
|
+
proposal = json.loads(Path(sys.argv[1]).read_text())
|
|
140
147
|
|
|
141
148
|
assert mission in mission_text, '.agent/mission.md did not record the analyst-derived mission anchor'
|
|
149
|
+
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after analyst-derived bootstrap'
|
|
150
|
+
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
142
151
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after analyst-derived bootstrap'
|
|
152
|
+
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after analyst-derived bootstrap'
|
|
153
|
+
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
143
154
|
assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after analyst-derived bootstrap'
|
|
155
|
+
assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after analyst-derived bootstrap'
|
|
156
|
+
assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
144
157
|
assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after analyst-derived bootstrap'
|
|
158
|
+
assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after analyst-derived bootstrap'
|
|
159
|
+
assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
160
|
+
assert proposal['mission'] == mission, 'discussion-only proposal snapshot should keep the inferred mission anchor'
|
|
161
|
+
assert proposal['analysis']['taskType'] == expected_task_type, 'discussion-only proposal snapshot should expose task_type hints separately'
|
|
162
|
+
assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'discussion-only proposal snapshot should expose evaluation_profile hints separately'
|
|
163
|
+
assert proposal['analysis']['critique'] == ['Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal.'], 'discussion-only proposal snapshot should preserve critique hints'
|
|
164
|
+
assert proposal['analysis']['risks'] == ['Stale widget-removal discussion could broaden the startup plan if it gets treated as mission text.'], 'discussion-only proposal snapshot should preserve risk hints'
|
|
165
|
+
assert proposal['analysis']['possibleNoise'] == ['older widget restyle ideas'], 'discussion-only proposal snapshot should preserve possible_noise hints'
|
|
166
|
+
assert 'Critique:' not in proposal['goalText'], 'goalText should keep critique separate from mission/scope/constraints/acceptance'
|
|
167
|
+
assert 'Task type:' not in proposal['goalText'], 'goalText should keep task_type hints separate from the mission body'
|
|
145
168
|
assert state['current_phase'] == 'reground', 'state.json current_phase should start at reground after analyst-derived bootstrap'
|
|
146
169
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should start at completion-regrounder after analyst-derived bootstrap'
|
|
170
|
+
assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'initial startup should record the accepted startup routing in continuation_reason'
|
|
171
|
+
assert 'task_type=completion-workflow' in state['continuation_reason'], 'initial startup should persist the selected task_type in continuation_reason'
|
|
172
|
+
assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'initial startup should persist the selected evaluation_profile in continuation_reason'
|
|
173
|
+
assert 'Keep critique separate from the mission anchor so startup analysis does not rewrite the workflow goal.' in state['continuation_reason'], 'initial startup should persist the accepted critique outcome in continuation_reason'
|
|
147
174
|
PY
|
|
148
175
|
|
|
149
176
|
# Completed workflow: /cook with no goal should infer the next round from recent discussion through analyst output.
|
|
@@ -164,21 +191,35 @@ import json
|
|
|
164
191
|
from pathlib import Path
|
|
165
192
|
|
|
166
193
|
mission = 'Ship the next workflow round for richer context-derived /cook startup.'
|
|
194
|
+
expected_task_type = 'completion-workflow'
|
|
195
|
+
expected_eval_profile = 'completion-rubric-v1'
|
|
167
196
|
mission_text = Path('.agent/mission.md').read_text()
|
|
197
|
+
profile = json.loads(Path('.agent/profile.json').read_text())
|
|
168
198
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
169
199
|
plan = json.loads(Path('.agent/plan.json').read_text())
|
|
170
200
|
active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
171
201
|
|
|
172
202
|
assert mission in mission_text, '.agent/mission.md did not update to the next-round context-derived mission anchor'
|
|
203
|
+
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after next-round startup'
|
|
204
|
+
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after next-round startup'
|
|
173
205
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after starting the next workflow round'
|
|
206
|
+
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after starting the next workflow round'
|
|
207
|
+
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after starting the next workflow round'
|
|
174
208
|
assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after starting the next workflow round'
|
|
209
|
+
assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after starting the next workflow round'
|
|
210
|
+
assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after starting the next workflow round'
|
|
175
211
|
assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after starting the next workflow round'
|
|
212
|
+
assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after starting the next workflow round'
|
|
213
|
+
assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after starting the next workflow round'
|
|
176
214
|
assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground for the next workflow round'
|
|
177
215
|
assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue for the next workflow round'
|
|
178
216
|
assert state['requires_reground'] is True, 'requires_reground should reset to true for the next workflow round'
|
|
179
217
|
assert state['project_done'] is False, 'project_done should reset to false for the next workflow round'
|
|
180
218
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should reset to completion-regrounder for the next workflow round'
|
|
181
219
|
assert state['continuation_reason'].startswith('User refocused workflow via /cook:'), 'continuation_reason should record the next-round refocus'
|
|
220
|
+
assert 'task_type=completion-workflow' in state['continuation_reason'], 'next-round refocus should persist the selected task_type'
|
|
221
|
+
assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'next-round refocus should persist the selected evaluation_profile'
|
|
222
|
+
assert 'critique outcome=accepted critique=none' in state['continuation_reason'], 'next-round refocus should persist that no critique notes were accepted'
|
|
182
223
|
assert plan['plan_basis'] == 'user_refocus', 'plan_basis should reset to user_refocus for the next workflow round'
|
|
183
224
|
assert active['status'] == 'idle', 'active-slice should reset to idle for the next workflow round'
|
|
184
225
|
PY
|
|
@@ -200,19 +241,33 @@ import json
|
|
|
200
241
|
from pathlib import Path
|
|
201
242
|
|
|
202
243
|
mission = 'Explicit replacement mission for the active workflow.'
|
|
244
|
+
expected_task_type = 'completion-workflow'
|
|
245
|
+
expected_eval_profile = 'completion-rubric-v1'
|
|
203
246
|
mission_text = Path('.agent/mission.md').read_text()
|
|
247
|
+
profile = json.loads(Path('.agent/profile.json').read_text())
|
|
204
248
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
205
249
|
plan = json.loads(Path('.agent/plan.json').read_text())
|
|
206
250
|
active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
207
251
|
|
|
208
252
|
assert mission in mission_text, '.agent/mission.md did not update to the explicit replacement mission anchor'
|
|
253
|
+
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-goal replacement'
|
|
254
|
+
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-goal replacement'
|
|
209
255
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-goal replacement'
|
|
256
|
+
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-goal replacement'
|
|
257
|
+
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-goal replacement'
|
|
210
258
|
assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after explicit-goal replacement'
|
|
259
|
+
assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after explicit-goal replacement'
|
|
260
|
+
assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after explicit-goal replacement'
|
|
211
261
|
assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after explicit-goal replacement'
|
|
262
|
+
assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after explicit-goal replacement'
|
|
263
|
+
assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after explicit-goal replacement'
|
|
212
264
|
assert state['current_phase'] == 'reground', 'current_phase should reset to reground after explicit-goal replacement'
|
|
213
265
|
assert state['continuation_policy'] == 'continue', 'continuation_policy should stay continue after explicit-goal replacement'
|
|
214
266
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after explicit-goal replacement'
|
|
215
267
|
assert state['continuation_reason'].startswith('User refocused workflow via /cook:'), 'continuation_reason should record the explicit-goal replacement'
|
|
268
|
+
assert 'task_type=completion-workflow' in state['continuation_reason'], 'explicit-goal replacement should persist the selected task_type'
|
|
269
|
+
assert 'evaluation_profile=completion-rubric-v1' in state['continuation_reason'], 'explicit-goal replacement should persist the selected evaluation_profile'
|
|
270
|
+
assert 'critique outcome=accepted critique=none' in state['continuation_reason'], 'explicit-goal replacement should persist the accepted critique outcome even when no critique was derived'
|
|
216
271
|
assert 'Preserve the richer proposal structure from discussion.' not in state['continuation_reason'], 'session scope should not be merged when analyst output is unavailable'
|
|
217
272
|
assert 'Keep explicit goals as the mission anchor when they conflict with earlier text.' not in state['continuation_reason'], 'session constraints should not be merged when analyst output is unavailable'
|
|
218
273
|
assert 'Refresh canonical state from the replacement mission.' not in state['continuation_reason'], 'session acceptance should not be merged when analyst output is unavailable'
|
|
@@ -226,35 +281,60 @@ mark_done
|
|
|
226
281
|
|
|
227
282
|
SESSION_FOUR="$TMPDIR/session-four.jsonl"
|
|
228
283
|
DISCUSSION_FOUR=$'Scope:\n- Add session-only scope.\n- Restyle widget.\nConstraints:\n- Keep rules.\nAcceptance:\n- Add test.'
|
|
229
|
-
EXPLICIT_GOAL_FOUR=$'Mission: Filter scope by mission.\nScope:\n- Keep explicit scope
|
|
284
|
+
EXPLICIT_GOAL_FOUR=$'Mission: Filter scope by mission.\nScope:\n- Keep explicit scope.\nCritique:\n- Keep critique notes separate from the mission anchor.\nRisks:\n- Session-only scope could leak into the next workflow round.\nTask type: completion-workflow\nEvaluation profile: completion-rubric-v1'
|
|
285
|
+
EXPLICIT_SNAPSHOT_FOUR="$TMPDIR/context-proposal-explicit-hints.json"
|
|
230
286
|
write_session "$SESSION_FOUR" "$ROOT" "$DISCUSSION_FOUR"
|
|
231
287
|
|
|
232
288
|
PI_COMPLETION_CONTEXT_PROPOSAL_ACTION=accept \
|
|
233
289
|
PI_COMPLETION_DISABLE_CONTEXT_PROPOSAL_ANALYST=1 \
|
|
290
|
+
PI_COMPLETION_TEST_CONTEXT_PROPOSAL_PATH="$EXPLICIT_SNAPSHOT_FOUR" \
|
|
234
291
|
PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
|
|
235
292
|
pi --session "$SESSION_FOUR" -e "$PKG_ROOT" -p "/cook $EXPLICIT_GOAL_FOUR" >/tmp/pi-completion-context-proposal-done-goal.out 2>/tmp/pi-completion-context-proposal-done-goal.err
|
|
236
293
|
|
|
237
|
-
python3 - <<'PY'
|
|
294
|
+
python3 - "$EXPLICIT_SNAPSHOT_FOUR" <<'PY'
|
|
238
295
|
import json
|
|
296
|
+
import sys
|
|
239
297
|
from pathlib import Path
|
|
240
298
|
|
|
241
299
|
mission = 'Filter scope by mission.'
|
|
300
|
+
expected_task_type = 'completion-workflow'
|
|
301
|
+
expected_eval_profile = 'completion-rubric-v1'
|
|
242
302
|
mission_text = Path('.agent/mission.md').read_text()
|
|
303
|
+
profile = json.loads(Path('.agent/profile.json').read_text())
|
|
243
304
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
244
305
|
plan = json.loads(Path('.agent/plan.json').read_text())
|
|
245
306
|
active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
307
|
+
proposal = json.loads(Path(sys.argv[1]).read_text())
|
|
246
308
|
continuation_reason = state['continuation_reason']
|
|
247
309
|
|
|
248
310
|
assert mission in mission_text, '.agent/mission.md did not update to the explicit next-round mission anchor'
|
|
311
|
+
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-goal next-round start'
|
|
312
|
+
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-goal next-round start'
|
|
249
313
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-goal next-round start'
|
|
314
|
+
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-goal next-round start'
|
|
315
|
+
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-goal next-round start'
|
|
250
316
|
assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after explicit-goal next-round start'
|
|
317
|
+
assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after explicit-goal next-round start'
|
|
318
|
+
assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after explicit-goal next-round start'
|
|
251
319
|
assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after explicit-goal next-round start'
|
|
320
|
+
assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after explicit-goal next-round start'
|
|
321
|
+
assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after explicit-goal next-round start'
|
|
322
|
+
assert proposal['mission'] == mission, 'explicit-goal proposal snapshot should preserve the explicit mission anchor'
|
|
323
|
+
assert proposal['analysis']['taskType'] == expected_task_type, 'explicit-goal proposal snapshot should preserve task_type hints from the goal text'
|
|
324
|
+
assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'explicit-goal proposal snapshot should preserve evaluation_profile hints from the goal text'
|
|
325
|
+
assert proposal['analysis']['critique'] == ['Keep critique notes separate from the mission anchor.'], 'explicit-goal proposal snapshot should preserve critique hints from the goal text'
|
|
326
|
+
assert proposal['analysis']['risks'] == ['Session-only scope could leak into the next workflow round.'], 'explicit-goal proposal snapshot should preserve risk hints from the goal text'
|
|
327
|
+
assert 'Critique:' not in proposal['goalText'], 'goalText should keep critique notes separate from mission/scope/constraints/acceptance'
|
|
328
|
+
assert 'Task type:' not in proposal['goalText'], 'goalText should keep task_type hints separate from the mission body'
|
|
252
329
|
assert state['current_phase'] == 'reground', 'current_phase should reset to reground after explicit-goal next-round start'
|
|
253
330
|
assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue after explicit-goal next-round start'
|
|
254
331
|
assert state['project_done'] is False, 'project_done should reset to false after explicit-goal next-round start'
|
|
255
332
|
assert state['requires_reground'] is True, 'requires_reground should reset to true after explicit-goal next-round start'
|
|
256
333
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after explicit-goal next-round start'
|
|
257
334
|
assert continuation_reason.startswith('User refocused workflow via /cook:'), 'continuation_reason should record the explicit-goal next-round start'
|
|
335
|
+
assert 'task_type=completion-workflow' in continuation_reason, 'explicit-goal next-round start should persist the selected task_type'
|
|
336
|
+
assert 'evaluation_profile=completion-rubric-v1' in continuation_reason, 'explicit-goal next-round start should persist the selected evaluation_profile'
|
|
337
|
+
assert 'Keep critique notes separate from the mission anchor.' in continuation_reason, 'explicit-goal next-round start should persist the accepted critique outcome'
|
|
258
338
|
assert 'Keep explicit scope.' in continuation_reason, 'explicit scope should remain in the explicit-goal proposal'
|
|
259
339
|
assert 'Add session-only scope.' not in continuation_reason, 'session-derived scope should not be merged when analyst output is unavailable'
|
|
260
340
|
assert 'Restyle widget.' not in continuation_reason, 'unrelated session-derived scope should not be merged when analyst output is unavailable'
|
|
@@ -283,19 +363,33 @@ import json
|
|
|
283
363
|
from pathlib import Path
|
|
284
364
|
|
|
285
365
|
mission = 'Use a proposal analyst to summarize natural discussion before /cook writes canonical state.'
|
|
366
|
+
expected_task_type = 'completion-workflow'
|
|
367
|
+
expected_eval_profile = 'completion-rubric-v1'
|
|
286
368
|
mission_text = Path('.agent/mission.md').read_text()
|
|
369
|
+
profile = json.loads(Path('.agent/profile.json').read_text())
|
|
287
370
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
288
371
|
plan = json.loads(Path('.agent/plan.json').read_text())
|
|
289
372
|
active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
290
373
|
continuation_reason = state['continuation_reason']
|
|
291
374
|
|
|
292
375
|
assert mission in mission_text, '.agent/mission.md did not record the analyst-derived mission anchor'
|
|
376
|
+
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after analyst-derived restart'
|
|
377
|
+
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after analyst-derived restart'
|
|
293
378
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after analyst-derived bootstrap'
|
|
379
|
+
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after analyst-derived bootstrap'
|
|
380
|
+
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
294
381
|
assert plan['mission_anchor'] == mission, 'plan.json mission_anchor mismatch after analyst-derived bootstrap'
|
|
382
|
+
assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after analyst-derived bootstrap'
|
|
383
|
+
assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
295
384
|
assert active['mission_anchor'] == mission, 'active-slice.json mission_anchor mismatch after analyst-derived bootstrap'
|
|
385
|
+
assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after analyst-derived bootstrap'
|
|
386
|
+
assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after analyst-derived bootstrap'
|
|
296
387
|
assert state['current_phase'] == 'reground', 'current_phase should reset to reground after analyst-derived bootstrap'
|
|
297
388
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next role should reset to completion-regrounder after analyst-derived bootstrap'
|
|
298
389
|
assert continuation_reason.startswith('User refocused workflow via /cook:'), 'continuation_reason should record the analyst-derived restart'
|
|
390
|
+
assert 'task_type=completion-workflow' in continuation_reason, 'analyst-derived restart should persist the selected task_type'
|
|
391
|
+
assert 'evaluation_profile=completion-rubric-v1' in continuation_reason, 'analyst-derived restart should persist the selected evaluation_profile'
|
|
392
|
+
assert 'critique outcome=accepted critique=none' in continuation_reason, 'analyst-derived restart should persist that no critique notes were accepted'
|
|
299
393
|
assert 'Keep explicit goals anchored.' in continuation_reason, 'analyst-derived scope should be preserved'
|
|
300
394
|
PY
|
|
301
395
|
|
|
@@ -307,7 +401,7 @@ git init -q
|
|
|
307
401
|
|
|
308
402
|
UI_SESSION_START="$TMPDIR/ui-session-start.jsonl"
|
|
309
403
|
UI_DISCUSSION_START=$'Mission: Replace the crowded selector with a clearer action layout.\nScope:\n- Separate proposal text from actions.\nConstraints:\n- Preserve Start/Edit/Cancel behavior.\nAcceptance:\n- Add regression coverage.'
|
|
310
|
-
UI_ANALYST_OUTPUT_START='{"mission":"Replace the crowded selector with a clearer action layout.","scope":["Separate proposal text from actions."],"constraints":["Preserve Start/Edit/Cancel behavior."],"acceptance":["Add regression coverage."],"confidence":0.95}'
|
|
404
|
+
UI_ANALYST_OUTPUT_START='{"mission":"Replace the crowded selector with a clearer action layout.","scope":["Separate proposal text from actions."],"constraints":["Preserve Start/Edit/Cancel behavior."],"acceptance":["Add regression coverage."],"critique":["Keep critique details separate from the editable proposal body."],"risks":["Bundling critique into the action list would make the confirmation harder to scan."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","possible_noise":["old selector wording"],"confidence":0.95}'
|
|
311
405
|
UI_SNAPSHOT_START="$TMPDIR/context-proposal-ui-start.json"
|
|
312
406
|
write_session "$UI_SESSION_START" "$UI_ROOT_START" "$UI_DISCUSSION_START"
|
|
313
407
|
|
|
@@ -326,13 +420,25 @@ snapshot = json.loads(Path(sys.argv[1]).read_text())
|
|
|
326
420
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
327
421
|
|
|
328
422
|
assert snapshot['proposalHeading'] == 'Proposed workflow', 'custom confirmation snapshot should expose a dedicated proposal section'
|
|
423
|
+
assert snapshot['critiqueHeading'] == 'Critique and risks', 'custom confirmation snapshot should expose critique separately from the proposal body'
|
|
424
|
+
assert snapshot['routingHeading'] == 'Routing recommendations', 'custom confirmation snapshot should expose routing recommendations separately from the proposal body'
|
|
425
|
+
assert state['task_type'] == 'completion-workflow', 'start action should preserve canonical task_type'
|
|
426
|
+
assert state['evaluation_profile'] == 'completion-rubric-v1', 'start action should preserve canonical evaluation_profile'
|
|
329
427
|
assert 'Mission\nReplace the crowded selector with a clearer action layout.' in snapshot['proposalBody'], 'proposal body should be captured separately from the action list'
|
|
428
|
+
assert 'Keep critique details separate from the editable proposal body.' not in snapshot['proposalBody'], 'critique notes should not be embedded in the proposal body'
|
|
429
|
+
assert 'Critique\n- Keep critique details separate from the editable proposal body.' in snapshot['critiqueBody'], 'critique section should render accepted critique notes separately'
|
|
430
|
+
assert 'Risks\n- Bundling critique into the action list would make the confirmation harder to scan.' in snapshot['critiqueBody'], 'critique section should render risk notes separately'
|
|
431
|
+
assert 'Possible noise\n- old selector wording' in snapshot['critiqueBody'], 'critique section should render possible-noise notes separately'
|
|
432
|
+
assert '- task_type: completion-workflow' in snapshot['routingBody'], 'routing section should render the recommended task_type'
|
|
433
|
+
assert '- evaluation_profile: completion-rubric-v1' in snapshot['routingBody'], 'routing section should render the recommended evaluation_profile'
|
|
330
434
|
assert [action['id'] for action in snapshot['actions']] == ['start', 'edit', 'cancel'], 'custom confirmation actions should stay Start/Edit/Cancel'
|
|
331
435
|
assert [action['label'] for action in snapshot['actions']] == ['Start', 'Edit', 'Cancel'], 'custom confirmation action labels should be concise'
|
|
332
436
|
for action in snapshot['actions']:
|
|
333
437
|
assert 'Replace the crowded selector with a clearer action layout.' not in action['label'], 'proposal mission should not be embedded in action labels'
|
|
334
438
|
assert 'Separate proposal text from actions.' not in action['description'], 'proposal scope should not be embedded in action descriptions'
|
|
335
439
|
assert state['mission_anchor'] == 'Replace the crowded selector with a clearer action layout.', 'start action should still accept the proposed mission'
|
|
440
|
+
assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'start action should persist the startup routing outcome in continuation_reason'
|
|
441
|
+
assert 'Keep critique details separate from the editable proposal body.' in state['continuation_reason'], 'start action should persist the accepted critique outcome canonically'
|
|
336
442
|
PY
|
|
337
443
|
|
|
338
444
|
# Custom confirmation UI: edit should keep the existing editor/parsing flow when the action comes from the new layout.
|
|
@@ -343,7 +449,7 @@ git init -q
|
|
|
343
449
|
|
|
344
450
|
UI_SESSION_EDIT="$TMPDIR/ui-session-edit.jsonl"
|
|
345
451
|
UI_DISCUSSION_EDIT=$'Mission: Keep editing support in the custom confirmation UI.\nScope:\n- Preserve the proposal editor.\nConstraints:\n- Keep parsing structured proposal text.\nAcceptance:\n- Update the mission anchor after edit.'
|
|
346
|
-
UI_ANALYST_OUTPUT_EDIT='{"mission":"Keep editing support in the custom confirmation UI.","scope":["Preserve the proposal editor."],"constraints":["Keep parsing structured proposal text."],"acceptance":["Update the mission anchor after edit."],"confidence":0.94}'
|
|
452
|
+
UI_ANALYST_OUTPUT_EDIT='{"mission":"Keep editing support in the custom confirmation UI.","scope":["Preserve the proposal editor."],"constraints":["Keep parsing structured proposal text."],"acceptance":["Update the mission anchor after edit."],"critique":["Keep critique persistence even when the operator edits the proposal body."],"task_type":"completion-workflow","evaluation_profile":"completion-rubric-v1","confidence":0.94}'
|
|
347
453
|
UI_EDIT_TEXT=$'Mission: Edited mission from the custom confirmation UI.\nScope:\n- Preserve parsing after edit.\nConstraints:\n- Keep the shared confirmation flow.\nAcceptance:\n- Start the workflow from the edited proposal.'
|
|
348
454
|
write_session "$UI_SESSION_EDIT" "$UI_ROOT_EDIT" "$UI_DISCUSSION_EDIT"
|
|
349
455
|
|
|
@@ -363,9 +469,12 @@ active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
|
363
469
|
mission = 'Edited mission from the custom confirmation UI.'
|
|
364
470
|
|
|
365
471
|
assert state['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update state.json'
|
|
472
|
+
assert state['task_type'] == 'completion-workflow', 'edit action should preserve canonical task_type'
|
|
473
|
+
assert state['evaluation_profile'] == 'completion-rubric-v1', 'edit action should preserve canonical evaluation_profile'
|
|
366
474
|
assert plan['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update plan.json'
|
|
367
475
|
assert active['mission_anchor'] == mission, 'edit action should still route through the proposal parser and update active-slice.json'
|
|
368
476
|
assert state['current_phase'] == 'reground', 'edit action should still bootstrap/reground the workflow'
|
|
477
|
+
assert 'Keep critique persistence even when the operator edits the proposal body.' in state['continuation_reason'], 'edit action should preserve the accepted critique outcome canonically'
|
|
369
478
|
PY
|
|
370
479
|
|
|
371
480
|
# Custom confirmation UI: cancel should exit without writing canonical state.
|
package/scripts/refocus-test.sh
CHANGED
|
@@ -44,15 +44,26 @@ import json
|
|
|
44
44
|
from pathlib import Path
|
|
45
45
|
|
|
46
46
|
new_anchor = 'refocused smoke-test mission with tests and docs parity.'
|
|
47
|
+
expected_task_type = 'completion-workflow'
|
|
48
|
+
expected_eval_profile = 'completion-rubric-v1'
|
|
47
49
|
mission_text = Path('.agent/mission.md').read_text()
|
|
50
|
+
profile = json.loads(Path('.agent/profile.json').read_text())
|
|
48
51
|
state = json.loads(Path('.agent/state.json').read_text())
|
|
49
52
|
plan = json.loads(Path('.agent/plan.json').read_text())
|
|
50
53
|
active = json.loads(Path('.agent/active-slice.json').read_text())
|
|
51
54
|
|
|
52
55
|
assert new_anchor in mission_text, '.agent/mission.md did not update to the refocused mission anchor'
|
|
56
|
+
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after refocus'
|
|
57
|
+
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after refocus'
|
|
53
58
|
assert state['mission_anchor'] == new_anchor, 'state.json mission_anchor mismatch after refocus'
|
|
59
|
+
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after refocus'
|
|
60
|
+
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after refocus'
|
|
54
61
|
assert plan['mission_anchor'] == new_anchor, 'plan.json mission_anchor mismatch after refocus'
|
|
62
|
+
assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after refocus'
|
|
63
|
+
assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after refocus'
|
|
55
64
|
assert active['mission_anchor'] == new_anchor, 'active-slice.json mission_anchor mismatch after refocus'
|
|
65
|
+
assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after refocus'
|
|
66
|
+
assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after refocus'
|
|
56
67
|
assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground after refocus'
|
|
57
68
|
assert state['requires_reground'] is True, 'state.json requires_reground should be true after refocus'
|
|
58
69
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should reset to completion-regrounder'
|
package/scripts/release-check.sh
CHANGED
|
@@ -4,10 +4,12 @@ set -euo pipefail
|
|
|
4
4
|
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
5
5
|
cd "$ROOT"
|
|
6
6
|
|
|
7
|
+
echo "[release-check] running startup/refocus/context regressions, including critique-aware /cook confirmation coverage"
|
|
7
8
|
npm run smoke-test
|
|
8
9
|
npm run refocus-test
|
|
9
10
|
npm run context-proposal-test
|
|
10
11
|
npm run observability-status-test
|
|
12
|
+
npm run rubric-contract-test
|
|
11
13
|
npm pack --dry-run >/dev/null
|
|
12
14
|
|
|
13
15
|
echo "release check passed"
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
5
|
+
cd "$ROOT"
|
|
6
|
+
|
|
7
|
+
node <<'NODE'
|
|
8
|
+
const fs = require('node:fs');
|
|
9
|
+
|
|
10
|
+
const read = (file) => fs.readFileSync(file, 'utf8');
|
|
11
|
+
const assertIncludes = (file, snippet) => {
|
|
12
|
+
const text = read(file);
|
|
13
|
+
if (!text.includes(snippet)) {
|
|
14
|
+
console.error(`${file} is missing required rubric-contract text: ${snippet}`);
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const rubricHeading = '## Structured Evaluation Rubric Foundation';
|
|
20
|
+
const rubricDimensions = [
|
|
21
|
+
'Contract coverage',
|
|
22
|
+
'Correctness risk',
|
|
23
|
+
'Verification evidence',
|
|
24
|
+
'Docs/state parity',
|
|
25
|
+
];
|
|
26
|
+
const verdictSnippets = [
|
|
27
|
+
'`pass` — no material issue remains',
|
|
28
|
+
'`concern` — a real caveat or remaining gap exists',
|
|
29
|
+
'`fail` — a blocking issue or contradictory truth exists',
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
for (const file of [
|
|
33
|
+
'skills/completion-protocol/SKILL.md',
|
|
34
|
+
'skills/completion-protocol/references/completion.md',
|
|
35
|
+
]) {
|
|
36
|
+
assertIncludes(file, rubricHeading);
|
|
37
|
+
assertIncludes(file, 'canonical `task_type` and `evaluation_profile` signaling');
|
|
38
|
+
assertIncludes(file, 'routing metadata only; later slices may still add stricter profile-aware rubric-output enforcement');
|
|
39
|
+
assertIncludes(file, '- `Rubric:`');
|
|
40
|
+
for (const dimension of rubricDimensions) {
|
|
41
|
+
assertIncludes(file, `- \`- ${dimension}: pass|concern|fail - ...\``);
|
|
42
|
+
}
|
|
43
|
+
for (const snippet of verdictSnippets) {
|
|
44
|
+
assertIncludes(file, snippet);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
for (const file of [
|
|
49
|
+
'agents/completion-reviewer.md',
|
|
50
|
+
'agents/completion-auditor.md',
|
|
51
|
+
'agents/completion-stop-judge.md',
|
|
52
|
+
]) {
|
|
53
|
+
assertIncludes(file, 'Always emit the shared rubric section');
|
|
54
|
+
assertIncludes(file, 'Use these exact rubric dimension names and verdict words');
|
|
55
|
+
assertIncludes(file, '`evaluation_profile`');
|
|
56
|
+
assertIncludes(file, '`implementation_surfaces`');
|
|
57
|
+
assertIncludes(file, '`verification_commands`');
|
|
58
|
+
assertIncludes(file, '- `Rubric:`');
|
|
59
|
+
for (const dimension of rubricDimensions) {
|
|
60
|
+
assertIncludes(file, `- \`- ${dimension}: pass|concern|fail - ...\``);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
assertIncludes('README.md', '## Structured evaluation rubrics');
|
|
65
|
+
assertIncludes('README.md', '- `task_type: completion-workflow`');
|
|
66
|
+
assertIncludes('README.md', '- `evaluation_profile: completion-rubric-v1`');
|
|
67
|
+
assertIncludes('README.md', 'kickoff/reminder/resume text and reviewer/auditor/stop-judge evaluation handoffs so downstream roles can rely on canonical signaling instead of prose inference alone.');
|
|
68
|
+
assertIncludes('README.md', 'Reviewer, auditor, and stop-judge dispatch/reminder surfaces now also thread the current active-slice implementation contract');
|
|
69
|
+
assertIncludes('README.md', 'Canonical reviewer/auditor/stop-judge transcription now fails closed on malformed rubric-bearing reports');
|
|
70
|
+
assertIncludes('README.md', 'npm run rubric-contract-test`, which now exercises reviewer, auditor, and stop-judge transcription paths');
|
|
71
|
+
for (const dimension of rubricDimensions) {
|
|
72
|
+
assertIncludes('README.md', `- \`${dimension}\``);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
assertIncludes('CHANGELOG.md', 'shared structured evaluation-rubric contract');
|
|
76
|
+
assertIncludes('CHANGELOG.md', 'added canonical `task_type: completion-workflow` and `evaluation_profile: completion-rubric-v1` signaling across the packaged control-plane defaults, verifier schema, and kickoff/reminder/resume surfaces');
|
|
77
|
+
assertIncludes('CHANGELOG.md', 'threaded canonical `evaluation_profile` plus the active-slice implementation contract into reviewer/auditor/stop-judge reminder and dispatch surfaces');
|
|
78
|
+
assertIncludes('CHANGELOG.md', 'made reviewer/auditor/stop-judge transcription fail closed on malformed rubric-bearing outputs while still accepting valid reports');
|
|
79
|
+
assertIncludes('extensions/completion/index.ts', 'Canonical routing profile:\\n- task_type: ${taskType}\\n- evaluation_profile: ${evaluationProfile}');
|
|
80
|
+
assertIncludes('extensions/completion/index.ts', '`Task type: ${currentTaskType(snapshot) ?? "(missing)"}`');
|
|
81
|
+
assertIncludes('extensions/completion/index.ts', '`Evaluation profile: ${currentEvaluationProfile(snapshot) ?? "(missing)"}`');
|
|
82
|
+
assertIncludes('extensions/completion/index.ts', '`task_type: ${currentTaskType(snapshot) ?? "(missing)"}`');
|
|
83
|
+
assertIncludes('extensions/completion/index.ts', '`evaluation_profile: ${currentEvaluationProfile(snapshot) ?? "(missing)"}`');
|
|
84
|
+
assertIncludes('extensions/completion/index.ts', 'Canonical evaluation handoff for ${role}:');
|
|
85
|
+
assertIncludes('extensions/completion/index.ts', 'buildEvaluationRoleReminderText(snapshot, nextRole)');
|
|
86
|
+
assertIncludes('extensions/completion/index.ts', 'roleReporting.transcribeCanonicalRoleReport');
|
|
87
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Missing Rubric heading for ${role}.');
|
|
88
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Reviewer output cannot mark \'Acceptable as-is: yes\' when any rubric line is fail.');
|
|
89
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
|
|
90
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Stale or conflicting canonical state\' with yes or no.');
|
|
91
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Plan truthfully captures remaining slice backlog\' with yes or no.');
|
|
92
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output cannot mark \'Can the project stop now: yes\' when any rubric line is fail.');
|
|
93
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Docs/config/runbooks match shipped behavior\' with yes or no.');
|
|
94
|
+
assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
|
|
95
|
+
assertIncludes('package.json', '"rubric-contract-test": "bash ./scripts/rubric-contract-test.sh"');
|
|
96
|
+
assertIncludes('scripts/release-check.sh', 'npm run rubric-contract-test');
|
|
97
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
|
|
98
|
+
NODE
|
|
99
|
+
|
|
100
|
+
node <<'NODE'
|
|
101
|
+
const fs = require('node:fs');
|
|
102
|
+
const path = require('node:path');
|
|
103
|
+
const {
|
|
104
|
+
parseReportFields,
|
|
105
|
+
transcribeCanonicalRoleReport,
|
|
106
|
+
} = require('./extensions/completion/role-reporting.js');
|
|
107
|
+
|
|
108
|
+
const tempRootBase = path.join(process.cwd(), '.agent', 'tmp');
|
|
109
|
+
fs.mkdirSync(tempRootBase, { recursive: true });
|
|
110
|
+
const tempRoot = fs.mkdtempSync(path.join(tempRootBase, 'rubric-role-reporting-'));
|
|
111
|
+
const snapshotFiles = {
|
|
112
|
+
sliceHistoryPath: path.join(tempRoot, 'slice-history.jsonl'),
|
|
113
|
+
stopHistoryPath: path.join(tempRoot, 'stop-check-history.jsonl'),
|
|
114
|
+
};
|
|
115
|
+
fs.writeFileSync(snapshotFiles.sliceHistoryPath, '');
|
|
116
|
+
fs.writeFileSync(snapshotFiles.stopHistoryPath, '');
|
|
117
|
+
|
|
118
|
+
const readJsonl = (file) => fs.readFileSync(file, 'utf8').split('\n').filter(Boolean).map((line) => JSON.parse(line));
|
|
119
|
+
const assert = (condition, message) => {
|
|
120
|
+
if (!condition) throw new Error(message);
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const reviewerReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - Locked acceptance criteria match the committed slice.\n- Correctness risk: pass - No blocking regression is evident.\n- Verification evidence: pass - Deterministic proof was rerun successfully.\n- Docs/state parity: pass - Docs and canonical state are aligned.\nFindings: none.\nAcceptable as-is: yes\nSmallest follow-up slice: none.`;
|
|
124
|
+
|
|
125
|
+
const reviewerMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - Locked acceptance criteria match the committed slice.\n- Correctness risk: pass - No blocking regression is evident.\n- Verification evidence: pass - Deterministic proof was rerun successfully.\nFindings: none.\nAcceptable as-is: yes\nSmallest follow-up slice: none.`;
|
|
126
|
+
|
|
127
|
+
const auditorReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - The accepted slice remains satisfied on HEAD.\n- Correctness risk: concern - Remaining planned work still keeps the project open.\n- Verification evidence: pass - Verification was rerun for the accepted slice.\n- Docs/state parity: pass - Canonical state can be reconciled truthfully.\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: yes\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: no\nPlan truthfully captures remaining slice backlog: yes - one planned slice remains.`;
|
|
128
|
+
|
|
129
|
+
const auditorMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: yes\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: no\nPlan truthfully captures remaining slice backlog: yes - one planned slice remains.`;
|
|
130
|
+
|
|
131
|
+
const stopJudgeReport = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: pass - All implementation slices are accepted on HEAD.\n- Correctness risk: pass - No remaining blocker or high-value gap is evident.\n- Verification evidence: pass - Final verification passes for the current head.\n- Docs/state parity: pass - Docs, config, and canonical state match shipped behavior.\nCan the project stop now: yes\nExact remaining open top-level contract IDs: none\nBlocker count: 0\nHigh-value gap count: 0\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: yes\nTracked and unignored worktree is clean: yes\nBrief justification: Current HEAD satisfies the stop criteria.`;
|
|
132
|
+
|
|
133
|
+
const stopJudgeMalformed = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: fail - A blocking contract is still open.\n- Correctness risk: pass - No additional risk was found.\n- Verification evidence: pass - Verification still passes.\n- Docs/state parity: pass - Docs and state match.\nCan the project stop now: yes\nExact remaining open top-level contract IDs: TEST-CONTRACT\nBlocker count: 1\nHigh-value gap count: 0\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: yes\nTracked and unignored worktree is clean: yes\nBrief justification: This should be rejected because the rubric blocks stop.`;
|
|
134
|
+
|
|
135
|
+
const auditorMalformedYesNo = `MISSION ANCHOR: test mission\nRemaining contract IDs: TEST-CONTRACT\nRubric:\n- Contract coverage: pass - The accepted slice remains satisfied on HEAD.\n- Correctness risk: concern - Remaining planned work still keeps the project open.\n- Verification evidence: pass - Verification was rerun for the accepted slice.\n- Docs/state parity: pass - Canonical state can be reconciled truthfully.\nWhy the project is still not done: One planned contract remains after this accepted slice.\nOpen top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nTracked and unignored worktree is clean: maybe\nWorktree blockers: none\nNext mandatory slice: next-slice\nStale or conflicting canonical state: perhaps\nPlan truthfully captures remaining slice backlog: sorta.`;
|
|
136
|
+
|
|
137
|
+
const stopJudgeMalformedYesNo = `MISSION ANCHOR: test mission\nRemaining contract IDs: none\nRubric:\n- Contract coverage: pass - All implementation slices are accepted on HEAD.\n- Correctness risk: pass - No remaining blocker or high-value gap is evident.\n- Verification evidence: pass - Final verification passes for the current head.\n- Docs/state parity: pass - Docs, config, and canonical state match shipped behavior.\nCan the project stop now: no\nExact remaining open top-level contract IDs: TEST-CONTRACT\nBlocker count: 0\nHigh-value gap count: 1\nLatest completed slice commit: abcdef1234567890abcdef1234567890abcdef12\nDocs/config/runbooks match shipped behavior: maybe\nTracked and unignored worktree is clean: perhaps\nBrief justification: This should be rejected because malformed yes/no-style fields must fail closed.`;
|
|
138
|
+
|
|
139
|
+
(async () => {
|
|
140
|
+
const reviewed = await transcribeCanonicalRoleReport({
|
|
141
|
+
role: 'completion-reviewer',
|
|
142
|
+
output: reviewerReport,
|
|
143
|
+
reportFields: parseReportFields(reviewerReport),
|
|
144
|
+
snapshotFiles,
|
|
145
|
+
headSha: '1111111111111111111111111111111111111111',
|
|
146
|
+
sliceId: 'slice-review',
|
|
147
|
+
recordedAt: 1,
|
|
148
|
+
});
|
|
149
|
+
assert(reviewed.errors.length === 0, `reviewer valid report should transcribe cleanly: ${reviewed.errors.join(' | ')}`);
|
|
150
|
+
assert(reviewed.appended.includes('reviewed:slice-review'), 'reviewer transcription should append reviewed record');
|
|
151
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'reviewer transcription should create one slice-history record');
|
|
152
|
+
|
|
153
|
+
const reviewerRejected = await transcribeCanonicalRoleReport({
|
|
154
|
+
role: 'completion-reviewer',
|
|
155
|
+
output: reviewerMalformed,
|
|
156
|
+
reportFields: parseReportFields(reviewerMalformed),
|
|
157
|
+
snapshotFiles,
|
|
158
|
+
headSha: '2222222222222222222222222222222222222222',
|
|
159
|
+
sliceId: 'slice-review',
|
|
160
|
+
recordedAt: 2,
|
|
161
|
+
});
|
|
162
|
+
assert(reviewerRejected.errors.some((error) => error.includes('Docs/state parity')), 'reviewer malformed report should be rejected for missing rubric line');
|
|
163
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'rejected reviewer report must not append history');
|
|
164
|
+
|
|
165
|
+
const audited = await transcribeCanonicalRoleReport({
|
|
166
|
+
role: 'completion-auditor',
|
|
167
|
+
output: auditorReport,
|
|
168
|
+
reportFields: parseReportFields(auditorReport),
|
|
169
|
+
snapshotFiles,
|
|
170
|
+
headSha: '3333333333333333333333333333333333333333',
|
|
171
|
+
sliceId: 'slice-audit',
|
|
172
|
+
recordedAt: 3,
|
|
173
|
+
});
|
|
174
|
+
assert(audited.errors.length === 0, `auditor valid report should transcribe cleanly: ${audited.errors.join(' | ')}`);
|
|
175
|
+
assert(audited.appended.includes('audited:slice-audit'), 'auditor transcription should append audited record');
|
|
176
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'auditor transcription should append a second slice-history record');
|
|
177
|
+
|
|
178
|
+
const auditorRejected = await transcribeCanonicalRoleReport({
|
|
179
|
+
role: 'completion-auditor',
|
|
180
|
+
output: auditorMalformed,
|
|
181
|
+
reportFields: parseReportFields(auditorMalformed),
|
|
182
|
+
snapshotFiles,
|
|
183
|
+
headSha: '4444444444444444444444444444444444444444',
|
|
184
|
+
sliceId: 'slice-audit',
|
|
185
|
+
recordedAt: 4,
|
|
186
|
+
});
|
|
187
|
+
assert(auditorRejected.errors.some((error) => error.includes('Missing Rubric heading')), 'auditor malformed report should be rejected without rubric heading');
|
|
188
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor report must not append history');
|
|
189
|
+
|
|
190
|
+
const judged = await transcribeCanonicalRoleReport({
|
|
191
|
+
role: 'completion-stop-judge',
|
|
192
|
+
output: stopJudgeReport,
|
|
193
|
+
reportFields: parseReportFields(stopJudgeReport),
|
|
194
|
+
snapshotFiles,
|
|
195
|
+
headSha: '5555555555555555555555555555555555555555',
|
|
196
|
+
recordedAt: 5,
|
|
197
|
+
});
|
|
198
|
+
assert(judged.errors.length === 0, `stop-judge valid report should transcribe cleanly: ${judged.errors.join(' | ')}`);
|
|
199
|
+
assert(judged.appended.includes('judgment:555555555555'), 'stop-judge transcription should append judgment record');
|
|
200
|
+
assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'stop-judge transcription should create one judgment record');
|
|
201
|
+
|
|
202
|
+
const judgeRejected = await transcribeCanonicalRoleReport({
|
|
203
|
+
role: 'completion-stop-judge',
|
|
204
|
+
output: stopJudgeMalformed,
|
|
205
|
+
reportFields: parseReportFields(stopJudgeMalformed),
|
|
206
|
+
snapshotFiles,
|
|
207
|
+
headSha: '6666666666666666666666666666666666666666',
|
|
208
|
+
recordedAt: 6,
|
|
209
|
+
});
|
|
210
|
+
assert(judgeRejected.errors.some((error) => error.includes("Can the project stop now: yes")), 'stop-judge malformed report should be rejected when fail rubric contradicts yes verdict');
|
|
211
|
+
assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'rejected stop-judge report must not append judgment history');
|
|
212
|
+
|
|
213
|
+
const auditorYesNoRejected = await transcribeCanonicalRoleReport({
|
|
214
|
+
role: 'completion-auditor',
|
|
215
|
+
output: auditorMalformedYesNo,
|
|
216
|
+
reportFields: parseReportFields(auditorMalformedYesNo),
|
|
217
|
+
snapshotFiles,
|
|
218
|
+
headSha: '7777777777777777777777777777777777777777',
|
|
219
|
+
sliceId: 'slice-audit',
|
|
220
|
+
recordedAt: 7,
|
|
221
|
+
});
|
|
222
|
+
assert(auditorYesNoRejected.errors.some((error) => error.includes("Tracked and unignored worktree is clean")), 'auditor malformed yes/no report should reject invalid worktree cleanliness values');
|
|
223
|
+
assert(auditorYesNoRejected.errors.some((error) => error.includes("Stale or conflicting canonical state")), 'auditor malformed yes/no report should reject invalid canonical-state values');
|
|
224
|
+
assert(auditorYesNoRejected.errors.some((error) => error.includes("Plan truthfully captures remaining slice backlog")), 'auditor malformed yes/no report should reject invalid backlog-truth values');
|
|
225
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor yes/no report must not append history');
|
|
226
|
+
|
|
227
|
+
const stopJudgeYesNoRejected = await transcribeCanonicalRoleReport({
|
|
228
|
+
role: 'completion-stop-judge',
|
|
229
|
+
output: stopJudgeMalformedYesNo,
|
|
230
|
+
reportFields: parseReportFields(stopJudgeMalformedYesNo),
|
|
231
|
+
snapshotFiles,
|
|
232
|
+
headSha: '8888888888888888888888888888888888888888',
|
|
233
|
+
recordedAt: 8,
|
|
234
|
+
});
|
|
235
|
+
assert(stopJudgeYesNoRejected.errors.some((error) => error.includes("Docs/config/runbooks match shipped behavior")), 'stop-judge malformed yes/no report should reject invalid docs parity values');
|
|
236
|
+
assert(stopJudgeYesNoRejected.errors.some((error) => error.includes("Tracked and unignored worktree is clean")), 'stop-judge malformed yes/no report should reject invalid worktree cleanliness values');
|
|
237
|
+
assert(readJsonl(snapshotFiles.stopHistoryPath).length === 1, 'rejected stop-judge yes/no report must not append judgment history');
|
|
238
|
+
|
|
239
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
240
|
+
})().catch((error) => {
|
|
241
|
+
try {
|
|
242
|
+
fs.rmSync(tempRoot, { recursive: true, force: true });
|
|
243
|
+
} catch {}
|
|
244
|
+
console.error(error instanceof Error ? error.message : String(error));
|
|
245
|
+
process.exit(1);
|
|
246
|
+
});
|
|
247
|
+
NODE
|
|
248
|
+
|
|
249
|
+
echo "rubric contract test passed"
|