@linimin/pi-letscook 0.1.70 → 0.1.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.agent/README.md CHANGED
@@ -22,6 +22,8 @@ This repository uses the `completion` workflow for long-running coding tasks.
22
22
  - `.agent/*.log`
23
23
  - `.agent/tmp/`
24
24
 
25
+ `.agent/profile.json` carries the stop-wave defaults for this repo, including `required_stop_judges` and `stop_aggregation_policy`. The packaged default is `required_stop_judges: 2` plus `stop_aggregation_policy: "unanimous-current-head-v1"`.
26
+
25
27
  `.agent/startup-brief.json` preserves the confirmed `/cook` startup intent as canonical intake for re-grounding. It does not replace `.agent/plan.json` or `.agent/active-slice.json`, which remain under regrounder authority.
26
28
 
27
29
  `.agent/verification-evidence.json` is the durable canonical record of deterministic verification for the selected slice or current HEAD. Recovery, review, audit, and stop-check reminder surfaces consume it instead of temp-only artifacts or conversational summaries when it is populated.
package/.agent/mission.md CHANGED
@@ -3,6 +3,6 @@
3
3
  Project: pi-letscook
4
4
 
5
5
  Mission anchor:
6
- Refactor the /cook startup boundary into the agreed mixed model: ordinary chat stays advisory-first by default with no default pre-/cook handoff capsule formation, while explicit /cook performs structured startup synthesis from recent discussion and preserves the approval-only Start/Cancel gate.
6
+ Change pi-letscook stop-wave behavior from 3 generic stop judges to 2 stop judges plus an explicit current-HEAD aggregation policy (`unanimous-current-head-v1`), then update control-plane defaults, protocol docs, and regression tests so final stop requires two valid current-HEAD `can_stop=yes` judgments and a passing stop verifier before reconciliation to done.
7
7
 
8
8
  This file is a tracked human-readable statement of the repo's completion mission. Re-grounders may refine this file when repo truth becomes clearer, but it must stay truthful to shipped behavior and the active completion objective.
@@ -2,7 +2,8 @@
2
2
  "schema_version": 1,
3
3
  "protocol_id": "completion",
4
4
  "project_name": "pi-letscook",
5
- "required_stop_judges": 3,
5
+ "required_stop_judges": 2,
6
+ "stop_aggregation_policy": "unanimous-current-head-v1",
6
7
  "priority_policy_id": "completion-default",
7
8
  "task_type": "completion-workflow",
8
9
  "evaluation_profile": "completion-rubric-v1",
@@ -101,12 +101,23 @@ for (const [file, record] of [
101
101
 
102
102
  const taskType = asString(profile.task_type);
103
103
  const evaluationProfile = asString(profile.evaluation_profile);
104
+ const requiredStopJudges = asNumber(profile.required_stop_judges);
105
+ const stopAggregationPolicy = asString(profile.stop_aggregation_policy);
106
+ if (!Number.isInteger(requiredStopJudges) || requiredStopJudges < 1) {
107
+ fail('.agent/profile.json required_stop_judges must be a positive integer');
108
+ }
109
+ if (stopAggregationPolicy !== 'unanimous-current-head-v1') {
110
+ fail('.agent/profile.json stop_aggregation_policy must be unanimous-current-head-v1');
111
+ }
104
112
  if (asString(state.task_type) !== taskType) fail('.agent/state.json task_type must match .agent/profile.json task_type');
105
113
  if (asString(plan.task_type) !== taskType) fail('.agent/plan.json task_type must match .agent/profile.json task_type');
106
114
  if (asString(active.task_type) !== taskType) fail('.agent/active-slice.json task_type must match .agent/profile.json task_type');
107
115
  if (asString(state.evaluation_profile) !== evaluationProfile) fail('.agent/state.json evaluation_profile must match .agent/profile.json evaluation_profile');
108
116
  if (asString(plan.evaluation_profile) !== evaluationProfile) fail('.agent/plan.json evaluation_profile must match .agent/profile.json evaluation_profile');
109
117
  if (asString(active.evaluation_profile) !== evaluationProfile) fail('.agent/active-slice.json evaluation_profile must match .agent/profile.json evaluation_profile');
118
+ const remainingStopJudges = asNumber(state.remaining_stop_judges);
119
+ if (remainingStopJudges === undefined) fail('.agent/state.json remaining_stop_judges must be numeric');
120
+ if (remainingStopJudges < 0) fail('.agent/state.json remaining_stop_judges must not be negative');
110
121
 
111
122
  if (asString(evidence.artifact_type) !== 'completion-verification-evidence') {
112
123
  fail('.agent/verification-evidence.json artifact_type must be completion-verification-evidence');
@@ -1,20 +1,108 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
 
4
- ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
- cd "$ROOT"
6
-
7
- echo "[completion-stop] verifying control plane and .agent/verification-evidence.json parity"
4
+ # .agent/verification-evidence.json parity is enforced by .agent/verify_completion_control_plane.sh before stop-wave policy checks.
8
5
  bash .agent/verify_completion_control_plane.sh
9
6
 
10
- if [[ "${PI_COMPLETION_RUNNING_RELEASE_CHECK:-0}" == "1" ]]; then
11
- echo "[completion-stop] release-check is already in progress; skipping nested npm run release-check >/dev/null recursion"
12
- npm run evaluator-calibration-test >/dev/null
13
- echo "completion stop verification passed"
14
- exit 0
15
- fi
7
+ CURRENT_HEAD="$(git rev-parse HEAD 2>/dev/null || true)"
8
+ export COMPLETION_STOP_HEAD="$CURRENT_HEAD"
9
+
10
+ node <<'NODE'
11
+ const fs = require('node:fs');
12
+ const { spawnSync } = require('node:child_process');
13
+
14
+ function fail(message) {
15
+ console.error(message);
16
+ process.exit(1);
17
+ }
18
+
19
+ function readJson(file) {
20
+ try {
21
+ return JSON.parse(fs.readFileSync(file, 'utf8'));
22
+ } catch (error) {
23
+ fail('Failed to read ' + file + ': ' + error.message);
24
+ }
25
+ }
26
+
27
+ function asString(value) {
28
+ return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined;
29
+ }
30
+
31
+ function asNumber(value) {
32
+ return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
33
+ }
34
+
35
+ function gitHeadSha() {
36
+ const result = spawnSync('git', ['rev-parse', 'HEAD'], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
37
+ if (result.status !== 0) {
38
+ fail('git rev-parse HEAD failed: ' + (asString(result.stderr) ?? 'unknown git error'));
39
+ }
40
+ return asString(result.stdout);
41
+ }
42
+
43
+ const profile = readJson('.agent/profile.json');
44
+ const state = readJson('.agent/state.json');
45
+ const requiredStopJudges = asNumber(profile.required_stop_judges);
46
+ if (!Number.isInteger(requiredStopJudges) || requiredStopJudges < 1) {
47
+ fail('.agent/profile.json required_stop_judges must be a positive integer before stop verification can run.');
48
+ }
49
+ const stopAggregationPolicy = asString(profile.stop_aggregation_policy);
50
+ if (stopAggregationPolicy !== 'unanimous-current-head-v1') {
51
+ fail('.agent/profile.json stop_aggregation_policy must be unanimous-current-head-v1 before stop verification can run.');
52
+ }
53
+
54
+ const currentPhase = asString(state.current_phase) ?? 'unknown';
55
+ const stopWaveActive = currentPhase === 'stop_wave' || currentPhase === 'done';
56
+ const rawHistory = fs.existsSync('.agent/stop-check-history.jsonl') ? fs.readFileSync('.agent/stop-check-history.jsonl', 'utf8') : '';
57
+ const seededHeadSha = asString(process.env.COMPLETION_STOP_HEAD);
58
+ if (!seededHeadSha && !stopWaveActive && rawHistory.trim().length === 0) {
59
+ console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
60
+ process.exit(0);
61
+ }
62
+ const headSha = seededHeadSha ?? gitHeadSha();
63
+ const currentHeadJudgments = [];
64
+ for (const [index, rawLine] of rawHistory.split(/\r?\n/).entries()) {
65
+ const line = rawLine.trim();
66
+ if (!line) continue;
67
+ let parsed;
68
+ try {
69
+ parsed = JSON.parse(line);
70
+ } catch (error) {
71
+ fail('.agent/stop-check-history.jsonl contains invalid JSON at line ' + (index + 1) + ': ' + error.message);
72
+ }
73
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
74
+ fail('.agent/stop-check-history.jsonl line ' + (index + 1) + ' must be a JSON object judgment record.');
75
+ }
76
+ if (parsed.type !== 'judgment') continue;
77
+ if (asString(parsed.head_sha) !== headSha) continue;
78
+ if (typeof parsed.can_stop !== 'boolean') {
79
+ fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry boolean can_stop.');
80
+ }
81
+ const blockerCount = asNumber(parsed.blocker_count);
82
+ const highValueGapCount = asNumber(parsed.high_value_gap_count);
83
+ if (blockerCount === undefined || highValueGapCount === undefined) {
84
+ fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry numeric blocker_count and high_value_gap_count.');
85
+ }
86
+ if (parsed.can_stop === false) {
87
+ fail('Current HEAD has a can_stop=no judgment at line ' + (index + 1) + '.');
88
+ }
89
+ if (blockerCount > 0 || highValueGapCount > 0) {
90
+ fail('Current-HEAD judgment at line ' + (index + 1) + ' cannot pass stop verification while blocker_count or high_value_gap_count is non-zero.');
91
+ }
92
+ currentHeadJudgments.push(parsed);
93
+ }
94
+
95
+ if (!stopWaveActive && currentHeadJudgments.length === 0) {
96
+ console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
97
+ process.exit(0);
98
+ }
99
+
100
+ if (currentHeadJudgments.length < requiredStopJudges) {
101
+ fail('Need ' + requiredStopJudges + ' valid current-HEAD judgments for HEAD ' + headSha + '; found ' + currentHeadJudgments.length + '.');
102
+ }
16
103
 
17
- echo "[completion-stop] delegating to npm run release-check >/dev/null for broad packaged verification, evaluator calibration, and contract coverage"
18
- PI_COMPLETION_RUNNING_RELEASE_CHECK=1 npm run release-check >/dev/null
104
+ console.log('[completion] stop-wave policy unanimous-current-head-v1 satisfied for HEAD ' + headSha + ' with ' + currentHeadJudgments.length + ' valid current-HEAD judgments');
105
+ NODE
19
106
 
20
- echo "completion stop verification passed"
107
+ echo "[completion] running repo-level verification: npm run release-check >/dev/null"
108
+ npm run release-check >/dev/null
package/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.1.72
4
+
5
+ ### Fixed
6
+
7
+ - relaxed reviewer no-follow-up routing parsing so `Acceptable as-is: yes` now also accepts `none, proceed to completion-auditor` and `none - proceed to auditor` in addition to the original exact allowance, reducing avoidable completion transcription warnings without weakening the follow-up-slice guard
8
+ - fixed completion-role continuation gating so an already-active `/cook` workflow with `continuation_policy: continue` can keep dispatching mandatory follow-up roles even when the harness no longer recognizes the current turn text as an explicit `/cook` or workflow-driver prompt, while still blocking ordinary main-chat turns from calling `completion_role`
9
+ - added a dedicated `completion-role-gating-test` regression so release-check now fails if active-workflow continuation falls back to the old prompt-only dispatch gate or stops rejecting ordinary main-chat turns
10
+
11
+ ## 0.1.71
12
+
13
+ ### Changed
14
+
15
+ - clarified the packaged `completion-auditor` output contract so `Stale or conflicting canonical state` must begin with `yes` or `no`, matching the canonical transcription gate
16
+ - added rubric-contract coverage to keep the stricter auditor yes/no guidance from drifting and to reduce avoidable transcription warnings during audit
17
+
3
18
  ## 0.1.70
4
19
 
5
20
  ### Changed
package/README.md CHANGED
@@ -184,7 +184,7 @@ Deterministic verification now also persists a durable canonical artifact in `.a
184
184
 
185
185
  Canonical reviewer/auditor/stop-judge transcription now fails closed on malformed rubric-bearing reports: the shared rubric heading plus all four rubric dimensions must be present, required role fields must remain intact, and reviewer/stop-judge yes/no verdicts cannot contradict rubric `fail` lines.
186
186
 
187
- Evaluator calibration now also fails closed on semantically lenient but well-formed reports. `npm run evaluator-calibration-test` drives the packaged transcription path through reviewer yes-with-follow-up, auditor open-contracts-with-`Next mandatory slice: none`, and stop-judge yes-with-open-contracts fixtures while still accepting truthful passing reports. It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting only the exact reviewer routing text `Smallest follow-up slice: none; proceed to completion-auditor.` with terminal punctuation or whitespace only. Both `npm run release-check` and `bash .agent/verify_completion_stop.sh` include this calibration gate.
187
+ Evaluator calibration now also fails closed on semantically lenient but well-formed reports. `npm run evaluator-calibration-test` drives the packaged transcription path through reviewer yes-with-follow-up, auditor open-contracts-with-`Next mandatory slice: none`, and stop-judge yes-with-open-contracts fixtures while still accepting truthful passing reports. It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting the reviewer routing forms `Smallest follow-up slice: none; proceed to completion-auditor.`, `Smallest follow-up slice: none, proceed to completion-auditor.`, and `Smallest follow-up slice: none - proceed to auditor.` with terminal punctuation or whitespace only. Both `npm run release-check` and `bash .agent/verify_completion_stop.sh` include this calibration gate.
188
188
 
189
189
  Deterministic active-slice contract regression now lives in `bash scripts/active-slice-contract-test.sh`, and `npm run release-check` pulls it into the packaged release gate before `npm pack --dry-run`.
190
190
 
@@ -52,9 +52,11 @@ Answer only:
52
52
  - `Tracked and unignored worktree is clean: yes/no`
53
53
  - `Worktree blockers: ...`
54
54
  - `Next mandatory slice: ...`
55
- - `Stale or conflicting canonical state: ...`
55
+ - `Stale or conflicting canonical state: yes/no - ...`
56
56
  - `Plan truthfully captures remaining slice backlog: yes/no - ...`
57
57
 
58
+ For every yes/no audit field, start the value with exactly `yes` or `no`. Do not substitute `none`, `clear`, `fresh`, `unknown`, or other synonyms. For example: `Stale or conflicting canonical state: no - canonical state remains aligned with the active slice and backlog.`
59
+
58
60
  If the tracked and unignored worktree is dirty after the latest committed slice, report that as a blocker to next-slice progression, do not recommend a new next slice, and point the workflow back to reconciliation of the latest slice.
59
61
 
60
62
  If no remaining gap is evident, say so plainly instead of inventing one.
@@ -468,13 +468,15 @@ async function refocusCompletionMission(
468
468
  deps: CompletionDriverDeps,
469
469
  advisoryStartupBrief?: Record<string, unknown>,
470
470
  ): Promise<void> {
471
- const requiredStopJudges = asNumber(snapshot.profile?.required_stop_judges) ?? 3;
471
+ const requiredStopJudges = asNumber(snapshot.profile?.required_stop_judges) ?? 2;
472
+ const stopAggregationPolicy = asString(snapshot.profile?.stop_aggregation_policy) ?? "unanimous-current-head-v1";
472
473
  const root = snapshot.files.root;
473
474
  const routing = deps.finalizeContextProposalAnalysis(analysis, [rawGoal, missionAnchor]);
474
475
  const docsSurfaces = asStringArray(snapshot.profile?.docs_surfaces);
475
476
  const nextProfile = buildProfileRecord({
476
477
  projectName: asString(snapshot.profile?.project_name) ?? path.basename(root),
477
478
  requiredStopJudges,
479
+ stopAggregationPolicy,
478
480
  priorityPolicyId: asString(snapshot.profile?.priority_policy_id) ?? "completion-default",
479
481
  docsSurfaces: docsSurfaces.length > 0 ? docsSurfaces : await detectDocsSurfaces(root),
480
482
  taskType: routing.taskType,
@@ -485,7 +487,7 @@ async function refocusCompletionMission(
485
487
  taskType: routing.taskType,
486
488
  evaluationProfile: routing.evaluationProfile,
487
489
  continuationReason: deps.buildContextProposalContinuationReason("User refocused workflow via /cook:", rawGoal, routing),
488
- }, advisoryStartupBrief),
490
+ }, advisoryStartupBrief, { requiredStopJudges }),
489
491
  remaining_stop_judges: requiredStopJudges,
490
492
  next_mandatory_action: "Reconcile canonical state from current repo truth for the refocused mission",
491
493
  };
@@ -262,6 +262,29 @@ function isCompletionWorkflowSessionTurn(snapshot: CompletionStateSnapshot | und
262
262
  return isCompletionDriverPromptTurn(snapshot, ctx) || isCookCommandTurn(ctx);
263
263
  }
264
264
 
265
+ function isOrdinaryMainChatTurnDuringActiveWorkflow(
266
+ snapshot: CompletionStateSnapshot | undefined,
267
+ ctx: { sessionManager?: any },
268
+ ): boolean {
269
+ if (!hasActiveWorkflowEntry(snapshot)) return false;
270
+ const latest = latestUserOrCustomTurnText(ctx);
271
+ if (!latest) return false;
272
+ if (isCookCommandTurn(ctx)) return false;
273
+ if (isCompletionDriverPromptTurn(snapshot, ctx)) return false;
274
+ return true;
275
+ }
276
+
277
+ function isCompletionRoleDispatchAllowedTurn(
278
+ snapshot: CompletionStateSnapshot | undefined,
279
+ ctx: { sessionManager?: any },
280
+ ): boolean {
281
+ if (hasCompletionRoutingActivation(snapshot)) return true;
282
+ if (!hasActiveWorkflowEntry(snapshot)) return false;
283
+ if (isCompletionWorkflowSessionTurn(snapshot, ctx)) return true;
284
+ if (isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;
285
+ return asString(snapshot?.state?.continuation_policy) === "continue";
286
+ }
287
+
265
288
  function shouldInjectCompletionWorkflowContext(snapshot: CompletionStateSnapshot | undefined, ctx: { sessionManager?: any }): boolean {
266
289
  return isCompletionWorkflowSessionTurn(snapshot, ctx);
267
290
  }
@@ -1081,7 +1104,7 @@ export default function completionExtension(pi: ExtensionAPI) {
1081
1104
  const snapshot = await loadCompletionSnapshot(cwd);
1082
1105
  const completionActive = Boolean(snapshot) && asString(snapshot?.state?.continuation_policy) !== "done";
1083
1106
  const root = snapshot?.files.root ?? findRepoRoot(cwd) ?? cwd;
1084
- const completionRoleDispatchAllowed = Boolean(role) || isCompletionWorkflowSessionTurn(snapshot, ctx);
1107
+ const completionRoleDispatchAllowed = Boolean(role) || isCompletionRoleDispatchAllowedTurn(snapshot, ctx);
1085
1108
  const reason = toolCallBlockReason({
1086
1109
  toolName: event.toolName,
1087
1110
  input: isRecord(event.input) ? event.input : undefined,
@@ -402,6 +402,8 @@ export function buildEvaluationRoleContextLines(
402
402
  `Canonical evaluation handoff for ${role}:`,
403
403
  `- task_type: ${deps.currentTaskType(snapshot) ?? "(missing)"}`,
404
404
  `- evaluation_profile: ${deps.currentEvaluationProfile(snapshot) ?? "(missing)"}`,
405
+ `- required_stop_judges: ${snapshot.profile?.required_stop_judges ?? "(missing)"}`,
406
+ `- stop_aggregation_policy: ${deps.asString(snapshot.profile?.stop_aggregation_policy) ?? "(missing)"}`,
405
407
  `- latest_completed_slice: ${deps.asString(snapshot.state?.latest_completed_slice) ?? "(none)"}`,
406
408
  `- active_slice_id: ${context.sliceId ?? "(none)"}`,
407
409
  `- active_slice_status: ${context.status ?? "(unknown)"}`,
@@ -115,7 +115,7 @@ function isPureNoneLike(value) {
115
115
  function isReviewerProceedToAuditorRoutingValue(value) {
116
116
  const raw = asString(value);
117
117
  if (!raw) return false;
118
- return /^none\s*;\s*proceed to completion-auditor(?:[\p{P}\s]*)$/iu.test(raw);
118
+ return /^none(?:\s*[,;:/-]\s*|\s+)proceed to (?:completion-)?auditor(?:[\p{P}\s]*)$/iu.test(raw);
119
119
  }
120
120
 
121
121
  function isReviewerNoFollowUpValue(value) {
@@ -8,6 +8,8 @@ import type { CompletionStateSnapshot, JsonRecord } from "./types";
8
8
  const PROTOCOL_ID = "completion";
9
9
  const DEFAULT_TASK_TYPE = "completion-workflow";
10
10
  const DEFAULT_EVALUATION_PROFILE = "completion-rubric-v1";
11
+ const DEFAULT_REQUIRED_STOP_JUDGES = 2;
12
+ const DEFAULT_STOP_AGGREGATION_POLICY = "unanimous-current-head-v1";
11
13
  const TRACKED_CONTRACT_FILES = [
12
14
  ".agent/README.md",
13
15
  ".agent/mission.md",
@@ -197,6 +199,7 @@ async function detectVerifierCommand(root: string): Promise<string | undefined>
197
199
  const scripts = isRecord(packageJson.scripts) ? packageJson.scripts : undefined;
198
200
  const packageManager = asString((packageJson as JsonRecord).packageManager) ?? "";
199
201
  const runner = packageManager.startsWith("pnpm") ? "pnpm" : packageManager.startsWith("yarn") ? "yarn" : packageManager.startsWith("bun") ? "bun" : "npm";
202
+ if (scripts && asString(scripts["release-check"])) return runner === "npm" ? "npm run release-check >/dev/null" : `${runner} run release-check >/dev/null`;
200
203
  if (scripts && asString(scripts.test)) return runner === "npm" ? "npm test" : `${runner} test`;
201
204
  if (scripts && asString(scripts.check)) return runner === "npm" ? "npm run check" : `${runner} check`;
202
205
  if (scripts && asString(scripts.lint)) return runner === "npm" ? "npm run lint" : `${runner} lint`;
@@ -214,6 +217,7 @@ async function detectVerifierCommand(root: string): Promise<string | undefined>
214
217
  export function buildProfileRecord(args: {
215
218
  projectName: string;
216
219
  requiredStopJudges: number;
220
+ stopAggregationPolicy?: string;
217
221
  priorityPolicyId?: string;
218
222
  docsSurfaces: string[];
219
223
  taskType?: string;
@@ -224,6 +228,7 @@ export function buildProfileRecord(args: {
224
228
  protocol_id: PROTOCOL_ID,
225
229
  project_name: args.projectName,
226
230
  required_stop_judges: args.requiredStopJudges,
231
+ stop_aggregation_policy: args.stopAggregationPolicy ?? DEFAULT_STOP_AGGREGATION_POLICY,
227
232
  priority_policy_id: args.priorityPolicyId ?? "completion-default",
228
233
  task_type: args.taskType ?? DEFAULT_TASK_TYPE,
229
234
  evaluation_profile: args.evaluationProfile ?? DEFAULT_EVALUATION_PROFILE,
@@ -239,8 +244,10 @@ export function defaultState(
239
244
  missionAnchor: string,
240
245
  routing?: { taskType?: string; evaluationProfile?: string; continuationReason?: string },
241
246
  advisoryStartupBrief?: JsonRecord,
247
+ stopPolicy?: { requiredStopJudges?: number },
242
248
  ): JsonRecord {
243
249
  const confirmedAt = asString(advisoryStartupBrief?.captured_at) ?? new Date().toISOString();
250
+ const requiredStopJudges = stopPolicy?.requiredStopJudges ?? DEFAULT_REQUIRED_STOP_JUDGES;
244
251
  return {
245
252
  schema_version: 1,
246
253
  mission_anchor: missionAnchor,
@@ -264,7 +271,7 @@ export function defaultState(
264
271
  release_blocker_ids: [],
265
272
  next_mandatory_action: "Reconcile canonical state from current repo truth",
266
273
  next_mandatory_role: "completion-regrounder",
267
- remaining_stop_judges: 3,
274
+ remaining_stop_judges: requiredStopJudges,
268
275
  last_reground_at: null,
269
276
  last_auditor_verdict: null,
270
277
  contract_status: "unknown",
@@ -360,7 +367,7 @@ export function defaultVerificationEvidence(): JsonRecord {
360
367
  }
361
368
 
362
369
  export function buildAgentReadme(projectName: string): string {
363
- return `# Completion Control Plane\n\nThis repository uses the \`completion\` workflow for long-running coding tasks.\n\n## Canonical tracked contract files\n\n- \`.agent/README.md\`\n- \`.agent/mission.md\`\n- \`.agent/profile.json\`\n- \`.agent/verify_completion_stop.sh\`\n- \`.agent/verify_completion_control_plane.sh\`\n\n## Ignored canonical execution state\n\n- \`.agent/state.json\`\n- \`.agent/startup-brief.json\`\n- \`.agent/plan.json\`\n- \`.agent/active-slice.json\`\n- \`.agent/slice-history.jsonl\`\n- \`.agent/stop-check-history.jsonl\`\n- \`.agent/verification-evidence.json\`\n- \`.agent/*.log\`\n- \`.agent/tmp/\`\n\n\`.agent/startup-brief.json\` preserves the confirmed \`/cook\` startup intent as canonical intake for re-grounding. It does not replace \`.agent/plan.json\` or \`.agent/active-slice.json\`, which remain under regrounder authority.\n\n\`.agent/verification-evidence.json\` is the durable canonical record of deterministic verification for the selected slice or current HEAD. Recovery, review, audit, and stop-check reminder surfaces consume it instead of temp-only artifacts or conversational summaries when it is populated.\n\nThe source of truth for long-running completion work is canonical \`.agent/**\` state plus current repo truth.\n\nProject: ${projectName}\n`;
370
+ return `# Completion Control Plane\n\nThis repository uses the \`completion\` workflow for long-running coding tasks.\n\n## Canonical tracked contract files\n\n- \`.agent/README.md\`\n- \`.agent/mission.md\`\n- \`.agent/profile.json\`\n- \`.agent/verify_completion_stop.sh\`\n- \`.agent/verify_completion_control_plane.sh\`\n\n## Ignored canonical execution state\n\n- \`.agent/state.json\`\n- \`.agent/startup-brief.json\`\n- \`.agent/plan.json\`\n- \`.agent/active-slice.json\`\n- \`.agent/slice-history.jsonl\`\n- \`.agent/stop-check-history.jsonl\`\n- \`.agent/verification-evidence.json\`\n- \`.agent/*.log\`\n- \`.agent/tmp/\`\n\n\`.agent/profile.json\` carries the stop-wave defaults for this repo, including \`required_stop_judges\` and \`stop_aggregation_policy\`. The packaged default is \`required_stop_judges: 2\` plus \`stop_aggregation_policy: "${DEFAULT_STOP_AGGREGATION_POLICY}"\`.\n\n\`.agent/startup-brief.json\` preserves the confirmed \`/cook\` startup intent as canonical intake for re-grounding. It does not replace \`.agent/plan.json\` or \`.agent/active-slice.json\`, which remain under regrounder authority.\n\n\`.agent/verification-evidence.json\` is the durable canonical record of deterministic verification for the selected slice or current HEAD. Recovery, review, audit, and stop-check reminder surfaces consume it instead of temp-only artifacts or conversational summaries when it is populated.\n\nThe source of truth for long-running completion work is canonical \`.agent/**\` state plus current repo truth.\n\nProject: ${projectName}\n`;
364
371
  }
365
372
 
366
373
  export function buildMission(projectName: string, missionAnchor: string): string {
@@ -371,7 +378,114 @@ export function buildVerifyStopScript(verifierCommand?: string): string {
371
378
  const repoCheck = verifierCommand
372
379
  ? `echo "[completion] running repo-level verification: ${verifierCommand}"\n${verifierCommand}`
373
380
  : `echo "[completion] no repo-specific verifier auto-detected; control-plane verification only"`;
374
- return `#!/usr/bin/env bash\nset -euo pipefail\n\nbash .agent/verify_completion_control_plane.sh\n${repoCheck}\n`;
381
+ return `#!/usr/bin/env bash
382
+ set -euo pipefail
383
+
384
+ # .agent/verification-evidence.json parity is enforced by .agent/verify_completion_control_plane.sh before stop-wave policy checks.
385
+ bash .agent/verify_completion_control_plane.sh
386
+
387
+ CURRENT_HEAD="$(git rev-parse HEAD 2>/dev/null || true)"
388
+ export COMPLETION_STOP_HEAD="$CURRENT_HEAD"
389
+
390
+ node <<'NODE'
391
+ const fs = require('node:fs');
392
+ const { spawnSync } = require('node:child_process');
393
+
394
+ function fail(message) {
395
+ console.error(message);
396
+ process.exit(1);
397
+ }
398
+
399
+ function readJson(file) {
400
+ try {
401
+ return JSON.parse(fs.readFileSync(file, 'utf8'));
402
+ } catch (error) {
403
+ fail('Failed to read ' + file + ': ' + error.message);
404
+ }
405
+ }
406
+
407
+ function asString(value) {
408
+ return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined;
409
+ }
410
+
411
+ function asNumber(value) {
412
+ return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
413
+ }
414
+
415
+ function gitHeadSha() {
416
+ const result = spawnSync('git', ['rev-parse', 'HEAD'], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
417
+ if (result.status !== 0) {
418
+ fail('git rev-parse HEAD failed: ' + (asString(result.stderr) ?? 'unknown git error'));
419
+ }
420
+ return asString(result.stdout);
421
+ }
422
+
423
+ const profile = readJson('.agent/profile.json');
424
+ const state = readJson('.agent/state.json');
425
+ const requiredStopJudges = asNumber(profile.required_stop_judges);
426
+ if (!Number.isInteger(requiredStopJudges) || requiredStopJudges < 1) {
427
+ fail('.agent/profile.json required_stop_judges must be a positive integer before stop verification can run.');
428
+ }
429
+ const stopAggregationPolicy = asString(profile.stop_aggregation_policy);
430
+ if (stopAggregationPolicy !== '${DEFAULT_STOP_AGGREGATION_POLICY}') {
431
+ fail('.agent/profile.json stop_aggregation_policy must be ${DEFAULT_STOP_AGGREGATION_POLICY} before stop verification can run.');
432
+ }
433
+
434
+ const currentPhase = asString(state.current_phase) ?? 'unknown';
435
+ const stopWaveActive = currentPhase === 'stop_wave' || currentPhase === 'done';
436
+ const rawHistory = fs.existsSync('.agent/stop-check-history.jsonl') ? fs.readFileSync('.agent/stop-check-history.jsonl', 'utf8') : '';
437
+ const seededHeadSha = asString(process.env.COMPLETION_STOP_HEAD);
438
+ if (!seededHeadSha && !stopWaveActive && rawHistory.trim().length === 0) {
439
+ console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
440
+ process.exit(0);
441
+ }
442
+ const headSha = seededHeadSha ?? gitHeadSha();
443
+ const currentHeadJudgments = [];
444
+ for (const [index, rawLine] of rawHistory.split(/\\r?\\n/).entries()) {
445
+ const line = rawLine.trim();
446
+ if (!line) continue;
447
+ let parsed;
448
+ try {
449
+ parsed = JSON.parse(line);
450
+ } catch (error) {
451
+ fail('.agent/stop-check-history.jsonl contains invalid JSON at line ' + (index + 1) + ': ' + error.message);
452
+ }
453
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
454
+ fail('.agent/stop-check-history.jsonl line ' + (index + 1) + ' must be a JSON object judgment record.');
455
+ }
456
+ if (parsed.type !== 'judgment') continue;
457
+ if (asString(parsed.head_sha) !== headSha) continue;
458
+ if (typeof parsed.can_stop !== 'boolean') {
459
+ fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry boolean can_stop.');
460
+ }
461
+ const blockerCount = asNumber(parsed.blocker_count);
462
+ const highValueGapCount = asNumber(parsed.high_value_gap_count);
463
+ if (blockerCount === undefined || highValueGapCount === undefined) {
464
+ fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry numeric blocker_count and high_value_gap_count.');
465
+ }
466
+ if (parsed.can_stop === false) {
467
+ fail('Current HEAD has a can_stop=no judgment at line ' + (index + 1) + '.');
468
+ }
469
+ if (blockerCount > 0 || highValueGapCount > 0) {
470
+ fail('Current-HEAD judgment at line ' + (index + 1) + ' cannot pass stop verification while blocker_count or high_value_gap_count is non-zero.');
471
+ }
472
+ currentHeadJudgments.push(parsed);
473
+ }
474
+
475
+ if (!stopWaveActive && currentHeadJudgments.length === 0) {
476
+ console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
477
+ process.exit(0);
478
+ }
479
+
480
+ if (currentHeadJudgments.length < requiredStopJudges) {
481
+ fail('Need ' + requiredStopJudges + ' valid current-HEAD judgments for HEAD ' + headSha + '; found ' + currentHeadJudgments.length + '.');
482
+ }
483
+
484
+ console.log('[completion] stop-wave policy ${DEFAULT_STOP_AGGREGATION_POLICY} satisfied for HEAD ' + headSha + ' with ' + currentHeadJudgments.length + ' valid current-HEAD judgments');
485
+ NODE
486
+
487
+ ${repoCheck}
488
+ `;
375
489
  }
376
490
 
377
491
  export function buildVerifyControlPlaneScript(): string {
@@ -644,18 +758,20 @@ export async function scaffoldCompletionFiles(
644
758
  const projectName = path.basename(root);
645
759
  const docsSurfaces = await detectDocsSurfaces(root);
646
760
  const verifierCommand = await detectVerifierCommand(root);
761
+ const requiredStopJudges = DEFAULT_REQUIRED_STOP_JUDGES;
762
+ const stopAggregationPolicy = DEFAULT_STOP_AGGREGATION_POLICY;
647
763
  const trackedFiles: Array<{ path: string; content: string; executable?: boolean }> = [
648
764
  { path: path.join(files.agentDir, "README.md"), content: buildAgentReadme(projectName) },
649
765
  { path: path.join(files.agentDir, "mission.md"), content: buildMission(projectName, missionAnchor) },
650
766
  {
651
767
  path: files.profilePath,
652
- content: `${JSON.stringify(buildProfileRecord({ projectName, requiredStopJudges: 3, docsSurfaces, taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile }), null, 2)}\n`,
768
+ content: `${JSON.stringify(buildProfileRecord({ projectName, requiredStopJudges, stopAggregationPolicy, docsSurfaces, taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile }), null, 2)}\n`,
653
769
  },
654
770
  { path: path.join(files.agentDir, "verify_completion_stop.sh"), content: buildVerifyStopScript(verifierCommand), executable: true },
655
771
  { path: path.join(files.agentDir, "verify_completion_control_plane.sh"), content: buildVerifyControlPlaneScript(), executable: true },
656
772
  {
657
773
  path: files.statePath,
658
- content: `${JSON.stringify(defaultState(missionAnchor, { taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile, continuationReason: options?.continuationReason }, options?.advisoryStartupBrief), null, 2)}\n`,
774
+ content: `${JSON.stringify(defaultState(missionAnchor, { taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile, continuationReason: options?.continuationReason }, options?.advisoryStartupBrief, { requiredStopJudges }), null, 2)}\n`,
659
775
  },
660
776
  {
661
777
  path: files.startupBriefPath,
@@ -423,6 +423,8 @@ export function buildCompletionStatusSurface(
423
423
  const releaseBlockerCount = asNumber(snapshot.state?.remaining_release_blockers) ?? 0;
424
424
  const highValueGapCount = asNumber(snapshot.state?.remaining_high_value_gaps) ?? 0;
425
425
  const remainingStopJudgeCount = asNumber(snapshot.state?.remaining_stop_judges) ?? 0;
426
+ const requiredStopJudges = asNumber(snapshot.profile?.required_stop_judges) ?? 0;
427
+ const stopAggregationPolicy = asString(snapshot.profile?.stop_aggregation_policy);
426
428
  const activeRole = liveActivity?.status === "running" ? liveActivity.role : undefined;
427
429
  const liveSignal = liveActivitySignal(liveActivity);
428
430
  const livePreview = livePreviewForStatus(liveActivity);
@@ -469,6 +471,8 @@ export function buildCompletionStatusSurface(
469
471
  releaseBlockerCount,
470
472
  highValueGapCount,
471
473
  remainingStopJudgeCount,
474
+ requiredStopJudges,
475
+ stopAggregationPolicy,
472
476
  activeRole,
473
477
  livePreview,
474
478
  liveState: liveSignal?.state,
@@ -74,6 +74,8 @@ export type CompletionStatusSurface = {
74
74
  releaseBlockerCount?: number;
75
75
  highValueGapCount?: number;
76
76
  remainingStopJudgeCount?: number;
77
+ requiredStopJudges?: number;
78
+ stopAggregationPolicy?: string;
77
79
  activeRole?: string;
78
80
  livePreview?: string;
79
81
  liveState?: "active" | "waiting" | "stalled";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@linimin/pi-letscook",
3
- "version": "0.1.70",
3
+ "version": "0.1.72",
4
4
  "description": "Pi package for long-running completion workflows with canonical .agent state, role-based subagents, continuity, and verification helpers.",
5
5
  "license": "MIT",
6
6
  "private": false,
@@ -41,6 +41,7 @@
41
41
  "refocus-test": "bash ./scripts/refocus-test.sh",
42
42
  "context-proposal-test": "bash ./scripts/context-proposal-test.sh",
43
43
  "observability-status-test": "bash ./scripts/observability-status-test.sh",
44
+ "completion-role-gating-test": "bash ./scripts/completion-role-gating-test.sh",
44
45
  "evaluator-calibration-test": "bash ./scripts/evaluator-calibration-test.sh",
45
46
  "rubric-contract-test": "bash ./scripts/rubric-contract-test.sh",
46
47
  "release-check": "bash ./scripts/release-check.sh"
@@ -261,7 +261,7 @@ state = {
261
261
  'release_blocker_ids': [],
262
262
  'next_mandatory_action': 'Implement selected slice active-slice-fixture.',
263
263
  'next_mandatory_role': 'completion-implementer',
264
- 'remaining_stop_judges': 3,
264
+ 'remaining_stop_judges': 2,
265
265
  'last_reground_at': '2026-05-03T00:00:00Z',
266
266
  'last_auditor_verdict': None,
267
267
  'contract_status': 'selected_slice_pending_implementation',
@@ -319,7 +319,7 @@ state = {
319
319
  'release_blocker_ids': [],
320
320
  'next_mandatory_action': 'Implement selected slice evidence-fixture.',
321
321
  'next_mandatory_role': 'completion-implementer',
322
- 'remaining_stop_judges': 3,
322
+ 'remaining_stop_judges': 2,
323
323
  'last_reground_at': '2026-05-03T00:00:00Z',
324
324
  'last_auditor_verdict': None,
325
325
  'contract_status': 'selected_slice_pending_implementation',
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+ cd "$ROOT"
6
+
7
+ node <<'NODE'
8
+ const fs = require('node:fs');
9
+
10
+ const read = (file) => fs.readFileSync(file, 'utf8');
11
+ const assertIncludes = (file, snippet) => {
12
+ const text = read(file);
13
+ if (!text.includes(snippet)) {
14
+ throw new Error(`${file} is missing required completion-role gating text: ${snippet}`);
15
+ }
16
+ };
17
+ const assertNotIncludes = (file, snippet) => {
18
+ const text = read(file);
19
+ if (text.includes(snippet)) {
20
+ throw new Error(`${file} still contains stale completion-role gating text: ${snippet}`);
21
+ }
22
+ };
23
+
24
+ assertIncludes('extensions/completion/index.ts', 'function isOrdinaryMainChatTurnDuringActiveWorkflow(');
25
+ assertIncludes('extensions/completion/index.ts', 'function isCompletionRoleDispatchAllowedTurn(');
26
+ assertIncludes('extensions/completion/index.ts', 'if (isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;');
27
+ assertIncludes('extensions/completion/index.ts', 'return asString(snapshot?.state?.continuation_policy) === "continue";');
28
+ assertIncludes('extensions/completion/index.ts', 'const completionRoleDispatchAllowed = Boolean(role) || isCompletionRoleDispatchAllowedTurn(snapshot, ctx);');
29
+ assertIncludes('extensions/completion/index.ts', 'if (isCookCommandTurn(ctx)) return false;');
30
+ assertIncludes('extensions/completion/index.ts', 'if (isCompletionDriverPromptTurn(snapshot, ctx)) return false;');
31
+ assertIncludes('extensions/completion/policy-guards.ts', 'return "completion_role may only be used from an active /cook workflow session.";');
32
+ assertIncludes('CHANGELOG.md', 'fixed completion-role continuation gating so an already-active `/cook` workflow with `continuation_policy: continue` can keep dispatching mandatory follow-up roles');
33
+
34
+ assertNotIncludes(
35
+ 'extensions/completion/index.ts',
36
+ 'const completionRoleDispatchAllowed = Boolean(role) || isCompletionWorkflowSessionTurn(snapshot, ctx);',
37
+ );
38
+
39
+ const indexText = read('extensions/completion/index.ts');
40
+ const ordinaryGuardIndex = indexText.indexOf('if (isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;');
41
+ const continueFallbackIndex = indexText.indexOf('return asString(snapshot?.state?.continuation_policy) === "continue";');
42
+ if (ordinaryGuardIndex === -1 || continueFallbackIndex === -1 || ordinaryGuardIndex > continueFallbackIndex) {
43
+ throw new Error('extensions/completion/index.ts must reject ordinary main-chat turns before allowing the continuation_policy=continue fallback.');
44
+ }
45
+ NODE
46
+
47
+ echo "completion-role gating test passed"
@@ -360,6 +360,8 @@ proposal = json.loads(Path(sys.argv[1]).read_text())
360
360
  assert mission in mission_text, '.agent/mission.md did not record the explicit-handoff mission anchor'
361
361
  assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-handoff bootstrap'
362
362
  assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-handoff bootstrap'
363
+ assert profile['required_stop_judges'] == 2, 'profile.json required_stop_judges mismatch after explicit-handoff bootstrap'
364
+ assert profile['stop_aggregation_policy'] == 'unanimous-current-head-v1', 'profile.json stop_aggregation_policy mismatch after explicit-handoff bootstrap'
363
365
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-handoff bootstrap'
364
366
  assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-handoff bootstrap'
365
367
  assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-handoff bootstrap'
@@ -386,6 +388,7 @@ assert proposal['source'] == 'handoff_capsule', 'explicit startup proposal snaps
386
388
  assert proposal['analysis']['taskType'] == expected_task_type, 'explicit startup proposal snapshot should expose task_type hints separately'
387
389
  assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'explicit startup proposal snapshot should expose evaluation_profile hints separately'
388
390
  assert state['current_phase'] == 'reground', 'state.json current_phase should start at reground after explicit-handoff bootstrap'
391
+ assert state['remaining_stop_judges'] == 2, 'state.json remaining_stop_judges should seed from the profile stop policy after explicit-handoff bootstrap'
389
392
  assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should start at completion-regrounder after explicit-handoff bootstrap'
390
393
  assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'initial startup should record the accepted startup routing in continuation_reason'
391
394
  assert 'task_type=completion-workflow' in state['continuation_reason'], 'initial startup should persist the selected task_type in continuation_reason'
@@ -888,6 +891,8 @@ proposal = json.loads(Path(sys.argv[1]).read_text())
888
891
  assert mission in mission_text, '.agent/mission.md did not update to the next-round explicit-handoff mission anchor'
889
892
  assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after next-round explicit handoff startup'
890
893
  assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after next-round explicit handoff startup'
894
+ assert profile['required_stop_judges'] == 2, 'profile.json required_stop_judges mismatch after next-round explicit handoff startup'
895
+ assert profile['stop_aggregation_policy'] == 'unanimous-current-head-v1', 'profile.json stop_aggregation_policy mismatch after next-round explicit handoff startup'
891
896
  assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after starting the next workflow round from explicit handoff'
892
897
  assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after starting the next workflow round from explicit handoff'
893
898
  assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after starting the next workflow round from explicit handoff'
@@ -902,6 +907,7 @@ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json
902
907
  assert proposal['mission'] == mission, 'next-round explicit handoff proposal snapshot should preserve the handoff mission anchor'
903
908
  assert proposal['source'] == 'handoff_capsule', 'next-round explicit handoff proposal snapshot should record the handoff capsule source'
904
909
  assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground for the next workflow round'
910
+ assert state['remaining_stop_judges'] == 2, 'state.json remaining_stop_judges should reset from the profile stop policy for the next workflow round'
905
911
  assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue for the next workflow round'
906
912
  assert state['requires_reground'] is True, 'requires_reground should reset to true for the next workflow round'
907
913
  assert state['project_done'] is False, 'project_done should reset to false for the next workflow round'
@@ -22,13 +22,16 @@ const assertIncludes = (file, snippet) => {
22
22
 
23
23
  assertIncludes('package.json', '"evaluator-calibration-test": "bash ./scripts/evaluator-calibration-test.sh"');
24
24
  assertIncludes('scripts/release-check.sh', 'npm run evaluator-calibration-test');
25
- assertIncludes('.agent/verify_completion_stop.sh', 'npm run evaluator-calibration-test >/dev/null');
25
+ assertIncludes('.agent/verify_completion_stop.sh', 'stop_aggregation_policy must be unanimous-current-head-v1');
26
+ assertIncludes('.agent/verify_completion_stop.sh', 'Current HEAD has a can_stop=no judgment');
27
+ assertIncludes('.agent/verify_completion_stop.sh', 'valid current-HEAD judgments');
28
+ assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
26
29
  assertIncludes('README.md', 'Evaluator calibration now also fails closed on semantically lenient but well-formed reports.');
27
30
  assertIncludes('README.md', '`npm run evaluator-calibration-test` drives the packaged transcription path through reviewer yes-with-follow-up, auditor open-contracts-with-`Next mandatory slice: none`, and stop-judge yes-with-open-contracts fixtures while still accepting truthful passing reports.');
28
- assertIncludes('README.md', 'It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting only the exact reviewer routing text `Smallest follow-up slice: none; proceed to completion-auditor.` with terminal punctuation or whitespace only.');
31
+ assertIncludes('README.md', 'It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting the reviewer routing forms `Smallest follow-up slice: none; proceed to completion-auditor.`, `Smallest follow-up slice: none, proceed to completion-auditor.`, and `Smallest follow-up slice: none - proceed to auditor.` with terminal punctuation or whitespace only.');
29
32
  assertIncludes('README.md', 'includes deterministic active-slice contract coverage plus observability coverage, evaluator calibration, and the rubric-contract regression');
30
33
  assertIncludes('CHANGELOG.md', 'added evaluator calibration fixtures for semantically lenient but well-formed reviewer/auditor/stop-judge reports');
31
- assertIncludes('CHANGELOG.md', 'tightened the reproducible `none; ...` reviewer/auditor/stop-judge bypass checks while still accepting only the exact reviewer `none; proceed to completion-auditor` routing allowance with terminal punctuation or whitespace only');
34
+ assertIncludes('CHANGELOG.md', 'relaxed reviewer no-follow-up routing parsing so `Acceptable as-is: yes` now also accepts `none, proceed to completion-auditor` and `none - proceed to auditor` in addition to the original exact allowance');
32
35
  assertIncludes('CHANGELOG.md', 'wired `npm run evaluator-calibration-test` into `npm run release-check` and `.agent/verify_completion_stop.sh`');
33
36
  assertIncludes('CHANGELOG.md', 'fixed the smoke auto-resume prompt regression');
34
37
  assertIncludes('extensions/completion/role-reporting.js', 'Reviewer output cannot mark \'Acceptable as-is: yes\' while naming a follow-up slice other than none.');
@@ -68,6 +71,28 @@ Findings: none.
68
71
  Acceptable as-is: yes
69
72
  Smallest follow-up slice: none; proceed to completion-auditor.`;
70
73
 
74
+ const reviewerCommaRoutingPass = `MISSION ANCHOR: test mission
75
+ Remaining contract IDs: TEST-CONTRACT
76
+ Rubric:
77
+ - Contract coverage: pass - Locked acceptance criteria match the committed slice.
78
+ - Correctness risk: pass - No blocking regression is evident.
79
+ - Verification evidence: pass - Deterministic proof was rerun successfully.
80
+ - Docs/state parity: pass - Docs and canonical state are aligned.
81
+ Findings: none.
82
+ Acceptable as-is: yes
83
+ Smallest follow-up slice: none, proceed to completion-auditor.`;
84
+
85
+ const reviewerShortAuditorRoutingPass = `MISSION ANCHOR: test mission
86
+ Remaining contract IDs: TEST-CONTRACT
87
+ Rubric:
88
+ - Contract coverage: pass - Locked acceptance criteria match the committed slice.
89
+ - Correctness risk: pass - No blocking regression is evident.
90
+ - Verification evidence: pass - Deterministic proof was rerun successfully.
91
+ - Docs/state parity: pass - Docs and canonical state are aligned.
92
+ Findings: none.
93
+ Acceptable as-is: yes
94
+ Smallest follow-up slice: none - proceed to auditor.`;
95
+
71
96
  const reviewerLenient = `MISSION ANCHOR: test mission
72
97
  Remaining contract IDs: TEST-CONTRACT
73
98
  Rubric:
@@ -214,6 +239,44 @@ Brief justification: This should be rejected because remaining contracts still e
214
239
  assert(reviewed.appended.includes('reviewed:slice-review'), 'reviewer passing fixture should append a reviewed record');
215
240
  assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'reviewer passing fixture should create one slice-history record');
216
241
 
242
+ const reviewerCommaRoutingReviewed = await transcribeCanonicalRoleReport({
243
+ role: 'completion-reviewer',
244
+ output: reviewerCommaRoutingPass,
245
+ reportFields: parseReportFields(reviewerCommaRoutingPass),
246
+ snapshotFiles,
247
+ headSha: '1212121212121212121212121212121212121212',
248
+ sliceId: 'slice-review-comma',
249
+ recordedAt: 12,
250
+ });
251
+ assert(
252
+ reviewerCommaRoutingReviewed.errors.length === 0,
253
+ `reviewer comma-routing fixture should transcribe cleanly: ${reviewerCommaRoutingReviewed.errors.join(' | ')}`,
254
+ );
255
+ assert(
256
+ reviewerCommaRoutingReviewed.appended.includes('reviewed:slice-review-comma'),
257
+ 'reviewer comma-routing fixture should append a reviewed record',
258
+ );
259
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'reviewer comma-routing fixture should append a second slice-history record');
260
+
261
+ const reviewerShortAuditorRoutingReviewed = await transcribeCanonicalRoleReport({
262
+ role: 'completion-reviewer',
263
+ output: reviewerShortAuditorRoutingPass,
264
+ reportFields: parseReportFields(reviewerShortAuditorRoutingPass),
265
+ snapshotFiles,
266
+ headSha: '1313131313131313131313131313131313131313',
267
+ sliceId: 'slice-review-short-auditor',
268
+ recordedAt: 13,
269
+ });
270
+ assert(
271
+ reviewerShortAuditorRoutingReviewed.errors.length === 0,
272
+ `reviewer short-auditor-routing fixture should transcribe cleanly: ${reviewerShortAuditorRoutingReviewed.errors.join(' | ')}`,
273
+ );
274
+ assert(
275
+ reviewerShortAuditorRoutingReviewed.appended.includes('reviewed:slice-review-short-auditor'),
276
+ 'reviewer short-auditor-routing fixture should append a reviewed record',
277
+ );
278
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'reviewer short-auditor-routing fixture should append a third slice-history record');
279
+
217
280
  const reviewerRejected = await transcribeCanonicalRoleReport({
218
281
  role: 'completion-reviewer',
219
282
  output: reviewerLenient,
@@ -227,7 +290,7 @@ Brief justification: This should be rejected because remaining contracts still e
227
290
  reviewerRejected.errors.some((error) => error.includes('follow-up slice other than none')),
228
291
  `reviewer lenient fixture should be rejected for a yes verdict with a follow-up slice: ${reviewerRejected.errors.join(' | ')}`,
229
292
  );
230
- assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'rejected reviewer fixture must not append history');
293
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'rejected reviewer fixture must not append history');
231
294
 
232
295
  const reviewerNonePrefixedRejected = await transcribeCanonicalRoleReport({
233
296
  role: 'completion-reviewer',
@@ -242,7 +305,7 @@ Brief justification: This should be rejected because remaining contracts still e
242
305
  reviewerNonePrefixedRejected.errors.some((error) => error.includes('follow-up slice other than none')),
243
306
  `reviewer none-prefixed lenient fixture should be rejected for a yes verdict with contradictory routing text: ${reviewerNonePrefixedRejected.errors.join(' | ')}`,
244
307
  );
245
- assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'rejected none-prefixed reviewer fixture must not append history');
308
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'rejected none-prefixed reviewer fixture must not append history');
246
309
 
247
310
  const reviewerTrailingTextAfterRoutingRejected = await transcribeCanonicalRoleReport({
248
311
  role: 'completion-reviewer',
@@ -255,9 +318,9 @@ Brief justification: This should be rejected because remaining contracts still e
255
318
  });
256
319
  assert(
257
320
  reviewerTrailingTextAfterRoutingRejected.errors.some((error) => error.includes('follow-up slice other than none')),
258
- `reviewer routing-trailing-text fixture should be rejected for extra text after the exact completion-auditor allowance: ${reviewerTrailingTextAfterRoutingRejected.errors.join(' | ')}`,
321
+ `reviewer routing-trailing-text fixture should be rejected for extra text after the allowed completion-auditor routing forms: ${reviewerTrailingTextAfterRoutingRejected.errors.join(' | ')}`,
259
322
  );
260
- assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'rejected reviewer routing-trailing-text fixture must not append history');
323
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'rejected reviewer routing-trailing-text fixture must not append history');
261
324
 
262
325
  const audited = await transcribeCanonicalRoleReport({
263
326
  role: 'completion-auditor',
@@ -270,7 +333,7 @@ Brief justification: This should be rejected because remaining contracts still e
270
333
  });
271
334
  assert(audited.errors.length === 0, `auditor passing fixture should transcribe cleanly: ${audited.errors.join(' | ')}`);
272
335
  assert(audited.appended.includes('audited:slice-audit'), 'auditor passing fixture should append an audited record');
273
- assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'auditor passing fixture should append a second slice-history record');
336
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 4, 'auditor passing fixture should append the next slice-history record');
274
337
 
275
338
  const auditorRejected = await transcribeCanonicalRoleReport({
276
339
  role: 'completion-auditor',
@@ -289,7 +352,7 @@ Brief justification: This should be rejected because remaining contracts still e
289
352
  auditorRejected.errors.some((error) => error.includes("Next mandatory slice") && error.includes('none')),
290
353
  `auditor lenient fixture should reject open-work reports with no next mandatory slice: ${auditorRejected.errors.join(' | ')}`,
291
354
  );
292
- assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected auditor fixture must not append history');
355
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 4, 'rejected auditor fixture must not append history');
293
356
 
294
357
  const auditorNonePrefixedRejected = await transcribeCanonicalRoleReport({
295
358
  role: 'completion-auditor',
@@ -304,7 +367,7 @@ Brief justification: This should be rejected because remaining contracts still e
304
367
  auditorNonePrefixedRejected.errors.some((error) => error.includes('listing worktree blockers')),
305
368
  `auditor none-prefixed lenient fixture should reject clean-yes reports that smuggle blockers behind none: ${auditorNonePrefixedRejected.errors.join(' | ')}`,
306
369
  );
307
- assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'rejected none-prefixed auditor fixture must not append history');
370
+ assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 4, 'rejected none-prefixed auditor fixture must not append history');
308
371
 
309
372
  const judged = await transcribeCanonicalRoleReport({
310
373
  role: 'completion-stop-judge',
@@ -34,6 +34,8 @@ elif mode == 'static':
34
34
  assert data['releaseBlockerCount'] == 1, data
35
35
  assert data['highValueGapCount'] == 4, data
36
36
  assert data['remainingStopJudgeCount'] == 2, data
37
+ assert data['requiredStopJudges'] == 2, data
38
+ assert data['stopAggregationPolicy'] == 'unanimous-current-head-v1', data
37
39
  assert not data.get('statusText'), data
38
40
  widget = data['widgetLines']
39
41
  assert 'phase: implement' in widget, widget
@@ -55,6 +57,8 @@ elif mode == 'live':
55
57
  'tool activity separated from role judgment',
56
58
  'waiting threshold uses updatedAt timestamps',
57
59
  ], data
60
+ assert data['requiredStopJudges'] == 2, data
61
+ assert data['stopAggregationPolicy'] == 'unanimous-current-head-v1', data
58
62
  assert not data.get('statusText'), data
59
63
  widget = data['widgetLines']
60
64
  assert widget == [], widget
@@ -98,7 +102,8 @@ cat > .agent/profile.json <<'JSON'
98
102
  "schema_version": 1,
99
103
  "protocol_id": "completion",
100
104
  "project_name": "status-surface-fixture",
101
- "required_stop_judges": 3,
105
+ "required_stop_judges": 2,
106
+ "stop_aggregation_policy": "unanimous-current-head-v1",
102
107
  "priority_policy_id": "completion-default",
103
108
  "docs_surfaces": ["README.md"]
104
109
  }
@@ -5,7 +5,7 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
5
  cd "$ROOT"
6
6
  export PI_COMPLETION_RUNNING_RELEASE_CHECK=1
7
7
 
8
- echo "[release-check] running control-plane validation, tracked .agent contract coverage, slice-surface parity, explicit-/cook parity, startup/refocus/context regressions, canonical evidence artifact, active-slice contract, observability, legacy cleanup, evaluator calibration, and rubric contract coverage"
8
+ echo "[release-check] running control-plane validation, tracked .agent contract coverage, slice-surface parity, explicit-/cook parity, startup/refocus/context regressions, canonical evidence artifact, active-slice contract, observability, completion-role gating, legacy cleanup, evaluator calibration, and rubric contract coverage"
9
9
  bash .agent/verify_completion_control_plane.sh
10
10
  git ls-files --error-unmatch .agent/README.md .agent/mission.md .agent/profile.json .agent/verify_completion_stop.sh .agent/verify_completion_control_plane.sh >/dev/null
11
11
 
@@ -34,6 +34,8 @@ checks = {
34
34
  'description: "/cook workflow: start or replace workflow only from an explicit primary-agent handoff, or resume the current workflow from canonical state"',
35
35
  '"Do not call completion_role from ordinary chat; it is reserved for active /cook workflow sessions."',
36
36
  '`COMPLETION WORKFLOW DRIVER\\nStart or continue the completion workflow for this repo.',
37
+ 'function isCompletionRoleDispatchAllowedTurn(',
38
+ 'return asString(snapshot?.state?.continuation_policy) === "continue";',
37
39
  ],
38
40
  "extensions/completion/policy-guards.ts": [
39
41
  'return "completion_role may only be used from an active /cook workflow session.";',
@@ -81,6 +83,7 @@ bash ./scripts/role-runner-contract-test.sh
81
83
  bash ./scripts/canonical-evidence-artifact-test.sh
82
84
  bash ./scripts/active-slice-contract-test.sh
83
85
  npm run observability-status-test
86
+ npm run completion-role-gating-test
84
87
  bash ./scripts/legacy-cleanup-test.sh
85
88
  npm run evaluator-calibration-test
86
89
  npm run rubric-contract-test
@@ -81,6 +81,8 @@ assertIncludes('extensions/completion/prompt-surfaces.ts', '`Task type: ${args.t
81
81
  assertIncludes('extensions/completion/prompt-surfaces.ts', '`Evaluation profile: ${args.evaluationProfile ?? "(missing)"}`');
82
82
  assertIncludes('extensions/completion/prompt-surfaces.ts', '`- task_type: ${deps.currentTaskType(snapshot) ?? "(missing)"}`');
83
83
  assertIncludes('extensions/completion/prompt-surfaces.ts', '`- evaluation_profile: ${deps.currentEvaluationProfile(snapshot) ?? "(missing)"}`');
84
+ assertIncludes('extensions/completion/prompt-surfaces.ts', '`- required_stop_judges: ${snapshot.profile?.required_stop_judges ?? "(missing)"}`');
85
+ assertIncludes('extensions/completion/prompt-surfaces.ts', '`- stop_aggregation_policy: ${deps.asString(snapshot.profile?.stop_aggregation_policy) ?? "(missing)"}`');
84
86
  assertIncludes('extensions/completion/prompt-surfaces.ts', 'Canonical evaluation handoff for ${role}:');
85
87
  assertIncludes('extensions/completion/index.ts', 'buildEvaluationRoleReminderText(snapshot, nextRole)');
86
88
  assertIncludes('extensions/completion/role-runner.ts', 'import { parseReportFields, transcribeRoleOutput, type TranscriptionResult } from "./transcription";');
@@ -90,11 +92,16 @@ assertIncludes('extensions/completion/role-reporting.js', 'Reviewer output canno
90
92
  assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
91
93
  assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Stale or conflicting canonical state\' with yes or no.');
92
94
  assertIncludes('extensions/completion/role-reporting.js', 'Auditor output must answer \'Plan truthfully captures remaining slice backlog\' with yes or no.');
95
+ assertIncludes('agents/completion-auditor.md', '`Stale or conflicting canonical state: yes/no - ...`');
96
+ assertIncludes('agents/completion-auditor.md', 'For every yes/no audit field, start the value with exactly `yes` or `no`.');
93
97
  assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output cannot mark \'Can the project stop now: yes\' when any rubric line is fail.');
94
98
  assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Docs/config/runbooks match shipped behavior\' with yes or no.');
95
99
  assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
96
100
  assertIncludes('package.json', '"rubric-contract-test": "bash ./scripts/rubric-contract-test.sh"');
97
101
  assertIncludes('scripts/release-check.sh', 'npm run rubric-contract-test');
102
+ assertIncludes('.agent/verify_completion_stop.sh', 'stop_aggregation_policy must be unanimous-current-head-v1');
103
+ assertIncludes('.agent/verify_completion_stop.sh', 'Current HEAD has a can_stop=no judgment');
104
+ assertIncludes('.agent/verify_completion_stop.sh', 'valid current-HEAD judgments');
98
105
  assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
99
106
  NODE
100
107
 
@@ -34,6 +34,8 @@ This skill defines shared protocol facts only. Role-specific behavior belongs in
34
34
  - Before selecting or advancing to the next slice after a committed slice, the tracked and unignored worktree must be clean. If it is not clean, treat that dirty state as a blocker to next-slice progression and reopen or continue the latest slice for reconciliation.
35
35
  - Docs, config, and runbooks must stay truthful to shipped behavior.
36
36
  - `.agent/verify_completion_stop.sh` is a generated repo-level baseline verifier. Onboarding should create a working version from current repo truth rather than an unconditional failing placeholder.
37
+ - The packaged default stop policy is `required_stop_judges: 2` plus `stop_aggregation_policy: "unanimous-current-head-v1"` in `.agent/profile.json`.
38
+ - Under `unanimous-current-head-v1`, only current-HEAD `judgment` records count, any current-HEAD `can_stop = no` fails closed, and repo-level stop verification must wait until the required current-HEAD judgments are recorded.
37
39
  - Keep slice-specific proof in repo tests or deterministic checks. Refresh `.agent/verify_completion_stop.sh` only when the repo's top-level verification surfaces change or the verifier becomes stale.
38
40
  - The workflow topology is flat and primary-driven: the main pi session remains the workflow root and invokes at most one completion role at a time.
39
41
  - No completion role may invoke another completion role during the normal workflow.
@@ -83,7 +85,7 @@ If the workflow driver detects that the next mandatory action belongs to a compl
83
85
  6. If the latest committed slice lacks an audit result, invoke `completion-auditor`.
84
86
  7. If review or audit have returned and canonical reconciliation is needed, invoke `completion-regrounder`. `completion-regrounder` must not select or hand off a next slice while the latest committed slice leaves the tracked and unignored worktree dirty; instead it must reopen or continue that latest slice for reconciliation.
85
87
  8. If all planned slices are done and final closure is being evaluated, invoke the required `completion-stop-judge` sessions directly.
86
- 9. After each required `completion-stop-judge` result is faithfully recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
88
+ 9. After each required current-HEAD `completion-stop-judge` result is faithfully recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
87
89
 
88
90
  The workflow driver must not substitute itself for any mandatory dispatch target above.
89
91
 
@@ -55,7 +55,8 @@ Optional context only:
55
55
  "schema_version": 1,
56
56
  "protocol_id": "completion",
57
57
  "project_name": "<repo-name>",
58
- "required_stop_judges": 3,
58
+ "required_stop_judges": 2,
59
+ "stop_aggregation_policy": "unanimous-current-head-v1",
59
60
  "priority_policy_id": "completion-default",
60
61
  "task_type": "completion-workflow",
61
62
  "evaluation_profile": "completion-rubric-v1",
@@ -254,6 +255,21 @@ Minimum record shape:
254
255
 
255
256
  Empty history files are legal.
256
257
 
258
+ ## Final Stop Aggregation Policy
259
+
260
+ The packaged default stop policy is:
261
+
262
+ - `required_stop_judges: 2`
263
+ - `stop_aggregation_policy: "unanimous-current-head-v1"`
264
+
265
+ Policy meaning:
266
+
267
+ - count only `judgment` records whose `head_sha` matches the current `HEAD`
268
+ - require at least two valid current-HEAD judgments before repo-level stop verification may run
269
+ - fail closed if any current-HEAD judgment has `can_stop = false`
270
+ - fail closed if a current-HEAD judgment is malformed or carries non-zero blocker/high-value-gap counts
271
+ - rerun `bash .agent/verify_completion_stop.sh` only after the required current-HEAD judgments are faithfully recorded, then hand final reconciliation back to `completion-regrounder`
272
+
257
273
  ## Structured Evaluation Rubric Foundation
258
274
 
259
275
  `completion-reviewer`, `completion-auditor`, and `completion-stop-judge` must emit rubric-backed evaluations using the same shared dimension names and verdict semantics.
@@ -344,7 +360,7 @@ It must not, while a slice is selected or in progress:
344
360
  6. If the latest committed slice lacks audit, invoke `completion-auditor`.
345
361
  7. If canonical reconciliation is needed after review or audit, invoke `completion-regrounder`.
346
362
  8. If all slices are done and final closure is under evaluation, invoke the required `completion-stop-judge` sessions directly.
347
- 9. After the required judgments are recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
363
+ 9. After the required current-HEAD judgments are recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
348
364
 
349
365
  ## Compaction And Recovery
350
366