@linimin/pi-letscook 0.1.71 → 0.1.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/README.md +2 -0
- package/.agent/mission.md +1 -1
- package/.agent/profile.json +2 -1
- package/.agent/verify_completion_control_plane.sh +11 -0
- package/.agent/verify_completion_stop.sh +101 -13
- package/CHANGELOG.md +16 -0
- package/README.md +1 -1
- package/extensions/completion/driver.ts +4 -2
- package/extensions/completion/index.ts +34 -1
- package/extensions/completion/prompt-surfaces.ts +2 -0
- package/extensions/completion/role-reporting.js +1 -1
- package/extensions/completion/state-store.ts +121 -5
- package/extensions/completion/status-surface.ts +4 -0
- package/extensions/completion/types.ts +2 -0
- package/package.json +2 -1
- package/scripts/active-slice-contract-test.sh +1 -1
- package/scripts/canonical-evidence-artifact-test.sh +1 -1
- package/scripts/completion-role-gating-test.sh +55 -0
- package/scripts/context-proposal-test.sh +6 -0
- package/scripts/evaluator-calibration-test.sh +73 -10
- package/scripts/observability-status-test.sh +6 -1
- package/scripts/release-check.sh +4 -1
- package/scripts/rubric-contract-test.sh +5 -0
- package/skills/completion-protocol/SKILL.md +3 -1
- package/skills/completion-protocol/references/completion.md +18 -2
package/.agent/README.md
CHANGED
|
@@ -22,6 +22,8 @@ This repository uses the `completion` workflow for long-running coding tasks.
|
|
|
22
22
|
- `.agent/*.log`
|
|
23
23
|
- `.agent/tmp/`
|
|
24
24
|
|
|
25
|
+
`.agent/profile.json` carries the stop-wave defaults for this repo, including `required_stop_judges` and `stop_aggregation_policy`. The packaged default is `required_stop_judges: 2` plus `stop_aggregation_policy: "unanimous-current-head-v1"`.
|
|
26
|
+
|
|
25
27
|
`.agent/startup-brief.json` preserves the confirmed `/cook` startup intent as canonical intake for re-grounding. It does not replace `.agent/plan.json` or `.agent/active-slice.json`, which remain under regrounder authority.
|
|
26
28
|
|
|
27
29
|
`.agent/verification-evidence.json` is the durable canonical record of deterministic verification for the selected slice or current HEAD. Recovery, review, audit, and stop-check reminder surfaces consume it instead of temp-only artifacts or conversational summaries when it is populated.
|
package/.agent/mission.md
CHANGED
|
@@ -3,6 +3,6 @@
|
|
|
3
3
|
Project: pi-letscook
|
|
4
4
|
|
|
5
5
|
Mission anchor:
|
|
6
|
-
|
|
6
|
+
Change pi-letscook stop-wave behavior from 3 generic stop judges to 2 stop judges plus an explicit current-HEAD aggregation policy (`unanimous-current-head-v1`), then update control-plane defaults, protocol docs, and regression tests so final stop requires two valid current-HEAD `can_stop=yes` judgments and a passing stop verifier before reconciliation to done.
|
|
7
7
|
|
|
8
8
|
This file is a tracked human-readable statement of the repo's completion mission. Re-grounders may refine this file when repo truth becomes clearer, but it must stay truthful to shipped behavior and the active completion objective.
|
package/.agent/profile.json
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
"schema_version": 1,
|
|
3
3
|
"protocol_id": "completion",
|
|
4
4
|
"project_name": "pi-letscook",
|
|
5
|
-
"required_stop_judges":
|
|
5
|
+
"required_stop_judges": 2,
|
|
6
|
+
"stop_aggregation_policy": "unanimous-current-head-v1",
|
|
6
7
|
"priority_policy_id": "completion-default",
|
|
7
8
|
"task_type": "completion-workflow",
|
|
8
9
|
"evaluation_profile": "completion-rubric-v1",
|
|
@@ -101,12 +101,23 @@ for (const [file, record] of [
|
|
|
101
101
|
|
|
102
102
|
const taskType = asString(profile.task_type);
|
|
103
103
|
const evaluationProfile = asString(profile.evaluation_profile);
|
|
104
|
+
const requiredStopJudges = asNumber(profile.required_stop_judges);
|
|
105
|
+
const stopAggregationPolicy = asString(profile.stop_aggregation_policy);
|
|
106
|
+
if (!Number.isInteger(requiredStopJudges) || requiredStopJudges < 1) {
|
|
107
|
+
fail('.agent/profile.json required_stop_judges must be a positive integer');
|
|
108
|
+
}
|
|
109
|
+
if (stopAggregationPolicy !== 'unanimous-current-head-v1') {
|
|
110
|
+
fail('.agent/profile.json stop_aggregation_policy must be unanimous-current-head-v1');
|
|
111
|
+
}
|
|
104
112
|
if (asString(state.task_type) !== taskType) fail('.agent/state.json task_type must match .agent/profile.json task_type');
|
|
105
113
|
if (asString(plan.task_type) !== taskType) fail('.agent/plan.json task_type must match .agent/profile.json task_type');
|
|
106
114
|
if (asString(active.task_type) !== taskType) fail('.agent/active-slice.json task_type must match .agent/profile.json task_type');
|
|
107
115
|
if (asString(state.evaluation_profile) !== evaluationProfile) fail('.agent/state.json evaluation_profile must match .agent/profile.json evaluation_profile');
|
|
108
116
|
if (asString(plan.evaluation_profile) !== evaluationProfile) fail('.agent/plan.json evaluation_profile must match .agent/profile.json evaluation_profile');
|
|
109
117
|
if (asString(active.evaluation_profile) !== evaluationProfile) fail('.agent/active-slice.json evaluation_profile must match .agent/profile.json evaluation_profile');
|
|
118
|
+
const remainingStopJudges = asNumber(state.remaining_stop_judges);
|
|
119
|
+
if (remainingStopJudges === undefined) fail('.agent/state.json remaining_stop_judges must be numeric');
|
|
120
|
+
if (remainingStopJudges < 0) fail('.agent/state.json remaining_stop_judges must not be negative');
|
|
110
121
|
|
|
111
122
|
if (asString(evidence.artifact_type) !== 'completion-verification-evidence') {
|
|
112
123
|
fail('.agent/verification-evidence.json artifact_type must be completion-verification-evidence');
|
|
@@ -1,20 +1,108 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
2
|
set -euo pipefail
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
cd "$ROOT"
|
|
6
|
-
|
|
7
|
-
echo "[completion-stop] verifying control plane and .agent/verification-evidence.json parity"
|
|
4
|
+
# .agent/verification-evidence.json parity is enforced by .agent/verify_completion_control_plane.sh before stop-wave policy checks.
|
|
8
5
|
bash .agent/verify_completion_control_plane.sh
|
|
9
6
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
7
|
+
CURRENT_HEAD="$(git rev-parse HEAD 2>/dev/null || true)"
|
|
8
|
+
export COMPLETION_STOP_HEAD="$CURRENT_HEAD"
|
|
9
|
+
|
|
10
|
+
node <<'NODE'
|
|
11
|
+
const fs = require('node:fs');
|
|
12
|
+
const { spawnSync } = require('node:child_process');
|
|
13
|
+
|
|
14
|
+
function fail(message) {
|
|
15
|
+
console.error(message);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function readJson(file) {
|
|
20
|
+
try {
|
|
21
|
+
return JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
22
|
+
} catch (error) {
|
|
23
|
+
fail('Failed to read ' + file + ': ' + error.message);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function asString(value) {
|
|
28
|
+
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function asNumber(value) {
|
|
32
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function gitHeadSha() {
|
|
36
|
+
const result = spawnSync('git', ['rev-parse', 'HEAD'], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
|
|
37
|
+
if (result.status !== 0) {
|
|
38
|
+
fail('git rev-parse HEAD failed: ' + (asString(result.stderr) ?? 'unknown git error'));
|
|
39
|
+
}
|
|
40
|
+
return asString(result.stdout);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const profile = readJson('.agent/profile.json');
|
|
44
|
+
const state = readJson('.agent/state.json');
|
|
45
|
+
const requiredStopJudges = asNumber(profile.required_stop_judges);
|
|
46
|
+
if (!Number.isInteger(requiredStopJudges) || requiredStopJudges < 1) {
|
|
47
|
+
fail('.agent/profile.json required_stop_judges must be a positive integer before stop verification can run.');
|
|
48
|
+
}
|
|
49
|
+
const stopAggregationPolicy = asString(profile.stop_aggregation_policy);
|
|
50
|
+
if (stopAggregationPolicy !== 'unanimous-current-head-v1') {
|
|
51
|
+
fail('.agent/profile.json stop_aggregation_policy must be unanimous-current-head-v1 before stop verification can run.');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const currentPhase = asString(state.current_phase) ?? 'unknown';
|
|
55
|
+
const stopWaveActive = currentPhase === 'stop_wave' || currentPhase === 'done';
|
|
56
|
+
const rawHistory = fs.existsSync('.agent/stop-check-history.jsonl') ? fs.readFileSync('.agent/stop-check-history.jsonl', 'utf8') : '';
|
|
57
|
+
const seededHeadSha = asString(process.env.COMPLETION_STOP_HEAD);
|
|
58
|
+
if (!seededHeadSha && !stopWaveActive && rawHistory.trim().length === 0) {
|
|
59
|
+
console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
|
|
60
|
+
process.exit(0);
|
|
61
|
+
}
|
|
62
|
+
const headSha = seededHeadSha ?? gitHeadSha();
|
|
63
|
+
const currentHeadJudgments = [];
|
|
64
|
+
for (const [index, rawLine] of rawHistory.split(/\r?\n/).entries()) {
|
|
65
|
+
const line = rawLine.trim();
|
|
66
|
+
if (!line) continue;
|
|
67
|
+
let parsed;
|
|
68
|
+
try {
|
|
69
|
+
parsed = JSON.parse(line);
|
|
70
|
+
} catch (error) {
|
|
71
|
+
fail('.agent/stop-check-history.jsonl contains invalid JSON at line ' + (index + 1) + ': ' + error.message);
|
|
72
|
+
}
|
|
73
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
74
|
+
fail('.agent/stop-check-history.jsonl line ' + (index + 1) + ' must be a JSON object judgment record.');
|
|
75
|
+
}
|
|
76
|
+
if (parsed.type !== 'judgment') continue;
|
|
77
|
+
if (asString(parsed.head_sha) !== headSha) continue;
|
|
78
|
+
if (typeof parsed.can_stop !== 'boolean') {
|
|
79
|
+
fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry boolean can_stop.');
|
|
80
|
+
}
|
|
81
|
+
const blockerCount = asNumber(parsed.blocker_count);
|
|
82
|
+
const highValueGapCount = asNumber(parsed.high_value_gap_count);
|
|
83
|
+
if (blockerCount === undefined || highValueGapCount === undefined) {
|
|
84
|
+
fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry numeric blocker_count and high_value_gap_count.');
|
|
85
|
+
}
|
|
86
|
+
if (parsed.can_stop === false) {
|
|
87
|
+
fail('Current HEAD has a can_stop=no judgment at line ' + (index + 1) + '.');
|
|
88
|
+
}
|
|
89
|
+
if (blockerCount > 0 || highValueGapCount > 0) {
|
|
90
|
+
fail('Current-HEAD judgment at line ' + (index + 1) + ' cannot pass stop verification while blocker_count or high_value_gap_count is non-zero.');
|
|
91
|
+
}
|
|
92
|
+
currentHeadJudgments.push(parsed);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (!stopWaveActive && currentHeadJudgments.length === 0) {
|
|
96
|
+
console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
|
|
97
|
+
process.exit(0);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (currentHeadJudgments.length < requiredStopJudges) {
|
|
101
|
+
fail('Need ' + requiredStopJudges + ' valid current-HEAD judgments for HEAD ' + headSha + '; found ' + currentHeadJudgments.length + '.');
|
|
102
|
+
}
|
|
16
103
|
|
|
17
|
-
|
|
18
|
-
|
|
104
|
+
console.log('[completion] stop-wave policy unanimous-current-head-v1 satisfied for HEAD ' + headSha + ' with ' + currentHeadJudgments.length + ' valid current-HEAD judgments');
|
|
105
|
+
NODE
|
|
19
106
|
|
|
20
|
-
echo "completion
|
|
107
|
+
echo "[completion] running repo-level verification: npm run release-check >/dev/null"
|
|
108
|
+
npm run release-check >/dev/null
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.1.73
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
|
|
7
|
+
- fixed `/cook` await-user-input resumptions so a user's exact reply in the active workflow can dispatch the mandatory follow-up completion role without forcing an extra `/cook` rerun
|
|
8
|
+
- kept active-workflow completion-role gating strict for ordinary main-chat turns while adding regression coverage that fails if await-user-input replies lose their workflow dispatch rights
|
|
9
|
+
|
|
10
|
+
## 0.1.72
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- relaxed reviewer no-follow-up routing parsing so `Acceptable as-is: yes` now also accepts `none, proceed to completion-auditor` and `none - proceed to auditor` in addition to the original exact allowance, reducing avoidable completion transcription warnings without weakening the follow-up-slice guard
|
|
15
|
+
- fixed completion-role continuation gating so an already-active `/cook` workflow with `continuation_policy: continue` can keep dispatching mandatory follow-up roles even when the harness no longer recognizes the current turn text as an explicit `/cook` or workflow-driver prompt, while still blocking ordinary main-chat turns from calling `completion_role`
|
|
16
|
+
- fixed `/cook` await-user-input resumptions so a user's exact reply in the active workflow can dispatch the mandatory follow-up completion role without forcing an extra `/cook` rerun
|
|
17
|
+
- added a dedicated `completion-role-gating-test` regression so release-check now fails if active-workflow continuation falls back to the old prompt-only dispatch gate, await-user-input replies lose workflow dispatch rights, or ordinary main-chat turns stop being rejected
|
|
18
|
+
|
|
3
19
|
## 0.1.71
|
|
4
20
|
|
|
5
21
|
### Changed
|
package/README.md
CHANGED
|
@@ -184,7 +184,7 @@ Deterministic verification now also persists a durable canonical artifact in `.a
|
|
|
184
184
|
|
|
185
185
|
Canonical reviewer/auditor/stop-judge transcription now fails closed on malformed rubric-bearing reports: the shared rubric heading plus all four rubric dimensions must be present, required role fields must remain intact, and reviewer/stop-judge yes/no verdicts cannot contradict rubric `fail` lines.
|
|
186
186
|
|
|
187
|
-
Evaluator calibration now also fails closed on semantically lenient but well-formed reports. `npm run evaluator-calibration-test` drives the packaged transcription path through reviewer yes-with-follow-up, auditor open-contracts-with-`Next mandatory slice: none`, and stop-judge yes-with-open-contracts fixtures while still accepting truthful passing reports. It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting
|
|
187
|
+
Evaluator calibration now also fails closed on semantically lenient but well-formed reports. `npm run evaluator-calibration-test` drives the packaged transcription path through reviewer yes-with-follow-up, auditor open-contracts-with-`Next mandatory slice: none`, and stop-judge yes-with-open-contracts fixtures while still accepting truthful passing reports. It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting the reviewer routing forms `Smallest follow-up slice: none; proceed to completion-auditor.`, `Smallest follow-up slice: none, proceed to completion-auditor.`, and `Smallest follow-up slice: none - proceed to auditor.` with terminal punctuation or whitespace only. Both `npm run release-check` and `bash .agent/verify_completion_stop.sh` include this calibration gate.
|
|
188
188
|
|
|
189
189
|
Deterministic active-slice contract regression now lives in `bash scripts/active-slice-contract-test.sh`, and `npm run release-check` pulls it into the packaged release gate before `npm pack --dry-run`.
|
|
190
190
|
|
|
@@ -468,13 +468,15 @@ async function refocusCompletionMission(
|
|
|
468
468
|
deps: CompletionDriverDeps,
|
|
469
469
|
advisoryStartupBrief?: Record<string, unknown>,
|
|
470
470
|
): Promise<void> {
|
|
471
|
-
const requiredStopJudges = asNumber(snapshot.profile?.required_stop_judges) ??
|
|
471
|
+
const requiredStopJudges = asNumber(snapshot.profile?.required_stop_judges) ?? 2;
|
|
472
|
+
const stopAggregationPolicy = asString(snapshot.profile?.stop_aggregation_policy) ?? "unanimous-current-head-v1";
|
|
472
473
|
const root = snapshot.files.root;
|
|
473
474
|
const routing = deps.finalizeContextProposalAnalysis(analysis, [rawGoal, missionAnchor]);
|
|
474
475
|
const docsSurfaces = asStringArray(snapshot.profile?.docs_surfaces);
|
|
475
476
|
const nextProfile = buildProfileRecord({
|
|
476
477
|
projectName: asString(snapshot.profile?.project_name) ?? path.basename(root),
|
|
477
478
|
requiredStopJudges,
|
|
479
|
+
stopAggregationPolicy,
|
|
478
480
|
priorityPolicyId: asString(snapshot.profile?.priority_policy_id) ?? "completion-default",
|
|
479
481
|
docsSurfaces: docsSurfaces.length > 0 ? docsSurfaces : await detectDocsSurfaces(root),
|
|
480
482
|
taskType: routing.taskType,
|
|
@@ -485,7 +487,7 @@ async function refocusCompletionMission(
|
|
|
485
487
|
taskType: routing.taskType,
|
|
486
488
|
evaluationProfile: routing.evaluationProfile,
|
|
487
489
|
continuationReason: deps.buildContextProposalContinuationReason("User refocused workflow via /cook:", rawGoal, routing),
|
|
488
|
-
}, advisoryStartupBrief),
|
|
490
|
+
}, advisoryStartupBrief, { requiredStopJudges }),
|
|
489
491
|
remaining_stop_judges: requiredStopJudges,
|
|
490
492
|
next_mandatory_action: "Reconcile canonical state from current repo truth for the refocused mission",
|
|
491
493
|
};
|
|
@@ -262,6 +262,39 @@ function isCompletionWorkflowSessionTurn(snapshot: CompletionStateSnapshot | und
|
|
|
262
262
|
return isCompletionDriverPromptTurn(snapshot, ctx) || isCookCommandTurn(ctx);
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
+
function isOrdinaryMainChatTurnDuringActiveWorkflow(
|
|
266
|
+
snapshot: CompletionStateSnapshot | undefined,
|
|
267
|
+
ctx: { sessionManager?: any },
|
|
268
|
+
): boolean {
|
|
269
|
+
if (!hasActiveWorkflowEntry(snapshot)) return false;
|
|
270
|
+
const latest = latestUserOrCustomTurnText(ctx);
|
|
271
|
+
if (!latest) return false;
|
|
272
|
+
if (isCookCommandTurn(ctx)) return false;
|
|
273
|
+
if (isCompletionDriverPromptTurn(snapshot, ctx)) return false;
|
|
274
|
+
return true;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function isAwaitingUserInputWorkflowReplyTurn(
|
|
278
|
+
snapshot: CompletionStateSnapshot | undefined,
|
|
279
|
+
ctx: { sessionManager?: any },
|
|
280
|
+
): boolean {
|
|
281
|
+
if (!hasActiveWorkflowEntry(snapshot)) return false;
|
|
282
|
+
if (!isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;
|
|
283
|
+
return asString(snapshot?.state?.continuation_policy) === "await_user_input";
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function isCompletionRoleDispatchAllowedTurn(
|
|
287
|
+
snapshot: CompletionStateSnapshot | undefined,
|
|
288
|
+
ctx: { sessionManager?: any },
|
|
289
|
+
): boolean {
|
|
290
|
+
if (hasCompletionRoutingActivation(snapshot)) return true;
|
|
291
|
+
if (!hasActiveWorkflowEntry(snapshot)) return false;
|
|
292
|
+
if (isCompletionWorkflowSessionTurn(snapshot, ctx)) return true;
|
|
293
|
+
if (isAwaitingUserInputWorkflowReplyTurn(snapshot, ctx)) return true;
|
|
294
|
+
if (isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;
|
|
295
|
+
return asString(snapshot?.state?.continuation_policy) === "continue";
|
|
296
|
+
}
|
|
297
|
+
|
|
265
298
|
function shouldInjectCompletionWorkflowContext(snapshot: CompletionStateSnapshot | undefined, ctx: { sessionManager?: any }): boolean {
|
|
266
299
|
return isCompletionWorkflowSessionTurn(snapshot, ctx);
|
|
267
300
|
}
|
|
@@ -1081,7 +1114,7 @@ export default function completionExtension(pi: ExtensionAPI) {
|
|
|
1081
1114
|
const snapshot = await loadCompletionSnapshot(cwd);
|
|
1082
1115
|
const completionActive = Boolean(snapshot) && asString(snapshot?.state?.continuation_policy) !== "done";
|
|
1083
1116
|
const root = snapshot?.files.root ?? findRepoRoot(cwd) ?? cwd;
|
|
1084
|
-
const completionRoleDispatchAllowed = Boolean(role) ||
|
|
1117
|
+
const completionRoleDispatchAllowed = Boolean(role) || isCompletionRoleDispatchAllowedTurn(snapshot, ctx);
|
|
1085
1118
|
const reason = toolCallBlockReason({
|
|
1086
1119
|
toolName: event.toolName,
|
|
1087
1120
|
input: isRecord(event.input) ? event.input : undefined,
|
|
@@ -402,6 +402,8 @@ export function buildEvaluationRoleContextLines(
|
|
|
402
402
|
`Canonical evaluation handoff for ${role}:`,
|
|
403
403
|
`- task_type: ${deps.currentTaskType(snapshot) ?? "(missing)"}`,
|
|
404
404
|
`- evaluation_profile: ${deps.currentEvaluationProfile(snapshot) ?? "(missing)"}`,
|
|
405
|
+
`- required_stop_judges: ${snapshot.profile?.required_stop_judges ?? "(missing)"}`,
|
|
406
|
+
`- stop_aggregation_policy: ${deps.asString(snapshot.profile?.stop_aggregation_policy) ?? "(missing)"}`,
|
|
405
407
|
`- latest_completed_slice: ${deps.asString(snapshot.state?.latest_completed_slice) ?? "(none)"}`,
|
|
406
408
|
`- active_slice_id: ${context.sliceId ?? "(none)"}`,
|
|
407
409
|
`- active_slice_status: ${context.status ?? "(unknown)"}`,
|
|
@@ -115,7 +115,7 @@ function isPureNoneLike(value) {
|
|
|
115
115
|
function isReviewerProceedToAuditorRoutingValue(value) {
|
|
116
116
|
const raw = asString(value);
|
|
117
117
|
if (!raw) return false;
|
|
118
|
-
return /^none\s
|
|
118
|
+
return /^none(?:\s*[,;:/-]\s*|\s+)proceed to (?:completion-)?auditor(?:[\p{P}\s]*)$/iu.test(raw);
|
|
119
119
|
}
|
|
120
120
|
|
|
121
121
|
function isReviewerNoFollowUpValue(value) {
|
|
@@ -8,6 +8,8 @@ import type { CompletionStateSnapshot, JsonRecord } from "./types";
|
|
|
8
8
|
const PROTOCOL_ID = "completion";
|
|
9
9
|
const DEFAULT_TASK_TYPE = "completion-workflow";
|
|
10
10
|
const DEFAULT_EVALUATION_PROFILE = "completion-rubric-v1";
|
|
11
|
+
const DEFAULT_REQUIRED_STOP_JUDGES = 2;
|
|
12
|
+
const DEFAULT_STOP_AGGREGATION_POLICY = "unanimous-current-head-v1";
|
|
11
13
|
const TRACKED_CONTRACT_FILES = [
|
|
12
14
|
".agent/README.md",
|
|
13
15
|
".agent/mission.md",
|
|
@@ -197,6 +199,7 @@ async function detectVerifierCommand(root: string): Promise<string | undefined>
|
|
|
197
199
|
const scripts = isRecord(packageJson.scripts) ? packageJson.scripts : undefined;
|
|
198
200
|
const packageManager = asString((packageJson as JsonRecord).packageManager) ?? "";
|
|
199
201
|
const runner = packageManager.startsWith("pnpm") ? "pnpm" : packageManager.startsWith("yarn") ? "yarn" : packageManager.startsWith("bun") ? "bun" : "npm";
|
|
202
|
+
if (scripts && asString(scripts["release-check"])) return runner === "npm" ? "npm run release-check >/dev/null" : `${runner} run release-check >/dev/null`;
|
|
200
203
|
if (scripts && asString(scripts.test)) return runner === "npm" ? "npm test" : `${runner} test`;
|
|
201
204
|
if (scripts && asString(scripts.check)) return runner === "npm" ? "npm run check" : `${runner} check`;
|
|
202
205
|
if (scripts && asString(scripts.lint)) return runner === "npm" ? "npm run lint" : `${runner} lint`;
|
|
@@ -214,6 +217,7 @@ async function detectVerifierCommand(root: string): Promise<string | undefined>
|
|
|
214
217
|
export function buildProfileRecord(args: {
|
|
215
218
|
projectName: string;
|
|
216
219
|
requiredStopJudges: number;
|
|
220
|
+
stopAggregationPolicy?: string;
|
|
217
221
|
priorityPolicyId?: string;
|
|
218
222
|
docsSurfaces: string[];
|
|
219
223
|
taskType?: string;
|
|
@@ -224,6 +228,7 @@ export function buildProfileRecord(args: {
|
|
|
224
228
|
protocol_id: PROTOCOL_ID,
|
|
225
229
|
project_name: args.projectName,
|
|
226
230
|
required_stop_judges: args.requiredStopJudges,
|
|
231
|
+
stop_aggregation_policy: args.stopAggregationPolicy ?? DEFAULT_STOP_AGGREGATION_POLICY,
|
|
227
232
|
priority_policy_id: args.priorityPolicyId ?? "completion-default",
|
|
228
233
|
task_type: args.taskType ?? DEFAULT_TASK_TYPE,
|
|
229
234
|
evaluation_profile: args.evaluationProfile ?? DEFAULT_EVALUATION_PROFILE,
|
|
@@ -239,8 +244,10 @@ export function defaultState(
|
|
|
239
244
|
missionAnchor: string,
|
|
240
245
|
routing?: { taskType?: string; evaluationProfile?: string; continuationReason?: string },
|
|
241
246
|
advisoryStartupBrief?: JsonRecord,
|
|
247
|
+
stopPolicy?: { requiredStopJudges?: number },
|
|
242
248
|
): JsonRecord {
|
|
243
249
|
const confirmedAt = asString(advisoryStartupBrief?.captured_at) ?? new Date().toISOString();
|
|
250
|
+
const requiredStopJudges = stopPolicy?.requiredStopJudges ?? DEFAULT_REQUIRED_STOP_JUDGES;
|
|
244
251
|
return {
|
|
245
252
|
schema_version: 1,
|
|
246
253
|
mission_anchor: missionAnchor,
|
|
@@ -264,7 +271,7 @@ export function defaultState(
|
|
|
264
271
|
release_blocker_ids: [],
|
|
265
272
|
next_mandatory_action: "Reconcile canonical state from current repo truth",
|
|
266
273
|
next_mandatory_role: "completion-regrounder",
|
|
267
|
-
remaining_stop_judges:
|
|
274
|
+
remaining_stop_judges: requiredStopJudges,
|
|
268
275
|
last_reground_at: null,
|
|
269
276
|
last_auditor_verdict: null,
|
|
270
277
|
contract_status: "unknown",
|
|
@@ -360,7 +367,7 @@ export function defaultVerificationEvidence(): JsonRecord {
|
|
|
360
367
|
}
|
|
361
368
|
|
|
362
369
|
export function buildAgentReadme(projectName: string): string {
|
|
363
|
-
return `# Completion Control Plane\n\nThis repository uses the \`completion\` workflow for long-running coding tasks.\n\n## Canonical tracked contract files\n\n- \`.agent/README.md\`\n- \`.agent/mission.md\`\n- \`.agent/profile.json\`\n- \`.agent/verify_completion_stop.sh\`\n- \`.agent/verify_completion_control_plane.sh\`\n\n## Ignored canonical execution state\n\n- \`.agent/state.json\`\n- \`.agent/startup-brief.json\`\n- \`.agent/plan.json\`\n- \`.agent/active-slice.json\`\n- \`.agent/slice-history.jsonl\`\n- \`.agent/stop-check-history.jsonl\`\n- \`.agent/verification-evidence.json\`\n- \`.agent/*.log\`\n- \`.agent/tmp/\`\n\n\`.agent/startup-brief.json\` preserves the confirmed \`/cook\` startup intent as canonical intake for re-grounding. It does not replace \`.agent/plan.json\` or \`.agent/active-slice.json\`, which remain under regrounder authority.\n\n\`.agent/verification-evidence.json\` is the durable canonical record of deterministic verification for the selected slice or current HEAD. Recovery, review, audit, and stop-check reminder surfaces consume it instead of temp-only artifacts or conversational summaries when it is populated.\n\nThe source of truth for long-running completion work is canonical \`.agent/**\` state plus current repo truth.\n\nProject: ${projectName}\n`;
|
|
370
|
+
return `# Completion Control Plane\n\nThis repository uses the \`completion\` workflow for long-running coding tasks.\n\n## Canonical tracked contract files\n\n- \`.agent/README.md\`\n- \`.agent/mission.md\`\n- \`.agent/profile.json\`\n- \`.agent/verify_completion_stop.sh\`\n- \`.agent/verify_completion_control_plane.sh\`\n\n## Ignored canonical execution state\n\n- \`.agent/state.json\`\n- \`.agent/startup-brief.json\`\n- \`.agent/plan.json\`\n- \`.agent/active-slice.json\`\n- \`.agent/slice-history.jsonl\`\n- \`.agent/stop-check-history.jsonl\`\n- \`.agent/verification-evidence.json\`\n- \`.agent/*.log\`\n- \`.agent/tmp/\`\n\n\`.agent/profile.json\` carries the stop-wave defaults for this repo, including \`required_stop_judges\` and \`stop_aggregation_policy\`. The packaged default is \`required_stop_judges: 2\` plus \`stop_aggregation_policy: "${DEFAULT_STOP_AGGREGATION_POLICY}"\`.\n\n\`.agent/startup-brief.json\` preserves the confirmed \`/cook\` startup intent as canonical intake for re-grounding. It does not replace \`.agent/plan.json\` or \`.agent/active-slice.json\`, which remain under regrounder authority.\n\n\`.agent/verification-evidence.json\` is the durable canonical record of deterministic verification for the selected slice or current HEAD. Recovery, review, audit, and stop-check reminder surfaces consume it instead of temp-only artifacts or conversational summaries when it is populated.\n\nThe source of truth for long-running completion work is canonical \`.agent/**\` state plus current repo truth.\n\nProject: ${projectName}\n`;
|
|
364
371
|
}
|
|
365
372
|
|
|
366
373
|
export function buildMission(projectName: string, missionAnchor: string): string {
|
|
@@ -371,7 +378,114 @@ export function buildVerifyStopScript(verifierCommand?: string): string {
|
|
|
371
378
|
const repoCheck = verifierCommand
|
|
372
379
|
? `echo "[completion] running repo-level verification: ${verifierCommand}"\n${verifierCommand}`
|
|
373
380
|
: `echo "[completion] no repo-specific verifier auto-detected; control-plane verification only"`;
|
|
374
|
-
return `#!/usr/bin/env bash
|
|
381
|
+
return `#!/usr/bin/env bash
|
|
382
|
+
set -euo pipefail
|
|
383
|
+
|
|
384
|
+
# .agent/verification-evidence.json parity is enforced by .agent/verify_completion_control_plane.sh before stop-wave policy checks.
|
|
385
|
+
bash .agent/verify_completion_control_plane.sh
|
|
386
|
+
|
|
387
|
+
CURRENT_HEAD="$(git rev-parse HEAD 2>/dev/null || true)"
|
|
388
|
+
export COMPLETION_STOP_HEAD="$CURRENT_HEAD"
|
|
389
|
+
|
|
390
|
+
node <<'NODE'
|
|
391
|
+
const fs = require('node:fs');
|
|
392
|
+
const { spawnSync } = require('node:child_process');
|
|
393
|
+
|
|
394
|
+
function fail(message) {
|
|
395
|
+
console.error(message);
|
|
396
|
+
process.exit(1);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function readJson(file) {
|
|
400
|
+
try {
|
|
401
|
+
return JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
402
|
+
} catch (error) {
|
|
403
|
+
fail('Failed to read ' + file + ': ' + error.message);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
function asString(value) {
|
|
408
|
+
return typeof value === 'string' && value.trim().length > 0 ? value.trim() : undefined;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function asNumber(value) {
|
|
412
|
+
return typeof value === 'number' && Number.isFinite(value) ? value : undefined;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
function gitHeadSha() {
|
|
416
|
+
const result = spawnSync('git', ['rev-parse', 'HEAD'], { encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] });
|
|
417
|
+
if (result.status !== 0) {
|
|
418
|
+
fail('git rev-parse HEAD failed: ' + (asString(result.stderr) ?? 'unknown git error'));
|
|
419
|
+
}
|
|
420
|
+
return asString(result.stdout);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
const profile = readJson('.agent/profile.json');
|
|
424
|
+
const state = readJson('.agent/state.json');
|
|
425
|
+
const requiredStopJudges = asNumber(profile.required_stop_judges);
|
|
426
|
+
if (!Number.isInteger(requiredStopJudges) || requiredStopJudges < 1) {
|
|
427
|
+
fail('.agent/profile.json required_stop_judges must be a positive integer before stop verification can run.');
|
|
428
|
+
}
|
|
429
|
+
const stopAggregationPolicy = asString(profile.stop_aggregation_policy);
|
|
430
|
+
if (stopAggregationPolicy !== '${DEFAULT_STOP_AGGREGATION_POLICY}') {
|
|
431
|
+
fail('.agent/profile.json stop_aggregation_policy must be ${DEFAULT_STOP_AGGREGATION_POLICY} before stop verification can run.');
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const currentPhase = asString(state.current_phase) ?? 'unknown';
|
|
435
|
+
const stopWaveActive = currentPhase === 'stop_wave' || currentPhase === 'done';
|
|
436
|
+
const rawHistory = fs.existsSync('.agent/stop-check-history.jsonl') ? fs.readFileSync('.agent/stop-check-history.jsonl', 'utf8') : '';
|
|
437
|
+
const seededHeadSha = asString(process.env.COMPLETION_STOP_HEAD);
|
|
438
|
+
if (!seededHeadSha && !stopWaveActive && rawHistory.trim().length === 0) {
|
|
439
|
+
console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
|
|
440
|
+
process.exit(0);
|
|
441
|
+
}
|
|
442
|
+
const headSha = seededHeadSha ?? gitHeadSha();
|
|
443
|
+
const currentHeadJudgments = [];
|
|
444
|
+
for (const [index, rawLine] of rawHistory.split(/\\r?\\n/).entries()) {
|
|
445
|
+
const line = rawLine.trim();
|
|
446
|
+
if (!line) continue;
|
|
447
|
+
let parsed;
|
|
448
|
+
try {
|
|
449
|
+
parsed = JSON.parse(line);
|
|
450
|
+
} catch (error) {
|
|
451
|
+
fail('.agent/stop-check-history.jsonl contains invalid JSON at line ' + (index + 1) + ': ' + error.message);
|
|
452
|
+
}
|
|
453
|
+
if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
454
|
+
fail('.agent/stop-check-history.jsonl line ' + (index + 1) + ' must be a JSON object judgment record.');
|
|
455
|
+
}
|
|
456
|
+
if (parsed.type !== 'judgment') continue;
|
|
457
|
+
if (asString(parsed.head_sha) !== headSha) continue;
|
|
458
|
+
if (typeof parsed.can_stop !== 'boolean') {
|
|
459
|
+
fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry boolean can_stop.');
|
|
460
|
+
}
|
|
461
|
+
const blockerCount = asNumber(parsed.blocker_count);
|
|
462
|
+
const highValueGapCount = asNumber(parsed.high_value_gap_count);
|
|
463
|
+
if (blockerCount === undefined || highValueGapCount === undefined) {
|
|
464
|
+
fail('Current-HEAD judgment at line ' + (index + 1) + ' must carry numeric blocker_count and high_value_gap_count.');
|
|
465
|
+
}
|
|
466
|
+
if (parsed.can_stop === false) {
|
|
467
|
+
fail('Current HEAD has a can_stop=no judgment at line ' + (index + 1) + '.');
|
|
468
|
+
}
|
|
469
|
+
if (blockerCount > 0 || highValueGapCount > 0) {
|
|
470
|
+
fail('Current-HEAD judgment at line ' + (index + 1) + ' cannot pass stop verification while blocker_count or high_value_gap_count is non-zero.');
|
|
471
|
+
}
|
|
472
|
+
currentHeadJudgments.push(parsed);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
if (!stopWaveActive && currentHeadJudgments.length === 0) {
|
|
476
|
+
console.log('[completion] current phase ' + currentPhase + ' is not stop_wave/done; current-HEAD stop judgments are not required yet');
|
|
477
|
+
process.exit(0);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
if (currentHeadJudgments.length < requiredStopJudges) {
|
|
481
|
+
fail('Need ' + requiredStopJudges + ' valid current-HEAD judgments for HEAD ' + headSha + '; found ' + currentHeadJudgments.length + '.');
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
console.log('[completion] stop-wave policy ${DEFAULT_STOP_AGGREGATION_POLICY} satisfied for HEAD ' + headSha + ' with ' + currentHeadJudgments.length + ' valid current-HEAD judgments');
|
|
485
|
+
NODE
|
|
486
|
+
|
|
487
|
+
${repoCheck}
|
|
488
|
+
`;
|
|
375
489
|
}
|
|
376
490
|
|
|
377
491
|
export function buildVerifyControlPlaneScript(): string {
|
|
@@ -644,18 +758,20 @@ export async function scaffoldCompletionFiles(
|
|
|
644
758
|
const projectName = path.basename(root);
|
|
645
759
|
const docsSurfaces = await detectDocsSurfaces(root);
|
|
646
760
|
const verifierCommand = await detectVerifierCommand(root);
|
|
761
|
+
const requiredStopJudges = DEFAULT_REQUIRED_STOP_JUDGES;
|
|
762
|
+
const stopAggregationPolicy = DEFAULT_STOP_AGGREGATION_POLICY;
|
|
647
763
|
const trackedFiles: Array<{ path: string; content: string; executable?: boolean }> = [
|
|
648
764
|
{ path: path.join(files.agentDir, "README.md"), content: buildAgentReadme(projectName) },
|
|
649
765
|
{ path: path.join(files.agentDir, "mission.md"), content: buildMission(projectName, missionAnchor) },
|
|
650
766
|
{
|
|
651
767
|
path: files.profilePath,
|
|
652
|
-
content: `${JSON.stringify(buildProfileRecord({ projectName, requiredStopJudges
|
|
768
|
+
content: `${JSON.stringify(buildProfileRecord({ projectName, requiredStopJudges, stopAggregationPolicy, docsSurfaces, taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile }), null, 2)}\n`,
|
|
653
769
|
},
|
|
654
770
|
{ path: path.join(files.agentDir, "verify_completion_stop.sh"), content: buildVerifyStopScript(verifierCommand), executable: true },
|
|
655
771
|
{ path: path.join(files.agentDir, "verify_completion_control_plane.sh"), content: buildVerifyControlPlaneScript(), executable: true },
|
|
656
772
|
{
|
|
657
773
|
path: files.statePath,
|
|
658
|
-
content: `${JSON.stringify(defaultState(missionAnchor, { taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile, continuationReason: options?.continuationReason }, options?.advisoryStartupBrief), null, 2)}\n`,
|
|
774
|
+
content: `${JSON.stringify(defaultState(missionAnchor, { taskType: options?.analysis?.taskType, evaluationProfile: options?.analysis?.evaluationProfile, continuationReason: options?.continuationReason }, options?.advisoryStartupBrief, { requiredStopJudges }), null, 2)}\n`,
|
|
659
775
|
},
|
|
660
776
|
{
|
|
661
777
|
path: files.startupBriefPath,
|
|
@@ -423,6 +423,8 @@ export function buildCompletionStatusSurface(
|
|
|
423
423
|
const releaseBlockerCount = asNumber(snapshot.state?.remaining_release_blockers) ?? 0;
|
|
424
424
|
const highValueGapCount = asNumber(snapshot.state?.remaining_high_value_gaps) ?? 0;
|
|
425
425
|
const remainingStopJudgeCount = asNumber(snapshot.state?.remaining_stop_judges) ?? 0;
|
|
426
|
+
const requiredStopJudges = asNumber(snapshot.profile?.required_stop_judges) ?? 0;
|
|
427
|
+
const stopAggregationPolicy = asString(snapshot.profile?.stop_aggregation_policy);
|
|
426
428
|
const activeRole = liveActivity?.status === "running" ? liveActivity.role : undefined;
|
|
427
429
|
const liveSignal = liveActivitySignal(liveActivity);
|
|
428
430
|
const livePreview = livePreviewForStatus(liveActivity);
|
|
@@ -469,6 +471,8 @@ export function buildCompletionStatusSurface(
|
|
|
469
471
|
releaseBlockerCount,
|
|
470
472
|
highValueGapCount,
|
|
471
473
|
remainingStopJudgeCount,
|
|
474
|
+
requiredStopJudges,
|
|
475
|
+
stopAggregationPolicy,
|
|
472
476
|
activeRole,
|
|
473
477
|
livePreview,
|
|
474
478
|
liveState: liveSignal?.state,
|
|
@@ -74,6 +74,8 @@ export type CompletionStatusSurface = {
|
|
|
74
74
|
releaseBlockerCount?: number;
|
|
75
75
|
highValueGapCount?: number;
|
|
76
76
|
remainingStopJudgeCount?: number;
|
|
77
|
+
requiredStopJudges?: number;
|
|
78
|
+
stopAggregationPolicy?: string;
|
|
77
79
|
activeRole?: string;
|
|
78
80
|
livePreview?: string;
|
|
79
81
|
liveState?: "active" | "waiting" | "stalled";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@linimin/pi-letscook",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.73",
|
|
4
4
|
"description": "Pi package for long-running completion workflows with canonical .agent state, role-based subagents, continuity, and verification helpers.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"private": false,
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
"refocus-test": "bash ./scripts/refocus-test.sh",
|
|
42
42
|
"context-proposal-test": "bash ./scripts/context-proposal-test.sh",
|
|
43
43
|
"observability-status-test": "bash ./scripts/observability-status-test.sh",
|
|
44
|
+
"completion-role-gating-test": "bash ./scripts/completion-role-gating-test.sh",
|
|
44
45
|
"evaluator-calibration-test": "bash ./scripts/evaluator-calibration-test.sh",
|
|
45
46
|
"rubric-contract-test": "bash ./scripts/rubric-contract-test.sh",
|
|
46
47
|
"release-check": "bash ./scripts/release-check.sh"
|
|
@@ -261,7 +261,7 @@ state = {
|
|
|
261
261
|
'release_blocker_ids': [],
|
|
262
262
|
'next_mandatory_action': 'Implement selected slice active-slice-fixture.',
|
|
263
263
|
'next_mandatory_role': 'completion-implementer',
|
|
264
|
-
'remaining_stop_judges':
|
|
264
|
+
'remaining_stop_judges': 2,
|
|
265
265
|
'last_reground_at': '2026-05-03T00:00:00Z',
|
|
266
266
|
'last_auditor_verdict': None,
|
|
267
267
|
'contract_status': 'selected_slice_pending_implementation',
|
|
@@ -319,7 +319,7 @@ state = {
|
|
|
319
319
|
'release_blocker_ids': [],
|
|
320
320
|
'next_mandatory_action': 'Implement selected slice evidence-fixture.',
|
|
321
321
|
'next_mandatory_role': 'completion-implementer',
|
|
322
|
-
'remaining_stop_judges':
|
|
322
|
+
'remaining_stop_judges': 2,
|
|
323
323
|
'last_reground_at': '2026-05-03T00:00:00Z',
|
|
324
324
|
'last_auditor_verdict': None,
|
|
325
325
|
'contract_status': 'selected_slice_pending_implementation',
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
5
|
+
cd "$ROOT"
|
|
6
|
+
|
|
7
|
+
node <<'NODE'
|
|
8
|
+
const fs = require('node:fs');
|
|
9
|
+
|
|
10
|
+
const read = (file) => fs.readFileSync(file, 'utf8');
|
|
11
|
+
const assertIncludes = (file, snippet) => {
|
|
12
|
+
const text = read(file);
|
|
13
|
+
if (!text.includes(snippet)) {
|
|
14
|
+
throw new Error(`${file} is missing required completion-role gating text: ${snippet}`);
|
|
15
|
+
}
|
|
16
|
+
};
|
|
17
|
+
const assertNotIncludes = (file, snippet) => {
|
|
18
|
+
const text = read(file);
|
|
19
|
+
if (text.includes(snippet)) {
|
|
20
|
+
throw new Error(`${file} still contains stale completion-role gating text: ${snippet}`);
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
assertIncludes('extensions/completion/index.ts', 'function isOrdinaryMainChatTurnDuringActiveWorkflow(');
|
|
25
|
+
assertIncludes('extensions/completion/index.ts', 'function isCompletionRoleDispatchAllowedTurn(');
|
|
26
|
+
assertIncludes('extensions/completion/index.ts', 'function isAwaitingUserInputWorkflowReplyTurn(');
|
|
27
|
+
assertIncludes('extensions/completion/index.ts', 'if (isAwaitingUserInputWorkflowReplyTurn(snapshot, ctx)) return true;');
|
|
28
|
+
assertIncludes('extensions/completion/index.ts', 'if (isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;');
|
|
29
|
+
assertIncludes('extensions/completion/index.ts', 'return asString(snapshot?.state?.continuation_policy) === "await_user_input";');
|
|
30
|
+
assertIncludes('extensions/completion/index.ts', 'return asString(snapshot?.state?.continuation_policy) === "continue";');
|
|
31
|
+
assertIncludes('extensions/completion/index.ts', 'const completionRoleDispatchAllowed = Boolean(role) || isCompletionRoleDispatchAllowedTurn(snapshot, ctx);');
|
|
32
|
+
assertIncludes('extensions/completion/index.ts', 'if (isCookCommandTurn(ctx)) return false;');
|
|
33
|
+
assertIncludes('extensions/completion/index.ts', 'if (isCompletionDriverPromptTurn(snapshot, ctx)) return false;');
|
|
34
|
+
assertIncludes('extensions/completion/policy-guards.ts', 'return "completion_role may only be used from an active /cook workflow session.";');
|
|
35
|
+
assertIncludes('CHANGELOG.md', 'fixed completion-role continuation gating so an already-active `/cook` workflow with `continuation_policy: continue` can keep dispatching mandatory follow-up roles');
|
|
36
|
+
assertIncludes('CHANGELOG.md', 'fixed `/cook` await-user-input resumptions so a user\'s exact reply in the active workflow can dispatch the mandatory follow-up completion role without forcing an extra `/cook` rerun');
|
|
37
|
+
|
|
38
|
+
assertNotIncludes(
|
|
39
|
+
'extensions/completion/index.ts',
|
|
40
|
+
'const completionRoleDispatchAllowed = Boolean(role) || isCompletionWorkflowSessionTurn(snapshot, ctx);',
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
const indexText = read('extensions/completion/index.ts');
|
|
44
|
+
const ordinaryGuardIndex = indexText.indexOf('if (isOrdinaryMainChatTurnDuringActiveWorkflow(snapshot, ctx)) return false;');
|
|
45
|
+
const awaitReplyAllowIndex = indexText.indexOf('if (isAwaitingUserInputWorkflowReplyTurn(snapshot, ctx)) return true;');
|
|
46
|
+
const continueFallbackIndex = indexText.indexOf('return asString(snapshot?.state?.continuation_policy) === "continue";');
|
|
47
|
+
if (awaitReplyAllowIndex === -1 || ordinaryGuardIndex === -1 || awaitReplyAllowIndex > ordinaryGuardIndex) {
|
|
48
|
+
throw new Error('extensions/completion/index.ts must allow active await_user_input reply turns before the ordinary main-chat rejection guard.');
|
|
49
|
+
}
|
|
50
|
+
if (ordinaryGuardIndex === -1 || continueFallbackIndex === -1 || ordinaryGuardIndex > continueFallbackIndex) {
|
|
51
|
+
throw new Error('extensions/completion/index.ts must reject ordinary main-chat turns before allowing the continuation_policy=continue fallback.');
|
|
52
|
+
}
|
|
53
|
+
NODE
|
|
54
|
+
|
|
55
|
+
echo "completion-role gating test passed"
|
|
@@ -360,6 +360,8 @@ proposal = json.loads(Path(sys.argv[1]).read_text())
|
|
|
360
360
|
assert mission in mission_text, '.agent/mission.md did not record the explicit-handoff mission anchor'
|
|
361
361
|
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after explicit-handoff bootstrap'
|
|
362
362
|
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after explicit-handoff bootstrap'
|
|
363
|
+
assert profile['required_stop_judges'] == 2, 'profile.json required_stop_judges mismatch after explicit-handoff bootstrap'
|
|
364
|
+
assert profile['stop_aggregation_policy'] == 'unanimous-current-head-v1', 'profile.json stop_aggregation_policy mismatch after explicit-handoff bootstrap'
|
|
363
365
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after explicit-handoff bootstrap'
|
|
364
366
|
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after explicit-handoff bootstrap'
|
|
365
367
|
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after explicit-handoff bootstrap'
|
|
@@ -386,6 +388,7 @@ assert proposal['source'] == 'handoff_capsule', 'explicit startup proposal snaps
|
|
|
386
388
|
assert proposal['analysis']['taskType'] == expected_task_type, 'explicit startup proposal snapshot should expose task_type hints separately'
|
|
387
389
|
assert proposal['analysis']['evaluationProfile'] == expected_eval_profile, 'explicit startup proposal snapshot should expose evaluation_profile hints separately'
|
|
388
390
|
assert state['current_phase'] == 'reground', 'state.json current_phase should start at reground after explicit-handoff bootstrap'
|
|
391
|
+
assert state['remaining_stop_judges'] == 2, 'state.json remaining_stop_judges should seed from the profile stop policy after explicit-handoff bootstrap'
|
|
389
392
|
assert state['next_mandatory_role'] == 'completion-regrounder', 'next_mandatory_role should start at completion-regrounder after explicit-handoff bootstrap'
|
|
390
393
|
assert state['continuation_reason'].startswith('User started workflow via /cook:'), 'initial startup should record the accepted startup routing in continuation_reason'
|
|
391
394
|
assert 'task_type=completion-workflow' in state['continuation_reason'], 'initial startup should persist the selected task_type in continuation_reason'
|
|
@@ -888,6 +891,8 @@ proposal = json.loads(Path(sys.argv[1]).read_text())
|
|
|
888
891
|
assert mission in mission_text, '.agent/mission.md did not update to the next-round explicit-handoff mission anchor'
|
|
889
892
|
assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after next-round explicit handoff startup'
|
|
890
893
|
assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after next-round explicit handoff startup'
|
|
894
|
+
assert profile['required_stop_judges'] == 2, 'profile.json required_stop_judges mismatch after next-round explicit handoff startup'
|
|
895
|
+
assert profile['stop_aggregation_policy'] == 'unanimous-current-head-v1', 'profile.json stop_aggregation_policy mismatch after next-round explicit handoff startup'
|
|
891
896
|
assert state['mission_anchor'] == mission, 'state.json mission_anchor mismatch after starting the next workflow round from explicit handoff'
|
|
892
897
|
assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after starting the next workflow round from explicit handoff'
|
|
893
898
|
assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after starting the next workflow round from explicit handoff'
|
|
@@ -902,6 +907,7 @@ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json
|
|
|
902
907
|
assert proposal['mission'] == mission, 'next-round explicit handoff proposal snapshot should preserve the handoff mission anchor'
|
|
903
908
|
assert proposal['source'] == 'handoff_capsule', 'next-round explicit handoff proposal snapshot should record the handoff capsule source'
|
|
904
909
|
assert state['current_phase'] == 'reground', 'state.json current_phase should reset to reground for the next workflow round'
|
|
910
|
+
assert state['remaining_stop_judges'] == 2, 'state.json remaining_stop_judges should reset from the profile stop policy for the next workflow round'
|
|
905
911
|
assert state['continuation_policy'] == 'continue', 'continuation_policy should reset to continue for the next workflow round'
|
|
906
912
|
assert state['requires_reground'] is True, 'requires_reground should reset to true for the next workflow round'
|
|
907
913
|
assert state['project_done'] is False, 'project_done should reset to false for the next workflow round'
|
|
@@ -22,13 +22,16 @@ const assertIncludes = (file, snippet) => {
|
|
|
22
22
|
|
|
23
23
|
assertIncludes('package.json', '"evaluator-calibration-test": "bash ./scripts/evaluator-calibration-test.sh"');
|
|
24
24
|
assertIncludes('scripts/release-check.sh', 'npm run evaluator-calibration-test');
|
|
25
|
-
assertIncludes('.agent/verify_completion_stop.sh', '
|
|
25
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'stop_aggregation_policy must be unanimous-current-head-v1');
|
|
26
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'Current HEAD has a can_stop=no judgment');
|
|
27
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'valid current-HEAD judgments');
|
|
28
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
|
|
26
29
|
assertIncludes('README.md', 'Evaluator calibration now also fails closed on semantically lenient but well-formed reports.');
|
|
27
30
|
assertIncludes('README.md', '`npm run evaluator-calibration-test` drives the packaged transcription path through reviewer yes-with-follow-up, auditor open-contracts-with-`Next mandatory slice: none`, and stop-judge yes-with-open-contracts fixtures while still accepting truthful passing reports.');
|
|
28
|
-
assertIncludes('README.md', 'It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting
|
|
31
|
+
assertIncludes('README.md', 'It also rejects the reproducible `none; ...` bypass family for reviewer follow-up, auditor worktree blockers, and stop-judge open-contract reporting, while still accepting the reviewer routing forms `Smallest follow-up slice: none; proceed to completion-auditor.`, `Smallest follow-up slice: none, proceed to completion-auditor.`, and `Smallest follow-up slice: none - proceed to auditor.` with terminal punctuation or whitespace only.');
|
|
29
32
|
assertIncludes('README.md', 'includes deterministic active-slice contract coverage plus observability coverage, evaluator calibration, and the rubric-contract regression');
|
|
30
33
|
assertIncludes('CHANGELOG.md', 'added evaluator calibration fixtures for semantically lenient but well-formed reviewer/auditor/stop-judge reports');
|
|
31
|
-
assertIncludes('CHANGELOG.md', '
|
|
34
|
+
assertIncludes('CHANGELOG.md', 'relaxed reviewer no-follow-up routing parsing so `Acceptable as-is: yes` now also accepts `none, proceed to completion-auditor` and `none - proceed to auditor` in addition to the original exact allowance');
|
|
32
35
|
assertIncludes('CHANGELOG.md', 'wired `npm run evaluator-calibration-test` into `npm run release-check` and `.agent/verify_completion_stop.sh`');
|
|
33
36
|
assertIncludes('CHANGELOG.md', 'fixed the smoke auto-resume prompt regression');
|
|
34
37
|
assertIncludes('extensions/completion/role-reporting.js', 'Reviewer output cannot mark \'Acceptable as-is: yes\' while naming a follow-up slice other than none.');
|
|
@@ -68,6 +71,28 @@ Findings: none.
|
|
|
68
71
|
Acceptable as-is: yes
|
|
69
72
|
Smallest follow-up slice: none; proceed to completion-auditor.`;
|
|
70
73
|
|
|
74
|
+
const reviewerCommaRoutingPass = `MISSION ANCHOR: test mission
|
|
75
|
+
Remaining contract IDs: TEST-CONTRACT
|
|
76
|
+
Rubric:
|
|
77
|
+
- Contract coverage: pass - Locked acceptance criteria match the committed slice.
|
|
78
|
+
- Correctness risk: pass - No blocking regression is evident.
|
|
79
|
+
- Verification evidence: pass - Deterministic proof was rerun successfully.
|
|
80
|
+
- Docs/state parity: pass - Docs and canonical state are aligned.
|
|
81
|
+
Findings: none.
|
|
82
|
+
Acceptable as-is: yes
|
|
83
|
+
Smallest follow-up slice: none, proceed to completion-auditor.`;
|
|
84
|
+
|
|
85
|
+
const reviewerShortAuditorRoutingPass = `MISSION ANCHOR: test mission
|
|
86
|
+
Remaining contract IDs: TEST-CONTRACT
|
|
87
|
+
Rubric:
|
|
88
|
+
- Contract coverage: pass - Locked acceptance criteria match the committed slice.
|
|
89
|
+
- Correctness risk: pass - No blocking regression is evident.
|
|
90
|
+
- Verification evidence: pass - Deterministic proof was rerun successfully.
|
|
91
|
+
- Docs/state parity: pass - Docs and canonical state are aligned.
|
|
92
|
+
Findings: none.
|
|
93
|
+
Acceptable as-is: yes
|
|
94
|
+
Smallest follow-up slice: none - proceed to auditor.`;
|
|
95
|
+
|
|
71
96
|
const reviewerLenient = `MISSION ANCHOR: test mission
|
|
72
97
|
Remaining contract IDs: TEST-CONTRACT
|
|
73
98
|
Rubric:
|
|
@@ -214,6 +239,44 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
214
239
|
assert(reviewed.appended.includes('reviewed:slice-review'), 'reviewer passing fixture should append a reviewed record');
|
|
215
240
|
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 1, 'reviewer passing fixture should create one slice-history record');
|
|
216
241
|
|
|
242
|
+
const reviewerCommaRoutingReviewed = await transcribeCanonicalRoleReport({
|
|
243
|
+
role: 'completion-reviewer',
|
|
244
|
+
output: reviewerCommaRoutingPass,
|
|
245
|
+
reportFields: parseReportFields(reviewerCommaRoutingPass),
|
|
246
|
+
snapshotFiles,
|
|
247
|
+
headSha: '1212121212121212121212121212121212121212',
|
|
248
|
+
sliceId: 'slice-review-comma',
|
|
249
|
+
recordedAt: 12,
|
|
250
|
+
});
|
|
251
|
+
assert(
|
|
252
|
+
reviewerCommaRoutingReviewed.errors.length === 0,
|
|
253
|
+
`reviewer comma-routing fixture should transcribe cleanly: ${reviewerCommaRoutingReviewed.errors.join(' | ')}`,
|
|
254
|
+
);
|
|
255
|
+
assert(
|
|
256
|
+
reviewerCommaRoutingReviewed.appended.includes('reviewed:slice-review-comma'),
|
|
257
|
+
'reviewer comma-routing fixture should append a reviewed record',
|
|
258
|
+
);
|
|
259
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 2, 'reviewer comma-routing fixture should append a second slice-history record');
|
|
260
|
+
|
|
261
|
+
const reviewerShortAuditorRoutingReviewed = await transcribeCanonicalRoleReport({
|
|
262
|
+
role: 'completion-reviewer',
|
|
263
|
+
output: reviewerShortAuditorRoutingPass,
|
|
264
|
+
reportFields: parseReportFields(reviewerShortAuditorRoutingPass),
|
|
265
|
+
snapshotFiles,
|
|
266
|
+
headSha: '1313131313131313131313131313131313131313',
|
|
267
|
+
sliceId: 'slice-review-short-auditor',
|
|
268
|
+
recordedAt: 13,
|
|
269
|
+
});
|
|
270
|
+
assert(
|
|
271
|
+
reviewerShortAuditorRoutingReviewed.errors.length === 0,
|
|
272
|
+
`reviewer short-auditor-routing fixture should transcribe cleanly: ${reviewerShortAuditorRoutingReviewed.errors.join(' | ')}`,
|
|
273
|
+
);
|
|
274
|
+
assert(
|
|
275
|
+
reviewerShortAuditorRoutingReviewed.appended.includes('reviewed:slice-review-short-auditor'),
|
|
276
|
+
'reviewer short-auditor-routing fixture should append a reviewed record',
|
|
277
|
+
);
|
|
278
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'reviewer short-auditor-routing fixture should append a third slice-history record');
|
|
279
|
+
|
|
217
280
|
const reviewerRejected = await transcribeCanonicalRoleReport({
|
|
218
281
|
role: 'completion-reviewer',
|
|
219
282
|
output: reviewerLenient,
|
|
@@ -227,7 +290,7 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
227
290
|
reviewerRejected.errors.some((error) => error.includes('follow-up slice other than none')),
|
|
228
291
|
`reviewer lenient fixture should be rejected for a yes verdict with a follow-up slice: ${reviewerRejected.errors.join(' | ')}`,
|
|
229
292
|
);
|
|
230
|
-
assert(readJsonl(snapshotFiles.sliceHistoryPath).length ===
|
|
293
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'rejected reviewer fixture must not append history');
|
|
231
294
|
|
|
232
295
|
const reviewerNonePrefixedRejected = await transcribeCanonicalRoleReport({
|
|
233
296
|
role: 'completion-reviewer',
|
|
@@ -242,7 +305,7 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
242
305
|
reviewerNonePrefixedRejected.errors.some((error) => error.includes('follow-up slice other than none')),
|
|
243
306
|
`reviewer none-prefixed lenient fixture should be rejected for a yes verdict with contradictory routing text: ${reviewerNonePrefixedRejected.errors.join(' | ')}`,
|
|
244
307
|
);
|
|
245
|
-
assert(readJsonl(snapshotFiles.sliceHistoryPath).length ===
|
|
308
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'rejected none-prefixed reviewer fixture must not append history');
|
|
246
309
|
|
|
247
310
|
const reviewerTrailingTextAfterRoutingRejected = await transcribeCanonicalRoleReport({
|
|
248
311
|
role: 'completion-reviewer',
|
|
@@ -255,9 +318,9 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
255
318
|
});
|
|
256
319
|
assert(
|
|
257
320
|
reviewerTrailingTextAfterRoutingRejected.errors.some((error) => error.includes('follow-up slice other than none')),
|
|
258
|
-
`reviewer routing-trailing-text fixture should be rejected for extra text after the
|
|
321
|
+
`reviewer routing-trailing-text fixture should be rejected for extra text after the allowed completion-auditor routing forms: ${reviewerTrailingTextAfterRoutingRejected.errors.join(' | ')}`,
|
|
259
322
|
);
|
|
260
|
-
assert(readJsonl(snapshotFiles.sliceHistoryPath).length ===
|
|
323
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 3, 'rejected reviewer routing-trailing-text fixture must not append history');
|
|
261
324
|
|
|
262
325
|
const audited = await transcribeCanonicalRoleReport({
|
|
263
326
|
role: 'completion-auditor',
|
|
@@ -270,7 +333,7 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
270
333
|
});
|
|
271
334
|
assert(audited.errors.length === 0, `auditor passing fixture should transcribe cleanly: ${audited.errors.join(' | ')}`);
|
|
272
335
|
assert(audited.appended.includes('audited:slice-audit'), 'auditor passing fixture should append an audited record');
|
|
273
|
-
assert(readJsonl(snapshotFiles.sliceHistoryPath).length ===
|
|
336
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 4, 'auditor passing fixture should append the next slice-history record');
|
|
274
337
|
|
|
275
338
|
const auditorRejected = await transcribeCanonicalRoleReport({
|
|
276
339
|
role: 'completion-auditor',
|
|
@@ -289,7 +352,7 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
289
352
|
auditorRejected.errors.some((error) => error.includes("Next mandatory slice") && error.includes('none')),
|
|
290
353
|
`auditor lenient fixture should reject open-work reports with no next mandatory slice: ${auditorRejected.errors.join(' | ')}`,
|
|
291
354
|
);
|
|
292
|
-
assert(readJsonl(snapshotFiles.sliceHistoryPath).length ===
|
|
355
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 4, 'rejected auditor fixture must not append history');
|
|
293
356
|
|
|
294
357
|
const auditorNonePrefixedRejected = await transcribeCanonicalRoleReport({
|
|
295
358
|
role: 'completion-auditor',
|
|
@@ -304,7 +367,7 @@ Brief justification: This should be rejected because remaining contracts still e
|
|
|
304
367
|
auditorNonePrefixedRejected.errors.some((error) => error.includes('listing worktree blockers')),
|
|
305
368
|
`auditor none-prefixed lenient fixture should reject clean-yes reports that smuggle blockers behind none: ${auditorNonePrefixedRejected.errors.join(' | ')}`,
|
|
306
369
|
);
|
|
307
|
-
assert(readJsonl(snapshotFiles.sliceHistoryPath).length ===
|
|
370
|
+
assert(readJsonl(snapshotFiles.sliceHistoryPath).length === 4, 'rejected none-prefixed auditor fixture must not append history');
|
|
308
371
|
|
|
309
372
|
const judged = await transcribeCanonicalRoleReport({
|
|
310
373
|
role: 'completion-stop-judge',
|
|
@@ -34,6 +34,8 @@ elif mode == 'static':
|
|
|
34
34
|
assert data['releaseBlockerCount'] == 1, data
|
|
35
35
|
assert data['highValueGapCount'] == 4, data
|
|
36
36
|
assert data['remainingStopJudgeCount'] == 2, data
|
|
37
|
+
assert data['requiredStopJudges'] == 2, data
|
|
38
|
+
assert data['stopAggregationPolicy'] == 'unanimous-current-head-v1', data
|
|
37
39
|
assert not data.get('statusText'), data
|
|
38
40
|
widget = data['widgetLines']
|
|
39
41
|
assert 'phase: implement' in widget, widget
|
|
@@ -55,6 +57,8 @@ elif mode == 'live':
|
|
|
55
57
|
'tool activity separated from role judgment',
|
|
56
58
|
'waiting threshold uses updatedAt timestamps',
|
|
57
59
|
], data
|
|
60
|
+
assert data['requiredStopJudges'] == 2, data
|
|
61
|
+
assert data['stopAggregationPolicy'] == 'unanimous-current-head-v1', data
|
|
58
62
|
assert not data.get('statusText'), data
|
|
59
63
|
widget = data['widgetLines']
|
|
60
64
|
assert widget == [], widget
|
|
@@ -98,7 +102,8 @@ cat > .agent/profile.json <<'JSON'
|
|
|
98
102
|
"schema_version": 1,
|
|
99
103
|
"protocol_id": "completion",
|
|
100
104
|
"project_name": "status-surface-fixture",
|
|
101
|
-
"required_stop_judges":
|
|
105
|
+
"required_stop_judges": 2,
|
|
106
|
+
"stop_aggregation_policy": "unanimous-current-head-v1",
|
|
102
107
|
"priority_policy_id": "completion-default",
|
|
103
108
|
"docs_surfaces": ["README.md"]
|
|
104
109
|
}
|
package/scripts/release-check.sh
CHANGED
|
@@ -5,7 +5,7 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
|
5
5
|
cd "$ROOT"
|
|
6
6
|
export PI_COMPLETION_RUNNING_RELEASE_CHECK=1
|
|
7
7
|
|
|
8
|
-
echo "[release-check] running control-plane validation, tracked .agent contract coverage, slice-surface parity, explicit-/cook parity, startup/refocus/context regressions, canonical evidence artifact, active-slice contract, observability, legacy cleanup, evaluator calibration, and rubric contract coverage"
|
|
8
|
+
echo "[release-check] running control-plane validation, tracked .agent contract coverage, slice-surface parity, explicit-/cook parity, startup/refocus/context regressions, canonical evidence artifact, active-slice contract, observability, completion-role gating, legacy cleanup, evaluator calibration, and rubric contract coverage"
|
|
9
9
|
bash .agent/verify_completion_control_plane.sh
|
|
10
10
|
git ls-files --error-unmatch .agent/README.md .agent/mission.md .agent/profile.json .agent/verify_completion_stop.sh .agent/verify_completion_control_plane.sh >/dev/null
|
|
11
11
|
|
|
@@ -34,6 +34,8 @@ checks = {
|
|
|
34
34
|
'description: "/cook workflow: start or replace workflow only from an explicit primary-agent handoff, or resume the current workflow from canonical state"',
|
|
35
35
|
'"Do not call completion_role from ordinary chat; it is reserved for active /cook workflow sessions."',
|
|
36
36
|
'`COMPLETION WORKFLOW DRIVER\\nStart or continue the completion workflow for this repo.',
|
|
37
|
+
'function isCompletionRoleDispatchAllowedTurn(',
|
|
38
|
+
'return asString(snapshot?.state?.continuation_policy) === "continue";',
|
|
37
39
|
],
|
|
38
40
|
"extensions/completion/policy-guards.ts": [
|
|
39
41
|
'return "completion_role may only be used from an active /cook workflow session.";',
|
|
@@ -81,6 +83,7 @@ bash ./scripts/role-runner-contract-test.sh
|
|
|
81
83
|
bash ./scripts/canonical-evidence-artifact-test.sh
|
|
82
84
|
bash ./scripts/active-slice-contract-test.sh
|
|
83
85
|
npm run observability-status-test
|
|
86
|
+
npm run completion-role-gating-test
|
|
84
87
|
bash ./scripts/legacy-cleanup-test.sh
|
|
85
88
|
npm run evaluator-calibration-test
|
|
86
89
|
npm run rubric-contract-test
|
|
@@ -81,6 +81,8 @@ assertIncludes('extensions/completion/prompt-surfaces.ts', '`Task type: ${args.t
|
|
|
81
81
|
assertIncludes('extensions/completion/prompt-surfaces.ts', '`Evaluation profile: ${args.evaluationProfile ?? "(missing)"}`');
|
|
82
82
|
assertIncludes('extensions/completion/prompt-surfaces.ts', '`- task_type: ${deps.currentTaskType(snapshot) ?? "(missing)"}`');
|
|
83
83
|
assertIncludes('extensions/completion/prompt-surfaces.ts', '`- evaluation_profile: ${deps.currentEvaluationProfile(snapshot) ?? "(missing)"}`');
|
|
84
|
+
assertIncludes('extensions/completion/prompt-surfaces.ts', '`- required_stop_judges: ${snapshot.profile?.required_stop_judges ?? "(missing)"}`');
|
|
85
|
+
assertIncludes('extensions/completion/prompt-surfaces.ts', '`- stop_aggregation_policy: ${deps.asString(snapshot.profile?.stop_aggregation_policy) ?? "(missing)"}`');
|
|
84
86
|
assertIncludes('extensions/completion/prompt-surfaces.ts', 'Canonical evaluation handoff for ${role}:');
|
|
85
87
|
assertIncludes('extensions/completion/index.ts', 'buildEvaluationRoleReminderText(snapshot, nextRole)');
|
|
86
88
|
assertIncludes('extensions/completion/role-runner.ts', 'import { parseReportFields, transcribeRoleOutput, type TranscriptionResult } from "./transcription";');
|
|
@@ -97,6 +99,9 @@ assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output mus
|
|
|
97
99
|
assertIncludes('extensions/completion/role-reporting.js', 'Stop-judge output must answer \'Tracked and unignored worktree is clean\' with yes or no.');
|
|
98
100
|
assertIncludes('package.json', '"rubric-contract-test": "bash ./scripts/rubric-contract-test.sh"');
|
|
99
101
|
assertIncludes('scripts/release-check.sh', 'npm run rubric-contract-test');
|
|
102
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'stop_aggregation_policy must be unanimous-current-head-v1');
|
|
103
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'Current HEAD has a can_stop=no judgment');
|
|
104
|
+
assertIncludes('.agent/verify_completion_stop.sh', 'valid current-HEAD judgments');
|
|
100
105
|
assertIncludes('.agent/verify_completion_stop.sh', 'npm run release-check >/dev/null');
|
|
101
106
|
NODE
|
|
102
107
|
|
|
@@ -34,6 +34,8 @@ This skill defines shared protocol facts only. Role-specific behavior belongs in
|
|
|
34
34
|
- Before selecting or advancing to the next slice after a committed slice, the tracked and unignored worktree must be clean. If it is not clean, treat that dirty state as a blocker to next-slice progression and reopen or continue the latest slice for reconciliation.
|
|
35
35
|
- Docs, config, and runbooks must stay truthful to shipped behavior.
|
|
36
36
|
- `.agent/verify_completion_stop.sh` is a generated repo-level baseline verifier. Onboarding should create a working version from current repo truth rather than an unconditional failing placeholder.
|
|
37
|
+
- The packaged default stop policy is `required_stop_judges: 2` plus `stop_aggregation_policy: "unanimous-current-head-v1"` in `.agent/profile.json`.
|
|
38
|
+
- Under `unanimous-current-head-v1`, only current-HEAD `judgment` records count, any current-HEAD `can_stop = no` fails closed, and repo-level stop verification must wait until the required current-HEAD judgments are recorded.
|
|
37
39
|
- Keep slice-specific proof in repo tests or deterministic checks. Refresh `.agent/verify_completion_stop.sh` only when the repo's top-level verification surfaces change or the verifier becomes stale.
|
|
38
40
|
- The workflow topology is flat and primary-driven: the main pi session remains the workflow root and invokes at most one completion role at a time.
|
|
39
41
|
- No completion role may invoke another completion role during the normal workflow.
|
|
@@ -83,7 +85,7 @@ If the workflow driver detects that the next mandatory action belongs to a compl
|
|
|
83
85
|
6. If the latest committed slice lacks an audit result, invoke `completion-auditor`.
|
|
84
86
|
7. If review or audit have returned and canonical reconciliation is needed, invoke `completion-regrounder`. `completion-regrounder` must not select or hand off a next slice while the latest committed slice leaves the tracked and unignored worktree dirty; instead it must reopen or continue that latest slice for reconciliation.
|
|
85
87
|
8. If all planned slices are done and final closure is being evaluated, invoke the required `completion-stop-judge` sessions directly.
|
|
86
|
-
9. After each required `completion-stop-judge` result is faithfully recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
|
|
88
|
+
9. After each required current-HEAD `completion-stop-judge` result is faithfully recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
|
|
87
89
|
|
|
88
90
|
The workflow driver must not substitute itself for any mandatory dispatch target above.
|
|
89
91
|
|
|
@@ -55,7 +55,8 @@ Optional context only:
|
|
|
55
55
|
"schema_version": 1,
|
|
56
56
|
"protocol_id": "completion",
|
|
57
57
|
"project_name": "<repo-name>",
|
|
58
|
-
"required_stop_judges":
|
|
58
|
+
"required_stop_judges": 2,
|
|
59
|
+
"stop_aggregation_policy": "unanimous-current-head-v1",
|
|
59
60
|
"priority_policy_id": "completion-default",
|
|
60
61
|
"task_type": "completion-workflow",
|
|
61
62
|
"evaluation_profile": "completion-rubric-v1",
|
|
@@ -254,6 +255,21 @@ Minimum record shape:
|
|
|
254
255
|
|
|
255
256
|
Empty history files are legal.
|
|
256
257
|
|
|
258
|
+
## Final Stop Aggregation Policy
|
|
259
|
+
|
|
260
|
+
The packaged default stop policy is:
|
|
261
|
+
|
|
262
|
+
- `required_stop_judges: 2`
|
|
263
|
+
- `stop_aggregation_policy: "unanimous-current-head-v1"`
|
|
264
|
+
|
|
265
|
+
Policy meaning:
|
|
266
|
+
|
|
267
|
+
- count only `judgment` records whose `head_sha` matches the current `HEAD`
|
|
268
|
+
- require at least two valid current-HEAD judgments before repo-level stop verification may run
|
|
269
|
+
- fail closed if any current-HEAD judgment has `can_stop = false`
|
|
270
|
+
- fail closed if a current-HEAD judgment is malformed or carries non-zero blocker/high-value-gap counts
|
|
271
|
+
- rerun `bash .agent/verify_completion_stop.sh` only after the required current-HEAD judgments are faithfully recorded, then hand final reconciliation back to `completion-regrounder`
|
|
272
|
+
|
|
257
273
|
## Structured Evaluation Rubric Foundation
|
|
258
274
|
|
|
259
275
|
`completion-reviewer`, `completion-auditor`, and `completion-stop-judge` must emit rubric-backed evaluations using the same shared dimension names and verdict semantics.
|
|
@@ -344,7 +360,7 @@ It must not, while a slice is selected or in progress:
|
|
|
344
360
|
6. If the latest committed slice lacks audit, invoke `completion-auditor`.
|
|
345
361
|
7. If canonical reconciliation is needed after review or audit, invoke `completion-regrounder`.
|
|
346
362
|
8. If all slices are done and final closure is under evaluation, invoke the required `completion-stop-judge` sessions directly.
|
|
347
|
-
9. After the required judgments are recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
|
|
363
|
+
9. After the required current-HEAD judgments are recorded, rerun `bash .agent/verify_completion_stop.sh` and invoke `completion-regrounder` for final stop reconciliation.
|
|
348
364
|
|
|
349
365
|
## Compaction And Recovery
|
|
350
366
|
|