@linimin/pi-letscook 0.1.30 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,31 +5,143 @@ PKG_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
5
  TMPDIR="$(mktemp -d)"
6
6
  trap 'rm -rf "$TMPDIR"' EXIT
7
7
 
8
- cd "$TMPDIR"
8
+ ROOT="$TMPDIR/repo"
9
+ KICKOFF_PROMPT="$TMPDIR/kickoff-prompt.txt"
10
+ RESUME_PROMPT="$TMPDIR/resume-prompt.txt"
11
+ AUTO_RESUME_PROMPT="$TMPDIR/auto-resume-prompt.txt"
12
+
13
+ mkdir -p "$ROOT"
14
+ cd "$ROOT"
9
15
  git init -q
10
16
 
11
- pi -e "$PKG_ROOT" -p "/cook smoke-test mission" >/tmp/pi-completion-smoke.out 2>/tmp/pi-completion-smoke.err &
12
- PI_PID=$!
13
- for _ in $(seq 1 60); do
14
- if [[ -f .agent/profile.json && -f .agent/state.json && -f .agent/plan.json && -f .agent/active-slice.json ]]; then
15
- break
16
- fi
17
- sleep 1
17
+ PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
18
+ PI_COMPLETION_TEST_DRIVER_PROMPT_PATH="$KICKOFF_PROMPT" \
19
+ pi -e "$PKG_ROOT" -p "/cook smoke-test mission" \
20
+ >"$TMPDIR/pi-completion-smoke-bootstrap.out" 2>"$TMPDIR/pi-completion-smoke-bootstrap.err"
21
+
22
+ for file in .agent/profile.json .agent/state.json .agent/plan.json .agent/active-slice.json; do
23
+ [[ -f "$file" ]] || { echo "missing canonical bootstrap file: $file" >&2; exit 1; }
18
24
  done
19
- if [[ ! -f .agent/profile.json || ! -f .agent/state.json || ! -f .agent/plan.json || ! -f .agent/active-slice.json ]]; then
20
- echo "completion bootstrap did not materialize canonical files in time" >&2
21
- cat /tmp/pi-completion-smoke.err >&2 || true
22
- kill "$PI_PID" >/dev/null 2>&1 || true
23
- wait "$PI_PID" >/dev/null 2>&1 || true
24
- exit 1
25
- fi
26
- kill "$PI_PID" >/dev/null 2>&1 || true
27
- wait "$PI_PID" >/dev/null 2>&1 || true
28
25
 
29
26
  bash .agent/verify_completion_control_plane.sh >/dev/null
30
27
  bash .agent/verify_completion_stop.sh >/dev/null
31
28
 
32
- python3 - <<'PY2'
29
+ python3 - "$KICKOFF_PROMPT" <<'PY'
30
+ import json
31
+ import sys
32
+ from pathlib import Path
33
+
34
+ expected_task_type = 'completion-workflow'
35
+ expected_eval_profile = 'completion-rubric-v1'
36
+
37
+ profile = json.loads(Path('.agent/profile.json').read_text())
38
+ state = json.loads(Path('.agent/state.json').read_text())
39
+ plan = json.loads(Path('.agent/plan.json').read_text())
40
+ active = json.loads(Path('.agent/active-slice.json').read_text())
41
+ kickoff = Path(sys.argv[1]).read_text()
42
+
43
+ assert profile['task_type'] == expected_task_type, 'profile.json task_type mismatch after bootstrap'
44
+ assert profile['evaluation_profile'] == expected_eval_profile, 'profile.json evaluation_profile mismatch after bootstrap'
45
+ assert state['task_type'] == expected_task_type, 'state.json task_type mismatch after bootstrap'
46
+ assert state['evaluation_profile'] == expected_eval_profile, 'state.json evaluation_profile mismatch after bootstrap'
47
+ assert plan['task_type'] == expected_task_type, 'plan.json task_type mismatch after bootstrap'
48
+ assert plan['evaluation_profile'] == expected_eval_profile, 'plan.json evaluation_profile mismatch after bootstrap'
49
+ assert active['task_type'] == expected_task_type, 'active-slice.json task_type mismatch after bootstrap'
50
+ assert active['evaluation_profile'] == expected_eval_profile, 'active-slice.json evaluation_profile mismatch after bootstrap'
51
+ assert active['implementation_surfaces'] == [], 'active-slice.json should scaffold empty implementation_surfaces'
52
+ assert active['verification_commands'] == [], 'active-slice.json should scaffold empty verification_commands'
53
+ assert 'Canonical routing profile:' in kickoff, 'kickoff prompt should expose canonical routing profile'
54
+ assert f'- task_type: {expected_task_type}' in kickoff, 'kickoff prompt missing canonical task_type'
55
+ assert f'- evaluation_profile: {expected_eval_profile}' in kickoff, 'kickoff prompt missing canonical evaluation_profile'
56
+ PY
57
+
58
+ PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
59
+ PI_COMPLETION_TEST_DRIVER_PROMPT_PATH="$RESUME_PROMPT" \
60
+ pi -e "$PKG_ROOT" -p "/cook" \
61
+ >"$TMPDIR/pi-completion-smoke-resume.out" 2>"$TMPDIR/pi-completion-smoke-resume.err"
62
+
63
+ python3 - "$RESUME_PROMPT" <<'PY'
64
+ import sys
65
+ from pathlib import Path
66
+
67
+ expected_task_type = 'completion-workflow'
68
+ expected_eval_profile = 'completion-rubric-v1'
69
+ resume = Path(sys.argv[1]).read_text()
70
+
71
+ assert 'Canonical routing profile:' in resume, 'resume prompt should expose canonical routing profile'
72
+ assert f'- task_type: {expected_task_type}' in resume, 'resume prompt missing canonical task_type'
73
+ assert f'- evaluation_profile: {expected_eval_profile}' in resume, 'resume prompt missing canonical evaluation_profile'
74
+ PY
75
+
76
+ PI_COMPLETION_SKIP_DRIVER_KICKOFF=1 \
77
+ PI_COMPLETION_TEST_AUTO_CONTINUE_ON_SESSION_START=1 \
78
+ PI_COMPLETION_TEST_AUTO_CONTINUE_PROMPT_PATH="$AUTO_RESUME_PROMPT" \
79
+ pi -e "$PKG_ROOT" -p "/cook" \
80
+ >"$TMPDIR/pi-completion-smoke-auto-resume.out" 2>"$TMPDIR/pi-completion-smoke-auto-resume.err"
81
+
82
+ python3 - "$AUTO_RESUME_PROMPT" <<'PY'
83
+ import sys
84
+ from pathlib import Path
85
+
86
+ expected_task_type = 'completion-workflow'
87
+ expected_eval_profile = 'completion-rubric-v1'
88
+ auto_resume = Path(sys.argv[1]).read_text()
89
+
90
+ assert 'Resume the completion workflow from canonical state.' in auto_resume, 'auto-resume prompt should use the canonical resume workflow prompt'
91
+ assert 'Canonical routing profile:' in auto_resume, 'auto-resume prompt should expose canonical routing profile'
92
+ assert f'- task_type: {expected_task_type}' in auto_resume, 'auto-resume prompt missing canonical task_type'
93
+ assert f'- evaluation_profile: {expected_eval_profile}' in auto_resume, 'auto-resume prompt missing canonical evaluation_profile'
94
+ PY
95
+
96
+ python3 - <<'PY'
97
+ import json
98
+ from pathlib import Path
99
+ path = Path('.agent/state.json')
100
+ state = json.loads(path.read_text())
101
+ state.pop('task_type', None)
102
+ path.write_text(json.dumps(state, indent=2) + '\n')
103
+ PY
104
+
105
+ if bash .agent/verify_completion_control_plane.sh >/dev/null 2>&1; then
106
+ echo "expected control-plane verification to fail when state.json omits task_type" >&2
107
+ exit 1
108
+ fi
109
+
110
+ python3 - <<'PY'
111
+ import json
112
+ from pathlib import Path
113
+ profile = json.loads(Path('.agent/profile.json').read_text())
114
+ state_path = Path('.agent/state.json')
115
+ state = json.loads(state_path.read_text())
116
+ state['task_type'] = profile['task_type']
117
+ state_path.write_text(json.dumps(state, indent=2) + '\n')
118
+ PY
119
+
120
+ python3 - <<'PY'
121
+ import json
122
+ from pathlib import Path
123
+ path = Path('.agent/active-slice.json')
124
+ active = json.loads(path.read_text())
125
+ active.pop('evaluation_profile', None)
126
+ path.write_text(json.dumps(active, indent=2) + '\n')
127
+ PY
128
+
129
+ if bash .agent/verify_completion_control_plane.sh >/dev/null 2>&1; then
130
+ echo "expected control-plane verification to fail when active-slice.json omits evaluation_profile" >&2
131
+ exit 1
132
+ fi
133
+
134
+ python3 - <<'PY'
135
+ import json
136
+ from pathlib import Path
137
+ profile = json.loads(Path('.agent/profile.json').read_text())
138
+ active_path = Path('.agent/active-slice.json')
139
+ active = json.loads(active_path.read_text())
140
+ active['evaluation_profile'] = profile['evaluation_profile']
141
+ active_path.write_text(json.dumps(active, indent=2) + '\n')
142
+ PY
143
+
144
+ python3 - <<'PY'
33
145
  import json
34
146
  from pathlib import Path
35
147
  path = Path('.agent/active-slice.json')
@@ -41,8 +153,10 @@ active.update({
41
153
  'contract_ids': ['smoke-contract'],
42
154
  'acceptance_criteria': ['criterion'],
43
155
  'blocked_on': [],
44
- 'locked_notes': [],
156
+ 'locked_notes': ['keep the change scoped to the selected active-slice contract'],
45
157
  'must_fix_findings': [],
158
+ 'implementation_surfaces': ['extensions/completion/index.ts', '.agent/verify_completion_control_plane.sh'],
159
+ 'verification_commands': ['bash .agent/verify_completion_control_plane.sh', 'npm run smoke-test'],
46
160
  'basis_commit': 'deadbeef',
47
161
  'remaining_contract_ids_before': ['smoke-contract'],
48
162
  'release_blocker_count_before': 1,
@@ -51,14 +165,14 @@ active.update({
51
165
  active.pop('priority', None)
52
166
  active.pop('why_now', None)
53
167
  path.write_text(json.dumps(active, indent=2) + '\n')
54
- PY2
168
+ PY
55
169
 
56
170
  if bash .agent/verify_completion_control_plane.sh >/dev/null 2>&1; then
57
171
  echo "expected control-plane verification to fail when selected active-slice omits priority/why_now" >&2
58
172
  exit 1
59
173
  fi
60
174
 
61
- python3 - <<'PY3'
175
+ python3 - <<'PY'
62
176
  import json
63
177
  from pathlib import Path
64
178
  path = Path('.agent/active-slice.json')
@@ -66,9 +180,47 @@ active = json.loads(path.read_text())
66
180
  active['priority'] = 1
67
181
  active['why_now'] = 'smoke test exact handoff'
68
182
  path.write_text(json.dumps(active, indent=2) + '\n')
69
- PY3
183
+ PY
184
+
185
+ python3 - <<'PY'
186
+ import json
187
+ from pathlib import Path
188
+ path = Path('.agent/active-slice.json')
189
+ active = json.loads(path.read_text())
190
+ active.pop('implementation_surfaces', None)
191
+ active.pop('verification_commands', None)
192
+ path.write_text(json.dumps(active, indent=2) + '\n')
193
+ PY
194
+
195
+ if bash .agent/verify_completion_control_plane.sh >/dev/null 2>&1; then
196
+ echo "expected control-plane verification to fail when selected active-slice omits implementation_surfaces/verification_commands" >&2
197
+ exit 1
198
+ fi
199
+
200
+ python3 - <<'PY'
201
+ import json
202
+ from pathlib import Path
203
+ path = Path('.agent/active-slice.json')
204
+ active = json.loads(path.read_text())
205
+ active['implementation_surfaces'] = ['extensions/completion/index.ts', '.agent/verify_completion_control_plane.sh']
206
+ active['verification_commands'] = ['bash .agent/verify_completion_control_plane.sh', 'npm run smoke-test']
207
+ path.write_text(json.dumps(active, indent=2) + '\n')
208
+ PY
70
209
 
71
210
  bash .agent/verify_completion_control_plane.sh >/dev/null
72
211
  bash .agent/verify_completion_stop.sh >/dev/null
73
212
 
74
- echo "smoke test passed: $TMPDIR"
213
+ python3 - "$PKG_ROOT" <<'PY'
214
+ import sys
215
+ from pathlib import Path
216
+
217
+ text = Path(sys.argv[1], 'extensions/completion', 'index.ts').read_text()
218
+ assert 'Active slice priority: ${activePriority}' in text, 'system reminder source should expose active-slice priority'
219
+ assert 'Active slice why_now: ${activeWhyNow}' in text, 'system reminder source should expose active-slice why_now'
220
+ assert 'Active implementation surfaces: ${implementationSurfaces.join(", ")}' in text, 'system reminder source should expose implementation_surfaces'
221
+ assert 'Active verification commands: ${verificationCommands.join(" | ")}' in text, 'system reminder source should expose verification_commands'
222
+ assert '`- implementation_surfaces: ${implementationSurfaces.join(" | ")}`' in text, 'resume capsule source should expose implementation_surfaces'
223
+ assert '`- verification_commands: ${verificationCommands.join(" | ")}`' in text, 'resume capsule source should expose verification_commands'
224
+ PY
225
+
226
+ echo "smoke test passed: $ROOT"
@@ -148,6 +148,8 @@ The workflow driver must invoke `completion-regrounder` before continuing whenev
148
148
  - acceptance criteria for the selected or active slice are missing or unclear
149
149
  - the exact implementer handoff snapshot in `.agent/active-slice.json` is missing, stale, or contradictory
150
150
 
151
+ The exact implementer handoff now includes implementation-scope surfaces and expected verification commands in addition to the locked slice goal, acceptance, notes, and before-slice counters.
152
+
151
153
  The workflow driver must not continue implementation, review, audit, or stop evaluation from compacted conversation memory alone.
152
154
 
153
155
  After compaction or recovery, `completion-implementer` must also re-read canonical `.agent/state.json`, `.agent/plan.json`, and `.agent/active-slice.json` before resuming work. If `.agent/active-slice.json` still contains a truthful exact handoff snapshot, continue from canonical state rather than asking the user to resend the original caller payload.
@@ -161,6 +163,43 @@ All completion reports must begin with:
161
163
 
162
164
  If a role-specific fixed format uses before/after wording, keep the same mission-anchor first line and then follow that role's exact format.
163
165
 
166
+ ## Structured Evaluation Rubric Foundation
167
+
168
+ `completion-reviewer`, `completion-auditor`, and `completion-stop-judge` must emit rubric-backed evaluations using the same shared dimension names and verdict semantics.
169
+
170
+ The shared rubric foundation now sits alongside canonical `task_type` and `evaluation_profile` signaling in the control plane. That signaling is routing metadata only; later slices may still add stricter profile-aware rubric-output enforcement.
171
+
172
+ Required rubric section:
173
+
174
+ - `Rubric:`
175
+ - `- Contract coverage: pass|concern|fail - ...`
176
+ - `- Correctness risk: pass|concern|fail - ...`
177
+ - `- Verification evidence: pass|concern|fail - ...`
178
+ - `- Docs/state parity: pass|concern|fail - ...`
179
+
180
+ Use the dimension names and verdict words exactly as written above.
181
+
182
+ Dimension meaning:
183
+
184
+ - `Contract coverage` — whether the slice or current HEAD satisfies the locked acceptance criteria and role-specific workflow obligations.
185
+ - `Correctness risk` — whether regressions, blocking defects, safety issues, or closure risks are still evident in current repo truth.
186
+ - `Verification evidence` — whether tests, deterministic proof, and rerun verification are strong enough for the role's decision.
187
+ - `Docs/state parity` — whether docs, config, runbooks, and canonical `.agent` state stay truthful to shipped behavior for the role's scope.
188
+
189
+ Verdict semantics:
190
+
191
+ - `pass` — no material issue remains for that dimension in the role's current decision.
192
+ - `concern` — a real caveat or remaining gap exists, but it does not by itself force rejection or `NO-STOP`; explain the follow-up plainly.
193
+ - `fail` — a blocking issue or contradictory truth exists and the role's final verdict must not be positive.
194
+
195
+ Decision alignment rules:
196
+
197
+ - Reviewer: any `fail` means `Acceptable as-is` must be `no`.
198
+ - Auditor: use `concern` or `fail` to explain why the project is not yet done and whether canonical backlog/state remain truthful.
199
+ - Stop judge: any `fail` means `Can the project stop now` must be `no`.
200
+
201
+ Always include all four rubric lines, even when every dimension is `pass`.
202
+
164
203
  ## References
165
204
 
166
205
  Read these bundled references when you need the full protocol or scaffolding material:
@@ -36,6 +36,8 @@
36
36
  "project_name": "<repo-name>",
37
37
  "required_stop_judges": 3,
38
38
  "priority_policy_id": "completion-default",
39
+ "task_type": "completion-workflow",
40
+ "evaluation_profile": "completion-rubric-v1",
39
41
  "docs_surfaces": ["README.md", "docs/"]
40
42
  }
41
43
  ```
@@ -48,6 +50,8 @@ Required fields:
48
50
 
49
51
  - `schema_version`
50
52
  - `mission_anchor`
53
+ - `task_type`
54
+ - `evaluation_profile`
51
55
  - `current_phase`
52
56
  - `continuation_policy`
53
57
  - `continuation_reason`
@@ -111,6 +115,8 @@ Required fields:
111
115
 
112
116
  - `schema_version`
113
117
  - `mission_anchor`
118
+ - `task_type`
119
+ - `evaluation_profile`
114
120
  - `last_reground_at`
115
121
  - `plan_basis`
116
122
  - `candidate_slices`
@@ -143,6 +149,27 @@ Rules:
143
149
 
144
150
  `active-slice.json` carries one current slice cursor.
145
151
 
152
+ Required base fields:
153
+
154
+ - `schema_version`
155
+ - `mission_anchor`
156
+ - `task_type`
157
+ - `evaluation_profile`
158
+ - `status`
159
+ - `slice_id`
160
+ - `goal`
161
+ - `contract_ids`
162
+ - `acceptance_criteria`
163
+ - `blocked_on`
164
+ - `locked_notes`
165
+ - `must_fix_findings`
166
+ - `implementation_surfaces`
167
+ - `verification_commands`
168
+ - `basis_commit`
169
+ - `remaining_contract_ids_before`
170
+ - `release_blocker_count_before`
171
+ - `high_value_gap_count_before`
172
+
146
173
  When `status` is `selected`, `in_progress`, `committed`, or `done`, `active-slice.json` must also carry the exact implementer handoff snapshot so `completion-implementer` can resume after compaction without asking the user to resend the original caller payload.
147
174
 
148
175
  Required exact handoff fields:
@@ -153,11 +180,18 @@ Required exact handoff fields:
153
180
  - `blocked_on`
154
181
  - `locked_notes`
155
182
  - `must_fix_findings`
183
+ - `implementation_surfaces`
184
+ - `verification_commands`
156
185
  - `basis_commit`
157
186
  - `remaining_contract_ids_before`
158
187
  - `release_blocker_count_before`
159
188
  - `high_value_gap_count_before`
160
189
 
190
+ Field meaning:
191
+
192
+ - `implementation_surfaces` — the repo files or surfaces this slice is expected to update or keep in parity, so implementers can resume on the right scope after compaction.
193
+ - `verification_commands` — the focused and broader deterministic checks expected before the slice is committed.
194
+
161
195
  Allowed `status` values:
162
196
 
163
197
  - `idle`
@@ -199,6 +233,43 @@ Minimum record shape:
199
233
 
200
234
  Empty history files are legal.
201
235
 
236
+ ## Structured Evaluation Rubric Foundation
237
+
238
+ `completion-reviewer`, `completion-auditor`, and `completion-stop-judge` must emit rubric-backed evaluations using the same shared dimension names and verdict semantics.
239
+
240
+ The shared rubric foundation now sits alongside canonical `task_type` and `evaluation_profile` signaling in `.agent/profile.json`, `.agent/state.json`, `.agent/plan.json`, and `.agent/active-slice.json`. That signaling is routing metadata only; later slices may still add stricter profile-aware rubric-output enforcement.
241
+
242
+ Required rubric section:
243
+
244
+ - `Rubric:`
245
+ - `- Contract coverage: pass|concern|fail - ...`
246
+ - `- Correctness risk: pass|concern|fail - ...`
247
+ - `- Verification evidence: pass|concern|fail - ...`
248
+ - `- Docs/state parity: pass|concern|fail - ...`
249
+
250
+ Use the dimension names and verdict words exactly as written above.
251
+
252
+ Dimension meaning:
253
+
254
+ - `Contract coverage` — whether the slice or current HEAD satisfies the locked acceptance criteria and role-specific workflow obligations.
255
+ - `Correctness risk` — whether regressions, blocking defects, safety issues, or closure risks are still evident in current repo truth.
256
+ - `Verification evidence` — whether tests, deterministic proof, and rerun verification are strong enough for the role's decision.
257
+ - `Docs/state parity` — whether docs, config, runbooks, and canonical `.agent` state stay truthful to shipped behavior for the role's scope.
258
+
259
+ Verdict semantics:
260
+
261
+ - `pass` — no material issue remains for that dimension in the role's current decision.
262
+ - `concern` — a real caveat or remaining gap exists, but it does not by itself force rejection or `NO-STOP`; explain the follow-up plainly.
263
+ - `fail` — a blocking issue or contradictory truth exists and the role's final verdict must not be positive.
264
+
265
+ Decision alignment rules:
266
+
267
+ - Reviewer: any `fail` means `Acceptable as-is` must be `no`.
268
+ - Auditor: use `concern` or `fail` to explain why the project is not yet done and whether canonical backlog/state remain truthful.
269
+ - Stop judge: any `fail` means `Can the project stop now` must be `no`.
270
+
271
+ Always include all four rubric lines, even when every dimension is `pass`.
272
+
202
273
  ## One-Slice Lifecycle
203
274
 
204
275
  1. Re-ground from current repo truth.