prizmkit 1.1.21 → 1.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/bundled/VERSION.json +3 -3
  2. package/bundled/dev-pipeline/lib/heartbeat.sh +50 -7
  3. package/bundled/dev-pipeline/reset-bug.sh +21 -13
  4. package/bundled/dev-pipeline/reset-feature.sh +21 -13
  5. package/bundled/dev-pipeline/reset-refactor.sh +21 -13
  6. package/bundled/dev-pipeline/run-bugfix.sh +40 -2
  7. package/bundled/dev-pipeline/run-feature.sh +41 -1
  8. package/bundled/dev-pipeline/run-refactor.sh +40 -2
  9. package/bundled/dev-pipeline/scripts/detect-stuck.py +25 -14
  10. package/bundled/dev-pipeline/scripts/init-bugfix-pipeline.py +0 -5
  11. package/bundled/dev-pipeline/scripts/init-pipeline.py +0 -5
  12. package/bundled/dev-pipeline/scripts/init-refactor-pipeline.py +0 -5
  13. package/bundled/dev-pipeline/scripts/update-bug-status.py +40 -31
  14. package/bundled/dev-pipeline/scripts/update-feature-status.py +54 -60
  15. package/bundled/dev-pipeline/scripts/update-refactor-status.py +43 -34
  16. package/bundled/dev-pipeline/templates/bootstrap-tier1.md +50 -7
  17. package/bundled/dev-pipeline/templates/bootstrap-tier2.md +50 -7
  18. package/bundled/dev-pipeline/templates/bootstrap-tier3.md +50 -7
  19. package/bundled/dev-pipeline/templates/sections/context-budget-rules.md +20 -0
  20. package/bundled/dev-pipeline/templates/sections/phase-browser-verification.md +84 -5
  21. package/bundled/dev-pipeline/templates/sections/phase-implement-agent.md +7 -0
  22. package/bundled/dev-pipeline/templates/sections/phase-implement-full.md +7 -0
  23. package/bundled/dev-pipeline/templates/sections/phase-implement-lite.md +7 -0
  24. package/bundled/dev-pipeline/tests/test_auto_skip.py +10 -3
  25. package/bundled/skills/_metadata.json +1 -1
  26. package/package.json +1 -1
@@ -96,11 +96,10 @@ def now_iso():
96
96
 
97
97
 
98
98
  def _default_status(refactor_id):
99
- """Create a default refactor status object."""
99
+ """Create a default refactor runtime status object (no status field)."""
100
100
  now = now_iso()
101
101
  return {
102
102
  "refactor_id": refactor_id,
103
- "status": "pending",
104
103
  "retry_count": 0,
105
104
  "max_retries": 3,
106
105
  "sessions": [],
@@ -112,20 +111,42 @@ def _default_status(refactor_id):
112
111
 
113
112
 
114
113
  def load_refactor_status(state_dir, refactor_id):
114
+ """Load runtime state from status.json for a refactor.
115
+
116
+ Returns runtime fields only (retry_count, sessions, etc.).
117
+ The 'status' field is NOT included — status lives exclusively
118
+ in refactor-list.json.
119
+ """
115
120
  status_path = os.path.join(state_dir, "refactors", refactor_id, "status.json")
116
121
  if not os.path.isfile(status_path):
117
122
  return _default_status(refactor_id)
118
123
  data, err = load_json_file(status_path)
119
124
  if err:
120
125
  return _default_status(refactor_id)
126
+ # Defensively remove status if present (legacy data)
127
+ data.pop("status", None)
121
128
  return data
122
129
 
123
130
 
124
131
  def save_refactor_status(state_dir, refactor_id, status_data):
132
+ """Write the status.json for a refactor (runtime fields only)."""
133
+ # Defensively strip status — it belongs in refactor-list.json
134
+ status_data.pop("status", None)
125
135
  status_path = os.path.join(state_dir, "refactors", refactor_id, "status.json")
126
136
  return write_json_file(status_path, status_data)
127
137
 
128
138
 
139
+ def get_refactor_status_from_list(refactor_list_path, refactor_id):
140
+ """Read a single refactor's status from refactor-list.json."""
141
+ data, err = load_json_file(refactor_list_path)
142
+ if err:
143
+ return "pending"
144
+ for r in data.get("refactors", []):
145
+ if isinstance(r, dict) and r.get("id") == refactor_id:
146
+ return r.get("status", "pending")
147
+ return "pending"
148
+
149
+
129
150
  def update_refactor_in_list(refactor_list_path, refactor_id, new_status):
130
151
  data, err = load_json_file(refactor_list_path)
131
152
  if err:
@@ -179,7 +200,7 @@ def action_get_next(refactor_list_data, state_dir):
179
200
  print("PIPELINE_COMPLETE")
180
201
  return
181
202
 
182
- # Build status map and completed set
203
+ # Build status map from refactor-list.json (single source of truth)
183
204
  status_map = {}
184
205
  status_data_map = {}
185
206
  for r in refactors:
@@ -188,8 +209,8 @@ def action_get_next(refactor_list_data, state_dir):
188
209
  rid = r.get("id")
189
210
  if not rid:
190
211
  continue
212
+ status_map[rid] = r.get("status", "pending")
191
213
  rs = load_refactor_status(state_dir, rid)
192
- status_map[rid] = rs.get("status", "pending")
193
214
  status_data_map[rid] = rs
194
215
 
195
216
  completed_set = {rid for rid, st in status_map.items() if st in TERMINAL_STATUSES}
@@ -270,35 +291,30 @@ def action_update(args, refactor_list_path, state_dir):
270
291
 
271
292
  rs = load_refactor_status(state_dir, refactor_id)
272
293
 
294
+ # Track what status we write to refactor-list.json
295
+ new_status = get_refactor_status_from_list(refactor_list_path, refactor_id)
296
+
273
297
  if session_status == "success":
274
- rs["status"] = "completed"
298
+ new_status = "completed"
275
299
  rs["resume_from_phase"] = None
276
300
  err = update_refactor_in_list(refactor_list_path, refactor_id, "completed")
277
301
  if err:
278
302
  error_out("Failed to update .prizmkit/plans/refactor-list.json: {}".format(err))
279
303
  return
280
304
  elif session_status in ("commit_missing", "docs_missing", "merge_conflict"):
281
- # Degraded outcome: keep artifacts for retry.
282
- # Write schema-valid status to refactor-list.json ("pending" for retry,
283
- # "failed" if max retries exceeded). Store the granular degraded reason
284
- # in status.json only (internal pipeline state, not schema-bound).
285
305
  rs["retry_count"] = rs.get("retry_count", 0) + 1
286
306
 
287
307
  if rs["retry_count"] >= max_retries:
288
- rs["status"] = "failed"
289
- target_status = "failed"
308
+ new_status = "failed"
290
309
  else:
291
- # status.json keeps the granular degraded reason for diagnostics
292
- rs["status"] = session_status
293
- # refactor-list.json gets schema-valid "pending" (will be retried)
294
- target_status = "pending"
310
+ new_status = "pending"
295
311
 
296
312
  rs["degraded_reason"] = session_status
297
313
  rs["resume_from_phase"] = None
298
314
  rs["sessions"] = []
299
315
  rs["last_session_id"] = None
300
316
 
301
- err = update_refactor_in_list(refactor_list_path, refactor_id, target_status)
317
+ err = update_refactor_in_list(refactor_list_path, refactor_id, new_status)
302
318
  if err:
303
319
  error_out("Failed to update .prizmkit/plans/refactor-list.json: {}".format(err))
304
320
  return
@@ -312,17 +328,15 @@ def action_update(args, refactor_list_path, state_dir):
312
328
  )
313
329
 
314
330
  if rs["retry_count"] >= max_retries:
315
- rs["status"] = "failed"
316
- target_status = "failed"
331
+ new_status = "failed"
317
332
  else:
318
- rs["status"] = "pending"
319
- target_status = "pending"
333
+ new_status = "pending"
320
334
 
321
335
  rs["resume_from_phase"] = None
322
336
  rs["sessions"] = []
323
337
  rs["last_session_id"] = None
324
338
 
325
- err = update_refactor_in_list(refactor_list_path, refactor_id, target_status)
339
+ err = update_refactor_in_list(refactor_list_path, refactor_id, new_status)
326
340
  if err:
327
341
  error_out("Failed to update .prizmkit/plans/refactor-list.json: {}".format(err))
328
342
  return
@@ -343,7 +357,7 @@ def action_update(args, refactor_list_path, state_dir):
343
357
 
344
358
  # Auto-skip downstream refactors when this refactor is marked as failed or skipped
345
359
  auto_skipped_refactors = []
346
- if rs["status"] in ("failed", "skipped"):
360
+ if new_status in ("failed", "skipped"):
347
361
  auto_skipped_refactors = auto_skip_blocked_refactors(
348
362
  refactor_list_path, state_dir, refactor_id
349
363
  )
@@ -352,7 +366,7 @@ def action_update(args, refactor_list_path, state_dir):
352
366
  "action": "update",
353
367
  "refactor_id": refactor_id,
354
368
  "session_status": session_status,
355
- "new_status": rs["status"],
369
+ "new_status": new_status,
356
370
  "retry_count": rs["retry_count"],
357
371
  "resume_from_phase": rs.get("resume_from_phase"),
358
372
  "updated_at": rs["updated_at"],
@@ -496,10 +510,9 @@ def auto_skip_blocked_refactors(refactor_list_path, state_dir, failed_refactor_i
496
510
  r["status"] = "auto_skipped"
497
511
  write_json_file(refactor_list_path, data)
498
512
 
499
- # Sync status.json for each auto-skipped refactor
513
+ # Update timestamps in status.json for each auto-skipped refactor
500
514
  for rid in to_skip:
501
515
  rs = load_refactor_status(state_dir, rid)
502
- rs["status"] = "auto_skipped"
503
516
  rs["updated_at"] = now_iso()
504
517
  save_refactor_status(state_dir, rid, rs)
505
518
 
@@ -589,8 +602,8 @@ def action_status(refactor_list_data, state_dir):
589
602
  if not rid:
590
603
  continue
591
604
 
605
+ rstatus = r.get("status", "pending")
592
606
  rs = load_refactor_status(state_dir, rid)
593
- rstatus = rs.get("status", "pending")
594
607
  retry_count = rs.get("retry_count", 0)
595
608
  max_retries_val = rs.get("max_retries", 3)
596
609
  resume_phase = rs.get("resume_from_phase")
@@ -688,10 +701,9 @@ def action_reset(args, refactor_list_path, state_dir):
688
701
  return
689
702
 
690
703
  rs = load_refactor_status(state_dir, refactor_id)
691
- old_status = rs.get("status", "unknown")
704
+ old_status = get_refactor_status_from_list(refactor_list_path, refactor_id)
692
705
  old_retry = rs.get("retry_count", 0)
693
706
 
694
- rs["status"] = "pending"
695
707
  rs["retry_count"] = 0
696
708
  rs["sessions"] = []
697
709
  rs["last_session_id"] = None
@@ -760,10 +772,9 @@ def action_clean(args, refactor_list_path, state_dir):
760
772
 
761
773
  # 4. Reset status
762
774
  rs = load_refactor_status(state_dir, refactor_id)
763
- old_status = rs.get("status", "unknown")
775
+ old_status = get_refactor_status_from_list(refactor_list_path, refactor_id)
764
776
  old_retry = rs.get("retry_count", 0)
765
777
 
766
- rs["status"] = "pending"
767
778
  rs["retry_count"] = 0
768
779
  rs["sessions"] = []
769
780
  rs["last_session_id"] = None
@@ -834,9 +845,8 @@ def action_start(args, refactor_list_path, state_dir):
834
845
  return
835
846
 
836
847
  rs = load_refactor_status(state_dir, refactor_id)
837
- old_status = rs.get("status", "pending")
848
+ old_status = get_refactor_status_from_list(refactor_list_path, refactor_id)
838
849
 
839
- rs["status"] = "in_progress"
840
850
  rs["updated_at"] = now_iso()
841
851
 
842
852
  err = save_refactor_status(state_dir, refactor_id, rs)
@@ -988,10 +998,9 @@ def action_unskip(args, refactor_list_path, state_dir):
988
998
  error_out("Failed to write .prizmkit/plans/refactor-list.json: {}".format(err))
989
999
  return
990
1000
 
991
- # Reset status.json for each refactor
1001
+ # Reset runtime fields in status.json for each refactor
992
1002
  for rid in to_reset:
993
1003
  rs = load_refactor_status(state_dir, rid)
994
- rs["status"] = "pending"
995
1004
  rs["retry_count"] = 0
996
1005
  rs["sessions"] = []
997
1006
  rs["last_session_id"] = None
@@ -189,6 +189,49 @@ Round 5: 3 failures [test_b, test_d, test_e] ← plateau 3/3 → STOP
189
189
 
190
190
  You MUST execute this phase. Do NOT skip it. Do NOT mark it as completed without actually running playwright-cli.
191
191
 
192
+ **CRITICAL CONSTRAINT — playwright-cli ONLY, NO Playwright MCP**:
193
+ - You MUST use `playwright-cli` (the CLI tool) for ALL browser interactions in this phase
194
+ - **NEVER** use Playwright MCP server, Playwright MCP tools, or any MCP-based browser automation
195
+ - If you have Playwright MCP configured, IGNORE it entirely — use the CLI command `playwright-cli` exclusively
196
+ - All browser actions go through `playwright-cli <command>` in the Bash tool, not through any MCP tool call
197
+
198
+ **Step 0 — Playwright CLI Readiness Check (BLOCKING — must pass before any browser action)**:
199
+
200
+ 0a. Check if `playwright-cli` is installed:
201
+ ```bash
202
+ which playwright-cli 2>/dev/null && playwright-cli --version 2>/dev/null || echo "NOT_INSTALLED"
203
+ ```
204
+ If output is `NOT_INSTALLED`, install it:
205
+ ```bash
206
+ npm install -g @playwright/cli@latest
207
+ ```
208
+ Then verify installation succeeded: `playwright-cli --version`. If installation fails, log `## Browser Verification: SKIPPED — playwright-cli installation failed` in context-snapshot.md and proceed to the next phase.
209
+
210
+ 0b. Learn playwright-cli usage (run once per session):
211
+ ```bash
212
+ playwright-cli --help
213
+ ```
214
+
215
+ 0c. Check if playwright-cli skill is installed for the current AI platform:
216
+ ```bash
217
+ CURRENT_PLATFORM=""
218
+ if which claude >/dev/null 2>&1; then
219
+ CURRENT_PLATFORM="claude"; SKILL_DIR="$HOME/.claude/skills"
220
+ elif which cbc >/dev/null 2>&1; then
221
+ CURRENT_PLATFORM="codebuddy"; SKILL_DIR="$HOME/.cbc/skills"
222
+ else
223
+ CURRENT_PLATFORM="unknown"
224
+ fi
225
+ if [ -d "$SKILL_DIR/playwright-cli" ] || ls "$SKILL_DIR"/playwright* 2>/dev/null | grep -q .; then
226
+ echo "SKILL_EXISTS"
227
+ else
228
+ echo "SKILL_MISSING"
229
+ fi
230
+ ```
231
+ If `SKILL_MISSING`: run `playwright-cli install --skills`. If current platform is NOT claude, copy installed skill from `$HOME/.claude/skills/playwright-cli` to `$SKILL_DIR/playwright-cli`.
232
+
233
+ 0d. Read the installed playwright-cli skill (SKILL.md) for workflow guidance. Use its recommended patterns to construct your verification flow.
234
+
192
235
  **Step 1 — Start Dev Server**:
193
236
 
194
237
  You know this project's tech stack. Detect and start the dev server yourself:
@@ -196,9 +239,7 @@ You know this project's tech stack. Detect and start the dev server yourself:
196
239
  1. Identify the dev server start command from project config (`package.json` scripts, `Makefile`, `docker-compose.yml`, etc.)
197
240
  2. **Detect the dev server port** — use the pre-detected port from pipeline if available, otherwise extract from project config. Do NOT hardcode or guess the port:
198
241
  ```bash
199
- # Use pipeline-injected port if available, otherwise extract from package.json
200
242
  DEV_PORT={{DEV_PORT}}
201
- # If DEV_PORT is still a placeholder, detect at runtime:
202
243
  if [ "$DEV_PORT" = "{{DEV_PORT}}" ]; then
203
244
  DEV_PORT=$(node -e "const s=require('./package.json').scripts.dev; const m=s.match(/-p\s+(\d+)/); console.log(m?m[1]:'')")
204
245
  if [ -z "$DEV_PORT" ]; then
@@ -225,14 +266,14 @@ You know this project's tech stack. Detect and start the dev server yourself:
225
266
 
226
267
  Use `playwright-cli snapshot` on the running app to discover actual element refs, then verify these goals:
227
268
  {{BROWSER_VERIFY_STEPS}}
228
- Decide the concrete playwright-cli actions (click, fill, assert, etc.) yourself based on the snapshot output and your knowledge of the implemented code. The goals above describe WHAT to verify — you determine HOW.
229
269
 
230
- Take a final screenshot for evidence.
270
+ Construct your verification workflow based on: (1) the playwright-cli skill documentation, (2) the `--help` output, (3) the current task's acceptance criteria. Decide the concrete playwright-cli actions yourself. Take a final screenshot: `playwright-cli screenshot`.
231
271
 
232
272
  **Step 3 — Cleanup (REQUIRED — you started it, you stop it)**:
233
273
 
234
- 1. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
235
- 2. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
274
+ 1. Close the playwright-cli browser: `playwright-cli close`
275
+ 2. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
276
+ 3. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
236
277
 
237
278
  **Step 4 — Reporting**:
238
279
 
@@ -241,10 +282,12 @@ Append results to `context-snapshot.md`:
241
282
  ## Browser Verification
242
283
  URL: http://localhost:$DEV_PORT
243
284
  Dev Server Command: <actual command used>
244
- Steps executed: [list]
285
+ playwright-cli version: <version>
286
+ Steps executed: [list of playwright-cli commands used]
245
287
  Screenshot: [path]
246
288
  Result: PASS / FAIL (reason)
247
289
  Server cleanup: confirmed
290
+ Browser cleanup: confirmed
248
291
  ```
249
292
 
250
293
  If verification fails, log the failure details but continue to commit. Failures do NOT block the commit, but you MUST attempt verification and MUST clean up the dev server.
@@ -287,6 +287,49 @@ If GATE:MISSING — send message to Reviewer (re-spawn if needed): "Write review
287
287
 
288
288
  You MUST execute this phase. Do NOT skip it. Do NOT mark it as completed without actually running playwright-cli.
289
289
 
290
+ **CRITICAL CONSTRAINT — playwright-cli ONLY, NO Playwright MCP**:
291
+ - You MUST use `playwright-cli` (the CLI tool) for ALL browser interactions in this phase
292
+ - **NEVER** use Playwright MCP server, Playwright MCP tools, or any MCP-based browser automation
293
+ - If you have Playwright MCP configured, IGNORE it entirely — use the CLI command `playwright-cli` exclusively
294
+ - All browser actions go through `playwright-cli <command>` in the Bash tool, not through any MCP tool call
295
+
296
+ **Step 0 — Playwright CLI Readiness Check (BLOCKING — must pass before any browser action)**:
297
+
298
+ 0a. Check if `playwright-cli` is installed:
299
+ ```bash
300
+ which playwright-cli 2>/dev/null && playwright-cli --version 2>/dev/null || echo "NOT_INSTALLED"
301
+ ```
302
+ If output is `NOT_INSTALLED`, install it:
303
+ ```bash
304
+ npm install -g @playwright/cli@latest
305
+ ```
306
+ Then verify installation succeeded: `playwright-cli --version`. If installation fails, log `## Browser Verification: SKIPPED — playwright-cli installation failed` in context-snapshot.md and proceed to the next phase.
307
+
308
+ 0b. Learn playwright-cli usage (run once per session):
309
+ ```bash
310
+ playwright-cli --help
311
+ ```
312
+
313
+ 0c. Check if playwright-cli skill is installed for the current AI platform:
314
+ ```bash
315
+ CURRENT_PLATFORM=""
316
+ if which claude >/dev/null 2>&1; then
317
+ CURRENT_PLATFORM="claude"; SKILL_DIR="$HOME/.claude/skills"
318
+ elif which cbc >/dev/null 2>&1; then
319
+ CURRENT_PLATFORM="codebuddy"; SKILL_DIR="$HOME/.cbc/skills"
320
+ else
321
+ CURRENT_PLATFORM="unknown"
322
+ fi
323
+ if [ -d "$SKILL_DIR/playwright-cli" ] || ls "$SKILL_DIR"/playwright* 2>/dev/null | grep -q .; then
324
+ echo "SKILL_EXISTS"
325
+ else
326
+ echo "SKILL_MISSING"
327
+ fi
328
+ ```
329
+ If `SKILL_MISSING`: run `playwright-cli install --skills`. If current platform is NOT claude, copy installed skill from `$HOME/.claude/skills/playwright-cli` to `$SKILL_DIR/playwright-cli`.
330
+
331
+ 0d. Read the installed playwright-cli skill (SKILL.md) for workflow guidance. Use its recommended patterns to construct your verification flow.
332
+
290
333
  **Step 1 — Start Dev Server**:
291
334
 
292
335
  You know this project's tech stack. Detect and start the dev server yourself:
@@ -294,9 +337,7 @@ You know this project's tech stack. Detect and start the dev server yourself:
294
337
  1. Identify the dev server start command from project config (`package.json` scripts, `Makefile`, `docker-compose.yml`, etc.)
295
338
  2. **Detect the dev server port** — use the pre-detected port from pipeline if available, otherwise extract from project config. Do NOT hardcode or guess the port:
296
339
  ```bash
297
- # Use pipeline-injected port if available, otherwise extract from package.json
298
340
  DEV_PORT={{DEV_PORT}}
299
- # If DEV_PORT is still a placeholder, detect at runtime:
300
341
  if [ "$DEV_PORT" = "{{DEV_PORT}}" ]; then
301
342
  DEV_PORT=$(node -e "const s=require('./package.json').scripts.dev; const m=s.match(/-p\s+(\d+)/); console.log(m?m[1]:'')")
302
343
  if [ -z "$DEV_PORT" ]; then
@@ -323,14 +364,14 @@ You know this project's tech stack. Detect and start the dev server yourself:
323
364
 
324
365
  Use `playwright-cli snapshot` on the running app to discover actual element refs, then verify these goals:
325
366
  {{BROWSER_VERIFY_STEPS}}
326
- Decide the concrete playwright-cli actions (click, fill, assert, etc.) yourself based on the snapshot output and your knowledge of the implemented code. The goals above describe WHAT to verify — you determine HOW.
327
367
 
328
- Take a final screenshot for evidence.
368
+ Construct your verification workflow based on: (1) the playwright-cli skill documentation, (2) the `--help` output, (3) the current task's acceptance criteria. Decide the concrete playwright-cli actions yourself. Take a final screenshot: `playwright-cli screenshot`.
329
369
 
330
370
  **Step 3 — Cleanup (REQUIRED — you started it, you stop it)**:
331
371
 
332
- 1. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
333
- 2. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
372
+ 1. Close the playwright-cli browser: `playwright-cli close`
373
+ 2. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
374
+ 3. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
334
375
 
335
376
  **Step 4 — Reporting**:
336
377
 
@@ -339,10 +380,12 @@ Append results to `context-snapshot.md`:
339
380
  ## Browser Verification
340
381
  URL: http://localhost:$DEV_PORT
341
382
  Dev Server Command: <actual command used>
342
- Steps executed: [list]
383
+ playwright-cli version: <version>
384
+ Steps executed: [list of playwright-cli commands used]
343
385
  Screenshot: [path]
344
386
  Result: PASS / FAIL (reason)
345
387
  Server cleanup: confirmed
388
+ Browser cleanup: confirmed
346
389
  ```
347
390
 
348
391
  If verification fails, log the failure details but continue to commit. Failures do NOT block the commit, but you MUST attempt verification and MUST clean up the dev server.
@@ -359,6 +359,49 @@ If GATE:MISSING — send message to Reviewer (re-spawn if needed): "Write review
359
359
 
360
360
  You MUST execute this phase. Do NOT skip it. Do NOT mark it as completed without actually running playwright-cli.
361
361
 
362
+ **CRITICAL CONSTRAINT — playwright-cli ONLY, NO Playwright MCP**:
363
+ - You MUST use `playwright-cli` (the CLI tool) for ALL browser interactions in this phase
364
+ - **NEVER** use Playwright MCP server, Playwright MCP tools, or any MCP-based browser automation
365
+ - If you have Playwright MCP configured, IGNORE it entirely — use the CLI command `playwright-cli` exclusively
366
+ - All browser actions go through `playwright-cli <command>` in the Bash tool, not through any MCP tool call
367
+
368
+ **Step 0 — Playwright CLI Readiness Check (BLOCKING — must pass before any browser action)**:
369
+
370
+ 0a. Check if `playwright-cli` is installed:
371
+ ```bash
372
+ which playwright-cli 2>/dev/null && playwright-cli --version 2>/dev/null || echo "NOT_INSTALLED"
373
+ ```
374
+ If output is `NOT_INSTALLED`, install it:
375
+ ```bash
376
+ npm install -g @playwright/cli@latest
377
+ ```
378
+ Then verify installation succeeded: `playwright-cli --version`. If installation fails, log `## Browser Verification: SKIPPED — playwright-cli installation failed` in context-snapshot.md and proceed to the next phase.
379
+
380
+ 0b. Learn playwright-cli usage (run once per session):
381
+ ```bash
382
+ playwright-cli --help
383
+ ```
384
+
385
+ 0c. Check if playwright-cli skill is installed for the current AI platform:
386
+ ```bash
387
+ CURRENT_PLATFORM=""
388
+ if which claude >/dev/null 2>&1; then
389
+ CURRENT_PLATFORM="claude"; SKILL_DIR="$HOME/.claude/skills"
390
+ elif which cbc >/dev/null 2>&1; then
391
+ CURRENT_PLATFORM="codebuddy"; SKILL_DIR="$HOME/.cbc/skills"
392
+ else
393
+ CURRENT_PLATFORM="unknown"
394
+ fi
395
+ if [ -d "$SKILL_DIR/playwright-cli" ] || ls "$SKILL_DIR"/playwright* 2>/dev/null | grep -q .; then
396
+ echo "SKILL_EXISTS"
397
+ else
398
+ echo "SKILL_MISSING"
399
+ fi
400
+ ```
401
+ If `SKILL_MISSING`: run `playwright-cli install --skills`. If current platform is NOT claude, copy installed skill from `$HOME/.claude/skills/playwright-cli` to `$SKILL_DIR/playwright-cli`.
402
+
403
+ 0d. Read the installed playwright-cli skill (SKILL.md) for workflow guidance. Use its recommended patterns to construct your verification flow.
404
+
362
405
  **Step 1 — Start Dev Server**:
363
406
 
364
407
  You know this project's tech stack. Detect and start the dev server yourself:
@@ -366,9 +409,7 @@ You know this project's tech stack. Detect and start the dev server yourself:
366
409
  1. Identify the dev server start command from project config (`package.json` scripts, `Makefile`, `docker-compose.yml`, etc.)
367
410
  2. **Detect the dev server port** — use the pre-detected port from pipeline if available, otherwise extract from project config. Do NOT hardcode or guess the port:
368
411
  ```bash
369
- # Use pipeline-injected port if available, otherwise extract from package.json
370
412
  DEV_PORT={{DEV_PORT}}
371
- # If DEV_PORT is still a placeholder, detect at runtime:
372
413
  if [ "$DEV_PORT" = "{{DEV_PORT}}" ]; then
373
414
  DEV_PORT=$(node -e "const s=require('./package.json').scripts.dev; const m=s.match(/-p\s+(\d+)/); console.log(m?m[1]:'')")
374
415
  if [ -z "$DEV_PORT" ]; then
@@ -395,14 +436,14 @@ You know this project's tech stack. Detect and start the dev server yourself:
395
436
 
396
437
  Use `playwright-cli snapshot` on the running app to discover actual element refs, then verify these goals:
397
438
  {{BROWSER_VERIFY_STEPS}}
398
- Decide the concrete playwright-cli actions (click, fill, assert, etc.) yourself based on the snapshot output and your knowledge of the implemented code. The goals above describe WHAT to verify — you determine HOW.
399
439
 
400
- Take a final screenshot for evidence.
440
+ Construct your verification workflow based on: (1) the playwright-cli skill documentation, (2) the `--help` output, (3) the current task's acceptance criteria. Decide the concrete playwright-cli actions yourself. Take a final screenshot: `playwright-cli screenshot`.
401
441
 
402
442
  **Step 3 — Cleanup (REQUIRED — you started it, you stop it)**:
403
443
 
404
- 1. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
405
- 2. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
444
+ 1. Close the playwright-cli browser: `playwright-cli close`
445
+ 2. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
446
+ 3. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
406
447
 
407
448
  **Step 4 — Reporting**:
408
449
 
@@ -411,10 +452,12 @@ Append results to `context-snapshot.md`:
411
452
  ## Browser Verification
412
453
  URL: http://localhost:$DEV_PORT
413
454
  Dev Server Command: <actual command used>
414
- Steps executed: [list]
455
+ playwright-cli version: <version>
456
+ Steps executed: [list of playwright-cli commands used]
415
457
  Screenshot: [path]
416
458
  Result: PASS / FAIL (reason)
417
459
  Server cleanup: confirmed
460
+ Browser cleanup: confirmed
418
461
  ```
419
462
 
420
463
  If verification fails, log the failure details but continue to commit. Failures do NOT block the commit, but you MUST attempt verification and MUST clean up the dev server.
@@ -11,3 +11,23 @@ You are running in **headless non-interactive mode** with a FINITE context windo
11
11
  5. **Minimize tool output** — Never load full command output into context. First capture to a temp file (`cmd 2>&1 | tee /tmp/out.txt | tail -20`), then scan the head/tail to identify relevant fields, and use targeted filtering (`grep`, `sed`, `awk`) to extract only the information needed for the current task. Only read the filtered result — never the raw full output.
12
12
  6. **No intermediate commits** — Do NOT run `git add`/`git commit` during implementation phases. All changes are committed once at the end via `/prizmkit-committer`.
13
13
  7. **Capture test output once** — When running test suites, always use `$TEST_CMD 2>&1 | tee /tmp/test-out.txt | tail -20`. Then grep `/tmp/test-out.txt` for details. Never re-run the suite just to apply a different filter.
14
+ 8. **Scaffold / generated file awareness (CRITICAL)** — When you run a scaffolding tool or package manager init command (`npm init`, `npx create-*`, `vite create`, `cargo init`, `go mod init`, `rails new`, `django-admin startproject`, `npx shadcn-ui init`, etc.), the output files are **generated boilerplate**. You MUST:
15
+ - Identify and mentally tag all files created by the tool as "scaffold files"
16
+ - Record the list of scaffold-generated files in context-snapshot.md under a `### Scaffold Files (do not re-read)` section
17
+ - **NEVER re-read scaffold files** after initial creation. Their content is standard boilerplate — you already know what they contain from the tool that generated them
18
+ - If you need to modify a scaffold file, make the edit directly without reading it first (you know the standard template content)
19
+ - This applies equally to `node_modules/`, `package-lock.json`, generated config files (`tsconfig.json`, `vite.config.ts`, `tailwind.config.js`, `.eslintrc`, etc.) produced by init commands
20
+ - When passing context to subagents, explicitly tell them which files are scaffold-generated so they skip reading them too
21
+ 9. **Package version verification (HARD CONSTRAINT — BLOCKING)** — Before writing ANY dependency version in `package.json`, `requirements.txt`, `Cargo.toml`, `go.mod`, `Gemfile`, `pyproject.toml`, or any other dependency manifest:
22
+ - You MUST verify the real version exists by querying the package registry first:
23
+ - npm/Node.js: `npm view <package> dist-tags.latest 2>/dev/null`
24
+ - Python/pip: `pip index versions <package> 2>/dev/null | head -1`
25
+ - Go: `go list -m -versions <module>@latest 2>/dev/null`
26
+ - Rust: `cargo search <crate> --limit 1 2>/dev/null`
27
+ - **NEVER guess or hallucinate version numbers**. If you cannot verify a version, use `"latest"` or `"*"` as a placeholder, or omit the version constraint entirely and let the package manager resolve it
28
+ - If the registry query fails (network issue, package not found), you MUST either:
29
+ (a) Use a known-safe version you have high confidence in, OR
30
+ (b) Skip that dependency and document it as a manual step, OR
31
+ (c) Use no version constraint (e.g., `"express": "*"`)
32
+ - **This is a BLOCKING gate**: do NOT run `npm install` / `pip install` / `cargo build` / `go mod tidy` until ALL versions in the manifest have been verified or use open constraints
33
+ - Batch version lookups: query multiple packages in parallel to save time (e.g., run multiple `npm view` commands concurrently)
@@ -2,6 +2,77 @@
2
2
 
3
3
  You MUST execute this phase. Do NOT skip it. Do NOT mark it as completed without actually running playwright-cli.
4
4
 
5
+ **CRITICAL CONSTRAINT — playwright-cli ONLY, NO Playwright MCP**:
6
+ - You MUST use `playwright-cli` (the CLI tool) for ALL browser interactions in this phase
7
+ - **NEVER** use Playwright MCP server, Playwright MCP tools, or any MCP-based browser automation
8
+ - If you have Playwright MCP configured, IGNORE it entirely — use the CLI command `playwright-cli` exclusively
9
+ - All browser actions go through `playwright-cli <command>` in the Bash tool, not through any MCP tool call
10
+
11
+ **Step 0 — Playwright CLI Readiness Check (BLOCKING — must pass before any browser action)**:
12
+
13
+ 0a. Check if `playwright-cli` is installed:
14
+ ```bash
15
+ which playwright-cli 2>/dev/null && playwright-cli --version 2>/dev/null || echo "NOT_INSTALLED"
16
+ ```
17
+ If output is `NOT_INSTALLED`, install it:
18
+ ```bash
19
+ npm install -g @playwright/cli@latest
20
+ ```
21
+ Then verify installation succeeded:
22
+ ```bash
23
+ playwright-cli --version
24
+ ```
25
+ If installation fails, log the error in context-snapshot.md under `## Browser Verification: SKIPPED — playwright-cli installation failed` and proceed to the next phase. Do NOT attempt browser verification without playwright-cli.
26
+
27
+ 0b. Learn playwright-cli usage (run once per session to understand available commands):
28
+ ```bash
29
+ playwright-cli --help
30
+ ```
31
+ Use this output to determine the correct commands for your verification steps. Do NOT guess command syntax — refer to the help output.
32
+
33
+ 0c. Check if playwright-cli skill is installed for the current AI platform:
34
+ ```bash
35
+ # Detect AI CLI platform
36
+ CURRENT_PLATFORM=""
37
+ if which claude >/dev/null 2>&1; then
38
+ CURRENT_PLATFORM="claude"
39
+ SKILL_DIR="$HOME/.claude/skills"
40
+ elif which cbc >/dev/null 2>&1; then
41
+ CURRENT_PLATFORM="codebuddy"
42
+ SKILL_DIR="$HOME/.cbc/skills"
43
+ else
44
+ # Try to detect from environment or config
45
+ CURRENT_PLATFORM="unknown"
46
+ fi
47
+
48
+ # Check if playwright-cli skill exists
49
+ if [ -d "$SKILL_DIR/playwright-cli" ] || ls "$SKILL_DIR"/playwright* 2>/dev/null | grep -q .; then
50
+ echo "SKILL_EXISTS"
51
+ else
52
+ echo "SKILL_MISSING"
53
+ fi
54
+ ```
55
+ If `SKILL_MISSING`:
56
+ ```bash
57
+ # Install playwright-cli skills (defaults to claude platform)
58
+ playwright-cli install --skills
59
+ ```
60
+ If the current platform is NOT claude, move the installed skill files to the correct location:
61
+ ```bash
62
+ # Skills are installed to claude's default location — move to current platform's skill dir
63
+ if [ "$CURRENT_PLATFORM" != "claude" ] && [ "$CURRENT_PLATFORM" != "unknown" ]; then
64
+ CLAUDE_SKILL_DIR="$HOME/.claude/skills"
65
+ if [ -d "$CLAUDE_SKILL_DIR/playwright-cli" ]; then
66
+ mkdir -p "$SKILL_DIR"
67
+ cp -r "$CLAUDE_SKILL_DIR/playwright-cli" "$SKILL_DIR/"
68
+ echo "Moved playwright-cli skill from claude to $CURRENT_PLATFORM"
69
+ fi
70
+ fi
71
+ ```
72
+
73
+ 0d. Read the installed playwright-cli skill for workflow guidance:
74
+ After skill installation, read the skill's SKILL.md to understand recommended workflows and patterns. Use these patterns to construct your verification flow — do NOT invent your own patterns if the skill provides them.
75
+
5
76
  **Step 1 — Start Dev Server**:
6
77
 
7
78
  You know this project's tech stack. Detect and start the dev server yourself:
@@ -39,14 +110,20 @@ You know this project's tech stack. Detect and start the dev server yourself:
39
110
  Use `playwright-cli snapshot` on the running app to discover actual element refs, then verify these goals:
40
111
  {{BROWSER_VERIFY_STEPS}}
41
112
 
42
- Decide the concrete playwright-cli actions (click, fill, assert, etc.) yourself based on the snapshot output and your knowledge of the implemented code. The goals above describe WHAT to verify — you determine HOW.
113
+ Construct your verification workflow based on:
114
+ 1. The playwright-cli skill documentation (read in Step 0d)
115
+ 2. The `playwright-cli --help` output (captured in Step 0b)
116
+ 3. The current task's acceptance criteria and implemented features
117
+
118
+ Decide the concrete playwright-cli actions (click, fill, snapshot, screenshot, etc.) yourself based on the snapshot output and your knowledge of the implemented code. The goals above describe WHAT to verify — you determine HOW using playwright-cli commands.
43
119
 
44
- Take a final screenshot for evidence.
120
+ Take a final screenshot for evidence: `playwright-cli screenshot`
45
121
 
46
122
  **Step 3 — Cleanup (REQUIRED — you started it, you stop it)**:
47
123
 
48
- 1. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
49
- 2. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
124
+ 1. Close the playwright-cli browser: `playwright-cli close`
125
+ 2. Kill the dev server process: `kill $DEV_SERVER_PID 2>/dev/null || true`
126
+ 3. Verify port is released: `lsof -ti:$DEV_PORT | xargs kill -9 2>/dev/null || true`
50
127
 
51
128
  **Step 4 — Reporting**:
52
129
 
@@ -55,10 +132,12 @@ Append results to `context-snapshot.md`:
55
132
  ## Browser Verification
56
133
  URL: http://localhost:$DEV_PORT
57
134
  Dev Server Command: <actual command used>
58
- Steps executed: [list]
135
+ playwright-cli version: <version>
136
+ Steps executed: [list of playwright-cli commands used]
59
137
  Screenshot: [path]
60
138
  Result: PASS / FAIL (reason)
61
139
  Server cleanup: confirmed
140
+ Browser cleanup: confirmed
62
141
  ```
63
142
 
64
143
  If verification fails, log the failure details but continue to commit. Failures do NOT block the commit, but you MUST attempt verification and MUST clean up the dev server.
@@ -2,6 +2,13 @@
2
2
 
3
3
  **Build artifacts rule** (passed to Dev): After any build/compile command (`go build`, `npm run build`, `tsc`, etc.), ensure the output binary or build directory is in `.gitignore`. Never commit compiled binaries, build output, or generated artifacts.
4
4
 
5
+ **Dependency version gate (BLOCKING — pass to Dev agent)**: Before running ANY package install command (`npm install`, `pip install`, `cargo build`, `go mod tidy`, `bundle install`, etc.):
6
+ 1. Every version number in the dependency manifest MUST be verified against the real registry (see Context Budget Rules §9)
7
+ 2. If a scaffold tool generated a `package.json` / `requirements.txt` / etc., verify the versions it wrote too — scaffold tools can emit outdated versions
8
+ 3. Do NOT proceed with install until all versions are confirmed real. Violation = wasted timeout cycles that can crash the session
9
+
10
+ **Scaffold file rule (pass to Dev agent)**: After running any init/scaffold command, record generated files in context-snapshot.md under `### Scaffold Files (do not re-read)`. Never re-read these files — their content is standard boilerplate (see Context Budget Rules §8). When spawning subagents, explicitly list scaffold files so they skip reading them.
11
+
5
12
  **Spawn Agent**:
6
13
  | Parameter | Value |
7
14
  |-----------|-------|