prizmkit 1.1.32 → 1.1.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled/VERSION.json +3 -3
- package/bundled/dev-pipeline/reset-bug.sh +0 -1
- package/bundled/dev-pipeline/reset-feature.sh +0 -1
- package/bundled/dev-pipeline/reset-refactor.sh +0 -1
- package/bundled/dev-pipeline/scripts/generate-bootstrap-prompt.py +26 -2
- package/bundled/dev-pipeline/scripts/generate-bugfix-prompt.py +24 -0
- package/bundled/dev-pipeline/scripts/generate-refactor-prompt.py +24 -0
- package/bundled/dev-pipeline/templates/agent-prompts/dev-implement.md +5 -8
- package/bundled/dev-pipeline/templates/agent-prompts/reviewer-review.md +1 -1
- package/bundled/dev-pipeline/templates/bootstrap-prompt.md +2 -0
- package/bundled/dev-pipeline/templates/bootstrap-tier1.md +10 -31
- package/bundled/dev-pipeline/templates/bootstrap-tier2.md +5 -3
- package/bundled/dev-pipeline/templates/bootstrap-tier3.md +4 -2
- package/bundled/dev-pipeline/templates/bug-fix-list-schema.json +5 -0
- package/bundled/dev-pipeline/templates/bugfix-bootstrap-prompt.md +2 -0
- package/bundled/dev-pipeline/templates/feature-list-schema.json +5 -0
- package/bundled/dev-pipeline/templates/refactor-bootstrap-prompt.md +2 -0
- package/bundled/dev-pipeline/templates/refactor-list-schema.json +5 -0
- package/bundled/dev-pipeline/templates/sections/context-budget-rules.md +1 -1
- package/bundled/dev-pipeline/templates/sections/feature-context.md +2 -0
- package/bundled/dev-pipeline/templates/sections/phase-implement-lite.md +1 -1
- package/bundled/dev-pipeline/templates/sections/phase0-test-baseline.md +1 -1
- package/bundled/dev-pipeline/templates/sections/test-failure-recovery-agent.md +2 -10
- package/bundled/dev-pipeline/templates/sections/test-failure-recovery-lite.md +2 -10
- package/bundled/skills/_metadata.json +1 -1
- package/bundled/skills/bug-planner/SKILL.md +14 -3
- package/bundled/skills/feature-planner/SKILL.md +14 -1
- package/bundled/skills/feature-workflow/SKILL.md +7 -1
- package/bundled/skills/refactor-planner/SKILL.md +14 -3
- package/bundled/skills/refactor-workflow/SKILL.md +7 -1
- package/package.json +1 -1
package/bundled/VERSION.json
CHANGED
|
@@ -195,8 +195,8 @@ def detect_test_commands(project_root):
|
|
|
195
195
|
except Exception:
|
|
196
196
|
pass
|
|
197
197
|
|
|
198
|
-
# Return deduplicated
|
|
199
|
-
return " ".join(dict.fromkeys(test_commands)) if test_commands else ""
|
|
198
|
+
# Return deduplicated commands joined with && for correct shell execution
|
|
199
|
+
return " && ".join(dict.fromkeys(test_commands)) if test_commands else ""
|
|
200
200
|
|
|
201
201
|
|
|
202
202
|
def extract_baseline_failures(test_cmd, project_root):
|
|
@@ -273,6 +273,29 @@ def format_global_context(global_context, project_root=None):
|
|
|
273
273
|
return "\n".join(lines)
|
|
274
274
|
|
|
275
275
|
|
|
276
|
+
def format_user_context(user_context):
|
|
277
|
+
"""Format user_context array as a markdown section.
|
|
278
|
+
|
|
279
|
+
Returns empty string if user_context is empty or absent,
|
|
280
|
+
so the template placeholder resolves to nothing.
|
|
281
|
+
"""
|
|
282
|
+
if not user_context or not isinstance(user_context, list):
|
|
283
|
+
return ""
|
|
284
|
+
items = [item for item in user_context if isinstance(item, str) and item.strip()]
|
|
285
|
+
if not items:
|
|
286
|
+
return ""
|
|
287
|
+
lines = [
|
|
288
|
+
"### User-Provided Context (HIGHEST PRIORITY)",
|
|
289
|
+
"",
|
|
290
|
+
"> The following materials were provided by the user. "
|
|
291
|
+
"They take precedence over AI inference.",
|
|
292
|
+
"",
|
|
293
|
+
]
|
|
294
|
+
for item in items:
|
|
295
|
+
lines.append("- {}".format(item))
|
|
296
|
+
return "\n".join(lines)
|
|
297
|
+
|
|
298
|
+
|
|
276
299
|
def get_completed_dependencies(features, feature):
|
|
277
300
|
"""Look up dependency features and list those with status=completed.
|
|
278
301
|
|
|
@@ -1352,6 +1375,7 @@ def build_replacements(args, feature, features, global_context, script_dir):
|
|
|
1352
1375
|
"{{FEATURE_LIST_PATH}}": os.path.abspath(args.feature_list),
|
|
1353
1376
|
"{{FEATURE_TITLE}}": feature.get("title", ""),
|
|
1354
1377
|
"{{FEATURE_DESCRIPTION}}": feature.get("description", ""),
|
|
1378
|
+
"{{USER_CONTEXT}}": format_user_context(feature.get("user_context", [])),
|
|
1355
1379
|
"{{ACCEPTANCE_CRITERIA}}": format_acceptance_criteria(
|
|
1356
1380
|
feature.get("acceptance_criteria", [])
|
|
1357
1381
|
),
|
|
@@ -126,6 +126,29 @@ def format_error_source_details(error_source):
|
|
|
126
126
|
return "\n".join(lines)
|
|
127
127
|
|
|
128
128
|
|
|
129
|
+
def format_user_context(user_context):
|
|
130
|
+
"""Format user_context array as a markdown section.
|
|
131
|
+
|
|
132
|
+
Returns empty string if user_context is empty or absent,
|
|
133
|
+
so the template placeholder resolves to nothing.
|
|
134
|
+
"""
|
|
135
|
+
if not user_context or not isinstance(user_context, list):
|
|
136
|
+
return ""
|
|
137
|
+
items = [item for item in user_context if isinstance(item, str) and item.strip()]
|
|
138
|
+
if not items:
|
|
139
|
+
return ""
|
|
140
|
+
lines = [
|
|
141
|
+
"### User-Provided Context (HIGHEST PRIORITY)",
|
|
142
|
+
"",
|
|
143
|
+
"> The following materials were provided by the user. "
|
|
144
|
+
"They take precedence over AI inference.",
|
|
145
|
+
"",
|
|
146
|
+
]
|
|
147
|
+
for item in items:
|
|
148
|
+
lines.append("- {}".format(item))
|
|
149
|
+
return "\n".join(lines)
|
|
150
|
+
|
|
151
|
+
|
|
129
152
|
def format_environment(env):
|
|
130
153
|
"""Format environment dict as a key-value list."""
|
|
131
154
|
if not env or not isinstance(env, dict):
|
|
@@ -245,6 +268,7 @@ def build_replacements(args, bug, global_context, script_dir):
|
|
|
245
268
|
"{{SEVERITY}}": bug.get("severity", "medium"),
|
|
246
269
|
"{{VERIFICATION_TYPE}}": vtype,
|
|
247
270
|
"{{BUG_DESCRIPTION}}": bug.get("description", ""),
|
|
271
|
+
"{{USER_CONTEXT}}": format_user_context(bug.get("user_context", [])),
|
|
248
272
|
"{{ERROR_SOURCE_TYPE}}": error_type,
|
|
249
273
|
"{{ERROR_SOURCE_DETAILS}}": format_error_source_details(error_source),
|
|
250
274
|
"{{ACCEPTANCE_CRITERIA}}": format_acceptance_criteria(
|
|
@@ -186,6 +186,29 @@ def format_global_context(global_context, project_root=None):
|
|
|
186
186
|
return "\n".join(lines)
|
|
187
187
|
|
|
188
188
|
|
|
189
|
+
def format_user_context(user_context):
|
|
190
|
+
"""Format user_context array as a markdown section.
|
|
191
|
+
|
|
192
|
+
Returns empty string if user_context is empty or absent,
|
|
193
|
+
so the template placeholder resolves to nothing.
|
|
194
|
+
"""
|
|
195
|
+
if not user_context or not isinstance(user_context, list):
|
|
196
|
+
return ""
|
|
197
|
+
items = [item for item in user_context if isinstance(item, str) and item.strip()]
|
|
198
|
+
if not items:
|
|
199
|
+
return ""
|
|
200
|
+
lines = [
|
|
201
|
+
"### User-Provided Context (HIGHEST PRIORITY)",
|
|
202
|
+
"",
|
|
203
|
+
"> The following materials were provided by the user. "
|
|
204
|
+
"They take precedence over AI inference.",
|
|
205
|
+
"",
|
|
206
|
+
]
|
|
207
|
+
for item in items:
|
|
208
|
+
lines.append("- {}".format(item))
|
|
209
|
+
return "\n".join(lines)
|
|
210
|
+
|
|
211
|
+
|
|
189
212
|
def format_scope(scope):
|
|
190
213
|
"""Format scope object into markdown detail lines."""
|
|
191
214
|
if not scope or not isinstance(scope, dict):
|
|
@@ -411,6 +434,7 @@ def build_replacements(args, refactor, refactors, global_context, script_dir):
|
|
|
411
434
|
"{{PRIORITY}}": refactor.get("priority", "medium"),
|
|
412
435
|
"{{COMPLEXITY}}": refactor.get("complexity", "medium"),
|
|
413
436
|
"{{REFACTOR_DESCRIPTION}}": refactor.get("description", ""),
|
|
437
|
+
"{{USER_CONTEXT}}": format_user_context(refactor.get("user_context", [])),
|
|
414
438
|
"{{ACCEPTANCE_CRITERIA}}": format_acceptance_criteria(
|
|
415
439
|
refactor.get("acceptance_criteria", [])
|
|
416
440
|
),
|
|
@@ -16,16 +16,13 @@ Update the AC Verification Checklist in context-snapshot.md by marking each item
|
|
|
16
16
|
|
|
17
17
|
## Test Failure Recovery (Convergence-Based)
|
|
18
18
|
|
|
19
|
-
If tests fail
|
|
19
|
+
If tests fail, use convergence recovery — keep fixing while progress is being made:
|
|
20
20
|
|
|
21
|
-
1. **Run tests, record results**: count failures,
|
|
22
|
-
2. **Check termination**:
|
|
23
|
-
|
|
24
|
-
- Plateau: same failures for 3 consecutive rounds → Cannot resolve, document and stop
|
|
25
|
-
- Failures decreased → Continue fixing
|
|
26
|
-
3. **Fix and iterate**: analyze, apply fix, re-run `$TEST_CMD`, go back to step 1
|
|
21
|
+
1. **Run tests, record results**: count failures, exclude baseline failures
|
|
22
|
+
2. **Check termination**: All pass → done | Plateau (same failures 3 rounds) → stop | Failures decreased → continue
|
|
23
|
+
3. **Fix and iterate**: analyze, apply fix, re-run `($TEST_CMD)`, go back to step 1
|
|
27
24
|
|
|
28
|
-
**Key rule**: If failures decrease (even by 1),
|
|
25
|
+
**Key rule**: If failures decrease (even by 1), plateau counter resets.
|
|
29
26
|
**Do NOT block completion** if unable to resolve — only NEW REGRESSIONS (not in baseline) require fixing.
|
|
30
27
|
**If any AC cannot be verified** due to test failure: the feature is incomplete, add to failure notes.
|
|
31
28
|
|
|
@@ -3,6 +3,6 @@
|
|
|
3
3
|
2. Read `.prizmkit/specs/{{FEATURE_SLUG}}/plan.md` for architecture decisions and completed tasks
|
|
4
4
|
3. Read `.prizm-docs/root.prizm` and relevant L1/L2 docs for RULES, PATTERNS, TRAPS
|
|
5
5
|
4. Run /prizmkit-code-review with artifact_dir=.prizmkit/specs/{{FEATURE_SLUG}}/: Phase 1 diagnostic review across all applicable dimensions, then Phase 2 fix strategy for any findings. Read ONLY files referenced in completed plan.md tasks for diagnosis; MAY read additional files for impact analysis.
|
|
6
|
-
5. Run the full test suite using `{{TEST_CMD}}`. When running tests: `{{TEST_CMD}} 2>&1 | tee /tmp/review-test-out.txt | tail -20`, then grep `/tmp/review-test-out.txt` for details — do NOT re-run the suite multiple times. Write and execute integration tests covering all goals from spec.md.
|
|
6
|
+
5. Run the full test suite using `{{TEST_CMD}}`. When running tests: `({{TEST_CMD}}) 2>&1 | tee /tmp/review-test-out.txt | tail -20`, then grep `/tmp/review-test-out.txt` for details — do NOT re-run the suite multiple times. Write and execute integration tests covering all goals from spec.md.
|
|
7
7
|
6. review-report.md will be written to .prizmkit/specs/{{FEATURE_SLUG}}/ by prizmkit-code-review.
|
|
8
8
|
Report: number of findings found, or 'no findings' if clean."
|
|
@@ -18,6 +18,8 @@ You are the **session orchestrator**. Implement Feature {{FEATURE_ID}}: "{{FEATU
|
|
|
18
18
|
|
|
19
19
|
{{FEATURE_DESCRIPTION}}
|
|
20
20
|
|
|
21
|
+
{{USER_CONTEXT}}
|
|
22
|
+
|
|
21
23
|
### Acceptance Criteria
|
|
22
24
|
|
|
23
25
|
{{ACCEPTANCE_CRITERIA}}
|
|
@@ -48,7 +50,7 @@ You are running in **headless non-interactive mode** with a FINITE context windo
|
|
|
48
50
|
4. **One task at a time** — In Phase 3 (implement), complete and test one task before starting the next.
|
|
49
51
|
5. **Minimize tool output** — Never load full command output into context. First capture to a temp file (`cmd 2>&1 | tee /tmp/out.txt | tail -20`), then scan the head/tail to identify relevant fields, and use targeted filtering (`grep`, `sed`, `awk`) to extract only the information needed for the current task. Only read the filtered result — never the raw full output.
|
|
50
52
|
6. **No intermediate commits** — Do NOT run `git add`/`git commit` during Phase 1-3. All changes are committed once at the end in Phase 4 via `/prizmkit-committer`.
|
|
51
|
-
7. **Capture test output once** — When running test suites, always use
|
|
53
|
+
7. **Capture test output once** — When running test suites, always use `($TEST_CMD) 2>&1 | tee /tmp/test-out.txt | tail -20`. Then grep `/tmp/test-out.txt` for details. Never re-run the suite just to apply a different filter.
|
|
52
54
|
|
|
53
55
|
---
|
|
54
56
|
|
|
@@ -129,7 +131,7 @@ Never commit compiled binaries, build output, or generated artifacts.
|
|
|
129
131
|
You know this project's tech stack. Identify ALL test commands that apply (e.g., `go test ./...`, `npm test`, `cargo test`, `pytest`, `make test`, etc.). Record them as `TEST_CMDS` (one or more commands). Then record baseline:
|
|
130
132
|
```bash
|
|
131
133
|
# Run each test command, capture output
|
|
132
|
-
$TEST_CMD 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
134
|
+
($TEST_CMD) 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
133
135
|
```
|
|
134
136
|
|
|
135
137
|
**3b.** Run `/prizmkit-implement` — this handles the full implementation cycle:
|
|
@@ -150,38 +152,15 @@ $TEST_CMD 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
|
150
152
|
|
|
151
153
|
**CP-2**: All acceptance criteria met, all tests pass.
|
|
152
154
|
|
|
153
|
-
### Test Failure Recovery
|
|
154
|
-
|
|
155
|
-
When tests fail during Phase 3, use **convergence-based recovery** — keep fixing as long as progress is being made.
|
|
156
|
-
|
|
157
|
-
**Recovery Loop**:
|
|
158
|
-
|
|
159
|
-
1. **Run tests and record results**: count total failures, note which tests failed. Exclude pre-existing baseline failures.
|
|
155
|
+
### Test Failure Recovery (Convergence-Based)
|
|
160
156
|
|
|
161
|
-
|
|
162
|
-
- **All tests pass** → Done. Exit recovery loop.
|
|
163
|
-
- **Plateau detected** — same failure count AND same failing tests for 3 consecutive rounds → AI cannot resolve. Document and exit.
|
|
164
|
-
- **Still making progress** — failure count decreased vs. previous round → Continue fixing.
|
|
165
|
-
- **First round** — no history yet → Proceed to fix.
|
|
166
|
-
|
|
167
|
-
3. **Fix and iterate**: analyze remaining failures, apply fix, re-run `$TEST_CMD`, go back to step 1.
|
|
168
|
-
|
|
169
|
-
**Convergence tracking example**:
|
|
170
|
-
```
|
|
171
|
-
Round 1: 5 failures [test_a, test_b, test_c, test_d, test_e]
|
|
172
|
-
Round 2: 3 failures [test_b, test_d, test_e] ← progress, continue
|
|
173
|
-
Round 3: 3 failures [test_b, test_d, test_e] ← plateau 1/3
|
|
174
|
-
Round 4: 3 failures [test_b, test_d, test_e] ← plateau 2/3
|
|
175
|
-
Round 5: 3 failures [test_b, test_d, test_e] ← plateau 3/3 → STOP
|
|
176
|
-
```
|
|
177
|
-
**Key rule**: If failures decrease (even by 1), the plateau counter resets to 0.
|
|
157
|
+
When tests fail, use convergence recovery — keep fixing while progress is being made:
|
|
178
158
|
|
|
179
|
-
**
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
- If any AC cannot be verified due to test failure: feature is incomplete
|
|
159
|
+
1. **Run tests, record results**: count failures, exclude baseline failures
|
|
160
|
+
2. **Check termination**: All pass → done | Plateau (same failures 3 rounds) → stop | Failures decreased → continue
|
|
161
|
+
3. **Fix and iterate**: analyze, fix, re-run `($TEST_CMD)`, go back to step 1
|
|
183
162
|
|
|
184
|
-
**
|
|
163
|
+
**Key rule**: If failures decrease (even by 1), plateau counter resets. Do NOT block commit for unresolved failures — document and defer to next session.
|
|
185
164
|
|
|
186
165
|
|
|
187
166
|
{{IF_BROWSER_INTERACTION}}
|
|
@@ -18,6 +18,8 @@ You are the **session orchestrator**. Implement Feature {{FEATURE_ID}}: "{{FEATU
|
|
|
18
18
|
|
|
19
19
|
{{FEATURE_DESCRIPTION}}
|
|
20
20
|
|
|
21
|
+
{{USER_CONTEXT}}
|
|
22
|
+
|
|
21
23
|
### Acceptance Criteria
|
|
22
24
|
|
|
23
25
|
{{ACCEPTANCE_CRITERIA}}
|
|
@@ -48,7 +50,7 @@ You are running in **headless non-interactive mode** with a FINITE context windo
|
|
|
48
50
|
4. **One task at a time** — In Phase 4 (implement), complete and test one task before starting the next.
|
|
49
51
|
5. **Minimize tool output** — Never load full command output into context. First capture to a temp file (`cmd 2>&1 | tee /tmp/out.txt | tail -20`), then scan the head/tail to identify relevant fields, and use targeted filtering (`grep`, `sed`, `awk`) to extract only the information needed for the current task. Only read the filtered result — never the raw full output.
|
|
50
52
|
6. **No intermediate commits** — Do NOT run `git add`/`git commit` during Phase 1-5. All changes are committed once at the end in Phase 6 via `/prizmkit-committer`.
|
|
51
|
-
7. **Capture test output once** — When running test suites, always use
|
|
53
|
+
7. **Capture test output once** — When running test suites, always use `($TEST_CMD) 2>&1 | tee /tmp/test-out.txt | tail -20`. Then grep `/tmp/test-out.txt` for details. Never re-run the suite just to apply a different filter.
|
|
52
54
|
|
|
53
55
|
---
|
|
54
56
|
|
|
@@ -88,7 +90,7 @@ If any agent times out:
|
|
|
88
90
|
|
|
89
91
|
You know this project's tech stack. Identify ALL test commands that apply (e.g., `go test ./...`, `npm test`, `cargo test`, `pytest`, `make test`, etc.). Record them as `TEST_CMDS`. Then record baseline:
|
|
90
92
|
```bash
|
|
91
|
-
$TEST_CMD 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
93
|
+
($TEST_CMD) 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
92
94
|
```
|
|
93
95
|
Save pre-existing failing tests as `BASELINE_FAILURES`.
|
|
94
96
|
|
|
@@ -263,7 +265,7 @@ Prompt:
|
|
|
263
265
|
> 2. Read `.prizmkit/specs/{{FEATURE_SLUG}}/plan.md` for architecture decisions and completed tasks
|
|
264
266
|
> 3. Read `.prizm-docs/root.prizm` and relevant L1/L2 docs for RULES, PATTERNS, TRAPS
|
|
265
267
|
> 4. Run /prizmkit-code-review with artifact_dir=.prizmkit/specs/{{FEATURE_SLUG}}/: Phase 1 diagnostic review across all applicable dimensions, then Phase 2 fix strategy formulation for any findings. Read ONLY files referenced in completed plan.md tasks for diagnosis; MAY read additional files for impact analysis.
|
|
266
|
-
> 5. Run the full test suite using `{{TEST_CMD}}`. When running: `{{TEST_CMD}} 2>&1 | tee /tmp/review-test-out.txt | tail -20`, then grep the file for details — do NOT re-run the suite multiple times. Write and execute integration tests covering all goals.
|
|
268
|
+
> 5. Run the full test suite using `{{TEST_CMD}}`. When running: `({{TEST_CMD}}) 2>&1 | tee /tmp/review-test-out.txt | tail -20`, then grep the file for details — do NOT re-run the suite multiple times. Write and execute integration tests covering all goals.
|
|
267
269
|
> 6. review-report.md will be written to .prizmkit/specs/{{FEATURE_SLUG}}/ by prizmkit-code-review.
|
|
268
270
|
> Report: number of findings found, or 'no findings' if clean."
|
|
269
271
|
|
|
@@ -18,6 +18,8 @@ You are the **session orchestrator**. Implement Feature {{FEATURE_ID}}: "{{FEATU
|
|
|
18
18
|
|
|
19
19
|
{{FEATURE_DESCRIPTION}}
|
|
20
20
|
|
|
21
|
+
{{USER_CONTEXT}}
|
|
22
|
+
|
|
21
23
|
### Acceptance Criteria
|
|
22
24
|
|
|
23
25
|
{{ACCEPTANCE_CRITERIA}}
|
|
@@ -89,7 +91,7 @@ If any agent times out:
|
|
|
89
91
|
**Step 2 — Record pre-existing failure baseline**:
|
|
90
92
|
```bash
|
|
91
93
|
# Run each test command, capture output
|
|
92
|
-
$TEST_CMD 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
94
|
+
($TEST_CMD) 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
93
95
|
```
|
|
94
96
|
Save the list of **pre-existing failing tests** (if any) as `BASELINE_FAILURES`. These are known failures that existed before this session — Dev must NOT be blamed for them, but must list them in COMPLETION_SIGNAL.
|
|
95
97
|
|
|
@@ -328,7 +330,7 @@ Prompt:
|
|
|
328
330
|
> 2. Read `.prizmkit/specs/{{FEATURE_SLUG}}/plan.md` for architecture decisions and completed tasks
|
|
329
331
|
> 3. Read `.prizm-docs/root.prizm` and relevant L1/L2 docs for RULES, PATTERNS, TRAPS
|
|
330
332
|
> 4. Run /prizmkit-code-review with artifact_dir=.prizmkit/specs/{{FEATURE_SLUG}}/: Phase 1 diagnostic review across all applicable dimensions, then Phase 2 fix strategy formulation for any findings. Read ONLY files referenced in completed plan.md tasks for diagnosis; MAY read additional files for impact analysis.
|
|
331
|
-
> 5. Run the full test suite using `{{TEST_CMD}}`. When running tests: `{{TEST_CMD}} 2>&1 | tee /tmp/review-test-out.txt | tail -20`, then grep `/tmp/review-test-out.txt` for details — do NOT re-run the suite multiple times. Write and execute integration tests covering all goals from spec.md.
|
|
333
|
+
> 5. Run the full test suite using `{{TEST_CMD}}`. When running tests: `({{TEST_CMD}}) 2>&1 | tee /tmp/review-test-out.txt | tail -20`, then grep `/tmp/review-test-out.txt` for details — do NOT re-run the suite multiple times. Write and execute integration tests covering all goals from spec.md.
|
|
332
334
|
> 6. review-report.md will be written to .prizmkit/specs/{{FEATURE_SLUG}}/ by prizmkit-code-review.
|
|
333
335
|
> Report: number of findings found, or 'no findings' if clean."
|
|
334
336
|
|
|
@@ -186,6 +186,11 @@
|
|
|
186
186
|
3
|
|
187
187
|
]
|
|
188
188
|
},
|
|
189
|
+
"user_context": {
|
|
190
|
+
"type": "array",
|
|
191
|
+
"items": { "type": "string" },
|
|
192
|
+
"description": "User-provided supplementary materials, preserved verbatim. Each entry is either inline content/rules (stored as-is) or a file reference (e.g. 'src/auth/login.ts:42-78', 'src/utils/ — focus on validation logic')."
|
|
193
|
+
},
|
|
189
194
|
"model": {
|
|
190
195
|
"type": "string",
|
|
191
196
|
"description": "AI model ID for this bug fix. Overrides $MODEL env var."
|
|
@@ -165,6 +165,11 @@
|
|
|
165
165
|
},
|
|
166
166
|
"description": "AI-generated summary of key changes from this feature session. Used to provide rich dependency context to downstream features. Each item is a concise statement about what was built/changed (e.g. APIs added, models created, key file paths)."
|
|
167
167
|
},
|
|
168
|
+
"user_context": {
|
|
169
|
+
"type": "array",
|
|
170
|
+
"items": { "type": "string" },
|
|
171
|
+
"description": "User-provided supplementary materials, preserved verbatim. Each entry is either inline content/rules (stored as-is) or a file reference (e.g. 'src/auth/login.ts:42-78', 'src/utils/ — focus on validation logic')."
|
|
172
|
+
},
|
|
168
173
|
"browser_interaction": {
|
|
169
174
|
"type": "object",
|
|
170
175
|
"description": "Browser verification config for features with UI. Requires playwright-cli. AI auto-detects dev server command, URL, and port from project config at runtime.",
|
|
@@ -184,6 +184,11 @@
|
|
|
184
184
|
},
|
|
185
185
|
"description": "AI-generated summary of key changes from this refactor session. Used to provide rich dependency context to downstream refactors. Each item is a concise statement about what was refactored (e.g. modules extracted, files restructured, interfaces changed)."
|
|
186
186
|
},
|
|
187
|
+
"user_context": {
|
|
188
|
+
"type": "array",
|
|
189
|
+
"items": { "type": "string" },
|
|
190
|
+
"description": "User-provided supplementary materials, preserved verbatim. Each entry is either inline content/rules (stored as-is) or a file reference (e.g. 'src/auth/login.ts:42-78', 'src/utils/ — focus on validation logic')."
|
|
191
|
+
},
|
|
187
192
|
"critic": {
|
|
188
193
|
"type": "boolean",
|
|
189
194
|
"description": "Enable adversarial critic review for this refactor. Default: true for critical/high priority refactors, false for others.",
|
|
@@ -10,7 +10,7 @@ You are running in **headless non-interactive mode** with a FINITE context windo
|
|
|
10
10
|
4. **One task at a time** — Complete and test one task before starting the next.
|
|
11
11
|
5. **Minimize tool output** — Never load full command output into context. First capture to a temp file (`cmd 2>&1 | tee /tmp/out.txt | tail -20`), then scan the head/tail to identify relevant fields, and use targeted filtering (`grep`, `sed`, `awk`) to extract only the information needed for the current task. Only read the filtered result — never the raw full output.
|
|
12
12
|
6. **No intermediate commits** — Do NOT run `git add`/`git commit` during implementation phases. All changes are committed once at the end via `/prizmkit-committer`.
|
|
13
|
-
7. **Capture test output once** — When running test suites, always use
|
|
13
|
+
7. **Capture test output once** — When running test suites, always use `($TEST_CMD) 2>&1 | tee /tmp/test-out.txt | tail -20`. Then grep `/tmp/test-out.txt` for details. Never re-run the suite just to apply a different filter.
|
|
14
14
|
8. **Scaffold / generated file awareness (CRITICAL)** — When you run a scaffolding tool or package manager init command (`npm init`, `npx create-*`, `vite create`, `cargo init`, `go mod init`, `rails new`, `django-admin startproject`, `npx shadcn-ui init`, etc.), the output files are **generated boilerplate**. You MUST:
|
|
15
15
|
- Identify and mentally tag all files created by the tool as "scaffold files"
|
|
16
16
|
- Record the list of scaffold-generated files in context-snapshot.md under a `### Scaffold Files (do not re-read)` section
|
|
@@ -19,7 +19,7 @@ Never commit compiled binaries, build output, or generated artifacts.
|
|
|
19
19
|
You know this project's tech stack. Identify ALL test commands that apply (e.g., `go test ./...`, `npm test`, `cargo test`, `pytest`, `make test`, etc.). Record them as `TEST_CMDS`. Then record baseline:
|
|
20
20
|
```bash
|
|
21
21
|
# Run each test command, capture output
|
|
22
|
-
$TEST_CMD 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
22
|
+
($TEST_CMD) 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
23
23
|
```
|
|
24
24
|
|
|
25
25
|
**3b.** Run `/prizmkit-implement` — this handles the full implementation cycle:
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
**Step 2 — Record pre-existing failure baseline**:
|
|
6
6
|
```bash
|
|
7
7
|
# Run each test command, capture output
|
|
8
|
-
$TEST_CMD 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
8
|
+
($TEST_CMD) 2>&1 | tee /tmp/test-baseline.txt | tail -20
|
|
9
9
|
```
|
|
10
10
|
Save the list of **pre-existing failing tests** (if any) as `BASELINE_FAILURES`. These are known failures that existed before this session — Dev must NOT be blamed for them, but must list them in COMPLETION_SIGNAL.
|
|
11
11
|
|
|
@@ -20,19 +20,11 @@ When tests fail during implementation (Phase 3 / Phase 4), use **convergence-bas
|
|
|
20
20
|
- **Pre-existing baseline failure**: Expected, do NOT fix
|
|
21
21
|
- **New regression**: Fix the code
|
|
22
22
|
- **Brittle test**: Fix the test or environment setup
|
|
23
|
-
- Apply fix, re-run
|
|
23
|
+
- Apply fix, re-run `($TEST_CMD)`, go back to step 1
|
|
24
24
|
|
|
25
25
|
### Convergence Tracking
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```
|
|
30
|
-
Round 1: 5 failures [test_a, test_b, test_c, test_d, test_e]
|
|
31
|
-
Round 2: 3 failures [test_b, test_d, test_e] ← progress, continue
|
|
32
|
-
Round 3: 3 failures [test_b, test_d, test_e] ← same as round 2 (plateau 1/3)
|
|
33
|
-
Round 4: 3 failures [test_b, test_d, test_e] ← plateau 2/3
|
|
34
|
-
Round 5: 3 failures [test_b, test_d, test_e] ← plateau 3/3 → STOP
|
|
35
|
-
```
|
|
27
|
+
Track failures each round. Example: 5→3→3→3→3 = plateau at round 3, stop at round 5 (3/3).
|
|
36
28
|
|
|
37
29
|
**Key rule**: If failures decrease (even by 1), the plateau counter resets to 0.
|
|
38
30
|
|
|
@@ -20,19 +20,11 @@ When tests fail during implementation, use **convergence-based recovery** — ke
|
|
|
20
20
|
- **Pre-existing baseline failure**: Expected, do NOT fix
|
|
21
21
|
- **New regression**: Fix the code
|
|
22
22
|
- **Brittle test**: Fix the test or environment setup
|
|
23
|
-
- Apply fix, re-run
|
|
23
|
+
- Apply fix, re-run `($TEST_CMD)`, go back to step 1
|
|
24
24
|
|
|
25
25
|
### Convergence Tracking
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```
|
|
30
|
-
Round 1: 5 failures [test_a, test_b, test_c, test_d, test_e]
|
|
31
|
-
Round 2: 3 failures [test_b, test_d, test_e] ← progress, continue
|
|
32
|
-
Round 3: 3 failures [test_b, test_d, test_e] ← same as round 2 (plateau 1/3)
|
|
33
|
-
Round 4: 3 failures [test_b, test_d, test_e] ← plateau 2/3
|
|
34
|
-
Round 5: 3 failures [test_b, test_d, test_e] ← plateau 3/3 → STOP
|
|
35
|
-
```
|
|
27
|
+
Track failures each round. Example: 5→3→3→3→3 = plateau at round 3, stop at round 5 (3/3).
|
|
36
28
|
|
|
37
29
|
**Key rule**: If failures decrease (even by 1), the plateau counter resets to 0.
|
|
38
30
|
|
|
@@ -39,9 +39,18 @@ After planning is complete, you MUST:
|
|
|
39
39
|
3. If the user wants to adjust → continue refining the bug list
|
|
40
40
|
4. NEVER auto-execute the pipeline, launcher, or any fix step
|
|
41
41
|
|
|
42
|
-
##
|
|
42
|
+
## User-Provided Content Priority (Hard Rule)
|
|
43
|
+
|
|
44
|
+
When the user provides detailed specifications, rules, or implementation requirements:
|
|
45
|
+
|
|
46
|
+
1. **Verbatim preservation**: The user's exact wording MUST be preserved in `description` and `acceptance_criteria` fields. Do NOT paraphrase, summarize, abstract, or simplify.
|
|
47
|
+
2. **No autonomous simplification**: A 200-word user specification must NOT become a 30-word description. Match the detail level of the user's input.
|
|
48
|
+
3. **Clarify, don't assume**: If any user-provided rule is ambiguous or potentially conflicts with another, ASK the user to clarify. No limit on clarification rounds. Do NOT proceed with unresolved ambiguities.
|
|
49
|
+
4. **Populate `user_context`**: ALL user-provided materials (supplementary content, rules, file path references) MUST be written into the `user_context` array of each bug in the generated `.prizmkit/plans/bug-fix-list.json`. Format:
|
|
50
|
+
- Supplementary content or rules → store as-is (verbatim text)
|
|
51
|
+
- File references → store as path string, e.g. `src/auth/login.ts:42-78` or `src/utils/validate.ts — focus on validateEmail function`
|
|
43
52
|
|
|
44
|
-
|
|
53
|
+
## When to Use
|
|
45
54
|
- "plan bug fixes", "report bugs", "create bug list"
|
|
46
55
|
- "generate bug list", "I have some bugs to fix"
|
|
47
56
|
- "these tests are failing", "here's an error log", "parse these errors"
|
|
@@ -109,7 +118,9 @@ Gather project metadata from the project's own configuration and documentation
|
|
|
109
118
|
- If none found, ask the user
|
|
110
119
|
3. **Identify testing framework**: Read from `.prizmkit/config.json` `tech_stack.testing`, or auto-detect from package.json/requirements.txt/etc., or ask user
|
|
111
120
|
4. **Clarify context** — if the project context, affected systems, or bug scope is unclear, ask questions one at a time (cite the unclear point, give a recommended answer with rationale) until you fully understand the environment. No limit on rounds or number of questions.
|
|
112
|
-
5. **Collect reference materials** —
|
|
121
|
+
5. **Collect reference materials** — **Upfront Material Detection (Hard Rule)**: If the user has already provided materials (file paths, URLs, rules, specifications, code snippets) in the same message that invoked this skill: (a) Acknowledge what was received: "I received the following materials: [list]"; (b) Read/fetch all provided materials immediately; (c) You MUST still ask: "Are there any additional materials you'd like to provide?"; (d) NEVER skip this collection step just because the user already provided some materials.
|
|
122
|
+
|
|
123
|
+
If the user has NOT provided any materials upfront, explicitly ask whether they have any supplementary materials for you to review before proceeding to bug collection:
|
|
113
124
|
> "Do you have any reference materials I should review to better understand these bugs? This can include:
|
|
114
125
|
> - **Code paths** — files or directories where the bugs likely originate
|
|
115
126
|
> - **Documents** — related design docs, API specs, or architecture docs for the affected area
|
|
@@ -47,6 +47,17 @@ The user chose this skill intentionally. Respect that choice.
|
|
|
47
47
|
3. If the user wants to adjust → continue refining `.prizmkit/plans/feature-list.json`
|
|
48
48
|
4. **NEVER auto-execute** the pipeline, launcher, or any implementation step
|
|
49
49
|
|
|
50
|
+
## User-Provided Content Priority (Hard Rule)
|
|
51
|
+
|
|
52
|
+
When the user provides detailed specifications, rules, or implementation requirements:
|
|
53
|
+
|
|
54
|
+
1. **Verbatim preservation**: The user's exact wording MUST be preserved in `description` and `acceptance_criteria` fields. Do NOT paraphrase, summarize, abstract, or simplify.
|
|
55
|
+
2. **No autonomous simplification**: A 200-word user specification must NOT become a 30-word description. Match the detail level of the user's input.
|
|
56
|
+
3. **Clarify, don't assume**: If any user-provided rule is ambiguous or potentially conflicts with another, ASK the user to clarify. No limit on clarification rounds. Do NOT proceed with unresolved ambiguities.
|
|
57
|
+
4. **Populate `user_context`**: ALL user-provided materials (supplementary content, rules, file path references) MUST be written into the `user_context` array of each feature in the generated `.prizmkit/plans/feature-list.json`. Format:
|
|
58
|
+
- Supplementary content or rules → store as-is (verbatim text)
|
|
59
|
+
- File references → store as path string, e.g. `src/auth/login.ts:42-78` or `src/utils/validate.ts — focus on validateEmail function`
|
|
60
|
+
|
|
50
61
|
## When to Use
|
|
51
62
|
|
|
52
63
|
Trigger this skill for requests like:
|
|
@@ -166,7 +177,9 @@ Execute the planning workflow in conversation mode with mandatory checkpoints:
|
|
|
166
177
|
### Interactive Phases
|
|
167
178
|
1. Clarify scope and goals
|
|
168
179
|
1.1 **Requirement clarification** — for ANY unclear aspect of the user's goals or scope, ask questions one at a time (cite the unclear point, give a recommended answer with rationale) until you fully understand. No limit on rounds. Do not proceed to Phase 2 with unresolved ambiguities.
|
|
169
|
-
1.2 **Collect reference materials** —
|
|
180
|
+
1.2 **Collect reference materials** — **Upfront Material Detection (Hard Rule)**: If the user has already provided materials (file paths, URLs, rules, specifications, code snippets) in the same message that invoked this skill: (a) Acknowledge what was received: "I received the following materials: [list]"; (b) Read/fetch all provided materials immediately; (c) You MUST still ask: "Are there any additional materials you'd like to provide?"; (d) NEVER skip this collection step just because the user already provided some materials.
|
|
181
|
+
|
|
182
|
+
If the user has NOT provided any materials upfront, explicitly ask whether they have any supplementary materials for you to review. Present this as a single prompt covering all material types:
|
|
170
183
|
> "Do you have any reference materials I should review before planning? This can include:
|
|
171
184
|
> - **Code paths** — files or directories I should read to understand existing implementation
|
|
172
185
|
> - **Documents** — design docs, PRDs, API specs, architecture proposals, or internal wiki pages
|
|
@@ -113,7 +113,13 @@ Ask the user to describe what they want to build. Listen for:
|
|
|
113
113
|
|
|
114
114
|
### Step 1.2: Collect Reference Materials
|
|
115
115
|
|
|
116
|
-
**
|
|
116
|
+
**Upfront Material Detection (Hard Rule)**: If the user has already provided materials (file paths, URLs, rules, specifications, code snippets) in the same message that invoked this skill:
|
|
117
|
+
1. Acknowledge what was received: "I received the following materials: [list]"
|
|
118
|
+
2. Read/fetch all provided materials immediately
|
|
119
|
+
3. You MUST still ask: "Are there any additional materials you'd like to provide?"
|
|
120
|
+
4. NEVER skip this collection step just because the user already provided some materials
|
|
121
|
+
|
|
122
|
+
**If the user has NOT provided any materials upfront**, ask the user explicitly what resources they have. Do NOT skip this step — user-provided materials are far more valuable than blind directory scanning.
|
|
117
123
|
|
|
118
124
|
Ask:
|
|
119
125
|
1. **Existing code** — "Is there existing code I should look at? Which files or directories are relevant?"
|
|
@@ -44,9 +44,18 @@ The user chose this skill intentionally. Respect that choice.
|
|
|
44
44
|
4. If the user wants to adjust → continue refining `.prizmkit/plans/refactor-list.json`
|
|
45
45
|
5. **NEVER auto-execute** the pipeline, launcher, or any implementation step
|
|
46
46
|
|
|
47
|
-
##
|
|
47
|
+
## User-Provided Content Priority (Hard Rule)
|
|
48
|
+
|
|
49
|
+
When the user provides detailed specifications, rules, or implementation requirements:
|
|
50
|
+
|
|
51
|
+
1. **Verbatim preservation**: The user's exact wording MUST be preserved in `description` and `acceptance_criteria` fields. Do NOT paraphrase, summarize, abstract, or simplify.
|
|
52
|
+
2. **No autonomous simplification**: A 200-word user specification must NOT become a 30-word description. Match the detail level of the user's input.
|
|
53
|
+
3. **Clarify, don't assume**: If any user-provided rule is ambiguous or potentially conflicts with another, ASK the user to clarify. No limit on clarification rounds. Do NOT proceed with unresolved ambiguities.
|
|
54
|
+
4. **Populate `user_context`**: ALL user-provided materials (supplementary content, rules, file path references) MUST be written into the `user_context` array of each refactor item in the generated `.prizmkit/plans/refactor-list.json`. Format:
|
|
55
|
+
- Supplementary content or rules → store as-is (verbatim text)
|
|
56
|
+
- File references → store as path string, e.g. `src/auth/login.ts:42-78` or `src/utils/validate.ts — focus on validateEmail function`
|
|
48
57
|
|
|
49
|
-
|
|
58
|
+
## When to Use
|
|
50
59
|
- "Plan refactoring", "Scope a restructuring"
|
|
51
60
|
- "Prepare .prizmkit/plans/refactor-list.json", "Prepare dev-pipeline input for refactoring"
|
|
52
61
|
- "Assess code for refactoring", "Identify refactoring targets"
|
|
@@ -127,7 +136,9 @@ Execute the planning workflow in conversation mode with mandatory checkpoints:
|
|
|
127
136
|
2. Read `.prizmkit/config.json` for tech stack info
|
|
128
137
|
3. Identify existing test suite and coverage
|
|
129
138
|
4. Summarize project context to the user: "Here's what I found about your project..."
|
|
130
|
-
5. **Collect reference materials** —
|
|
139
|
+
5. **Collect reference materials** — **Upfront Material Detection (Hard Rule)**: If the user has already provided materials (file paths, URLs, rules, specifications, code snippets) in the same message that invoked this skill: (a) Acknowledge what was received: "I received the following materials: [list]"; (b) Read/fetch all provided materials immediately; (c) You MUST still ask: "Are there any additional materials you'd like to provide?"; (d) NEVER skip this collection step just because the user already provided some materials.
|
|
140
|
+
|
|
141
|
+
If the user has NOT provided any materials upfront, explicitly ask whether they have any supplementary materials for you to review before planning the refactoring:
|
|
131
142
|
> "Do you have any reference materials I should review before planning the refactoring? This can include:
|
|
132
143
|
> - **Code paths** — specific files or directories that are refactoring targets or dependencies
|
|
133
144
|
> - **Documents** — design docs, architecture proposals, refactoring RFCs, or technical debt analyses
|
|
@@ -121,7 +121,13 @@ Then ask:
|
|
|
121
121
|
|
|
122
122
|
### Step 1.2: Collect Reference Materials
|
|
123
123
|
|
|
124
|
-
**
|
|
124
|
+
**Upfront Material Detection (Hard Rule)**: If the user has already provided materials (file paths, URLs, rules, specifications, code snippets) in the same message that invoked this skill:
|
|
125
|
+
1. Acknowledge what was received: "I received the following materials: [list]"
|
|
126
|
+
2. Read/fetch all provided materials immediately
|
|
127
|
+
3. You MUST still ask: "Are there any additional materials you'd like to provide?"
|
|
128
|
+
4. NEVER skip this collection step just because the user already provided some materials
|
|
129
|
+
|
|
130
|
+
**If the user has NOT provided any materials upfront**, ask the user explicitly what resources they have. Do NOT skip this step — user-provided materials are far more valuable than blind directory scanning.
|
|
125
131
|
|
|
126
132
|
Ask:
|
|
127
133
|
1. **Code paths** — "Which files or directories are the main targets? Any specific files I should look at?"
|