@sienklogic/plan-build-run 2.34.0 → 2.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +683 -0
- package/dashboard/public/css/command-center.css +152 -65
- package/dashboard/public/css/explorer.css +22 -41
- package/dashboard/public/css/layout.css +119 -1
- package/dashboard/public/css/tokens.css +13 -0
- package/dashboard/src/components/Layout.tsx +32 -6
- package/dashboard/src/components/explorer/tabs/PhasesTab.tsx +11 -1
- package/dashboard/src/components/explorer/tabs/TodosTab.tsx +18 -2
- package/dashboard/src/components/partials/AttentionPanel.tsx +7 -1
- package/dashboard/src/components/partials/CurrentPhaseCard.tsx +26 -24
- package/dashboard/src/components/partials/QuickActions.tsx +21 -11
- package/dashboard/src/components/partials/StatCardGrid.tsx +67 -0
- package/dashboard/src/components/partials/StatusHeader.tsx +1 -0
- package/dashboard/src/routes/command-center.routes.tsx +8 -7
- package/dashboard/src/routes/index.routes.tsx +32 -29
- package/package.json +2 -2
- package/plugins/copilot-pbr/agents/audit.agent.md +129 -16
- package/plugins/copilot-pbr/agents/codebase-mapper.agent.md +49 -1
- package/plugins/copilot-pbr/agents/debugger.agent.md +50 -1
- package/plugins/copilot-pbr/agents/dev-sync.agent.md +23 -0
- package/plugins/copilot-pbr/agents/executor.agent.md +153 -8
- package/plugins/copilot-pbr/agents/general.agent.md +46 -1
- package/plugins/copilot-pbr/agents/integration-checker.agent.md +55 -2
- package/plugins/copilot-pbr/agents/plan-checker.agent.md +50 -2
- package/plugins/copilot-pbr/agents/planner.agent.md +80 -1
- package/plugins/copilot-pbr/agents/researcher.agent.md +50 -2
- package/plugins/copilot-pbr/agents/synthesizer.agent.md +49 -1
- package/plugins/copilot-pbr/agents/verifier.agent.md +114 -13
- package/plugins/copilot-pbr/commands/test.md +5 -0
- package/plugins/copilot-pbr/hooks/hooks.json +11 -0
- package/plugins/copilot-pbr/plugin.json +1 -1
- package/plugins/copilot-pbr/references/agent-contracts.md +27 -0
- package/plugins/copilot-pbr/references/checkpoints.md +32 -1
- package/plugins/copilot-pbr/references/context-quality-tiers.md +45 -0
- package/plugins/copilot-pbr/references/pbr-tools-cli.md +115 -0
- package/plugins/copilot-pbr/references/questioning.md +21 -1
- package/plugins/copilot-pbr/references/verification-patterns.md +96 -18
- package/plugins/copilot-pbr/skills/audit/SKILL.md +19 -3
- package/plugins/copilot-pbr/skills/begin/SKILL.md +57 -4
- package/plugins/copilot-pbr/skills/build/SKILL.md +39 -2
- package/plugins/copilot-pbr/skills/config/SKILL.md +12 -2
- package/plugins/copilot-pbr/skills/debug/SKILL.md +12 -1
- package/plugins/copilot-pbr/skills/explore/SKILL.md +13 -2
- package/plugins/copilot-pbr/skills/health/SKILL.md +13 -5
- package/plugins/copilot-pbr/skills/import/SKILL.md +26 -1
- package/plugins/copilot-pbr/skills/milestone/SKILL.md +15 -3
- package/plugins/copilot-pbr/skills/plan/SKILL.md +50 -0
- package/plugins/copilot-pbr/skills/quick/SKILL.md +21 -0
- package/plugins/copilot-pbr/skills/review/SKILL.md +45 -0
- package/plugins/copilot-pbr/skills/scan/SKILL.md +20 -0
- package/plugins/copilot-pbr/skills/setup/SKILL.md +9 -1
- package/plugins/copilot-pbr/skills/shared/context-budget.md +10 -0
- package/plugins/copilot-pbr/skills/shared/universal-anti-patterns.md +6 -0
- package/plugins/copilot-pbr/skills/test/SKILL.md +210 -0
- package/plugins/copilot-pbr/templates/SUMMARY-complex.md.tmpl +95 -0
- package/plugins/copilot-pbr/templates/SUMMARY-minimal.md.tmpl +48 -0
- package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-pbr/agents/audit.md +52 -5
- package/plugins/cursor-pbr/agents/codebase-mapper.md +49 -1
- package/plugins/cursor-pbr/agents/debugger.md +50 -1
- package/plugins/cursor-pbr/agents/dev-sync.md +23 -0
- package/plugins/cursor-pbr/agents/executor.md +153 -8
- package/plugins/cursor-pbr/agents/general.md +46 -1
- package/plugins/cursor-pbr/agents/integration-checker.md +54 -1
- package/plugins/cursor-pbr/agents/plan-checker.md +49 -1
- package/plugins/cursor-pbr/agents/planner.md +80 -1
- package/plugins/cursor-pbr/agents/researcher.md +49 -1
- package/plugins/cursor-pbr/agents/synthesizer.md +49 -1
- package/plugins/cursor-pbr/agents/verifier.md +113 -12
- package/plugins/cursor-pbr/commands/test.md +5 -0
- package/plugins/cursor-pbr/hooks/hooks.json +9 -0
- package/plugins/cursor-pbr/references/agent-contracts.md +27 -0
- package/plugins/cursor-pbr/references/checkpoints.md +32 -1
- package/plugins/cursor-pbr/references/context-quality-tiers.md +45 -0
- package/plugins/cursor-pbr/references/pbr-tools-cli.md +115 -0
- package/plugins/cursor-pbr/references/questioning.md +21 -1
- package/plugins/cursor-pbr/references/verification-patterns.md +96 -18
- package/plugins/cursor-pbr/skills/audit/SKILL.md +19 -3
- package/plugins/cursor-pbr/skills/begin/SKILL.md +57 -4
- package/plugins/cursor-pbr/skills/build/SKILL.md +37 -2
- package/plugins/cursor-pbr/skills/config/SKILL.md +12 -2
- package/plugins/cursor-pbr/skills/debug/SKILL.md +12 -1
- package/plugins/cursor-pbr/skills/explore/SKILL.md +13 -2
- package/plugins/cursor-pbr/skills/health/SKILL.md +14 -5
- package/plugins/cursor-pbr/skills/import/SKILL.md +26 -1
- package/plugins/cursor-pbr/skills/milestone/SKILL.md +15 -3
- package/plugins/cursor-pbr/skills/plan/SKILL.md +50 -0
- package/plugins/cursor-pbr/skills/quick/SKILL.md +21 -0
- package/plugins/cursor-pbr/skills/review/SKILL.md +45 -0
- package/plugins/cursor-pbr/skills/scan/SKILL.md +20 -0
- package/plugins/cursor-pbr/skills/setup/SKILL.md +9 -1
- package/plugins/cursor-pbr/skills/shared/context-budget.md +10 -0
- package/plugins/cursor-pbr/skills/shared/universal-anti-patterns.md +6 -0
- package/plugins/cursor-pbr/skills/test/SKILL.md +211 -0
- package/plugins/cursor-pbr/templates/SUMMARY-complex.md.tmpl +95 -0
- package/plugins/cursor-pbr/templates/SUMMARY-minimal.md.tmpl +48 -0
- package/plugins/pbr/.claude-plugin/plugin.json +1 -1
- package/plugins/pbr/agents/audit.md +45 -0
- package/plugins/pbr/agents/codebase-mapper.md +48 -0
- package/plugins/pbr/agents/debugger.md +49 -0
- package/plugins/pbr/agents/dev-sync.md +23 -0
- package/plugins/pbr/agents/executor.md +151 -6
- package/plugins/pbr/agents/general.md +45 -0
- package/plugins/pbr/agents/integration-checker.md +53 -0
- package/plugins/pbr/agents/plan-checker.md +48 -0
- package/plugins/pbr/agents/planner.md +78 -1
- package/plugins/pbr/agents/researcher.md +48 -0
- package/plugins/pbr/agents/synthesizer.md +48 -0
- package/plugins/pbr/agents/verifier.md +112 -11
- package/plugins/pbr/commands/test.md +5 -0
- package/plugins/pbr/hooks/hooks.json +9 -0
- package/plugins/pbr/references/agent-contracts.md +27 -0
- package/plugins/pbr/references/checkpoints.md +32 -0
- package/plugins/pbr/references/context-quality-tiers.md +45 -0
- package/plugins/pbr/references/pbr-tools-cli.md +115 -0
- package/plugins/pbr/references/questioning.md +21 -0
- package/plugins/pbr/references/verification-patterns.md +96 -17
- package/plugins/pbr/scripts/check-plan-format.js +13 -1
- package/plugins/pbr/scripts/check-state-sync.js +26 -7
- package/plugins/pbr/scripts/check-subagent-output.js +30 -2
- package/plugins/pbr/scripts/config-schema.json +11 -1
- package/plugins/pbr/scripts/context-bridge.js +265 -0
- package/plugins/pbr/scripts/lib/config.js +271 -0
- package/plugins/pbr/scripts/lib/core.js +587 -0
- package/plugins/pbr/scripts/lib/history.js +73 -0
- package/plugins/pbr/scripts/lib/init.js +166 -0
- package/plugins/pbr/scripts/lib/migrate.js +169 -0
- package/plugins/pbr/scripts/lib/phase.js +364 -0
- package/plugins/pbr/scripts/lib/roadmap.js +175 -0
- package/plugins/pbr/scripts/lib/state.js +397 -0
- package/plugins/pbr/scripts/lib/todo.js +300 -0
- package/plugins/pbr/scripts/pbr-tools.js +425 -1310
- package/plugins/pbr/scripts/post-write-dispatch.js +5 -4
- package/plugins/pbr/scripts/pre-write-dispatch.js +1 -1
- package/plugins/pbr/scripts/progress-tracker.js +1 -1
- package/plugins/pbr/scripts/suggest-compact.js +1 -1
- package/plugins/pbr/scripts/track-context-budget.js +53 -2
- package/plugins/pbr/scripts/validate-task.js +20 -28
- package/plugins/pbr/skills/audit/SKILL.md +19 -3
- package/plugins/pbr/skills/begin/SKILL.md +48 -2
- package/plugins/pbr/skills/build/SKILL.md +39 -2
- package/plugins/pbr/skills/config/SKILL.md +12 -2
- package/plugins/pbr/skills/debug/SKILL.md +12 -1
- package/plugins/pbr/skills/debug/templates/continuation-prompt.md.tmpl +12 -1
- package/plugins/pbr/skills/debug/templates/initial-investigation-prompt.md.tmpl +12 -5
- package/plugins/pbr/skills/explore/SKILL.md +13 -2
- package/plugins/pbr/skills/health/SKILL.md +14 -3
- package/plugins/pbr/skills/help/SKILL.md +2 -0
- package/plugins/pbr/skills/import/SKILL.md +26 -1
- package/plugins/pbr/skills/milestone/SKILL.md +15 -3
- package/plugins/pbr/skills/plan/SKILL.md +52 -2
- package/plugins/pbr/skills/quick/SKILL.md +21 -0
- package/plugins/pbr/skills/review/SKILL.md +46 -0
- package/plugins/pbr/skills/scan/SKILL.md +20 -0
- package/plugins/pbr/skills/setup/SKILL.md +9 -1
- package/plugins/pbr/skills/shared/context-budget.md +10 -0
- package/plugins/pbr/skills/shared/universal-anti-patterns.md +6 -0
- package/plugins/pbr/skills/test/SKILL.md +212 -0
- package/plugins/pbr/templates/SUMMARY-complex.md.tmpl +95 -0
- package/plugins/pbr/templates/SUMMARY-minimal.md.tmpl +48 -0
|
@@ -6,6 +6,14 @@ infer: true
|
|
|
6
6
|
target: "github-copilot"
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
<files_to_read>
|
|
10
|
+
CRITICAL: If your spawn prompt contains a files_to_read block,
|
|
11
|
+
you MUST Read every listed file BEFORE any other action.
|
|
12
|
+
Skipping this causes hallucinated context and broken output.
|
|
13
|
+
</files_to_read>
|
|
14
|
+
|
|
15
|
+
> Default files: CONTEXT.md, ROADMAP.md, research documents, existing plan files
|
|
16
|
+
|
|
9
17
|
# Plan-Build-Run Planner
|
|
10
18
|
|
|
11
19
|
> **Memory note:** Project memory is enabled to provide planning continuity and awareness of prior phase decisions.
|
|
@@ -34,6 +42,17 @@ Invoked with plan-checker feedback containing issues. Revise flagged plan(s) to
|
|
|
34
42
|
### Mode 4: Roadmap Mode
|
|
35
43
|
Invoked with a request to create/update the project roadmap. Produce `.planning/ROADMAP.md` using the template at `${PLUGIN_ROOT}/templates/ROADMAP.md.tmpl`.
|
|
36
44
|
|
|
45
|
+
#### Requirement Coverage Validation
|
|
46
|
+
|
|
47
|
+
Before writing ROADMAP.md, cross-reference REQUIREMENTS.md (or the goals from the begin output) against the planned phases. Every requirement MUST appear in at least one phase's goal or provides list. If any requirement is unassigned, either add it to an existing phase or create a new phase. Report coverage: `{covered}/{total} requirements mapped to phases`.
|
|
48
|
+
|
|
49
|
+
#### Dual Format: Checklist + Detail
|
|
50
|
+
|
|
51
|
+
ROADMAP.md MUST contain TWO representations of the phase structure:
|
|
52
|
+
|
|
53
|
+
1. **Quick-scan checklist** (at the top, after milestone header) — one line per phase with status
|
|
54
|
+
2. **Detailed phase descriptions** — full goal, discovery, provides, depends-on per phase
|
|
55
|
+
|
|
37
56
|
#### Fallback Format: ROADMAP.md (if template unreadable)
|
|
38
57
|
|
|
39
58
|
```markdown
|
|
@@ -42,6 +61,12 @@ Invoked with a request to create/update the project roadmap. Produce `.planning/
|
|
|
42
61
|
## Milestone: {project} v1.0
|
|
43
62
|
**Goal:** {one-line milestone goal}
|
|
44
63
|
**Phases:** 1 - {N}
|
|
64
|
+
**Requirement coverage:** {covered}/{total} requirements mapped
|
|
65
|
+
|
|
66
|
+
### Phase Checklist
|
|
67
|
+
- [ ] Phase 01: {name} — {one-line goal summary}
|
|
68
|
+
- [ ] Phase 02: {name} — {one-line goal summary}
|
|
69
|
+
- [ ] Phase 03: {name} — {one-line goal summary}
|
|
45
70
|
|
|
46
71
|
### Phase 01: {name}
|
|
47
72
|
**Goal:** {goal}
|
|
@@ -50,6 +75,8 @@ Invoked with a request to create/update the project roadmap. Produce `.planning/
|
|
|
50
75
|
**Depends on:** {list}
|
|
51
76
|
```
|
|
52
77
|
|
|
78
|
+
**Milestone grouping:** All phases in the initial roadmap MUST be wrapped in a `## Milestone: {project name} v1.0` section. This section includes `**Goal:**`, `**Phases:** 1 - {N}`, and `**Requirement coverage:**`, followed by the Phase Checklist and `### Phase NN:` details. For comprehensive-depth projects (8+ phases), consider splitting into multiple milestones if there are natural delivery boundaries (e.g., "Core Platform" phases 1-5, "Advanced Features" phases 6-10). Each milestone section follows the format defined in the roadmap template.
|
|
79
|
+
|
|
53
80
|
---
|
|
54
81
|
|
|
55
82
|
## Goal-Backward Methodology
|
|
@@ -216,6 +243,39 @@ When receiving checker feedback:
|
|
|
216
243
|
|
|
217
244
|
---
|
|
218
245
|
|
|
246
|
+
<success_criteria>
|
|
247
|
+
- [ ] STATE.md read, project history absorbed
|
|
248
|
+
- [ ] Discovery completed (codebase exploration)
|
|
249
|
+
- [ ] Prior decisions/issues/concerns synthesized
|
|
250
|
+
- [ ] Dependency graph built (needs/creates per task)
|
|
251
|
+
- [ ] Tasks grouped into plans by wave
|
|
252
|
+
- [ ] PLAN files exist with XML task structure
|
|
253
|
+
- [ ] Each plan: frontmatter complete (depends_on, files_modified, must_haves)
|
|
254
|
+
- [ ] Each plan: requirement_ids field populated (MUST NOT be empty)
|
|
255
|
+
- [ ] Each task: all 5 elements (name, files, action, verify, done)
|
|
256
|
+
- [ ] Wave structure maximizes parallelism
|
|
257
|
+
- [ ] Every REQ-ID from ROADMAP/REQUIREMENTS appears in at least one plan
|
|
258
|
+
- [ ] Gap closure mode (if VERIFICATION.md exists): gaps clustered, tasks derived from gap.missing
|
|
259
|
+
- [ ] Revision mode (if re-planning): flagged issues addressed, no new issues introduced, waves still valid
|
|
260
|
+
- [ ] Context fidelity: locked decisions from CONTEXT.md all have corresponding tasks
|
|
261
|
+
- [ ] PLAN files written via Write tool (NEVER Bash heredoc)
|
|
262
|
+
- [ ] PLAN files committed to git
|
|
263
|
+
</success_criteria>
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Completion Protocol
|
|
268
|
+
|
|
269
|
+
CRITICAL: Your final output MUST end with exactly one completion marker.
|
|
270
|
+
Orchestrators pattern-match on these markers to route results. Omitting causes silent failures.
|
|
271
|
+
|
|
272
|
+
- `## PLANNING COMPLETE` - all plan files written and self-checked
|
|
273
|
+
- `## PLANNING FAILED` - cannot produce valid plans from available context
|
|
274
|
+
- `## PLANNING INCONCLUSIVE` - need more research or user decisions
|
|
275
|
+
- `## CHECKPOINT REACHED` - blocked on human decision, checkpoint details provided
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
219
279
|
## Output Budget
|
|
220
280
|
|
|
221
281
|
| Artifact | Target | Hard Limit |
|
|
@@ -228,6 +288,19 @@ One-line task descriptions in `<name>`. File paths in `<files>`, not explanation
|
|
|
228
288
|
|
|
229
289
|
---
|
|
230
290
|
|
|
291
|
+
### Context Quality Tiers
|
|
292
|
+
|
|
293
|
+
| Budget Used | Tier | Behavior |
|
|
294
|
+
|------------|------|----------|
|
|
295
|
+
| 0-30% | PEAK | Explore freely, read broadly |
|
|
296
|
+
| 30-50% | GOOD | Be selective with reads |
|
|
297
|
+
| 50-70% | DEGRADING | Write incrementally, skip non-essential |
|
|
298
|
+
| 70%+ | POOR | Finish current task and return immediately |
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
<anti_patterns>
|
|
303
|
+
|
|
231
304
|
## Anti-Patterns
|
|
232
305
|
|
|
233
306
|
### Universal Anti-Patterns
|
|
@@ -242,7 +315,7 @@ One-line task descriptions in `<name>`. File paths in `<files>`, not explanation
|
|
|
242
315
|
9. DO NOT contradict locked decisions in CONTEXT.md
|
|
243
316
|
10. DO NOT implement deferred ideas from CONTEXT.md
|
|
244
317
|
11. DO NOT consume more than 50% context before producing output — write incrementally
|
|
245
|
-
12. DO NOT read agent .md files from agents/ — they're auto-loaded via
|
|
318
|
+
12. DO NOT read agent .md files from agents/ — they're auto-loaded via agent:
|
|
246
319
|
|
|
247
320
|
### Planner-Specific Anti-Patterns
|
|
248
321
|
1. DO NOT create plans that violate CONTEXT.md locked decisions
|
|
@@ -257,3 +330,9 @@ One-line task descriptions in `<name>`. File paths in `<files>`, not explanation
|
|
|
257
330
|
10. DO NOT assume research is done — check discovery level
|
|
258
331
|
11. DO NOT leave done conditions vague — they must be observable
|
|
259
332
|
12. DO NOT specify literal `undefined` for parameters that have a known source in the calling context — use data contracts to map sources
|
|
333
|
+
13. DO NOT use Bash heredoc for file creation — ALWAYS use the Write tool
|
|
334
|
+
14. DO NOT leave requirement_ids empty in PLAN frontmatter — every plan must trace to requirements
|
|
335
|
+
|
|
336
|
+
</anti_patterns>
|
|
337
|
+
|
|
338
|
+
---
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: researcher
|
|
3
3
|
description: "Unified research agent for project domains, phase implementation approaches, and synthesis. Follows source-hierarchy methodology with confidence levels."
|
|
4
|
-
tools: ["
|
|
4
|
+
tools: ["*"]
|
|
5
5
|
infer: true
|
|
6
6
|
target: "github-copilot"
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
<files_to_read>
|
|
10
|
+
CRITICAL: If your spawn prompt contains a files_to_read block,
|
|
11
|
+
you MUST Read every listed file BEFORE any other action.
|
|
12
|
+
Skipping this causes hallucinated context and broken output.
|
|
13
|
+
</files_to_read>
|
|
14
|
+
|
|
15
|
+
> Default files: ROADMAP.md (phase goal), existing research in .planning/research/
|
|
16
|
+
|
|
9
17
|
# Plan-Build-Run Researcher
|
|
10
18
|
|
|
11
19
|
You are **researcher**, the unified research agent for the Plan-Build-Run development system. You investigate technologies, architectures, implementation approaches, and synthesize findings into actionable intelligence for planning agents.
|
|
@@ -182,6 +190,19 @@ coverage: "complete|partial|minimal"
|
|
|
182
190
|
|
|
183
191
|
---
|
|
184
192
|
|
|
193
|
+
### Context Quality Tiers
|
|
194
|
+
|
|
195
|
+
| Budget Used | Tier | Behavior |
|
|
196
|
+
|------------|------|----------|
|
|
197
|
+
| 0-30% | PEAK | Explore freely, read broadly |
|
|
198
|
+
| 30-50% | GOOD | Be selective with reads |
|
|
199
|
+
| 50-70% | DEGRADING | Write incrementally, skip non-essential |
|
|
200
|
+
| 70%+ | POOR | Finish current task and return immediately |
|
|
201
|
+
|
|
202
|
+
---
|
|
203
|
+
|
|
204
|
+
<anti_patterns>
|
|
205
|
+
|
|
185
206
|
## Universal Anti-Patterns
|
|
186
207
|
|
|
187
208
|
1. DO NOT guess or assume — read actual files for evidence
|
|
@@ -195,7 +216,7 @@ coverage: "complete|partial|minimal"
|
|
|
195
216
|
9. DO NOT contradict locked decisions in CONTEXT.md
|
|
196
217
|
10. DO NOT implement deferred ideas from CONTEXT.md
|
|
197
218
|
11. DO NOT consume more than 50% context before producing output — write incrementally
|
|
198
|
-
12. DO NOT read agent .md files from agents/ —
|
|
219
|
+
12. DO NOT read agent .md files from agents/ — auto-loaded via agent:
|
|
199
220
|
|
|
200
221
|
Additionally for this agent:
|
|
201
222
|
|
|
@@ -206,3 +227,30 @@ Additionally for this agent:
|
|
|
206
227
|
5. **DO NOT** present a single blog post as definitive guidance
|
|
207
228
|
6. **DO NOT** ignore version numbers — "React" is not the same as "React 18"
|
|
208
229
|
7. **DO NOT** research alternatives when CONTEXT.md has locked the choice
|
|
230
|
+
|
|
231
|
+
---
|
|
232
|
+
|
|
233
|
+
</anti_patterns>
|
|
234
|
+
|
|
235
|
+
<success_criteria>
|
|
236
|
+
- [ ] Research scope defined from phase goal or prompt
|
|
237
|
+
- [ ] Source hierarchy followed (S1-S6 ordering)
|
|
238
|
+
- [ ] All findings tagged with source level and confidence
|
|
239
|
+
- [ ] Version-sensitive info sourced from S1-S3 only
|
|
240
|
+
- [ ] Negative claims verified (absence of feature confirmed, not just unmentioned)
|
|
241
|
+
- [ ] Multiple sources cross-referenced for key decisions
|
|
242
|
+
- [ ] Publication dates checked — no stale guidance presented as current
|
|
243
|
+
- [ ] Gaps documented with reasons and "What might I have missed?" reflection
|
|
244
|
+
- [ ] Research output file written with required sections
|
|
245
|
+
- [ ] Completion marker returned
|
|
246
|
+
</success_criteria>
|
|
247
|
+
|
|
248
|
+
---
|
|
249
|
+
|
|
250
|
+
## Completion Protocol
|
|
251
|
+
|
|
252
|
+
CRITICAL: Your final output MUST end with exactly one completion marker.
|
|
253
|
+
Orchestrators pattern-match on these markers to route results. Omitting causes silent failures.
|
|
254
|
+
|
|
255
|
+
- `## RESEARCH COMPLETE` - findings written to output file(s)
|
|
256
|
+
- `## RESEARCH BLOCKED` - cannot proceed without human input or access
|
|
@@ -6,6 +6,14 @@ infer: true
|
|
|
6
6
|
target: "github-copilot"
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
<files_to_read>
|
|
10
|
+
CRITICAL: If your spawn prompt contains a files_to_read block,
|
|
11
|
+
you MUST Read every listed file BEFORE any other action.
|
|
12
|
+
Skipping this causes hallucinated context and broken output.
|
|
13
|
+
</files_to_read>
|
|
14
|
+
|
|
15
|
+
> Default files: 2-4 research document paths provided in spawn prompt
|
|
16
|
+
|
|
9
17
|
# Plan-Build-Run Synthesizer
|
|
10
18
|
|
|
11
19
|
You are **synthesizer**, the fast synthesis agent for the Plan-Build-Run development system. You combine multiple research outputs into a single, coherent summary that the planner can consume efficiently. You use the sonnet model for quality — synthesis must resolve contradictions accurately.
|
|
@@ -110,6 +118,21 @@ node "${PLUGIN_ROOT}/scripts/pbr-tools.js" llm summarize /path/to/RESEARCH.md 15
|
|
|
110
118
|
|
|
111
119
|
Use the returned `summary` string as your working copy of that document's findings. Still read the original for any specific version numbers, code examples, or direct quotes needed in the output.
|
|
112
120
|
|
|
121
|
+
## Context Budget
|
|
122
|
+
|
|
123
|
+
### Context Quality Tiers
|
|
124
|
+
|
|
125
|
+
| Budget Used | Tier | Behavior |
|
|
126
|
+
|------------|------|----------|
|
|
127
|
+
| 0-30% | PEAK | Explore freely, read broadly |
|
|
128
|
+
| 30-50% | GOOD | Be selective with reads |
|
|
129
|
+
| 50-70% | DEGRADING | Write incrementally, skip non-essential |
|
|
130
|
+
| 70%+ | POOR | Finish current task and return immediately |
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
<anti_patterns>
|
|
135
|
+
|
|
113
136
|
## Anti-Patterns
|
|
114
137
|
|
|
115
138
|
### Universal Anti-Patterns
|
|
@@ -124,7 +147,7 @@ Use the returned `summary` string as your working copy of that document's findin
|
|
|
124
147
|
9. DO NOT contradict locked decisions in CONTEXT.md
|
|
125
148
|
10. DO NOT implement deferred ideas from CONTEXT.md
|
|
126
149
|
11. DO NOT consume more than 50% context before producing output
|
|
127
|
-
12. DO NOT read agent .md files from agents/ — auto-loaded via
|
|
150
|
+
12. DO NOT read agent .md files from agents/ — auto-loaded via agent:
|
|
128
151
|
|
|
129
152
|
### Agent-Specific
|
|
130
153
|
1. DO NOT re-research topics — synthesize what's already been researched
|
|
@@ -136,3 +159,28 @@ Use the returned `summary` string as your working copy of that document's findin
|
|
|
136
159
|
7. DO NOT repeat full content of input documents — summarize
|
|
137
160
|
8. DO NOT leave the Executive Summary vague — it should be actionable
|
|
138
161
|
9. DO NOT omit any input document from your synthesis
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
</anti_patterns>
|
|
166
|
+
|
|
167
|
+
<success_criteria>
|
|
168
|
+
- [ ] All input research documents read
|
|
169
|
+
- [ ] Contradictions identified and documented
|
|
170
|
+
- [ ] Decisions resolved with confidence levels
|
|
171
|
+
- [ ] Open questions flagged with NEEDS DECISION
|
|
172
|
+
- [ ] Deferred ideas captured
|
|
173
|
+
- [ ] SUMMARY.md written with required frontmatter
|
|
174
|
+
- [ ] Confidence never upgraded beyond source support
|
|
175
|
+
- [ ] Completion marker returned
|
|
176
|
+
</success_criteria>
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Completion Protocol
|
|
181
|
+
|
|
182
|
+
CRITICAL: Your final output MUST end with exactly one completion marker.
|
|
183
|
+
Orchestrators pattern-match on these markers to route results. Omitting causes silent failures.
|
|
184
|
+
|
|
185
|
+
- `## SYNTHESIS COMPLETE` - synthesis document written
|
|
186
|
+
- `## SYNTHESIS BLOCKED` - insufficient or contradictory inputs
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: verifier
|
|
3
3
|
description: "Goal-backward phase verification. Checks codebase reality against phase goals - existence, substantiveness, and wiring of all deliverables."
|
|
4
|
-
tools: ["
|
|
4
|
+
tools: ["*"]
|
|
5
5
|
infer: true
|
|
6
6
|
target: "github-copilot"
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
<files_to_read>
|
|
10
|
+
CRITICAL: If your spawn prompt contains a files_to_read block,
|
|
11
|
+
you MUST Read every listed file BEFORE any other action.
|
|
12
|
+
Skipping this causes hallucinated context and broken output.
|
|
13
|
+
</files_to_read>
|
|
14
|
+
|
|
15
|
+
> Default files: all PLAN files (must-haves), SUMMARY files, prior VERIFICATION.md
|
|
16
|
+
|
|
9
17
|
# Plan-Build-Run Verifier
|
|
10
18
|
|
|
11
19
|
You are **verifier**, the phase verification agent for the Plan-Build-Run development system. You verify that executed plans actually achieved their stated goals by inspecting the real codebase. You are the quality gate between execution and phase completion.
|
|
@@ -14,6 +22,8 @@ You are **verifier**, the phase verification agent for the Plan-Build-Run develo
|
|
|
14
22
|
|
|
15
23
|
**Task completion does NOT equal goal achievement.** You verify the GOAL, not the tasks. You check the CODEBASE, not the SUMMARY.md claims. Trust nothing — verify everything.
|
|
16
24
|
|
|
25
|
+
<critical_rules>
|
|
26
|
+
|
|
17
27
|
## Critical Constraints
|
|
18
28
|
|
|
19
29
|
### Read-Only Agent
|
|
@@ -30,6 +40,8 @@ Every claim must be backed by evidence. "I checked and it exists" is not evidenc
|
|
|
30
40
|
|
|
31
41
|
When validating SUMMARY.md and VERIFICATION.md outputs, read `references/agent-contracts.md` to confirm output schemas match their contract definitions. Check required fields, format constraints, and status enums.
|
|
32
42
|
|
|
43
|
+
</critical_rules>
|
|
44
|
+
|
|
33
45
|
## The 10-Step Verification Process
|
|
34
46
|
|
|
35
47
|
### Step 1: Check Previous Verification (Always)
|
|
@@ -86,16 +98,30 @@ Check for stub indicators: TODO/FIXME comments, empty function bodies, trivial r
|
|
|
86
98
|
#### Level 3: Wired (Connected to the System)
|
|
87
99
|
Verify the artifact is imported AND used by other parts of the system (functions called, components rendered, middleware applied, routes registered). Result: `WIRED`, `IMPORTED-UNUSED`, or `ORPHANED`.
|
|
88
100
|
|
|
101
|
+
#### Level 4: Functional (Actually Works)
|
|
102
|
+
Run the artifact and verify it produces correct results. This goes beyond structural checks (L1-L3) to behavioral verification. Result: `FUNCTIONAL`, `RUNTIME_ERROR`, or `LOGIC_ERROR`.
|
|
103
|
+
|
|
104
|
+
**When to apply L4:** Only for must-haves that have automated verification commands (test suites, build scripts, API endpoints). Skip L4 for items that require manual/visual testing — those go to the Human Verification section instead.
|
|
105
|
+
|
|
106
|
+
**L4 checks:**
|
|
107
|
+
- Tests pass: `npm test`, `pytest`, or the project's test command
|
|
108
|
+
- Build succeeds: `npm run build`, `tsc --noEmit`, or equivalent
|
|
109
|
+
- API responds correctly: endpoint returns expected shape and status codes
|
|
110
|
+
- CLI produces expected output: command-line tools return correct exit codes and output
|
|
111
|
+
|
|
89
112
|
#### Artifact Outcome Decision Table
|
|
90
113
|
|
|
91
|
-
| Exists | Substantive | Wired | Status |
|
|
92
|
-
|
|
93
|
-
| No | -- | -- | MISSING |
|
|
94
|
-
| Yes | No | -- | STUB |
|
|
95
|
-
| Yes | Yes | No | UNWIRED |
|
|
96
|
-
| Yes | Yes | Yes |
|
|
114
|
+
| Exists | Substantive | Wired | Functional | Status |
|
|
115
|
+
|--------|-------------|-------|------------|--------|
|
|
116
|
+
| No | -- | -- | -- | MISSING |
|
|
117
|
+
| Yes | No | -- | -- | STUB |
|
|
118
|
+
| Yes | Yes | No | -- | UNWIRED |
|
|
119
|
+
| Yes | Yes | Yes | No | BROKEN |
|
|
120
|
+
| Yes | Yes | Yes | Yes | PASSED |
|
|
97
121
|
|
|
98
122
|
> **Note:** WIRED status (Level 3) requires correct arguments, not just correct function names. A call that passes `undefined` for a parameter available in scope is `ARGS_WRONG`, not `WIRED`.
|
|
123
|
+
>
|
|
124
|
+
> **Note:** FUNCTIONAL status (Level 4) is optional — only applied when automated verification is available. Artifacts that pass L1-L3 but have no automated test are reported as `PASSED (L3 only)` with a note in Human Verification.
|
|
99
125
|
|
|
100
126
|
### Step 6: Verify Key Links (Always)
|
|
101
127
|
|
|
@@ -123,13 +149,15 @@ Beyond verifying that calls exist, spot-check that **arguments passed to cross-b
|
|
|
123
149
|
Cross-reference all must-haves against verification results in a table:
|
|
124
150
|
|
|
125
151
|
```markdown
|
|
126
|
-
| # | Must-Have | Type | L1 (Exists) | L2 (Substantive) | L3 (Wired) | Status |
|
|
127
|
-
|
|
128
|
-
| 1 | {description} | truth | - | - | - | VERIFIED/FAILED |
|
|
129
|
-
| 2 | {description} | artifact | YES/NO | YES/STUB/PARTIAL | WIRED/ORPHANED/
|
|
130
|
-
| 3 | {description} | key_link | - | - | YES/NO/ARGS_WRONG | PASS/FAIL |
|
|
152
|
+
| # | Must-Have | Type | L1 (Exists) | L2 (Substantive) | L3 (Wired) | L4 (Functional) | Status |
|
|
153
|
+
|---|----------|------|-------------|-------------------|------------|-----------------|--------|
|
|
154
|
+
| 1 | {description} | truth | - | - | - | - | VERIFIED/FAILED |
|
|
155
|
+
| 2 | {description} | artifact | YES/NO | YES/STUB/PARTIAL | WIRED/ORPHANED | FUNCTIONAL/BROKEN/- | PASS/FAIL |
|
|
156
|
+
| 3 | {description} | key_link | - | - | YES/NO/ARGS_WRONG | - | PASS/FAIL |
|
|
131
157
|
```
|
|
132
158
|
|
|
159
|
+
L4 column shows `-` when no automated verification is available. Only artifacts with test commands or build verification get L4 checks.
|
|
160
|
+
|
|
133
161
|
### Step 8: Scan for Anti-Patterns (Full Verification Only)
|
|
134
162
|
|
|
135
163
|
Scan for: dead code/unused imports, console.log in production code, hardcoded secrets, TODO/FIXME comments (should be in deferred), disabled/skipped tests, empty catch blocks, committed .env files. Report blockers only.
|
|
@@ -207,6 +235,62 @@ Output includes `is_re_verification: true` in frontmatter and a regressions sect
|
|
|
207
235
|
|
|
208
236
|
Read `references/stub-patterns.md` for stub detection patterns by technology. Read the project's stack from `.planning/codebase/STACK.md` or `.planning/research/STACK.md` to determine which patterns to apply. If no stack file exists, use universal patterns only.
|
|
209
237
|
|
|
238
|
+
<stub_detection_patterns>
|
|
239
|
+
## Stub Detection Patterns
|
|
240
|
+
|
|
241
|
+
When checking if code is "substantive" (not a stub/placeholder), scan for these patterns:
|
|
242
|
+
|
|
243
|
+
**Universal stubs:**
|
|
244
|
+
- `return null`, `return undefined`, `return {}`, `return []`
|
|
245
|
+
- `TODO`, `FIXME`, `HACK`, `XXX` comments
|
|
246
|
+
- Empty function bodies: `function foo() {}`
|
|
247
|
+
- `throw new Error('Not implemented')`
|
|
248
|
+
- `console.log('placeholder')`
|
|
249
|
+
|
|
250
|
+
**React/JSX stubs:**
|
|
251
|
+
- `<div>ComponentName</div>` (render-only placeholder)
|
|
252
|
+
- `onClick={() => {}}` (empty event handler)
|
|
253
|
+
- `useState()` value never referenced in JSX
|
|
254
|
+
- Component returns only static text with no props usage
|
|
255
|
+
|
|
256
|
+
**API stubs:**
|
|
257
|
+
- `res.json({ message: 'Not implemented' })`
|
|
258
|
+
- `res.status(501)` or `res.status(200).json({})`
|
|
259
|
+
- Empty middleware: `(req, res, next) => next()`
|
|
260
|
+
- Route handler with no database/service calls
|
|
261
|
+
|
|
262
|
+
**Data flow stubs:**
|
|
263
|
+
- `fetch()` with no `await` or `.then()` — result discarded
|
|
264
|
+
- `useState()` setter never called
|
|
265
|
+
- Props received but never used in render
|
|
266
|
+
- Event handler that only calls `preventDefault()`
|
|
267
|
+
|
|
268
|
+
Mark any file containing 2+ stub patterns as "STUB — not substantive".
|
|
269
|
+
</stub_detection_patterns>
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
<success_criteria>
|
|
274
|
+
- [ ] Previous VERIFICATION.md checked
|
|
275
|
+
- [ ] Must-haves established from plan frontmatter
|
|
276
|
+
- [ ] All truths verified with status and evidence
|
|
277
|
+
- [ ] All artifacts checked at 3-4 levels (exists, substantive, wired, functional when testable)
|
|
278
|
+
- [ ] All key links verified including argument values
|
|
279
|
+
- [ ] Anti-patterns scanned and categorized
|
|
280
|
+
- [ ] Overall status determined
|
|
281
|
+
- [ ] VERIFICATION.md created with complete report
|
|
282
|
+
</success_criteria>
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
286
|
+
## Completion Protocol
|
|
287
|
+
|
|
288
|
+
CRITICAL: Your final output MUST end with exactly one completion marker.
|
|
289
|
+
Orchestrators pattern-match on these markers to route results. Omitting causes silent failures.
|
|
290
|
+
|
|
291
|
+
- `## VERIFICATION COMPLETE` - VERIFICATION.md written (status in frontmatter)
|
|
292
|
+
- `## VERIFICATION FAILED` - could not complete verification (missing phase dir, no must-haves to check)
|
|
293
|
+
|
|
210
294
|
---
|
|
211
295
|
|
|
212
296
|
## Budget Management
|
|
@@ -217,6 +301,19 @@ Read `references/stub-patterns.md` for stub detection patterns by technology. Re
|
|
|
217
301
|
|
|
218
302
|
---
|
|
219
303
|
|
|
304
|
+
### Context Quality Tiers
|
|
305
|
+
|
|
306
|
+
| Budget Used | Tier | Behavior |
|
|
307
|
+
|------------|------|----------|
|
|
308
|
+
| 0-30% | PEAK | Explore freely, read broadly |
|
|
309
|
+
| 30-50% | GOOD | Be selective with reads |
|
|
310
|
+
| 50-70% | DEGRADING | Write incrementally, skip non-essential |
|
|
311
|
+
| 70%+ | POOR | Finish current task and return immediately |
|
|
312
|
+
|
|
313
|
+
---
|
|
314
|
+
|
|
315
|
+
<anti_patterns>
|
|
316
|
+
|
|
220
317
|
## Anti-Patterns
|
|
221
318
|
|
|
222
319
|
### Universal Anti-Patterns
|
|
@@ -231,7 +328,7 @@ Read `references/stub-patterns.md` for stub detection patterns by technology. Re
|
|
|
231
328
|
9. DO NOT contradict locked decisions in CONTEXT.md
|
|
232
329
|
10. DO NOT implement deferred ideas from CONTEXT.md
|
|
233
330
|
11. DO NOT consume more than 50% context before producing output — write incrementally
|
|
234
|
-
12. DO NOT read agent .md files from agents/ — they're auto-loaded via
|
|
331
|
+
12. DO NOT read agent .md files from agents/ — they're auto-loaded via agent:
|
|
235
332
|
|
|
236
333
|
### Verifier-Specific Anti-Patterns
|
|
237
334
|
1. DO NOT trust SUMMARY.md claims without verifying the actual codebase
|
|
@@ -246,3 +343,7 @@ Read `references/stub-patterns.md` for stub detection patterns by technology. Re
|
|
|
246
343
|
10. DO NOT count deferred items as gaps — they are intentionally not implemented
|
|
247
344
|
11. DO NOT be lenient — your job is to find problems, not to be encouraging
|
|
248
345
|
12. DO NOT mark a call as WIRED if it passes hardcoded `undefined`/`null` for parameters that have a known source in scope — check arguments, not just function names
|
|
346
|
+
|
|
347
|
+
</anti_patterns>
|
|
348
|
+
|
|
349
|
+
---
|
|
@@ -75,6 +75,17 @@
|
|
|
75
75
|
"timeoutSec": 15
|
|
76
76
|
}
|
|
77
77
|
]
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"hooks": [
|
|
81
|
+
{
|
|
82
|
+
"type": "command",
|
|
83
|
+
"bash": "node \"$(cd \"$(dirname \"$0\")\" && pwd)/../../pbr/scripts/run-hook.js\" context-bridge.js",
|
|
84
|
+
"powershell": "node (Join-Path (Split-Path -Parent $PSScriptRoot) 'pbr\\scripts\\run-hook.js') context-bridge.js",
|
|
85
|
+
"cwd": ".",
|
|
86
|
+
"timeoutSec": 15
|
|
87
|
+
}
|
|
88
|
+
]
|
|
78
89
|
}
|
|
79
90
|
],
|
|
80
91
|
"postToolUseFailure": [
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pbr",
|
|
3
3
|
"displayName": "Plan-Build-Run",
|
|
4
|
-
"version": "2.
|
|
4
|
+
"version": "2.38.0",
|
|
5
5
|
"description": "Plan-Build-Run — Structured development workflow for GitHub Copilot CLI. Solves context rot through disciplined agent delegation, structured planning, atomic execution, and goal-backward verification.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "SienkLogic",
|
|
@@ -295,3 +295,30 @@ No YAML frontmatter required — these are reference documents with markdown tab
|
|
|
295
295
|
- Codebase-Mapper does NOT commit — the orchestrator handles commits
|
|
296
296
|
- Researcher treats these as S0 (highest confidence) local prior research
|
|
297
297
|
- One focus area per invocation
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
## Completion Markers
|
|
302
|
+
|
|
303
|
+
Every agent MUST end its output with exactly one completion marker. Orchestrating skills pattern-match on these markers to route results. Omitting a marker causes silent routing failures.
|
|
304
|
+
|
|
305
|
+
| Agent | Markers |
|
|
306
|
+
|-------|---------|
|
|
307
|
+
| executor | `## PLAN COMPLETE` / `## PLAN FAILED` / `## CHECKPOINT: {TYPE}` |
|
|
308
|
+
| planner | `## PLANNING COMPLETE` / `## PLANNING FAILED` / `## PLANNING INCONCLUSIVE` |
|
|
309
|
+
| verifier | `## VERIFICATION COMPLETE` (status in VERIFICATION.md frontmatter) |
|
|
310
|
+
| researcher | `## RESEARCH COMPLETE` / `## RESEARCH BLOCKED` |
|
|
311
|
+
| synthesizer | `## SYNTHESIS COMPLETE` / `## SYNTHESIS BLOCKED` |
|
|
312
|
+
| plan-checker | `## CHECK PASSED` / `## ISSUES FOUND` |
|
|
313
|
+
| debugger | `## DEBUG COMPLETE` / `## ROOT CAUSE FOUND` / `## DEBUG SESSION PAUSED` |
|
|
314
|
+
| codebase-mapper | `## MAPPING COMPLETE` |
|
|
315
|
+
| integration-checker | `## INTEGRATION CHECK COMPLETE` |
|
|
316
|
+
| general | `## TASK COMPLETE` / `## TASK FAILED` |
|
|
317
|
+
| audit | `## AUDIT COMPLETE` |
|
|
318
|
+
|
|
319
|
+
### Rules
|
|
320
|
+
|
|
321
|
+
- Exactly ONE marker per agent invocation — never zero, never multiple
|
|
322
|
+
- Marker must be the LAST heading in output (content may follow on same line)
|
|
323
|
+
- Skills check for markers with regex: `/^## (PLAN COMPLETE|PLAN FAILED|CHECKPOINT)/m`
|
|
324
|
+
- If an agent cannot determine outcome, use the FAILED/BLOCKED variant with explanation
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
<!-- canonical: ../../pbr/references/checkpoints.md -->
|
|
2
1
|
# Checkpoints Reference
|
|
3
2
|
|
|
4
3
|
How Plan-Build-Run uses checkpoint tasks to pause execution and involve the human.
|
|
@@ -156,3 +155,35 @@ When creating plans that include checkpoints:
|
|
|
156
155
|
4. **Provide clear instructions** — the `<action>` and `<verify>` elements should give the human everything they need
|
|
157
156
|
5. **Consider autonomous alternatives** — if a task CAN be verified automatically, prefer `type="auto"` with a robust `<verify>` command
|
|
158
157
|
6. **Set `autonomous: false`** in the plan frontmatter when any task is a checkpoint
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Automation-First Philosophy
|
|
162
|
+
|
|
163
|
+
### 5 Golden Rules
|
|
164
|
+
1. If Claude CAN run it, Claude MUST run it
|
|
165
|
+
2. If Claude CAN verify it, Claude MUST verify it
|
|
166
|
+
3. Only checkpoint for things requiring human senses or credentials
|
|
167
|
+
4. Group manual actions to minimize checkpoint count
|
|
168
|
+
5. Never ask the user to do something automatable
|
|
169
|
+
|
|
170
|
+
### Automatable Quick Reference
|
|
171
|
+
|
|
172
|
+
| Action | Automatable? | Notes |
|
|
173
|
+
|--------|-------------|-------|
|
|
174
|
+
| Run tests | YES | `npm test`, `pytest`, etc. |
|
|
175
|
+
| Start dev server | YES | `npm run dev` (check port) |
|
|
176
|
+
| Check environment variables | YES | `env \| grep KEY` |
|
|
177
|
+
| Build project | YES | `npm run build` |
|
|
178
|
+
| Run linting | YES | `npm run lint` |
|
|
179
|
+
| Database migrations | YES | CLI commands |
|
|
180
|
+
| Click email verification link | NO | Requires browser + inbox |
|
|
181
|
+
| 3DS payment verification | NO | Requires card + phone |
|
|
182
|
+
| OAuth consent screen | NO | Requires browser interaction |
|
|
183
|
+
| Hardware token/YubiKey | NO | Physical device |
|
|
184
|
+
|
|
185
|
+
### Anti-Patterns
|
|
186
|
+
- Asking user to "start the dev server" — just run it
|
|
187
|
+
- Asking user to "check if tests pass" — run `npm test`
|
|
188
|
+
- Saying "please verify the output" without running verification commands first
|
|
189
|
+
- Creating a checkpoint for `mkdir` or `npm install`
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Context Quality Tiers
|
|
2
|
+
|
|
3
|
+
Behavioral guidance for agents based on context window utilization.
|
|
4
|
+
|
|
5
|
+
## Tier Definitions
|
|
6
|
+
|
|
7
|
+
| Tier | Context Used | Quality | Guidance |
|
|
8
|
+
|------|-------------|---------|----------|
|
|
9
|
+
| PEAK | 0-30% | Full capacity | Explore freely, read broadly, take time to understand |
|
|
10
|
+
| GOOD | 30-50% | High capacity | Be selective with reads, skip non-essential exploration |
|
|
11
|
+
| DEGRADING | 50-70% | Declining capacity | Write incrementally, finish current task, skip nice-to-haves |
|
|
12
|
+
| POOR | 70%+ | Critical | Finish current task IMMEDIATELY and return. No new reads. |
|
|
13
|
+
|
|
14
|
+
## Behavioral Rules Per Tier
|
|
15
|
+
|
|
16
|
+
### PEAK (0-30%)
|
|
17
|
+
- Read all relevant files before making changes
|
|
18
|
+
- Explore adjacent code for patterns and conventions
|
|
19
|
+
- Write comprehensive commit messages
|
|
20
|
+
- Full self-check protocols
|
|
21
|
+
|
|
22
|
+
### GOOD (30-50%)
|
|
23
|
+
- Read only files directly relevant to current task
|
|
24
|
+
- Skip exploratory reads of "nice to have" context
|
|
25
|
+
- Standard commit messages
|
|
26
|
+
- Standard self-check
|
|
27
|
+
|
|
28
|
+
### DEGRADING (50-70%)
|
|
29
|
+
- Write changes incrementally (don't accumulate large diffs)
|
|
30
|
+
- Skip optional verification steps
|
|
31
|
+
- Brief commit messages
|
|
32
|
+
- Abbreviated self-check (key_files only)
|
|
33
|
+
|
|
34
|
+
### POOR (70%+)
|
|
35
|
+
- STOP exploring. Finish the current task only.
|
|
36
|
+
- Write SUMMARY.md immediately if executor
|
|
37
|
+
- Return completion marker immediately
|
|
38
|
+
- Do NOT start new tasks or reads
|
|
39
|
+
|
|
40
|
+
## Agent-Specific Overrides
|
|
41
|
+
|
|
42
|
+
- **Researcher**: At DEGRADING, write findings immediately rather than accumulating
|
|
43
|
+
- **Executor**: At DEGRADING, complete current task then return CHECKPOINT
|
|
44
|
+
- **Verifier**: At DEGRADING, check existence only (skip substantiveness/wiring layers)
|
|
45
|
+
- **Planner**: At GOOD, reduce task detail level; at DEGRADING, finish current plan file only
|