@sienklogic/plan-build-run 2.22.2 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/CHANGELOG.md +42 -0
  2. package/dashboard/package.json +3 -2
  3. package/dashboard/src/middleware/errorHandler.js +12 -2
  4. package/dashboard/src/repositories/planning.repository.js +24 -12
  5. package/dashboard/src/routes/pages.routes.js +182 -4
  6. package/dashboard/src/server.js +4 -0
  7. package/dashboard/src/services/audit.service.js +42 -0
  8. package/dashboard/src/services/dashboard.service.js +1 -12
  9. package/dashboard/src/services/local-llm-metrics.service.js +81 -0
  10. package/dashboard/src/services/quick.service.js +62 -0
  11. package/dashboard/src/services/roadmap.service.js +1 -11
  12. package/dashboard/src/utils/strip-bom.js +8 -0
  13. package/dashboard/src/views/audit-detail.ejs +5 -0
  14. package/dashboard/src/views/audits.ejs +5 -0
  15. package/dashboard/src/views/partials/analytics-content.ejs +61 -0
  16. package/dashboard/src/views/partials/audit-detail-content.ejs +12 -0
  17. package/dashboard/src/views/partials/audits-content.ejs +34 -0
  18. package/dashboard/src/views/partials/quick-content.ejs +40 -0
  19. package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
  20. package/dashboard/src/views/partials/sidebar.ejs +16 -0
  21. package/dashboard/src/views/partials/todos-content.ejs +13 -3
  22. package/dashboard/src/views/quick-detail.ejs +5 -0
  23. package/dashboard/src/views/quick.ejs +5 -0
  24. package/package.json +1 -1
  25. package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
  26. package/plugins/copilot-pbr/agents/integration-checker.agent.md +9 -2
  27. package/plugins/copilot-pbr/agents/planner.agent.md +19 -0
  28. package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
  29. package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
  30. package/plugins/copilot-pbr/agents/verifier.agent.md +22 -2
  31. package/plugins/copilot-pbr/plugin.json +1 -1
  32. package/plugins/copilot-pbr/references/config-reference.md +89 -0
  33. package/plugins/copilot-pbr/references/plan-format.md +22 -0
  34. package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
  35. package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
  36. package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
  37. package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
  38. package/plugins/copilot-pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
  39. package/plugins/copilot-pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
  40. package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
  41. package/plugins/cursor-pbr/agents/debugger.md +15 -0
  42. package/plugins/cursor-pbr/agents/integration-checker.md +9 -2
  43. package/plugins/cursor-pbr/agents/planner.md +19 -0
  44. package/plugins/cursor-pbr/agents/researcher.md +20 -0
  45. package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
  46. package/plugins/cursor-pbr/agents/verifier.md +22 -2
  47. package/plugins/cursor-pbr/references/config-reference.md +89 -0
  48. package/plugins/cursor-pbr/references/plan-format.md +22 -0
  49. package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
  50. package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
  51. package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
  52. package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
  53. package/plugins/cursor-pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
  54. package/plugins/cursor-pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
  55. package/plugins/pbr/.claude-plugin/plugin.json +1 -1
  56. package/plugins/pbr/agents/debugger.md +15 -0
  57. package/plugins/pbr/agents/integration-checker.md +9 -2
  58. package/plugins/pbr/agents/planner.md +19 -0
  59. package/plugins/pbr/agents/researcher.md +20 -0
  60. package/plugins/pbr/agents/synthesizer.md +12 -0
  61. package/plugins/pbr/agents/verifier.md +22 -2
  62. package/plugins/pbr/references/config-reference.md +89 -0
  63. package/plugins/pbr/references/plan-format.md +22 -0
  64. package/plugins/pbr/scripts/check-config-change.js +33 -0
  65. package/plugins/pbr/scripts/check-plan-format.js +52 -4
  66. package/plugins/pbr/scripts/check-subagent-output.js +43 -3
  67. package/plugins/pbr/scripts/config-schema.json +48 -0
  68. package/plugins/pbr/scripts/local-llm/client.js +214 -0
  69. package/plugins/pbr/scripts/local-llm/health.js +217 -0
  70. package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
  71. package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
  72. package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
  73. package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
  74. package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
  75. package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
  76. package/plugins/pbr/scripts/local-llm/router.js +101 -0
  77. package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
  78. package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
  79. package/plugins/pbr/scripts/pbr-tools.js +120 -3
  80. package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
  81. package/plugins/pbr/scripts/progress-tracker.js +29 -3
  82. package/plugins/pbr/scripts/session-cleanup.js +36 -1
  83. package/plugins/pbr/scripts/validate-task.js +30 -1
  84. package/plugins/pbr/skills/health/SKILL.md +8 -1
  85. package/plugins/pbr/skills/help/SKILL.md +4 -4
  86. package/plugins/pbr/skills/milestone/SKILL.md +12 -12
  87. package/plugins/pbr/skills/status/SKILL.md +38 -2
  88. package/plugins/pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
  89. package/plugins/pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
  90. package/dashboard/src/views/coming-soon.ejs +0 -11
@@ -68,6 +68,31 @@ Read the following files (skip any that don't exist):
68
68
  5. **`.planning/REQUIREMENTS.md`** — Requirements (if exists)
69
69
  - Extract: requirement completion status if tracked
70
70
 
71
+ ### Step 1b: Read Local LLM Stats (advisory — skip on any error)
72
+
73
+ After loading config.json, check `local_llm.enabled`. If `true`:
74
+
75
+ ```bash
76
+ node ${PLUGIN_ROOT}/scripts/pbr-tools.js llm status
77
+ node ${PLUGIN_ROOT}/scripts/pbr-tools.js llm metrics
78
+ ```
79
+
80
+ Parse both JSON responses. Capture:
81
+
82
+ - `status.model` — model name
83
+ - `metrics.total_calls` — lifetime total calls
84
+ - `metrics.tokens_saved` — lifetime frontier tokens saved
85
+ - `metrics.cost_saved_usd` — lifetime cost estimate
86
+ - `metrics.avg_latency_ms` — lifetime average latency
87
+
88
+ Also run session-scoped metrics if `.planning/.session-start` exists:
89
+
90
+ ```bash
91
+ node ${PLUGIN_ROOT}/scripts/pbr-tools.js llm metrics --session <content-of-.session-start>
92
+ ```
93
+
94
+ If `local_llm.enabled` is `false` or commands fail, skip this step silently.
95
+
71
96
  ### Step 2: Scan Phase Directories
72
97
 
73
98
  For each phase listed in ROADMAP.md:
@@ -191,8 +216,18 @@ Todos: {count} pending. Run `/pbr:todo list` to see them.
191
216
 
192
217
  {If notes exist:}
193
218
  Notes: {count} quick capture(s). `/pbr:note list` to review.
219
+
220
+ {If local_llm.enabled AND total_calls > 0:}
221
+ Local LLM: enabled ({model}, avg {avg_ms}ms)
222
+ This session: {session_calls} calls, ~{session_tokens} frontier tokens saved
223
+ Lifetime: {total_calls} calls, ~{tokens_saved} tokens saved (~{cost_str} at $3/M)
224
+
225
+ {If local_llm.enabled AND total_calls == 0:}
226
+ Local LLM: enabled ({model}) — no calls yet this session
194
227
  ```
195
228
 
229
+ The Local LLM block is **advisory only** — it never affects the routing decision or Next Up suggestion.
230
+
196
231
  ### Progress Bar
197
232
 
198
233
  Generate a 20-character progress bar:
@@ -342,9 +377,10 @@ This skill should be fast. It's a status check, not an analysis.
342
377
  - Cache nothing (always read fresh state)
343
378
 
344
379
  **DO NOT:**
380
+
345
381
  - Read full SUMMARY.md contents (frontmatter is enough)
346
382
  - Read plan file contents (just check existence)
347
- - Run any Bash commands
383
+ - Run Bash commands except for Step 1b (2-3 `pbr-tools` calls only when `local_llm.enabled: true`, skipped entirely otherwise)
348
384
  - Modify any files
349
385
  - Invoke any agents
350
386
 
@@ -112,7 +112,22 @@ Phase 03 (Core) ──provides──→ Phase 04 (Frontend)
112
112
  ### Flow 2: {Flow Name} - {STATUS}
113
113
  ...
114
114
 
115
- ## 5. Integration Issues Summary
115
+ ## 5. Data-Flow Propagation
116
+
117
+ ### Cross-Boundary Data Flows
118
+
119
+ | Data Field | Source | Intermediate Steps | Destination | Status |
120
+ |------------|--------|-------------------|-------------|--------|
121
+ | {field name} | {origin, e.g., hook stdin `data.session_id`} | {module1:L12 → module2:L45} | {dest, e.g., metrics.jsonl `session_id`} | PROPAGATED |
122
+ | {field name} | {origin} | {module1:L12 → module2:L45} | {dest} | DATA_DROPPED |
123
+
124
+ ### Data-Flow Issues
125
+
126
+ | Field | Dropped At | Available In Scope | Passed Instead | Fix |
127
+ |-------|-----------|-------------------|----------------|-----|
128
+ | {field} | {file:line} | `data.session_id` | `undefined` | Pass `data.session_id` |
129
+
130
+ ## 6. Integration Issues Summary
116
131
 
117
132
  ### Critical Issues (system cannot function)
118
133
 
@@ -131,7 +146,7 @@ Phase 03 (Core) ──provides──→ Phase 04 (Frontend)
131
146
  1. **{Issue}**: {description}
132
147
  - Fix: {recommended action}
133
148
 
134
- ## 6. Integration Score
149
+ ## 7. Integration Score
135
150
 
136
151
  | Category | Items Checked | Passed | Failed | Score |
137
152
  |----------|--------------|--------|--------|-------|
@@ -139,6 +154,7 @@ Phase 03 (Core) ──provides──→ Phase 04 (Frontend)
139
154
  | API coverage | {n} | {n} | {n} | {%} |
140
155
  | Auth protection | {n} | {n} | {n} | {%} |
141
156
  | E2E flows | {n} | {n} | {n} | {%} |
157
+ | Data-flow propagation | {n} | {n} | {n} | {%} |
142
158
  | **Overall** | {n} | {n} | {n} | **{%}** |
143
159
 
144
160
  ## Recommendations
@@ -54,8 +54,9 @@ anti_patterns:
54
54
 
55
55
  | # | Link Description | Source | Target | Status | Evidence |
56
56
  |---|-----------------|--------|--------|--------|----------|
57
- | 1 | {what connects to what} | `{source_file}` | `{target_file}` | WIRED | Import at L12, called at L45 |
57
+ | 1 | {what connects to what} | `{source_file}` | `{target_file}` | WIRED | Import at L12, called at L45, args correct |
58
58
  | 2 | {what connects to what} | `{source_file}` | `{target_file}` | BROKEN | Imported but never called |
59
+ | 3 | {what connects to what} | `{source_file}` | `{target_file}` | ARGS_WRONG | Called at L45 but passes undefined for sessionId (data.session_id in scope) |
59
60
 
60
61
  ## Gaps Found
61
62
 
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pbr",
3
- "version": "2.22.2",
3
+ "version": "2.24.0",
4
4
  "description": "Plan-Build-Run — Structured development workflow for Claude Code. Solves context rot through disciplined subagent delegation, structured planning, atomic execution, and goal-backward verification.",
5
5
  "author": {
6
6
  "name": "SienkLogic",
@@ -144,6 +144,21 @@ Then emit a `DECISION` checkpoint asking the user to approve, modify, or reject
144
144
 
145
145
  **Commit format**: `fix({scope}): {description}` with body: `Root cause: ...` and `Debug session: .planning/debug/{slug}.md`
146
146
 
147
+ ## Local LLM Error Classification (Optional)
148
+
149
+ When you receive an error message or stack trace, you MAY use the local LLM to classify it before starting hypothesis generation. This is advisory — skip it if unavailable.
150
+
151
+ ```bash
152
+ # Write the error to a temp file, then classify:
153
+ echo "Error text here" > /tmp/debug-error.txt
154
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm classify-error /tmp/debug-error.txt debugger 2>/dev/null
155
+ # Returns: {"category":"missing_output","confidence":0.91,"latency_ms":1840,"fallback_used":false}
156
+ ```
157
+
158
+ Categories: `connection_refused`, `timeout`, `missing_output`, `wrong_output_format`, `permission_error`, `unknown`.
159
+
160
+ If classification succeeds, use the returned category to bias your initial hypothesis ranking. If it returns null or fails, proceed with manual hypothesis generation as normal.
161
+
147
162
  ## Common Bug Patterns
148
163
 
149
164
  Reference: `references/common-bug-patterns.md` — covers off-by-one, null/undefined, async/timing, state management, import/module, environment, and data shape patterns.
@@ -40,6 +40,7 @@ You MUST perform all applicable categories (skip only if zero items exist for th
40
40
  3. **Auth Protection** — Every non-public route must have auth middleware. Frontend route guards must match backend protection.
41
41
  4. **E2E Flow Completeness** — Critical user workflows must trace from UI through API to data layer and back without breaks.
42
42
  5. **Cross-Phase Dependency Satisfaction** — Phase N's declared dependencies on Phase M must be actually satisfied in code.
43
+ 6. **Data-Flow Propagation** — Values originating at one boundary (hook stdin fields, API request params, env vars) must propagate correctly through the call chain to their destination (log entries, database records, API responses). A connected pipeline with missing data is a broken integration.
43
44
 
44
45
  > **First-phase edge case**: If no completed phases exist yet, focus on verifying the current phase's internal consistency — exports match imports within the phase, API contracts are self-consistent. Cross-phase checks are not applicable and should be skipped.
45
46
 
@@ -52,14 +53,19 @@ Read `references/agent-contracts.md` to validate agent-to-agent handoffs. Verify
52
53
  - **Write access for output artifact only** — you have Write access for your output artifact only. You CANNOT fix source code — you REPORT issues.
53
54
  - **Cross-phase scope** — unlike verifier (single phase), you check across phases.
54
55
 
55
- ## 6-Step Verification Process
56
+ ## 7-Step Verification Process
56
57
 
57
58
  1. **Build Export/Import Map**: Read each completed phase's SUMMARY.md frontmatter (`requires`, `provides`, `affects`). Grep actual exports/imports in source. Cross-reference declared vs actual — flag mismatches.
58
59
  2. **Verify Export Usage**: For each `provides` item: locate actual export (missing = `MISSING_EXPORT` ERROR), find consumers (none = `ORPHANED` WARNING), verify usage not just import (`IMPORTED_UNUSED` WARNING), check signature compatibility (`MISMATCHED` ERROR). Status `CONSUMED` = OK.
59
60
  3. **Verify API Coverage**: Discover routes, find frontend callers, match by method+path+body/params. Produce coverage table. See `references/integration-patterns.md` for framework-specific patterns.
60
61
  4. **Verify Auth Protection**: Identify auth mechanism, list all routes, classify (public vs protected), check frontend guards. Flag UNPROTECTED routes.
61
62
  5. **Verify E2E Flows**: Trace critical workflows step-by-step — verify each step exists and connects to the next (import/call/redirect). Record evidence (file:line). Flow status: COMPLETE | BROKEN | PARTIAL | UNTRACEABLE. See `references/integration-patterns.md` for flow templates.
62
- 6. **Compile Integration Report**: Produce final report with all findings by category.
63
+ 6. **Verify Data-Flow Propagation**: For each cross-boundary data field identified in plans or SUMMARY.md, trace the value from source through intermediate functions to destination. Verify the value is actually passed (not `undefined`/`null`/hardcoded) at each step.
64
+ - **Source examples**: hook stdin (`data.session_id`), API request params, environment variables, config fields
65
+ - **Destination examples**: log entries, database records, API responses, metric files
66
+ - **Method**: Grep each intermediate call site and inspect arguments. Flag `DATA_DROPPED` when a value available in scope is replaced by `undefined` or a placeholder.
67
+ - **Status**: `PROPAGATED` (value flows correctly) | `DATA_DROPPED` (value lost at some step) | `UNTRACEABLE` (cannot determine flow)
68
+ 7. **Compile Integration Report**: Produce final report with all findings by category.
63
69
 
64
70
  ## Output Format
65
71
 
@@ -124,3 +130,4 @@ See `references/integration-patterns.md` for grep/search patterns by framework.
124
130
  - "File exists" is not "component is integrated"
125
131
  - Auth middleware existing somewhere does not mean routes are protected
126
132
  - Always check error handling paths, not just happy paths
133
+ - Structural connectivity is not data-flow correctness — a connected pipeline can still drop data at any step
@@ -73,6 +73,23 @@ Each must-have maps to one or more tasks. Every task exists to make a must-have
73
73
 
74
74
  ---
75
75
 
76
+ ## Data Contracts for Cross-Boundary Parameters
77
+
78
+ When a function signature includes parameters that flow across module boundaries — session IDs from hook stdin, config objects from disk, auth tokens from environment — the plan **MUST** specify the **source** for each argument, not just the type.
79
+
80
+ For every cross-boundary call in a task's `<action>`, document:
81
+
82
+ | Parameter | Source | Context | Fallback |
83
+ |-----------|--------|---------|----------|
84
+ | `sessionId` | `data.session_id` (hook stdin) | Hook scripts only | `undefined` (CLI context) |
85
+ | `config` | `configLoad(planningDir)` | All callers | `resolveConfig(undefined)` |
86
+
87
+ **When to apply:** Any function call where the caller and callee live in different modules AND at least one argument originates from an external boundary (stdin, env, disk, network). Internal helper calls within the same module do not need contracts.
88
+
89
+ **Why this matters:** Without explicit source mapping, executors will use the type-correct but value-wrong default (e.g., `undefined` instead of `data.session_id`). The plan is the single source of truth for how data flows — if the plan says `undefined`, the executor will faithfully implement `undefined`.
90
+
91
+ ---
92
+
76
93
  ## Plan Structure
77
94
 
78
95
  Read `references/plan-format.md` for the complete plan file specification including:
@@ -172,6 +189,7 @@ When CONTEXT.md or RESEARCH-SUMMARY.md contains `[NEEDS DECISION]` flags from th
172
189
  - [ ] Dependencies are acyclic, no file conflicts within same wave
173
190
  - [ ] Locked decisions honored, no deferred ideas included
174
191
  - [ ] Verify commands are actually executable
192
+ - [ ] Cross-boundary parameters have documented sources (data contracts)
175
193
 
176
194
  ---
177
195
 
@@ -245,3 +263,4 @@ One-line task descriptions in `<name>`. File paths in `<files>`, not explanation
245
263
  9. DO NOT plan for features outside the current phase goal
246
264
  10. DO NOT assume research is done — check discovery level
247
265
  11. DO NOT leave done conditions vague — they must be observable
266
+ 12. DO NOT specify literal `undefined` for parameters that have a known source in the calling context — use data contracts to map sources
@@ -62,6 +62,26 @@ All claims must be attributed to a source level. Higher levels override lower le
62
62
 
63
63
  **Offline Fallback**: If web tools are unavailable (air-gapped environment, MCP not configured), rely on local sources: codebase analysis via Glob/Grep, existing documentation, and README files. Assign these S3-S4 confidence levels. Do not attempt WebFetch or WebSearch — note in the output header that external sources were unavailable.
64
64
 
65
+ ## Local LLM Source Scoring (Optional)
66
+
67
+ If local LLM offload is configured, you MAY use it to score source credibility instead of manually assigning S-levels. This is advisory — never wait on it or fail if it returns null.
68
+
69
+ Check availability first:
70
+
71
+ ```bash
72
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm status 2>/dev/null
73
+ ```
74
+
75
+ If `enabled: true`, score a source excerpt:
76
+
77
+ ```bash
78
+ echo "Source URL and content excerpt" > /tmp/source-excerpt.txt
79
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm score-source "https://example.com/docs" /tmp/source-excerpt.txt 2>/dev/null
80
+ # Returns: {"level":"S2","confidence":0.87,"reason":"Official library documentation page"}
81
+ ```
82
+
83
+ Use the returned `level` to set your source tag. If the call fails or returns `null`, assign the level manually per the hierarchy table above.
84
+
65
85
  ---
66
86
 
67
87
  ## Confidence Levels
@@ -100,6 +100,18 @@ conflicts: N
100
100
  - **Research gaps**: Add `[RESEARCH GAP]` flag, add to Open Questions with high impact, never fabricate
101
101
  - **Duplicates**: Consolidate into one entry, note multi-source agreement, reference all documents
102
102
 
103
+ ## Local LLM Context Summarization (Optional)
104
+
105
+ When input research documents are large (>2000 words combined), you MAY use the local LLM to pre-summarize each document before synthesis. This reduces your own context consumption. Advisory only — if unavailable, read documents normally.
106
+
107
+ ```bash
108
+ # Pre-summarize a large research document to ~150 words:
109
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm summarize /path/to/RESEARCH.md 150 2>/dev/null
110
+ # Returns: {"summary":"...plain text summary under 150 words...","latency_ms":2100,"fallback_used":false}
111
+ ```
112
+
113
+ Use the returned `summary` string as your working copy of that document's findings. Still read the original for any specific version numbers, code examples, or direct quotes needed in the output.
114
+
103
115
  ## Anti-Patterns
104
116
 
105
117
  ### Universal Anti-Patterns
@@ -101,10 +101,29 @@ Verify the artifact is imported AND used by other parts of the system (functions
101
101
  | Yes | Yes | No | UNWIRED |
102
102
  | Yes | Yes | Yes | PASSED |
103
103
 
104
+ > **Note:** WIRED status (Level 3) requires correct arguments, not just correct function names. A call that passes `undefined` for a parameter available in scope is `ARGS_WRONG`, not `WIRED`.
105
+
104
106
  ### Step 6: Verify Key Links (Always)
105
107
 
106
108
  For each key_link: identify source and target components, verify the import path resolves, verify the imported symbol is actually called/used, and verify call signatures match. Watch for: wrong import paths, imported-but-never-called symbols, defined-but-never-applied middleware, registered-but-never-triggered event handlers.
107
109
 
110
+ ### Step 6b: Argument-Level Spot Checks (Always)
111
+
112
+ Beyond verifying that calls exist, spot-check that **arguments passed to cross-boundary calls carry the correct values**. A call with the right function but wrong arguments is effectively UNWIRED.
113
+
114
+ **Focus on:** IDs (session, user, request), config objects, auth tokens, and context data that originate from external boundaries (stdin, env, disk).
115
+
116
+ **Method:**
117
+ 1. For each key_link verified in Step 6, grep the call site and inspect the arguments
118
+ 2. Compare each argument against the data source available in the calling scope
119
+ 3. Flag any argument that passes `undefined`, `null`, or a hardcoded placeholder when the calling scope has the real value available (e.g., `data.session_id` is in scope but `undefined` is passed)
120
+
121
+ **Classification:**
122
+ - `WIRED` requires both correct function AND correct arguments
123
+ - `ARGS_WRONG` = correct function called but one or more arguments are incorrect/missing — this is a key link gap
124
+
125
+ **Example:** A hook script receives `data` from stdin containing `session_id`. If it calls `logMetric(planningDir, { session_id: undefined })` instead of `logMetric(planningDir, { session_id: data.session_id })`, that is an `ARGS_WRONG` gap even though the call itself exists.
126
+
108
127
  ### Step 7: Check Requirements Coverage (Always)
109
128
 
110
129
  Cross-reference all must-haves against verification results in a table:
@@ -113,8 +132,8 @@ Cross-reference all must-haves against verification results in a table:
113
132
  | # | Must-Have | Type | L1 (Exists) | L2 (Substantive) | L3 (Wired) | Status |
114
133
  |---|----------|------|-------------|-------------------|------------|--------|
115
134
  | 1 | {description} | truth | - | - | - | VERIFIED/FAILED |
116
- | 2 | {description} | artifact | YES/NO | YES/STUB/PARTIAL | WIRED/ORPHANED | PASS/FAIL |
117
- | 3 | {description} | key_link | - | - | YES/NO | PASS/FAIL |
135
+ | 2 | {description} | artifact | YES/NO | YES/STUB/PARTIAL | WIRED/ORPHANED/ARGS_WRONG | PASS/FAIL |
136
+ | 3 | {description} | key_link | - | - | YES/NO/ARGS_WRONG | PASS/FAIL |
118
137
  ```
119
138
 
120
139
  ### Step 8: Scan for Anti-Patterns (Full Verification Only)
@@ -232,3 +251,4 @@ Read `references/stub-patterns.md` for stub detection patterns by technology. Re
232
251
  9. DO NOT give PASSED status if ANY must-have fails at ANY level
233
252
  10. DO NOT count deferred items as gaps — they are intentionally not implemented
234
253
  11. DO NOT be lenient — your job is to find problems, not to be encouraging
254
+ 12. DO NOT mark a call as WIRED if it passes hardcoded `undefined`/`null` for parameters that have a known source in scope — check arguments, not just function names
@@ -439,3 +439,92 @@ Run validation with: `node plugins/pbr/scripts/pbr-tools.js config validate`
439
439
  | `tdd_mode: true` + `depth: quick` | quick depth skips verification, which conflicts with TDD's verify-first approach |
440
440
  | `git.mode: disabled` + `atomic_commits: true` | atomic_commits has no effect when git is disabled |
441
441
  | `git.branching: phase` + `git.mode: disabled` | Branching settings are ignored when git is disabled |
442
+
443
+ ---
444
+
445
+ ## local_llm
446
+
447
+ Offloads selected PBR inference tasks to a locally running Ollama instance, reducing frontier model usage and latency for fast classification calls. The key `enabled` defaults to `false`, so users without Ollama see no change — all LLM calls continue routing to Claude as normal. When enabled, PBR uses a `local_first` routing strategy: fast tasks (artifact classification, task validation) go to the local model; complex tasks (planning, execution) stay on the frontier model.
448
+
449
+ ### Quick setup
450
+
451
+ 1. Install Ollama:
452
+ - **Linux/macOS**: `curl -fsSL https://ollama.com/install.sh | sh`
453
+ - **Windows**: Download from [ollama.com/download](https://ollama.com/download) and run the installer
454
+ 2. Pull the recommended model: `ollama pull qwen2.5-coder:7b`
455
+ 3. Add to `.planning/config.json`:
456
+
457
+ ```json
458
+ "local_llm": {
459
+ "enabled": true,
460
+ "model": "qwen2.5-coder:7b"
461
+ }
462
+ ```
463
+
464
+ 4. Verify connectivity: `node /path/to/plugins/pbr/scripts/pbr-tools.js llm health`
465
+
466
+ ### Field reference
467
+
468
+ | Property | Type | Default | Description |
469
+ |----------|------|---------|-------------|
470
+ | `local_llm.enabled` | boolean | `false` | Enable local LLM offloading; `false` = all calls use frontier |
471
+ | `local_llm.provider` | string | `"ollama"` | Backend provider; only `"ollama"` is supported |
472
+ | `local_llm.endpoint` | string | `"http://localhost:11434"` | Ollama API base URL |
473
+ | `local_llm.model` | string | `"qwen2.5-coder:7b"` | Model tag to use for local inference |
474
+ | `local_llm.timeout_ms` | integer | `3000` | Per-request timeout in milliseconds; >= 500 |
475
+ | `local_llm.max_retries` | integer | `1` | Number of retry attempts on failure before falling back |
476
+ | `local_llm.fallback` | string | `"frontier"` | What to use when local LLM fails: `"frontier"` or `"skip"` |
477
+ | `local_llm.routing_strategy` | string | `"local_first"` | `"local_first"` sends fast tasks local; `"always_local"` routes everything |
478
+
479
+ ### features sub-table
480
+
481
+ Controls which PBR tasks are eligible for local LLM offloading.
482
+
483
+ | Property | Default | Description |
484
+ |----------|---------|-------------|
485
+ | `artifact_classification` | `true` | Classify artifact types (PLAN, SUMMARY, VERIFICATION) locally |
486
+ | `task_validation` | `true` | Validate task scope and completeness locally |
487
+ | `context_summarization` | `false` | Summarize context windows locally (higher token demand) |
488
+ | `source_scoring` | `false` | Score source files by relevance locally |
489
+
490
+ ### advanced sub-table
491
+
492
+ | Property | Default | Description |
493
+ |----------|---------|-------------|
494
+ | `confidence_threshold` | `0.9` | Minimum confidence (0–1) for local output to be accepted; below this, falls back to frontier |
495
+ | `shadow_mode` | `false` | Run local LLM in parallel with frontier but discard local results — useful for tuning confidence thresholds without affecting output |
496
+ | `max_input_tokens` | `2000` | Truncate inputs longer than this before sending to local model |
497
+ | `keep_alive` | `"30m"` | How long Ollama keeps the model loaded between requests (Ollama format: `"5m"`, `"1h"`) |
498
+ | `num_ctx` | `4096` | Context window size passed to Ollama; **must be 4096 on Windows** (see Windows gotchas) |
499
+ | `disable_after_failures` | `3` | Automatically disable local LLM for the session after this many consecutive failures |
500
+
501
+ ### Hardware requirements
502
+
503
+ | Tier | Hardware | Notes |
504
+ |------|----------|-------|
505
+ | Recommended | RTX 3060+ with 8 GB VRAM | Full GPU acceleration; qwen2.5-coder:7b loads entirely in VRAM |
506
+ | Functional | GTX 1660+ with 6 GB VRAM | GPU acceleration with slight layer offload to RAM |
507
+ | Marginal | CPU only, 32 GB RAM | Works but adds 5-20s latency per call; disable context-heavy features |
508
+
509
+ For GPU acceleration, ensure NVIDIA drivers are 520+ and CUDA 11.8+ is installed. AMD GPU support is available via ROCm on Linux only.
510
+
511
+ ### Windows gotchas
512
+
513
+ - **Smart App Control**: May block `ollama_llama_server.exe` on first run. Allow it via Security settings or disable Smart App Control.
514
+ - **Windows Defender**: Add an exclusion for `%LOCALAPPDATA%\Programs\Ollama\ollama_llama_server.exe` to prevent Defender from scanning inference calls in real time.
515
+ - **`num_ctx` must be 4096**: Higher values cause GPU memory fragmentation on Windows and result in OOM errors mid-session. Always set `advanced.num_ctx: 4096` in your config.
516
+ - **Firewall**: Ollama listens on `localhost:11434` by default. If you see connection refused errors, check that Windows Firewall is not blocking loopback connections.
517
+
518
+ ### Viewing metrics
519
+
520
+ After enabling local LLM, PBR logs per-call metrics to `.planning/logs/local-llm-metrics.jsonl`. Use the built-in subcommands to inspect them:
521
+
522
+ ```bash
523
+ # Show session summary (calls routed, latency, token savings)
524
+ node plugins/pbr/scripts/pbr-tools.js llm metrics
525
+
526
+ # Suggest routing threshold adjustments based on recent accuracy
527
+ node plugins/pbr/scripts/pbr-tools.js llm adjust-thresholds
528
+ ```
529
+
530
+ Metrics include: routing decision, model used, latency ms, confidence score, whether the frontier fallback was triggered, and estimated tokens saved.
@@ -70,6 +70,28 @@ requirement_ids:
70
70
  | `consumes` | NO | array | What this plan needs from prior plans. Format: `"Thing (from plan XX-YY)"` |
71
71
  | `requirement_ids` | NO | array | Requirement IDs from REQUIREMENTS.md or ROADMAP.md goal IDs that this plan addresses. Enables bidirectional traceability between plans and requirements/goals. |
72
72
  | `dependency_fingerprints` | NO | object | Hashes of dependency phase SUMMARY.md files at plan-creation time. Used to detect stale plans. |
73
+ | `data_contracts` | NO | array | Cross-boundary parameter mappings for calls where arguments originate from external boundaries. Format: `"param: source (context) [fallback]"` |
74
+
75
+ ### Data Contracts
76
+
77
+ When a task's `<action>` includes calls across module boundaries where arguments come from external sources (hook stdin, env vars, API params, config files), document the parameter-to-source mapping in `data_contracts` frontmatter and in the `<action>` step itself.
78
+
79
+ Example frontmatter:
80
+
81
+ ```yaml
82
+ data_contracts:
83
+ - "sessionId: data.session_id (hook stdin) [undefined in CLI context]"
84
+ - "config: configLoad(planningDir) (disk) [resolveConfig(undefined)]"
85
+ ```
86
+
87
+ Example in `<action>`:
88
+
89
+ ```
90
+ 3. Call classifyArtifact(llmConfig, planningDir, content, fileType, data.session_id)
91
+ Data contract: sessionId ← data.session_id from hook stdin (undefined in CLI context)
92
+ ```
93
+
94
+ **When to apply:** Any call where caller and callee are in different modules AND at least one argument originates from an external boundary. Internal helper calls within the same module do not need contracts.
73
95
 
74
96
  ---
75
97
 
@@ -64,6 +64,18 @@ function validateConfig(configPath) {
64
64
  }
65
65
  }
66
66
 
67
+ // Advisory: suggest local_llm defaults if the key is absent
68
+ if (!config.local_llm) {
69
+ warnings.push(
70
+ 'local_llm config missing. To enable local LLM offload, add to config.json:\n' +
71
+ '"local_llm": {\n' +
72
+ ' "enabled": false,\n' +
73
+ ' "model": "qwen2.5-coder:7b",\n' +
74
+ ' "endpoint": "http://localhost:11434"\n' +
75
+ '} (set enabled: true after running: ollama pull qwen2.5-coder:7b)'
76
+ );
77
+ }
78
+
67
79
  // Check version
68
80
  if (config.version && config.version < 2) {
69
81
  warnings.push(`Config version ${config.version} is outdated — expected version 2+`);
@@ -90,6 +102,27 @@ function validateConfig(configPath) {
90
102
  }
91
103
  }
92
104
 
105
+ // Validate local_llm block
106
+ if (config.local_llm !== undefined) {
107
+ const llm = config.local_llm;
108
+ if (llm.enabled !== undefined && typeof llm.enabled !== 'boolean') {
109
+ warnings.push('local_llm.enabled must be a boolean');
110
+ }
111
+ if (llm.provider !== undefined && llm.provider !== 'ollama') {
112
+ warnings.push(`local_llm.provider "${llm.provider}" is not supported — use "ollama"`);
113
+ }
114
+ if (llm.timeout_ms !== undefined && (typeof llm.timeout_ms !== 'number' || llm.timeout_ms < 500)) {
115
+ warnings.push('local_llm.timeout_ms must be a number >= 500');
116
+ }
117
+ if (llm.advanced && llm.advanced.num_ctx !== undefined && llm.advanced.num_ctx !== 4096) {
118
+ warnings.push(`local_llm.advanced.num_ctx is ${llm.advanced.num_ctx} — strongly recommend 4096 to avoid GPU memory issues on Windows`);
119
+ }
120
+ if (llm.advanced && llm.advanced.disable_after_failures !== undefined &&
121
+ (typeof llm.advanced.disable_after_failures !== 'number' || llm.advanced.disable_after_failures < 1)) {
122
+ warnings.push('local_llm.advanced.disable_after_failures must be a number >= 1');
123
+ }
124
+ }
125
+
93
126
  return warnings;
94
127
  }
95
128
 
@@ -25,13 +25,29 @@ const path = require('path');
25
25
  const { logHook } = require('./hook-logger');
26
26
  const { logEvent } = require('./event-logger');
27
27
  const { atomicWrite } = require('./pbr-tools');
28
+ const { resolveConfig } = require('./local-llm/health');
29
+ const { classifyArtifact } = require('./local-llm/operations/classify-artifact');
28
30
 
29
- function main() {
31
+ /**
32
+ * Load and resolve the local_llm config block from .planning/config.json.
33
+ * Returns a resolved config (always safe to use — disabled by default on error).
34
+ */
35
+ function loadLocalLlmConfig() {
36
+ try {
37
+ const configPath = path.join(process.cwd(), '.planning', 'config.json');
38
+ const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
39
+ return resolveConfig(parsed.local_llm);
40
+ } catch (_e) {
41
+ return resolveConfig(undefined);
42
+ }
43
+ }
44
+
45
+ async function main() {
30
46
  let input = '';
31
47
 
32
48
  process.stdin.setEncoding('utf8');
33
49
  process.stdin.on('data', (chunk) => { input += chunk; });
34
- process.stdin.on('end', () => {
50
+ process.stdin.on('end', async () => {
35
51
  try {
36
52
  const data = JSON.parse(input);
37
53
 
@@ -62,6 +78,22 @@ function main() {
62
78
  ? validateRoadmap(content, filePath)
63
79
  : validateSummary(content, filePath);
64
80
 
81
+ // LLM advisory enrichment — advisory only, never blocks
82
+ if ((isPlan || isSummary) && result.errors.length === 0) {
83
+ try {
84
+ const llmConfig = loadLocalLlmConfig();
85
+ const planningDir = path.join(process.cwd(), '.planning');
86
+ const fileType = isPlan ? 'PLAN' : 'SUMMARY';
87
+ const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, data.session_id);
88
+ if (llmResult && llmResult.classification) {
89
+ const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
90
+ result.warnings.push(llmNote);
91
+ }
92
+ } catch (_llmErr) {
93
+ // Never propagate LLM errors
94
+ }
95
+ }
96
+
65
97
  const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
66
98
 
67
99
  if (result.errors.length > 0) {
@@ -227,9 +259,9 @@ function validateSummary(content, _filePath) {
227
259
  /**
228
260
  * Core plan/summary check logic for use by dispatchers.
229
261
  * @param {Object} data - Parsed hook input (tool_input, etc.)
230
- * @returns {null|{output: Object}} null if pass or not applicable, result otherwise
262
+ * @returns {Promise<null|{output: Object}>} null if pass or not applicable, result otherwise
231
263
  */
232
- function checkPlanWrite(data) {
264
+ async function checkPlanWrite(data) {
233
265
  const filePath = data.tool_input?.file_path || data.tool_input?.path || '';
234
266
  const basename = path.basename(filePath);
235
267
  const isPlan = basename.endsWith('PLAN.md');
@@ -249,6 +281,22 @@ function checkPlanWrite(data) {
249
281
  ? validateRoadmap(content, filePath)
250
282
  : validateSummary(content, filePath);
251
283
 
284
+ // LLM advisory enrichment — advisory only, never blocks
285
+ if ((isPlan || isSummary) && result.errors.length === 0) {
286
+ try {
287
+ const llmConfig = loadLocalLlmConfig();
288
+ const planningDir = path.join(process.cwd(), '.planning');
289
+ const fileType = isPlan ? 'PLAN' : 'SUMMARY';
290
+ const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, data.session_id);
291
+ if (llmResult && llmResult.classification) {
292
+ const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
293
+ result.warnings.push(llmNote);
294
+ }
295
+ } catch (_llmErr) {
296
+ // Never propagate LLM errors
297
+ }
298
+ }
299
+
252
300
  const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
253
301
 
254
302
  if (result.errors.length > 0) {
@@ -20,6 +20,8 @@
20
20
  const fs = require('fs');
21
21
  const path = require('path');
22
22
  const { logHook } = require('./hook-logger');
23
+ const { resolveConfig } = require('./local-llm/health');
24
+ const { classifyError } = require('./local-llm/operations/classify-error');
23
25
 
24
26
  /**
25
27
  * Check if a file was modified recently (within thresholdMs).
@@ -310,7 +312,17 @@ function readStdin() {
310
312
  return {};
311
313
  }
312
314
 
313
- function main() {
315
+ function loadLocalLlmConfig(cwd) {
316
+ try {
317
+ const configPath = path.join(cwd, '.planning', 'config.json');
318
+ const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
319
+ return resolveConfig(parsed.local_llm);
320
+ } catch (_) {
321
+ return resolveConfig(undefined);
322
+ }
323
+ }
324
+
325
+ async function main() {
314
326
  const data = readStdin();
315
327
  const cwd = process.cwd();
316
328
  const planningDir = path.join(cwd, '.planning');
@@ -426,8 +438,22 @@ function main() {
426
438
  agent_type: agentType,
427
439
  warnings: skillWarnings
428
440
  });
441
+ // LLM error classification — advisory enrichment
442
+ let llmCategoryNote = '';
443
+ try {
444
+ const llmConfig = loadLocalLlmConfig(cwd);
445
+ const errorText = (data.tool_output || '').substring(0, 500);
446
+ if (errorText) {
447
+ const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, data.session_id);
448
+ if (llmResult && llmResult.category) {
449
+ llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
450
+ }
451
+ }
452
+ } catch (_llmErr) {
453
+ // Never propagate
454
+ }
429
455
  const msg = `Warning: Agent ${agentType} completed but no ${outputSpec.description} was found.\nSkill-specific warnings:\n` +
430
- skillWarnings.map(w => `- ${w}`).join('\n');
456
+ skillWarnings.map(w => `- ${w}`).join('\n') + llmCategoryNote;
431
457
  process.stdout.write(JSON.stringify({ additionalContext: msg }));
432
458
  } else if (genericMissing) {
433
459
  logHook('check-subagent-output', 'PostToolUse', 'warning', {
@@ -435,8 +461,22 @@ function main() {
435
461
  expected: outputSpec.description,
436
462
  found: 'none'
437
463
  });
464
+ // LLM error classification — advisory enrichment
465
+ let llmCategoryNote = '';
466
+ try {
467
+ const llmConfig = loadLocalLlmConfig(cwd);
468
+ const errorText = (data.tool_output || '').substring(0, 500);
469
+ if (errorText) {
470
+ const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, data.session_id);
471
+ if (llmResult && llmResult.category) {
472
+ llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
473
+ }
474
+ }
475
+ } catch (_llmErr) {
476
+ // Never propagate
477
+ }
438
478
  const output = {
439
- additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.`
479
+ additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.` + llmCategoryNote
440
480
  };
441
481
  process.stdout.write(JSON.stringify(output));
442
482
  } else if (skillWarnings.length > 0) {