npm - @sienklogic/plan-build-run - Versions diffs - 2.22.2 → 2.24.0 - Mend

@sienklogic/plan-build-run 2.22.2 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/CHANGELOG.md +42 -0
package/dashboard/package.json +3 -2
package/dashboard/src/middleware/errorHandler.js +12 -2
package/dashboard/src/repositories/planning.repository.js +24 -12
package/dashboard/src/routes/pages.routes.js +182 -4
package/dashboard/src/server.js +4 -0
package/dashboard/src/services/audit.service.js +42 -0
package/dashboard/src/services/dashboard.service.js +1 -12
package/dashboard/src/services/local-llm-metrics.service.js +81 -0
package/dashboard/src/services/quick.service.js +62 -0
package/dashboard/src/services/roadmap.service.js +1 -11
package/dashboard/src/utils/strip-bom.js +8 -0
package/dashboard/src/views/audit-detail.ejs +5 -0
package/dashboard/src/views/audits.ejs +5 -0
package/dashboard/src/views/partials/analytics-content.ejs +61 -0
package/dashboard/src/views/partials/audit-detail-content.ejs +12 -0
package/dashboard/src/views/partials/audits-content.ejs +34 -0
package/dashboard/src/views/partials/quick-content.ejs +40 -0
package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
package/dashboard/src/views/partials/sidebar.ejs +16 -0
package/dashboard/src/views/partials/todos-content.ejs +13 -3
package/dashboard/src/views/quick-detail.ejs +5 -0
package/dashboard/src/views/quick.ejs +5 -0
package/package.json +1 -1
package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
package/plugins/copilot-pbr/agents/integration-checker.agent.md +9 -2
package/plugins/copilot-pbr/agents/planner.agent.md +19 -0
package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
package/plugins/copilot-pbr/agents/verifier.agent.md +22 -2
package/plugins/copilot-pbr/plugin.json +1 -1
package/plugins/copilot-pbr/references/config-reference.md +89 -0
package/plugins/copilot-pbr/references/plan-format.md +22 -0
package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
package/plugins/copilot-pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
package/plugins/copilot-pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
package/plugins/cursor-pbr/agents/debugger.md +15 -0
package/plugins/cursor-pbr/agents/integration-checker.md +9 -2
package/plugins/cursor-pbr/agents/planner.md +19 -0
package/plugins/cursor-pbr/agents/researcher.md +20 -0
package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
package/plugins/cursor-pbr/agents/verifier.md +22 -2
package/plugins/cursor-pbr/references/config-reference.md +89 -0
package/plugins/cursor-pbr/references/plan-format.md +22 -0
package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
package/plugins/cursor-pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
package/plugins/cursor-pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
package/plugins/pbr/.claude-plugin/plugin.json +1 -1
package/plugins/pbr/agents/debugger.md +15 -0
package/plugins/pbr/agents/integration-checker.md +9 -2
package/plugins/pbr/agents/planner.md +19 -0
package/plugins/pbr/agents/researcher.md +20 -0
package/plugins/pbr/agents/synthesizer.md +12 -0
package/plugins/pbr/agents/verifier.md +22 -2
package/plugins/pbr/references/config-reference.md +89 -0
package/plugins/pbr/references/plan-format.md +22 -0
package/plugins/pbr/scripts/check-config-change.js +33 -0
package/plugins/pbr/scripts/check-plan-format.js +52 -4
package/plugins/pbr/scripts/check-subagent-output.js +43 -3
package/plugins/pbr/scripts/config-schema.json +48 -0
package/plugins/pbr/scripts/local-llm/client.js +214 -0
package/plugins/pbr/scripts/local-llm/health.js +217 -0
package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
package/plugins/pbr/scripts/local-llm/router.js +101 -0
package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
package/plugins/pbr/scripts/pbr-tools.js +120 -3
package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
package/plugins/pbr/scripts/progress-tracker.js +29 -3
package/plugins/pbr/scripts/session-cleanup.js +36 -1
package/plugins/pbr/scripts/validate-task.js +30 -1
package/plugins/pbr/skills/health/SKILL.md +8 -1
package/plugins/pbr/skills/help/SKILL.md +4 -4
package/plugins/pbr/skills/milestone/SKILL.md +12 -12
package/plugins/pbr/skills/status/SKILL.md +38 -2
package/plugins/pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
package/plugins/pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
package/dashboard/src/views/coming-soon.ejs +0 -11

package/plugins/cursor-pbr/skills/status/SKILL.md CHANGED Viewed

@@ -68,6 +68,31 @@ Read the following files (skip any that don't exist):
 5. **`.planning/REQUIREMENTS.md`** — Requirements (if exists)
    - Extract: requirement completion status if tracked
+### Step 1b: Read Local LLM Stats (advisory — skip on any error)
+After loading config.json, check `local_llm.enabled`. If `true`:
+```bash
+node ${PLUGIN_ROOT}/scripts/pbr-tools.js llm status
+node ${PLUGIN_ROOT}/scripts/pbr-tools.js llm metrics
+```
+Parse both JSON responses. Capture:
+- `status.model` — model name
+- `metrics.total_calls` — lifetime total calls
+- `metrics.tokens_saved` — lifetime frontier tokens saved
+- `metrics.cost_saved_usd` — lifetime cost estimate
+- `metrics.avg_latency_ms` — lifetime average latency
+Also run session-scoped metrics if `.planning/.session-start` exists:
+```bash
+node ${PLUGIN_ROOT}/scripts/pbr-tools.js llm metrics --session <content-of-.session-start>
+```
+If `local_llm.enabled` is `false` or commands fail, skip this step silently.
 ### Step 2: Scan Phase Directories
 For each phase listed in ROADMAP.md:
@@ -191,8 +216,18 @@ Todos: {count} pending. Run `/pbr:todo list` to see them.
 {If notes exist:}
 Notes: {count} quick capture(s). `/pbr:note list` to review.
+{If local_llm.enabled AND total_calls > 0:}
+Local LLM: enabled ({model}, avg {avg_ms}ms)
+This session: {session_calls} calls, ~{session_tokens} frontier tokens saved
+Lifetime: {total_calls} calls, ~{tokens_saved} tokens saved (~{cost_str} at $3/M)
+{If local_llm.enabled AND total_calls == 0:}
+Local LLM: enabled ({model}) — no calls yet this session
 ```
+The Local LLM block is **advisory only** — it never affects the routing decision or Next Up suggestion.
 ### Progress Bar
 Generate a 20-character progress bar:
@@ -342,9 +377,10 @@ This skill should be fast. It's a status check, not an analysis.
 - Cache nothing (always read fresh state)
 **DO NOT:**
 - Read full SUMMARY.md contents (frontmatter is enough)
 - Read plan file contents (just check existence)
-- Run any Bash commands
+- Run Bash commands except for Step 1b (2-3 `pbr-tools` calls only when `local_llm.enabled: true`, skipped entirely otherwise)
 - Modify any files
 - Invoke any agents

package/plugins/cursor-pbr/templates/INTEGRATION-REPORT.md.tmpl CHANGED Viewed

@@ -112,7 +112,22 @@ Phase 03 (Core)  ──provides──→ Phase 04 (Frontend)
 ### Flow 2: {Flow Name} - {STATUS}
 ...
-## 5. Integration Issues Summary
+## 5. Data-Flow Propagation
+### Cross-Boundary Data Flows
+| Data Field | Source | Intermediate Steps | Destination | Status |
+|------------|--------|-------------------|-------------|--------|
+| {field name} | {origin, e.g., hook stdin `data.session_id`} | {module1:L12 → module2:L45} | {dest, e.g., metrics.jsonl `session_id`} | PROPAGATED |
+| {field name} | {origin} | {module1:L12 → module2:L45} | {dest} | DATA_DROPPED |
+### Data-Flow Issues
+| Field | Dropped At | Available In Scope | Passed Instead | Fix |
+|-------|-----------|-------------------|----------------|-----|
+| {field} | {file:line} | `data.session_id` | `undefined` | Pass `data.session_id` |
+## 6. Integration Issues Summary
 ### Critical Issues (system cannot function)
@@ -131,7 +146,7 @@ Phase 03 (Core)  ──provides──→ Phase 04 (Frontend)
 1. **{Issue}**: {description}
    - Fix: {recommended action}
-## 6. Integration Score
+## 7. Integration Score
 | Category | Items Checked | Passed | Failed | Score |
 |----------|--------------|--------|--------|-------|
@@ -139,6 +154,7 @@ Phase 03 (Core)  ──provides──→ Phase 04 (Frontend)
 | API coverage | {n} | {n} | {n} | {%} |
 | Auth protection | {n} | {n} | {n} | {%} |
 | E2E flows | {n} | {n} | {n} | {%} |
+| Data-flow propagation | {n} | {n} | {n} | {%} |
 | **Overall** | {n} | {n} | {n} | **{%}** |
 ## Recommendations

package/plugins/cursor-pbr/templates/VERIFICATION-DETAIL.md.tmpl CHANGED Viewed

@@ -54,8 +54,9 @@ anti_patterns:
 | # | Link Description | Source | Target | Status | Evidence |
 |---|-----------------|--------|--------|--------|----------|
-| 1 | {what connects to what} | `{source_file}` | `{target_file}` | WIRED | Import at L12, called at L45 |
+| 1 | {what connects to what} | `{source_file}` | `{target_file}` | WIRED | Import at L12, called at L45, args correct |
 | 2 | {what connects to what} | `{source_file}` | `{target_file}` | BROKEN | Imported but never called |
+| 3 | {what connects to what} | `{source_file}` | `{target_file}` | ARGS_WRONG | Called at L45 but passes undefined for sessionId (data.session_id in scope) |
 ## Gaps Found

package/plugins/pbr/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pbr",
-  "version": "2.22.2",
+  "version": "2.24.0",
   "description": "Plan-Build-Run — Structured development workflow for Claude Code. Solves context rot through disciplined subagent delegation, structured planning, atomic execution, and goal-backward verification.",
   "author": {
     "name": "SienkLogic",

package/plugins/pbr/agents/debugger.md CHANGED Viewed

@@ -144,6 +144,21 @@ Then emit a `DECISION` checkpoint asking the user to approve, modify, or reject
 **Commit format**: `fix({scope}): {description}` with body: `Root cause: ...` and `Debug session: .planning/debug/{slug}.md`
+## Local LLM Error Classification (Optional)
+When you receive an error message or stack trace, you MAY use the local LLM to classify it before starting hypothesis generation. This is advisory — skip it if unavailable.
+```bash
+# Write the error to a temp file, then classify:
+echo "Error text here" > /tmp/debug-error.txt
+node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm classify-error /tmp/debug-error.txt debugger 2>/dev/null
+# Returns: {"category":"missing_output","confidence":0.91,"latency_ms":1840,"fallback_used":false}
+```
+Categories: `connection_refused`, `timeout`, `missing_output`, `wrong_output_format`, `permission_error`, `unknown`.
+If classification succeeds, use the returned category to bias your initial hypothesis ranking. If it returns null or fails, proceed with manual hypothesis generation as normal.
 ## Common Bug Patterns
 Reference: `references/common-bug-patterns.md` — covers off-by-one, null/undefined, async/timing, state management, import/module, environment, and data shape patterns.

package/plugins/pbr/agents/integration-checker.md CHANGED Viewed

@@ -40,6 +40,7 @@ You MUST perform all applicable categories (skip only if zero items exist for th
 3. **Auth Protection** — Every non-public route must have auth middleware. Frontend route guards must match backend protection.
 4. **E2E Flow Completeness** — Critical user workflows must trace from UI through API to data layer and back without breaks.
 5. **Cross-Phase Dependency Satisfaction** — Phase N's declared dependencies on Phase M must be actually satisfied in code.
+6. **Data-Flow Propagation** — Values originating at one boundary (hook stdin fields, API request params, env vars) must propagate correctly through the call chain to their destination (log entries, database records, API responses). A connected pipeline with missing data is a broken integration.
 > **First-phase edge case**: If no completed phases exist yet, focus on verifying the current phase's internal consistency — exports match imports within the phase, API contracts are self-consistent. Cross-phase checks are not applicable and should be skipped.
@@ -52,14 +53,19 @@ Read `references/agent-contracts.md` to validate agent-to-agent handoffs. Verify
 - **Write access for output artifact only** — you have Write access for your output artifact only. You CANNOT fix source code — you REPORT issues.
 - **Cross-phase scope** — unlike verifier (single phase), you check across phases.
-## 6-Step Verification Process
+## 7-Step Verification Process
 1. **Build Export/Import Map**: Read each completed phase's SUMMARY.md frontmatter (`requires`, `provides`, `affects`). Grep actual exports/imports in source. Cross-reference declared vs actual — flag mismatches.
 2. **Verify Export Usage**: For each `provides` item: locate actual export (missing = `MISSING_EXPORT` ERROR), find consumers (none = `ORPHANED` WARNING), verify usage not just import (`IMPORTED_UNUSED` WARNING), check signature compatibility (`MISMATCHED` ERROR). Status `CONSUMED` = OK.
 3. **Verify API Coverage**: Discover routes, find frontend callers, match by method+path+body/params. Produce coverage table. See `references/integration-patterns.md` for framework-specific patterns.
 4. **Verify Auth Protection**: Identify auth mechanism, list all routes, classify (public vs protected), check frontend guards. Flag UNPROTECTED routes.
 5. **Verify E2E Flows**: Trace critical workflows step-by-step — verify each step exists and connects to the next (import/call/redirect). Record evidence (file:line). Flow status: COMPLETE | BROKEN | PARTIAL | UNTRACEABLE. See `references/integration-patterns.md` for flow templates.
-6. **Compile Integration Report**: Produce final report with all findings by category.
+6. **Verify Data-Flow Propagation**: For each cross-boundary data field identified in plans or SUMMARY.md, trace the value from source through intermediate functions to destination. Verify the value is actually passed (not `undefined`/`null`/hardcoded) at each step.
+   - **Source examples**: hook stdin (`data.session_id`), API request params, environment variables, config fields
+   - **Destination examples**: log entries, database records, API responses, metric files
+   - **Method**: Grep each intermediate call site and inspect arguments. Flag `DATA_DROPPED` when a value available in scope is replaced by `undefined` or a placeholder.
+   - **Status**: `PROPAGATED` (value flows correctly) | `DATA_DROPPED` (value lost at some step) | `UNTRACEABLE` (cannot determine flow)
+7. **Compile Integration Report**: Produce final report with all findings by category.
 ## Output Format
@@ -124,3 +130,4 @@ See `references/integration-patterns.md` for grep/search patterns by framework.
 - "File exists" is not "component is integrated"
 - Auth middleware existing somewhere does not mean routes are protected
 - Always check error handling paths, not just happy paths
+- Structural connectivity is not data-flow correctness — a connected pipeline can still drop data at any step

package/plugins/pbr/agents/planner.md CHANGED Viewed

@@ -73,6 +73,23 @@ Each must-have maps to one or more tasks. Every task exists to make a must-have
 ---
+## Data Contracts for Cross-Boundary Parameters
+When a function signature includes parameters that flow across module boundaries — session IDs from hook stdin, config objects from disk, auth tokens from environment — the plan **MUST** specify the **source** for each argument, not just the type.
+For every cross-boundary call in a task's `<action>`, document:
+| Parameter | Source | Context | Fallback |
+|-----------|--------|---------|----------|
+| `sessionId` | `data.session_id` (hook stdin) | Hook scripts only | `undefined` (CLI context) |
+| `config` | `configLoad(planningDir)` | All callers | `resolveConfig(undefined)` |
+**When to apply:** Any function call where the caller and callee live in different modules AND at least one argument originates from an external boundary (stdin, env, disk, network). Internal helper calls within the same module do not need contracts.
+**Why this matters:** Without explicit source mapping, executors will use the type-correct but value-wrong default (e.g., `undefined` instead of `data.session_id`). The plan is the single source of truth for how data flows — if the plan says `undefined`, the executor will faithfully implement `undefined`.
+---
 ## Plan Structure
 Read `references/plan-format.md` for the complete plan file specification including:
@@ -172,6 +189,7 @@ When CONTEXT.md or RESEARCH-SUMMARY.md contains `[NEEDS DECISION]` flags from th
    - [ ] Dependencies are acyclic, no file conflicts within same wave
    - [ ] Locked decisions honored, no deferred ideas included
    - [ ] Verify commands are actually executable
+   - [ ] Cross-boundary parameters have documented sources (data contracts)
 ---
@@ -245,3 +263,4 @@ One-line task descriptions in `<name>`. File paths in `<files>`, not explanation
 9. DO NOT plan for features outside the current phase goal
 10. DO NOT assume research is done — check discovery level
 11. DO NOT leave done conditions vague — they must be observable
+12. DO NOT specify literal `undefined` for parameters that have a known source in the calling context — use data contracts to map sources

package/plugins/pbr/agents/researcher.md CHANGED Viewed

@@ -62,6 +62,26 @@ All claims must be attributed to a source level. Higher levels override lower le
 **Offline Fallback**: If web tools are unavailable (air-gapped environment, MCP not configured), rely on local sources: codebase analysis via Glob/Grep, existing documentation, and README files. Assign these S3-S4 confidence levels. Do not attempt WebFetch or WebSearch — note in the output header that external sources were unavailable.
+## Local LLM Source Scoring (Optional)
+If local LLM offload is configured, you MAY use it to score source credibility instead of manually assigning S-levels. This is advisory — never wait on it or fail if it returns null.
+Check availability first:
+```bash
+node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm status 2>/dev/null
+```
+If `enabled: true`, score a source excerpt:
+```bash
+echo "Source URL and content excerpt" > /tmp/source-excerpt.txt
+node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm score-source "https://example.com/docs" /tmp/source-excerpt.txt 2>/dev/null
+# Returns: {"level":"S2","confidence":0.87,"reason":"Official library documentation page"}
+```
+Use the returned `level` to set your source tag. If the call fails or returns `null`, assign the level manually per the hierarchy table above.
 ---
 ## Confidence Levels

package/plugins/pbr/agents/synthesizer.md CHANGED Viewed

@@ -100,6 +100,18 @@ conflicts: N
 - **Research gaps**: Add `[RESEARCH GAP]` flag, add to Open Questions with high impact, never fabricate
 - **Duplicates**: Consolidate into one entry, note multi-source agreement, reference all documents
+## Local LLM Context Summarization (Optional)
+When input research documents are large (>2000 words combined), you MAY use the local LLM to pre-summarize each document before synthesis. This reduces your own context consumption. Advisory only — if unavailable, read documents normally.
+```bash
+# Pre-summarize a large research document to ~150 words:
+node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm summarize /path/to/RESEARCH.md 150 2>/dev/null
+# Returns: {"summary":"...plain text summary under 150 words...","latency_ms":2100,"fallback_used":false}
+```
+Use the returned `summary` string as your working copy of that document's findings. Still read the original for any specific version numbers, code examples, or direct quotes needed in the output.
 ## Anti-Patterns
 ### Universal Anti-Patterns

package/plugins/pbr/agents/verifier.md CHANGED Viewed

@@ -101,10 +101,29 @@ Verify the artifact is imported AND used by other parts of the system (functions
 | Yes | Yes | No | UNWIRED |
 | Yes | Yes | Yes | PASSED |
+> **Note:** WIRED status (Level 3) requires correct arguments, not just correct function names. A call that passes `undefined` for a parameter available in scope is `ARGS_WRONG`, not `WIRED`.
 ### Step 6: Verify Key Links (Always)
 For each key_link: identify source and target components, verify the import path resolves, verify the imported symbol is actually called/used, and verify call signatures match. Watch for: wrong import paths, imported-but-never-called symbols, defined-but-never-applied middleware, registered-but-never-triggered event handlers.
+### Step 6b: Argument-Level Spot Checks (Always)
+Beyond verifying that calls exist, spot-check that **arguments passed to cross-boundary calls carry the correct values**. A call with the right function but wrong arguments is effectively UNWIRED.
+**Focus on:** IDs (session, user, request), config objects, auth tokens, and context data that originate from external boundaries (stdin, env, disk).
+**Method:**
+1. For each key_link verified in Step 6, grep the call site and inspect the arguments
+2. Compare each argument against the data source available in the calling scope
+3. Flag any argument that passes `undefined`, `null`, or a hardcoded placeholder when the calling scope has the real value available (e.g., `data.session_id` is in scope but `undefined` is passed)
+**Classification:**
+- `WIRED` requires both correct function AND correct arguments
+- `ARGS_WRONG` = correct function called but one or more arguments are incorrect/missing — this is a key link gap
+**Example:** A hook script receives `data` from stdin containing `session_id`. If it calls `logMetric(planningDir, { session_id: undefined })` instead of `logMetric(planningDir, { session_id: data.session_id })`, that is an `ARGS_WRONG` gap even though the call itself exists.
 ### Step 7: Check Requirements Coverage (Always)
 Cross-reference all must-haves against verification results in a table:
@@ -113,8 +132,8 @@ Cross-reference all must-haves against verification results in a table:
 | # | Must-Have | Type | L1 (Exists) | L2 (Substantive) | L3 (Wired) | Status |
 |---|----------|------|-------------|-------------------|------------|--------|
 | 1 | {description} | truth | - | - | - | VERIFIED/FAILED |
-| 2 | {description} | artifact | YES/NO | YES/STUB/PARTIAL | WIRED/ORPHANED | PASS/FAIL |
-| 3 | {description} | key_link | - | - | YES/NO | PASS/FAIL |
+| 2 | {description} | artifact | YES/NO | YES/STUB/PARTIAL | WIRED/ORPHANED/ARGS_WRONG | PASS/FAIL |
+| 3 | {description} | key_link | - | - | YES/NO/ARGS_WRONG | PASS/FAIL |
 ```
 ### Step 8: Scan for Anti-Patterns (Full Verification Only)
@@ -232,3 +251,4 @@ Read `references/stub-patterns.md` for stub detection patterns by technology. Re
 9. DO NOT give PASSED status if ANY must-have fails at ANY level
 10. DO NOT count deferred items as gaps — they are intentionally not implemented
 11. DO NOT be lenient — your job is to find problems, not to be encouraging
+12. DO NOT mark a call as WIRED if it passes hardcoded `undefined`/`null` for parameters that have a known source in scope — check arguments, not just function names

package/plugins/pbr/references/config-reference.md CHANGED Viewed

@@ -439,3 +439,92 @@ Run validation with: `node plugins/pbr/scripts/pbr-tools.js config validate`
 | `tdd_mode: true` + `depth: quick` | quick depth skips verification, which conflicts with TDD's verify-first approach |
 | `git.mode: disabled` + `atomic_commits: true` | atomic_commits has no effect when git is disabled |
 | `git.branching: phase` + `git.mode: disabled` | Branching settings are ignored when git is disabled |
+---
+## local_llm
+Offloads selected PBR inference tasks to a locally running Ollama instance, reducing frontier model usage and latency for fast classification calls. The key `enabled` defaults to `false`, so users without Ollama see no change — all LLM calls continue routing to Claude as normal. When enabled, PBR uses a `local_first` routing strategy: fast tasks (artifact classification, task validation) go to the local model; complex tasks (planning, execution) stay on the frontier model.
+### Quick setup
+1. Install Ollama:
+   - **Linux/macOS**: `curl -fsSL https://ollama.com/install.sh | sh`
+   - **Windows**: Download from [ollama.com/download](https://ollama.com/download) and run the installer
+2. Pull the recommended model: `ollama pull qwen2.5-coder:7b`
+3. Add to `.planning/config.json`:
+   ```json
+   "local_llm": {
+     "enabled": true,
+     "model": "qwen2.5-coder:7b"
+   }
+   ```
+4. Verify connectivity: `node /path/to/plugins/pbr/scripts/pbr-tools.js llm health`
+### Field reference
+| Property | Type | Default | Description |
+|----------|------|---------|-------------|
+| `local_llm.enabled` | boolean | `false` | Enable local LLM offloading; `false` = all calls use frontier |
+| `local_llm.provider` | string | `"ollama"` | Backend provider; only `"ollama"` is supported |
+| `local_llm.endpoint` | string | `"http://localhost:11434"` | Ollama API base URL |
+| `local_llm.model` | string | `"qwen2.5-coder:7b"` | Model tag to use for local inference |
+| `local_llm.timeout_ms` | integer | `3000` | Per-request timeout in milliseconds; >= 500 |
+| `local_llm.max_retries` | integer | `1` | Number of retry attempts on failure before falling back |
+| `local_llm.fallback` | string | `"frontier"` | What to use when local LLM fails: `"frontier"` or `"skip"` |
+| `local_llm.routing_strategy` | string | `"local_first"` | `"local_first"` sends fast tasks local; `"always_local"` routes everything |
+### features sub-table
+Controls which PBR tasks are eligible for local LLM offloading.
+| Property | Default | Description |
+|----------|---------|-------------|
+| `artifact_classification` | `true` | Classify artifact types (PLAN, SUMMARY, VERIFICATION) locally |
+| `task_validation` | `true` | Validate task scope and completeness locally |
+| `context_summarization` | `false` | Summarize context windows locally (higher token demand) |
+| `source_scoring` | `false` | Score source files by relevance locally |
+### advanced sub-table
+| Property | Default | Description |
+|----------|---------|-------------|
+| `confidence_threshold` | `0.9` | Minimum confidence (0–1) for local output to be accepted; below this, falls back to frontier |
+| `shadow_mode` | `false` | Run local LLM in parallel with frontier but discard local results — useful for tuning confidence thresholds without affecting output |
+| `max_input_tokens` | `2000` | Truncate inputs longer than this before sending to local model |
+| `keep_alive` | `"30m"` | How long Ollama keeps the model loaded between requests (Ollama format: `"5m"`, `"1h"`) |
+| `num_ctx` | `4096` | Context window size passed to Ollama; **must be 4096 on Windows** (see Windows gotchas) |
+| `disable_after_failures` | `3` | Automatically disable local LLM for the session after this many consecutive failures |
+### Hardware requirements
+| Tier | Hardware | Notes |
+|------|----------|-------|
+| Recommended | RTX 3060+ with 8 GB VRAM | Full GPU acceleration; qwen2.5-coder:7b loads entirely in VRAM |
+| Functional | GTX 1660+ with 6 GB VRAM | GPU acceleration with slight layer offload to RAM |
+| Marginal | CPU only, 32 GB RAM | Works but adds 5-20s latency per call; disable context-heavy features |
+For GPU acceleration, ensure NVIDIA drivers are 520+ and CUDA 11.8+ is installed. AMD GPU support is available via ROCm on Linux only.
+### Windows gotchas
+- **Smart App Control**: May block `ollama_llama_server.exe` on first run. Allow it via Security settings or disable Smart App Control.
+- **Windows Defender**: Add an exclusion for `%LOCALAPPDATA%\Programs\Ollama\ollama_llama_server.exe` to prevent Defender from scanning inference calls in real time.
+- **`num_ctx` must be 4096**: Higher values cause GPU memory fragmentation on Windows and result in OOM errors mid-session. Always set `advanced.num_ctx: 4096` in your config.
+- **Firewall**: Ollama listens on `localhost:11434` by default. If you see connection refused errors, check that Windows Firewall is not blocking loopback connections.
+### Viewing metrics
+After enabling local LLM, PBR logs per-call metrics to `.planning/logs/local-llm-metrics.jsonl`. Use the built-in subcommands to inspect them:
+```bash
+# Show session summary (calls routed, latency, token savings)
+node plugins/pbr/scripts/pbr-tools.js llm metrics
+# Suggest routing threshold adjustments based on recent accuracy
+node plugins/pbr/scripts/pbr-tools.js llm adjust-thresholds
+```
+Metrics include: routing decision, model used, latency ms, confidence score, whether the frontier fallback was triggered, and estimated tokens saved.

package/plugins/pbr/references/plan-format.md CHANGED Viewed

@@ -70,6 +70,28 @@ requirement_ids:
 | `consumes` | NO | array | What this plan needs from prior plans. Format: `"Thing (from plan XX-YY)"` |
 | `requirement_ids` | NO | array | Requirement IDs from REQUIREMENTS.md or ROADMAP.md goal IDs that this plan addresses. Enables bidirectional traceability between plans and requirements/goals. |
 | `dependency_fingerprints` | NO | object | Hashes of dependency phase SUMMARY.md files at plan-creation time. Used to detect stale plans. |
+| `data_contracts` | NO | array | Cross-boundary parameter mappings for calls where arguments originate from external boundaries. Format: `"param: source (context) [fallback]"` |
+### Data Contracts
+When a task's `<action>` includes calls across module boundaries where arguments come from external sources (hook stdin, env vars, API params, config files), document the parameter-to-source mapping in `data_contracts` frontmatter and in the `<action>` step itself.
+Example frontmatter:
+```yaml
+data_contracts:
+  - "sessionId: data.session_id (hook stdin) [undefined in CLI context]"
+  - "config: configLoad(planningDir) (disk) [resolveConfig(undefined)]"
+```
+Example in `<action>`:
+```
+3. Call classifyArtifact(llmConfig, planningDir, content, fileType, data.session_id)
+   Data contract: sessionId ← data.session_id from hook stdin (undefined in CLI context)
+```
+**When to apply:** Any call where caller and callee are in different modules AND at least one argument originates from an external boundary. Internal helper calls within the same module do not need contracts.
 ---

package/plugins/pbr/scripts/check-config-change.js CHANGED Viewed

@@ -64,6 +64,18 @@ function validateConfig(configPath) {
     }
   }
+  // Advisory: suggest local_llm defaults if the key is absent
+  if (!config.local_llm) {
+    warnings.push(
+      'local_llm config missing. To enable local LLM offload, add to config.json:\n' +
+      '"local_llm": {\n' +
+      '  "enabled": false,\n' +
+      '  "model": "qwen2.5-coder:7b",\n' +
+      '  "endpoint": "http://localhost:11434"\n' +
+      '} (set enabled: true after running: ollama pull qwen2.5-coder:7b)'
+    );
+  }
   // Check version
   if (config.version && config.version < 2) {
     warnings.push(`Config version ${config.version} is outdated — expected version 2+`);
@@ -90,6 +102,27 @@ function validateConfig(configPath) {
     }
   }
+  // Validate local_llm block
+  if (config.local_llm !== undefined) {
+    const llm = config.local_llm;
+    if (llm.enabled !== undefined && typeof llm.enabled !== 'boolean') {
+      warnings.push('local_llm.enabled must be a boolean');
+    }
+    if (llm.provider !== undefined && llm.provider !== 'ollama') {
+      warnings.push(`local_llm.provider "${llm.provider}" is not supported — use "ollama"`);
+    }
+    if (llm.timeout_ms !== undefined && (typeof llm.timeout_ms !== 'number' || llm.timeout_ms < 500)) {
+      warnings.push('local_llm.timeout_ms must be a number >= 500');
+    }
+    if (llm.advanced && llm.advanced.num_ctx !== undefined && llm.advanced.num_ctx !== 4096) {
+      warnings.push(`local_llm.advanced.num_ctx is ${llm.advanced.num_ctx} — strongly recommend 4096 to avoid GPU memory issues on Windows`);
+    }
+    if (llm.advanced && llm.advanced.disable_after_failures !== undefined &&
+        (typeof llm.advanced.disable_after_failures !== 'number' || llm.advanced.disable_after_failures < 1)) {
+      warnings.push('local_llm.advanced.disable_after_failures must be a number >= 1');
+    }
+  }
   return warnings;
 }

package/plugins/pbr/scripts/check-plan-format.js CHANGED Viewed

@@ -25,13 +25,29 @@ const path = require('path');
 const { logHook } = require('./hook-logger');
 const { logEvent } = require('./event-logger');
 const { atomicWrite } = require('./pbr-tools');
+const { resolveConfig } = require('./local-llm/health');
+const { classifyArtifact } = require('./local-llm/operations/classify-artifact');
-function main() {
+/**
+ * Load and resolve the local_llm config block from .planning/config.json.
+ * Returns a resolved config (always safe to use — disabled by default on error).
+ */
+function loadLocalLlmConfig() {
+  try {
+    const configPath = path.join(process.cwd(), '.planning', 'config.json');
+    const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
+    return resolveConfig(parsed.local_llm);
+  } catch (_e) {
+    return resolveConfig(undefined);
+  }
+}
+async function main() {
   let input = '';
   process.stdin.setEncoding('utf8');
   process.stdin.on('data', (chunk) => { input += chunk; });
-  process.stdin.on('end', () => {
+  process.stdin.on('end', async () => {
     try {
       const data = JSON.parse(input);
@@ -62,6 +78,22 @@ function main() {
             ? validateRoadmap(content, filePath)
             : validateSummary(content, filePath);
+      // LLM advisory enrichment — advisory only, never blocks
+      if ((isPlan || isSummary) && result.errors.length === 0) {
+        try {
+          const llmConfig = loadLocalLlmConfig();
+          const planningDir = path.join(process.cwd(), '.planning');
+          const fileType = isPlan ? 'PLAN' : 'SUMMARY';
+          const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, data.session_id);
+          if (llmResult && llmResult.classification) {
+            const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
+            result.warnings.push(llmNote);
+          }
+        } catch (_llmErr) {
+          // Never propagate LLM errors
+        }
+      }
       const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
       if (result.errors.length > 0) {
@@ -227,9 +259,9 @@ function validateSummary(content, _filePath) {
 /**
  * Core plan/summary check logic for use by dispatchers.
  * @param {Object} data - Parsed hook input (tool_input, etc.)
- * @returns {null|{output: Object}} null if pass or not applicable, result otherwise
+ * @returns {Promise<null|{output: Object}>} null if pass or not applicable, result otherwise
  */
-function checkPlanWrite(data) {
+async function checkPlanWrite(data) {
   const filePath = data.tool_input?.file_path || data.tool_input?.path || '';
   const basename = path.basename(filePath);
   const isPlan = basename.endsWith('PLAN.md');
@@ -249,6 +281,22 @@ function checkPlanWrite(data) {
         ? validateRoadmap(content, filePath)
         : validateSummary(content, filePath);
+  // LLM advisory enrichment — advisory only, never blocks
+  if ((isPlan || isSummary) && result.errors.length === 0) {
+    try {
+      const llmConfig = loadLocalLlmConfig();
+      const planningDir = path.join(process.cwd(), '.planning');
+      const fileType = isPlan ? 'PLAN' : 'SUMMARY';
+      const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, data.session_id);
+      if (llmResult && llmResult.classification) {
+        const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
+        result.warnings.push(llmNote);
+      }
+    } catch (_llmErr) {
+      // Never propagate LLM errors
+    }
+  }
   const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
   if (result.errors.length > 0) {

package/plugins/pbr/scripts/check-subagent-output.js CHANGED Viewed

@@ -20,6 +20,8 @@
 const fs = require('fs');
 const path = require('path');
 const { logHook } = require('./hook-logger');
+const { resolveConfig } = require('./local-llm/health');
+const { classifyError } = require('./local-llm/operations/classify-error');
 /**
  * Check if a file was modified recently (within thresholdMs).
@@ -310,7 +312,17 @@ function readStdin() {
   return {};
 }
-function main() {
+function loadLocalLlmConfig(cwd) {
+  try {
+    const configPath = path.join(cwd, '.planning', 'config.json');
+    const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
+    return resolveConfig(parsed.local_llm);
+  } catch (_) {
+    return resolveConfig(undefined);
+  }
+}
+async function main() {
   const data = readStdin();
   const cwd = process.cwd();
   const planningDir = path.join(cwd, '.planning');
@@ -426,8 +438,22 @@ function main() {
       agent_type: agentType,
       warnings: skillWarnings
     });
+    // LLM error classification — advisory enrichment
+    let llmCategoryNote = '';
+    try {
+      const llmConfig = loadLocalLlmConfig(cwd);
+      const errorText = (data.tool_output || '').substring(0, 500);
+      if (errorText) {
+        const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, data.session_id);
+        if (llmResult && llmResult.category) {
+          llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
+        }
+      }
+    } catch (_llmErr) {
+      // Never propagate
+    }
     const msg = `Warning: Agent ${agentType} completed but no ${outputSpec.description} was found.\nSkill-specific warnings:\n` +
-      skillWarnings.map(w => `- ${w}`).join('\n');
+      skillWarnings.map(w => `- ${w}`).join('\n') + llmCategoryNote;
     process.stdout.write(JSON.stringify({ additionalContext: msg }));
   } else if (genericMissing) {
     logHook('check-subagent-output', 'PostToolUse', 'warning', {
@@ -435,8 +461,22 @@ function main() {
       expected: outputSpec.description,
       found: 'none'
     });
+    // LLM error classification — advisory enrichment
+    let llmCategoryNote = '';
+    try {
+      const llmConfig = loadLocalLlmConfig(cwd);
+      const errorText = (data.tool_output || '').substring(0, 500);
+      if (errorText) {
+        const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, data.session_id);
+        if (llmResult && llmResult.category) {
+          llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
+        }
+      }
+    } catch (_llmErr) {
+      // Never propagate
+    }
     const output = {
-      additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.`
+      additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.` + llmCategoryNote
     };
     process.stdout.write(JSON.stringify(output));
   } else if (skillWarnings.length > 0) {