@sienklogic/plan-build-run 2.22.2 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/dashboard/package.json +2 -1
- package/dashboard/src/middleware/errorHandler.js +12 -2
- package/dashboard/src/repositories/planning.repository.js +23 -1
- package/dashboard/src/routes/pages.routes.js +65 -2
- package/dashboard/src/services/local-llm-metrics.service.js +81 -0
- package/dashboard/src/services/quick.service.js +62 -0
- package/dashboard/src/views/partials/analytics-content.ejs +61 -0
- package/dashboard/src/views/partials/quick-content.ejs +40 -0
- package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
- package/dashboard/src/views/partials/sidebar.ejs +8 -0
- package/dashboard/src/views/quick-detail.ejs +5 -0
- package/dashboard/src/views/quick.ejs +5 -0
- package/package.json +1 -1
- package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
- package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
- package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
- package/plugins/copilot-pbr/plugin.json +1 -1
- package/plugins/copilot-pbr/references/config-reference.md +89 -0
- package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
- package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
- package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
- package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-pbr/agents/debugger.md +15 -0
- package/plugins/cursor-pbr/agents/researcher.md +20 -0
- package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
- package/plugins/cursor-pbr/references/config-reference.md +89 -0
- package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
- package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
- package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
- package/plugins/pbr/.claude-plugin/plugin.json +1 -1
- package/plugins/pbr/agents/debugger.md +15 -0
- package/plugins/pbr/agents/researcher.md +20 -0
- package/plugins/pbr/agents/synthesizer.md +12 -0
- package/plugins/pbr/references/config-reference.md +89 -0
- package/plugins/pbr/scripts/check-config-change.js +33 -0
- package/plugins/pbr/scripts/check-plan-format.js +52 -4
- package/plugins/pbr/scripts/check-subagent-output.js +43 -3
- package/plugins/pbr/scripts/config-schema.json +48 -0
- package/plugins/pbr/scripts/local-llm/client.js +214 -0
- package/plugins/pbr/scripts/local-llm/health.js +217 -0
- package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
- package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
- package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
- package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
- package/plugins/pbr/scripts/local-llm/router.js +101 -0
- package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
- package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
- package/plugins/pbr/scripts/pbr-tools.js +120 -3
- package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
- package/plugins/pbr/scripts/progress-tracker.js +29 -3
- package/plugins/pbr/scripts/session-cleanup.js +36 -1
- package/plugins/pbr/scripts/validate-task.js +30 -1
- package/plugins/pbr/skills/health/SKILL.md +8 -1
- package/plugins/pbr/skills/help/SKILL.md +4 -4
- package/plugins/pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/pbr/skills/status/SKILL.md +38 -2
|
@@ -62,6 +62,26 @@ All claims must be attributed to a source level. Higher levels override lower le
|
|
|
62
62
|
|
|
63
63
|
**Offline Fallback**: If web tools are unavailable (air-gapped environment, MCP not configured), rely on local sources: codebase analysis via Glob/Grep, existing documentation, and README files. Assign these S3-S4 confidence levels. Do not attempt WebFetch or WebSearch — note in the output header that external sources were unavailable.
|
|
64
64
|
|
|
65
|
+
## Local LLM Source Scoring (Optional)
|
|
66
|
+
|
|
67
|
+
If local LLM offload is configured, you MAY use it to score source credibility instead of manually assigning S-levels. This is advisory — never wait on it or fail if it returns null.
|
|
68
|
+
|
|
69
|
+
Check availability first:
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm status 2>/dev/null
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
If `enabled: true`, score a source excerpt:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
echo "Source URL and content excerpt" > /tmp/source-excerpt.txt
|
|
79
|
+
node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm score-source "https://example.com/docs" /tmp/source-excerpt.txt 2>/dev/null
|
|
80
|
+
# Returns: {"level":"S2","confidence":0.87,"reason":"Official library documentation page"}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Use the returned `level` to set your source tag. If the call fails or returns `null`, assign the level manually per the hierarchy table above.
|
|
84
|
+
|
|
65
85
|
---
|
|
66
86
|
|
|
67
87
|
## Confidence Levels
|
|
@@ -100,6 +100,18 @@ conflicts: N
|
|
|
100
100
|
- **Research gaps**: Add `[RESEARCH GAP]` flag, add to Open Questions with high impact, never fabricate
|
|
101
101
|
- **Duplicates**: Consolidate into one entry, note multi-source agreement, reference all documents
|
|
102
102
|
|
|
103
|
+
## Local LLM Context Summarization (Optional)
|
|
104
|
+
|
|
105
|
+
When input research documents are large (>2000 words combined), you MAY use the local LLM to pre-summarize each document before synthesis. This reduces your own context consumption. Advisory only — if unavailable, read documents normally.
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Pre-summarize a large research document to ~150 words:
|
|
109
|
+
node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm summarize /path/to/RESEARCH.md 150 2>/dev/null
|
|
110
|
+
# Returns: {"summary":"...plain text summary under 150 words...","latency_ms":2100,"fallback_used":false}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Use the returned `summary` string as your working copy of that document's findings. Still read the original for any specific version numbers, code examples, or direct quotes needed in the output.
|
|
114
|
+
|
|
103
115
|
## Anti-Patterns
|
|
104
116
|
|
|
105
117
|
### Universal Anti-Patterns
|
|
@@ -439,3 +439,92 @@ Run validation with: `node plugins/pbr/scripts/pbr-tools.js config validate`
|
|
|
439
439
|
| `tdd_mode: true` + `depth: quick` | quick depth skips verification, which conflicts with TDD's verify-first approach |
|
|
440
440
|
| `git.mode: disabled` + `atomic_commits: true` | atomic_commits has no effect when git is disabled |
|
|
441
441
|
| `git.branching: phase` + `git.mode: disabled` | Branching settings are ignored when git is disabled |
|
|
442
|
+
|
|
443
|
+
---
|
|
444
|
+
|
|
445
|
+
## local_llm
|
|
446
|
+
|
|
447
|
+
Offloads selected PBR inference tasks to a locally running Ollama instance, reducing frontier model usage and latency for fast classification calls. The key `enabled` defaults to `false`, so users without Ollama see no change — all LLM calls continue routing to Claude as normal. When enabled, PBR uses a `local_first` routing strategy: fast tasks (artifact classification, task validation) go to the local model; complex tasks (planning, execution) stay on the frontier model.
|
|
448
|
+
|
|
449
|
+
### Quick setup
|
|
450
|
+
|
|
451
|
+
1. Install Ollama:
|
|
452
|
+
- **Linux/macOS**: `curl -fsSL https://ollama.com/install.sh | sh`
|
|
453
|
+
- **Windows**: Download from [ollama.com/download](https://ollama.com/download) and run the installer
|
|
454
|
+
2. Pull the recommended model: `ollama pull qwen2.5-coder:7b`
|
|
455
|
+
3. Add to `.planning/config.json`:
|
|
456
|
+
|
|
457
|
+
```json
|
|
458
|
+
"local_llm": {
|
|
459
|
+
"enabled": true,
|
|
460
|
+
"model": "qwen2.5-coder:7b"
|
|
461
|
+
}
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
4. Verify connectivity: `node /path/to/plugins/pbr/scripts/pbr-tools.js llm health`
|
|
465
|
+
|
|
466
|
+
### Field reference
|
|
467
|
+
|
|
468
|
+
| Property | Type | Default | Description |
|
|
469
|
+
|----------|------|---------|-------------|
|
|
470
|
+
| `local_llm.enabled` | boolean | `false` | Enable local LLM offloading; `false` = all calls use frontier |
|
|
471
|
+
| `local_llm.provider` | string | `"ollama"` | Backend provider; only `"ollama"` is supported |
|
|
472
|
+
| `local_llm.endpoint` | string | `"http://localhost:11434"` | Ollama API base URL |
|
|
473
|
+
| `local_llm.model` | string | `"qwen2.5-coder:7b"` | Model tag to use for local inference |
|
|
474
|
+
| `local_llm.timeout_ms` | integer | `3000` | Per-request timeout in milliseconds; >= 500 |
|
|
475
|
+
| `local_llm.max_retries` | integer | `1` | Number of retry attempts on failure before falling back |
|
|
476
|
+
| `local_llm.fallback` | string | `"frontier"` | What to use when local LLM fails: `"frontier"` or `"skip"` |
|
|
477
|
+
| `local_llm.routing_strategy` | string | `"local_first"` | `"local_first"` sends fast tasks local; `"always_local"` routes everything |
|
|
478
|
+
|
|
479
|
+
### features sub-table
|
|
480
|
+
|
|
481
|
+
Controls which PBR tasks are eligible for local LLM offloading.
|
|
482
|
+
|
|
483
|
+
| Property | Default | Description |
|
|
484
|
+
|----------|---------|-------------|
|
|
485
|
+
| `artifact_classification` | `true` | Classify artifact types (PLAN, SUMMARY, VERIFICATION) locally |
|
|
486
|
+
| `task_validation` | `true` | Validate task scope and completeness locally |
|
|
487
|
+
| `context_summarization` | `false` | Summarize context windows locally (higher token demand) |
|
|
488
|
+
| `source_scoring` | `false` | Score source files by relevance locally |
|
|
489
|
+
|
|
490
|
+
### advanced sub-table
|
|
491
|
+
|
|
492
|
+
| Property | Default | Description |
|
|
493
|
+
|----------|---------|-------------|
|
|
494
|
+
| `confidence_threshold` | `0.9` | Minimum confidence (0–1) for local output to be accepted; below this, falls back to frontier |
|
|
495
|
+
| `shadow_mode` | `false` | Run local LLM in parallel with frontier but discard local results — useful for tuning confidence thresholds without affecting output |
|
|
496
|
+
| `max_input_tokens` | `2000` | Truncate inputs longer than this before sending to local model |
|
|
497
|
+
| `keep_alive` | `"30m"` | How long Ollama keeps the model loaded between requests (Ollama format: `"5m"`, `"1h"`) |
|
|
498
|
+
| `num_ctx` | `4096` | Context window size passed to Ollama; **must be 4096 on Windows** (see Windows gotchas) |
|
|
499
|
+
| `disable_after_failures` | `3` | Automatically disable local LLM for the session after this many consecutive failures |
|
|
500
|
+
|
|
501
|
+
### Hardware requirements
|
|
502
|
+
|
|
503
|
+
| Tier | Hardware | Notes |
|
|
504
|
+
|------|----------|-------|
|
|
505
|
+
| Recommended | RTX 3060+ with 8 GB VRAM | Full GPU acceleration; qwen2.5-coder:7b loads entirely in VRAM |
|
|
506
|
+
| Functional | GTX 1660+ with 6 GB VRAM | GPU acceleration with slight layer offload to RAM |
|
|
507
|
+
| Marginal | CPU only, 32 GB RAM | Works but adds 5-20s latency per call; disable context-heavy features |
|
|
508
|
+
|
|
509
|
+
For GPU acceleration, ensure NVIDIA drivers are 520+ and CUDA 11.8+ is installed. AMD GPU support is available via ROCm on Linux only.
|
|
510
|
+
|
|
511
|
+
### Windows gotchas
|
|
512
|
+
|
|
513
|
+
- **Smart App Control**: May block `ollama_llama_server.exe` on first run. Allow it via Security settings or disable Smart App Control.
|
|
514
|
+
- **Windows Defender**: Add an exclusion for `%LOCALAPPDATA%\Programs\Ollama\ollama_llama_server.exe` to prevent Defender from scanning inference calls in real time.
|
|
515
|
+
- **`num_ctx` must be 4096**: Higher values cause GPU memory fragmentation on Windows and result in OOM errors mid-session. Always set `advanced.num_ctx: 4096` in your config.
|
|
516
|
+
- **Firewall**: Ollama listens on `localhost:11434` by default. If you see connection refused errors, check that Windows Firewall is not blocking loopback connections.
|
|
517
|
+
|
|
518
|
+
### Viewing metrics
|
|
519
|
+
|
|
520
|
+
After enabling local LLM, PBR logs per-call metrics to `.planning/logs/local-llm-metrics.jsonl`. Use the built-in subcommands to inspect them:
|
|
521
|
+
|
|
522
|
+
```bash
|
|
523
|
+
# Show session summary (calls routed, latency, token savings)
|
|
524
|
+
node plugins/pbr/scripts/pbr-tools.js llm metrics
|
|
525
|
+
|
|
526
|
+
# Suggest routing threshold adjustments based on recent accuracy
|
|
527
|
+
node plugins/pbr/scripts/pbr-tools.js llm adjust-thresholds
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
Metrics include: routing decision, model used, latency ms, confidence score, whether the frontier fallback was triggered, and estimated tokens saved.
|
|
@@ -64,6 +64,18 @@ function validateConfig(configPath) {
|
|
|
64
64
|
}
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
+
// Advisory: suggest local_llm defaults if the key is absent
|
|
68
|
+
if (!config.local_llm) {
|
|
69
|
+
warnings.push(
|
|
70
|
+
'local_llm config missing. To enable local LLM offload, add to config.json:\n' +
|
|
71
|
+
'"local_llm": {\n' +
|
|
72
|
+
' "enabled": false,\n' +
|
|
73
|
+
' "model": "qwen2.5-coder:7b",\n' +
|
|
74
|
+
' "endpoint": "http://localhost:11434"\n' +
|
|
75
|
+
'} (set enabled: true after running: ollama pull qwen2.5-coder:7b)'
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
67
79
|
// Check version
|
|
68
80
|
if (config.version && config.version < 2) {
|
|
69
81
|
warnings.push(`Config version ${config.version} is outdated — expected version 2+`);
|
|
@@ -90,6 +102,27 @@ function validateConfig(configPath) {
|
|
|
90
102
|
}
|
|
91
103
|
}
|
|
92
104
|
|
|
105
|
+
// Validate local_llm block
|
|
106
|
+
if (config.local_llm !== undefined) {
|
|
107
|
+
const llm = config.local_llm;
|
|
108
|
+
if (llm.enabled !== undefined && typeof llm.enabled !== 'boolean') {
|
|
109
|
+
warnings.push('local_llm.enabled must be a boolean');
|
|
110
|
+
}
|
|
111
|
+
if (llm.provider !== undefined && llm.provider !== 'ollama') {
|
|
112
|
+
warnings.push(`local_llm.provider "${llm.provider}" is not supported — use "ollama"`);
|
|
113
|
+
}
|
|
114
|
+
if (llm.timeout_ms !== undefined && (typeof llm.timeout_ms !== 'number' || llm.timeout_ms < 500)) {
|
|
115
|
+
warnings.push('local_llm.timeout_ms must be a number >= 500');
|
|
116
|
+
}
|
|
117
|
+
if (llm.advanced && llm.advanced.num_ctx !== undefined && llm.advanced.num_ctx !== 4096) {
|
|
118
|
+
warnings.push(`local_llm.advanced.num_ctx is ${llm.advanced.num_ctx} — strongly recommend 4096 to avoid GPU memory issues on Windows`);
|
|
119
|
+
}
|
|
120
|
+
if (llm.advanced && llm.advanced.disable_after_failures !== undefined &&
|
|
121
|
+
(typeof llm.advanced.disable_after_failures !== 'number' || llm.advanced.disable_after_failures < 1)) {
|
|
122
|
+
warnings.push('local_llm.advanced.disable_after_failures must be a number >= 1');
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
93
126
|
return warnings;
|
|
94
127
|
}
|
|
95
128
|
|
|
@@ -25,13 +25,29 @@ const path = require('path');
|
|
|
25
25
|
const { logHook } = require('./hook-logger');
|
|
26
26
|
const { logEvent } = require('./event-logger');
|
|
27
27
|
const { atomicWrite } = require('./pbr-tools');
|
|
28
|
+
const { resolveConfig } = require('./local-llm/health');
|
|
29
|
+
const { classifyArtifact } = require('./local-llm/operations/classify-artifact');
|
|
28
30
|
|
|
29
|
-
|
|
31
|
+
/**
|
|
32
|
+
* Load and resolve the local_llm config block from .planning/config.json.
|
|
33
|
+
* Returns a resolved config (always safe to use — disabled by default on error).
|
|
34
|
+
*/
|
|
35
|
+
function loadLocalLlmConfig() {
|
|
36
|
+
try {
|
|
37
|
+
const configPath = path.join(process.cwd(), '.planning', 'config.json');
|
|
38
|
+
const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
39
|
+
return resolveConfig(parsed.local_llm);
|
|
40
|
+
} catch (_e) {
|
|
41
|
+
return resolveConfig(undefined);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async function main() {
|
|
30
46
|
let input = '';
|
|
31
47
|
|
|
32
48
|
process.stdin.setEncoding('utf8');
|
|
33
49
|
process.stdin.on('data', (chunk) => { input += chunk; });
|
|
34
|
-
process.stdin.on('end', () => {
|
|
50
|
+
process.stdin.on('end', async () => {
|
|
35
51
|
try {
|
|
36
52
|
const data = JSON.parse(input);
|
|
37
53
|
|
|
@@ -62,6 +78,22 @@ function main() {
|
|
|
62
78
|
? validateRoadmap(content, filePath)
|
|
63
79
|
: validateSummary(content, filePath);
|
|
64
80
|
|
|
81
|
+
// LLM advisory enrichment — advisory only, never blocks
|
|
82
|
+
if ((isPlan || isSummary) && result.errors.length === 0) {
|
|
83
|
+
try {
|
|
84
|
+
const llmConfig = loadLocalLlmConfig();
|
|
85
|
+
const planningDir = path.join(process.cwd(), '.planning');
|
|
86
|
+
const fileType = isPlan ? 'PLAN' : 'SUMMARY';
|
|
87
|
+
const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, undefined);
|
|
88
|
+
if (llmResult && llmResult.classification) {
|
|
89
|
+
const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
|
|
90
|
+
result.warnings.push(llmNote);
|
|
91
|
+
}
|
|
92
|
+
} catch (_llmErr) {
|
|
93
|
+
// Never propagate LLM errors
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
65
97
|
const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
|
|
66
98
|
|
|
67
99
|
if (result.errors.length > 0) {
|
|
@@ -227,9 +259,9 @@ function validateSummary(content, _filePath) {
|
|
|
227
259
|
/**
|
|
228
260
|
* Core plan/summary check logic for use by dispatchers.
|
|
229
261
|
* @param {Object} data - Parsed hook input (tool_input, etc.)
|
|
230
|
-
* @returns {null|{output: Object}} null if pass or not applicable, result otherwise
|
|
262
|
+
* @returns {Promise<null|{output: Object}>} null if pass or not applicable, result otherwise
|
|
231
263
|
*/
|
|
232
|
-
function checkPlanWrite(data) {
|
|
264
|
+
async function checkPlanWrite(data) {
|
|
233
265
|
const filePath = data.tool_input?.file_path || data.tool_input?.path || '';
|
|
234
266
|
const basename = path.basename(filePath);
|
|
235
267
|
const isPlan = basename.endsWith('PLAN.md');
|
|
@@ -249,6 +281,22 @@ function checkPlanWrite(data) {
|
|
|
249
281
|
? validateRoadmap(content, filePath)
|
|
250
282
|
: validateSummary(content, filePath);
|
|
251
283
|
|
|
284
|
+
// LLM advisory enrichment — advisory only, never blocks
|
|
285
|
+
if ((isPlan || isSummary) && result.errors.length === 0) {
|
|
286
|
+
try {
|
|
287
|
+
const llmConfig = loadLocalLlmConfig();
|
|
288
|
+
const planningDir = path.join(process.cwd(), '.planning');
|
|
289
|
+
const fileType = isPlan ? 'PLAN' : 'SUMMARY';
|
|
290
|
+
const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, undefined);
|
|
291
|
+
if (llmResult && llmResult.classification) {
|
|
292
|
+
const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
|
|
293
|
+
result.warnings.push(llmNote);
|
|
294
|
+
}
|
|
295
|
+
} catch (_llmErr) {
|
|
296
|
+
// Never propagate LLM errors
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
252
300
|
const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
|
|
253
301
|
|
|
254
302
|
if (result.errors.length > 0) {
|
|
@@ -20,6 +20,8 @@
|
|
|
20
20
|
const fs = require('fs');
|
|
21
21
|
const path = require('path');
|
|
22
22
|
const { logHook } = require('./hook-logger');
|
|
23
|
+
const { resolveConfig } = require('./local-llm/health');
|
|
24
|
+
const { classifyError } = require('./local-llm/operations/classify-error');
|
|
23
25
|
|
|
24
26
|
/**
|
|
25
27
|
* Check if a file was modified recently (within thresholdMs).
|
|
@@ -310,7 +312,17 @@ function readStdin() {
|
|
|
310
312
|
return {};
|
|
311
313
|
}
|
|
312
314
|
|
|
313
|
-
function
|
|
315
|
+
function loadLocalLlmConfig(cwd) {
|
|
316
|
+
try {
|
|
317
|
+
const configPath = path.join(cwd, '.planning', 'config.json');
|
|
318
|
+
const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
319
|
+
return resolveConfig(parsed.local_llm);
|
|
320
|
+
} catch (_) {
|
|
321
|
+
return resolveConfig(undefined);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
async function main() {
|
|
314
326
|
const data = readStdin();
|
|
315
327
|
const cwd = process.cwd();
|
|
316
328
|
const planningDir = path.join(cwd, '.planning');
|
|
@@ -426,8 +438,22 @@ function main() {
|
|
|
426
438
|
agent_type: agentType,
|
|
427
439
|
warnings: skillWarnings
|
|
428
440
|
});
|
|
441
|
+
// LLM error classification — advisory enrichment
|
|
442
|
+
let llmCategoryNote = '';
|
|
443
|
+
try {
|
|
444
|
+
const llmConfig = loadLocalLlmConfig(cwd);
|
|
445
|
+
const errorText = (data.tool_output || '').substring(0, 500);
|
|
446
|
+
if (errorText) {
|
|
447
|
+
const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, undefined);
|
|
448
|
+
if (llmResult && llmResult.category) {
|
|
449
|
+
llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
} catch (_llmErr) {
|
|
453
|
+
// Never propagate
|
|
454
|
+
}
|
|
429
455
|
const msg = `Warning: Agent ${agentType} completed but no ${outputSpec.description} was found.\nSkill-specific warnings:\n` +
|
|
430
|
-
skillWarnings.map(w => `- ${w}`).join('\n');
|
|
456
|
+
skillWarnings.map(w => `- ${w}`).join('\n') + llmCategoryNote;
|
|
431
457
|
process.stdout.write(JSON.stringify({ additionalContext: msg }));
|
|
432
458
|
} else if (genericMissing) {
|
|
433
459
|
logHook('check-subagent-output', 'PostToolUse', 'warning', {
|
|
@@ -435,8 +461,22 @@ function main() {
|
|
|
435
461
|
expected: outputSpec.description,
|
|
436
462
|
found: 'none'
|
|
437
463
|
});
|
|
464
|
+
// LLM error classification — advisory enrichment
|
|
465
|
+
let llmCategoryNote = '';
|
|
466
|
+
try {
|
|
467
|
+
const llmConfig = loadLocalLlmConfig(cwd);
|
|
468
|
+
const errorText = (data.tool_output || '').substring(0, 500);
|
|
469
|
+
if (errorText) {
|
|
470
|
+
const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, undefined);
|
|
471
|
+
if (llmResult && llmResult.category) {
|
|
472
|
+
llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
} catch (_llmErr) {
|
|
476
|
+
// Never propagate
|
|
477
|
+
}
|
|
438
478
|
const output = {
|
|
439
|
-
additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.`
|
|
479
|
+
additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.` + llmCategoryNote
|
|
440
480
|
};
|
|
441
481
|
process.stdout.write(JSON.stringify(output));
|
|
442
482
|
} else if (skillWarnings.length > 0) {
|
|
@@ -253,6 +253,54 @@
|
|
|
253
253
|
}
|
|
254
254
|
},
|
|
255
255
|
"additionalProperties": false
|
|
256
|
+
},
|
|
257
|
+
"local_llm": {
|
|
258
|
+
"type": "object",
|
|
259
|
+
"properties": {
|
|
260
|
+
"enabled": { "type": "boolean" },
|
|
261
|
+
"provider": { "type": "string", "enum": ["ollama"] },
|
|
262
|
+
"endpoint": { "type": "string", "format": "uri" },
|
|
263
|
+
"model": { "type": "string" },
|
|
264
|
+
"timeout_ms": { "type": "integer", "minimum": 500 },
|
|
265
|
+
"max_retries": { "type": "integer", "minimum": 0, "maximum": 3 },
|
|
266
|
+
"fallback": { "type": "string", "enum": ["frontier", "skip"] },
|
|
267
|
+
"routing_strategy": { "type": "string", "enum": ["local_first", "frontier_first"] },
|
|
268
|
+
"features": {
|
|
269
|
+
"type": "object",
|
|
270
|
+
"properties": {
|
|
271
|
+
"artifact_classification": { "type": "boolean" },
|
|
272
|
+
"task_validation": { "type": "boolean" },
|
|
273
|
+
"plan_adequacy": { "type": "boolean" },
|
|
274
|
+
"gap_detection": { "type": "boolean" },
|
|
275
|
+
"context_summarization": { "type": "boolean" },
|
|
276
|
+
"source_scoring": { "type": "boolean" }
|
|
277
|
+
},
|
|
278
|
+
"additionalProperties": false
|
|
279
|
+
},
|
|
280
|
+
"metrics": {
|
|
281
|
+
"type": "object",
|
|
282
|
+
"properties": {
|
|
283
|
+
"enabled": { "type": "boolean" },
|
|
284
|
+
"log_file": { "type": "string" },
|
|
285
|
+
"show_session_summary": { "type": "boolean" },
|
|
286
|
+
"frontier_token_rate": { "type": "number", "minimum": 0 }
|
|
287
|
+
},
|
|
288
|
+
"additionalProperties": false
|
|
289
|
+
},
|
|
290
|
+
"advanced": {
|
|
291
|
+
"type": "object",
|
|
292
|
+
"properties": {
|
|
293
|
+
"confidence_threshold": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
294
|
+
"max_input_tokens": { "type": "integer", "minimum": 100 },
|
|
295
|
+
"keep_alive": { "type": "string" },
|
|
296
|
+
"num_ctx": { "type": "integer", "minimum": 512 },
|
|
297
|
+
"disable_after_failures": { "type": "integer", "minimum": 1 },
|
|
298
|
+
"shadow_mode": { "type": "boolean" }
|
|
299
|
+
},
|
|
300
|
+
"additionalProperties": false
|
|
301
|
+
}
|
|
302
|
+
},
|
|
303
|
+
"additionalProperties": false
|
|
256
304
|
}
|
|
257
305
|
},
|
|
258
306
|
"additionalProperties": false
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/* global fetch, AbortSignal, performance */
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
// Circuit breaker: Map<operationType, { failures: number, disabled: boolean }>
|
|
5
|
+
const circuitState = new Map();
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Attempts to parse JSON from text that may be raw JSON or wrapped in a markdown code block.
|
|
9
|
+
* @param {string} text
|
|
10
|
+
* @returns {{ ok: true, data: any } | { ok: false, raw: string }}
|
|
11
|
+
*/
|
|
12
|
+
function tryParseJSON(text) {
|
|
13
|
+
// Attempt 1: direct parse
|
|
14
|
+
try {
|
|
15
|
+
const data = JSON.parse(text);
|
|
16
|
+
return { ok: true, data };
|
|
17
|
+
} catch (_) {
|
|
18
|
+
// fall through
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Attempt 2: extract from markdown code block
|
|
22
|
+
const codeBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
23
|
+
if (codeBlockMatch) {
|
|
24
|
+
try {
|
|
25
|
+
const data = JSON.parse(codeBlockMatch[1].trim());
|
|
26
|
+
return { ok: true, data };
|
|
27
|
+
} catch (_) {
|
|
28
|
+
// fall through
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Attempt 3: find first {...}
|
|
33
|
+
const objectMatch = text.match(/\{[\s\S]*\}/);
|
|
34
|
+
if (objectMatch) {
|
|
35
|
+
try {
|
|
36
|
+
const data = JSON.parse(objectMatch[0]);
|
|
37
|
+
return { ok: true, data };
|
|
38
|
+
} catch (_) {
|
|
39
|
+
// fall through
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return { ok: false, raw: text };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Maps an error to one of 5 canonical types.
|
|
48
|
+
* @param {Error} err
|
|
49
|
+
* @returns {{ type: string, message: string }}
|
|
50
|
+
*/
|
|
51
|
+
function categorizeError(err) {
|
|
52
|
+
if (
|
|
53
|
+
(err.cause && err.cause.code === 'ECONNREFUSED') ||
|
|
54
|
+
(err.message && err.message.includes('ECONNREFUSED'))
|
|
55
|
+
) {
|
|
56
|
+
return { type: 'ECONNREFUSED', message: err.message };
|
|
57
|
+
}
|
|
58
|
+
if (err.name === 'TimeoutError' || err.name === 'AbortError') {
|
|
59
|
+
return { type: 'timeout', message: err.message };
|
|
60
|
+
}
|
|
61
|
+
if (err.message && err.message.startsWith('HTTP ')) {
|
|
62
|
+
return { type: 'http_error', message: err.message };
|
|
63
|
+
}
|
|
64
|
+
if (err instanceof SyntaxError) {
|
|
65
|
+
return { type: 'json_parse', message: err.message };
|
|
66
|
+
}
|
|
67
|
+
return { type: 'wrong_answer', message: err.message };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns true if the circuit is open (operation should be skipped).
|
|
72
|
+
* @param {string} operationType
|
|
73
|
+
* @param {number} maxFailures
|
|
74
|
+
* @returns {boolean}
|
|
75
|
+
*/
|
|
76
|
+
function isDisabled(operationType, maxFailures) {
|
|
77
|
+
const entry = circuitState.get(operationType);
|
|
78
|
+
if (!entry) return false;
|
|
79
|
+
return entry.disabled || entry.failures >= maxFailures;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Records a failure for an operation type. Disables the circuit if maxFailures is reached.
|
|
84
|
+
* @param {string} operationType
|
|
85
|
+
* @param {number} maxFailures
|
|
86
|
+
*/
|
|
87
|
+
function recordFailure(operationType, maxFailures) {
|
|
88
|
+
const entry = circuitState.get(operationType) || { failures: 0, disabled: false };
|
|
89
|
+
entry.failures += 1;
|
|
90
|
+
if (entry.failures >= maxFailures) {
|
|
91
|
+
entry.disabled = true;
|
|
92
|
+
}
|
|
93
|
+
circuitState.set(operationType, entry);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Resets the circuit breaker for an operation type.
|
|
98
|
+
* @param {string} operationType
|
|
99
|
+
*/
|
|
100
|
+
function resetCircuit(operationType) {
|
|
101
|
+
circuitState.delete(operationType);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Sends a chat completion request to a local LLM endpoint with retry and circuit-breaker logic.
|
|
106
|
+
*
|
|
107
|
+
* @param {object} config - local_llm config block (resolved)
|
|
108
|
+
* @param {string} prompt - user message to send
|
|
109
|
+
* @param {string} operationType - operation identifier for circuit breaker tracking
|
|
110
|
+
* @param {object} [options={}] - optional parameters
|
|
111
|
+
* @param {boolean} [options.logprobs] - if true, request logprobs from the API
|
|
112
|
+
* @returns {Promise<{ content: string, latency_ms: number, tokens: number, logprobsData: Array<{token: string, logprob: number}>|null }>}
|
|
113
|
+
*/
|
|
114
|
+
async function complete(config, prompt, operationType, options = {}) {
|
|
115
|
+
const endpoint = config.endpoint || 'http://localhost:11434';
|
|
116
|
+
const model = config.model || 'qwen2.5-coder:7b';
|
|
117
|
+
const timeoutMs = config.timeout_ms || 3000;
|
|
118
|
+
const maxRetries = config.max_retries != null ? config.max_retries : 1;
|
|
119
|
+
const numCtx = (config.advanced && config.advanced.num_ctx) || 4096;
|
|
120
|
+
const keepAlive = (config.advanced && config.advanced.keep_alive) || '30m';
|
|
121
|
+
const maxFailures = (config.advanced && config.advanced.disable_after_failures) || 3;
|
|
122
|
+
|
|
123
|
+
if (isDisabled(operationType, maxFailures)) {
|
|
124
|
+
const err = new Error('Circuit open for operation: ' + operationType);
|
|
125
|
+
err.type = 'circuit_open';
|
|
126
|
+
throw err;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const bodyObj = {
|
|
130
|
+
model,
|
|
131
|
+
messages: [
|
|
132
|
+
{
|
|
133
|
+
role: 'system',
|
|
134
|
+
content:
|
|
135
|
+
'You are a precise classification assistant. Always respond with valid JSON only. No explanations outside the JSON.'
|
|
136
|
+
},
|
|
137
|
+
{ role: 'user', content: prompt }
|
|
138
|
+
],
|
|
139
|
+
response_format: { type: 'json_object' },
|
|
140
|
+
temperature: 0.1,
|
|
141
|
+
max_tokens: 200,
|
|
142
|
+
keep_alive: keepAlive,
|
|
143
|
+
num_ctx: numCtx
|
|
144
|
+
};
|
|
145
|
+
if (options.logprobs === true) {
|
|
146
|
+
bodyObj.logprobs = true;
|
|
147
|
+
bodyObj.top_logprobs = 3;
|
|
148
|
+
}
|
|
149
|
+
const body = JSON.stringify(bodyObj);
|
|
150
|
+
|
|
151
|
+
const url = endpoint + '/v1/chat/completions';
|
|
152
|
+
const totalAttempts = maxRetries + 1;
|
|
153
|
+
|
|
154
|
+
let lastErr;
|
|
155
|
+
for (let attempt = 0; attempt < totalAttempts; attempt++) {
|
|
156
|
+
const start = performance.now();
|
|
157
|
+
try {
|
|
158
|
+
const res = await fetch(url, {
|
|
159
|
+
method: 'POST',
|
|
160
|
+
headers: { 'Content-Type': 'application/json' },
|
|
161
|
+
body,
|
|
162
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
if (!res.ok) {
|
|
166
|
+
const errText = await res.text().catch(() => '');
|
|
167
|
+
throw new Error('HTTP ' + res.status + ': ' + errText);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const json = await res.json();
|
|
171
|
+
const content = json.choices[0].message.content;
|
|
172
|
+
const completionTokens = (json.usage && json.usage.completion_tokens) || 0;
|
|
173
|
+
const latency_ms = performance.now() - start;
|
|
174
|
+
const logprobsData = (options.logprobs && json.choices[0].logprobs)
|
|
175
|
+
? json.choices[0].logprobs.content
|
|
176
|
+
: null;
|
|
177
|
+
|
|
178
|
+
return { content, latency_ms, tokens: completionTokens, logprobsData };
|
|
179
|
+
} catch (err) {
|
|
180
|
+
lastErr = err;
|
|
181
|
+
const isConnRefused =
|
|
182
|
+
(err.cause && err.cause.code === 'ECONNREFUSED') ||
|
|
183
|
+
(err.message && err.message.includes('ECONNREFUSED'));
|
|
184
|
+
|
|
185
|
+
if (isConnRefused) {
|
|
186
|
+
// Server not running — no point retrying
|
|
187
|
+
recordFailure(operationType, maxFailures);
|
|
188
|
+
throw err;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
|
|
192
|
+
const isHttpError = err.message && err.message.startsWith('HTTP ');
|
|
193
|
+
|
|
194
|
+
if ((isTimeout || isHttpError) && attempt < totalAttempts - 1) {
|
|
195
|
+
// Retry
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Final attempt or non-retryable error
|
|
200
|
+
if (attempt === totalAttempts - 1) {
|
|
201
|
+
recordFailure(operationType, maxFailures);
|
|
202
|
+
} else {
|
|
203
|
+
recordFailure(operationType, maxFailures);
|
|
204
|
+
}
|
|
205
|
+
throw err;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Should not reach here, but guard anyway
|
|
210
|
+
recordFailure(operationType, maxFailures);
|
|
211
|
+
throw lastErr;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
module.exports = { tryParseJSON, categorizeError, isDisabled, recordFailure, resetCircuit, complete };
|