@sienklogic/plan-build-run 2.22.2 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/dashboard/package.json +2 -1
  3. package/dashboard/src/middleware/errorHandler.js +12 -2
  4. package/dashboard/src/repositories/planning.repository.js +23 -1
  5. package/dashboard/src/routes/pages.routes.js +65 -2
  6. package/dashboard/src/services/local-llm-metrics.service.js +81 -0
  7. package/dashboard/src/services/quick.service.js +62 -0
  8. package/dashboard/src/views/partials/analytics-content.ejs +61 -0
  9. package/dashboard/src/views/partials/quick-content.ejs +40 -0
  10. package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
  11. package/dashboard/src/views/partials/sidebar.ejs +8 -0
  12. package/dashboard/src/views/quick-detail.ejs +5 -0
  13. package/dashboard/src/views/quick.ejs +5 -0
  14. package/package.json +1 -1
  15. package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
  16. package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
  17. package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
  18. package/plugins/copilot-pbr/plugin.json +1 -1
  19. package/plugins/copilot-pbr/references/config-reference.md +89 -0
  20. package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
  21. package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
  22. package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
  23. package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
  24. package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
  25. package/plugins/cursor-pbr/agents/debugger.md +15 -0
  26. package/plugins/cursor-pbr/agents/researcher.md +20 -0
  27. package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
  28. package/plugins/cursor-pbr/references/config-reference.md +89 -0
  29. package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
  30. package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
  31. package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
  32. package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
  33. package/plugins/pbr/.claude-plugin/plugin.json +1 -1
  34. package/plugins/pbr/agents/debugger.md +15 -0
  35. package/plugins/pbr/agents/researcher.md +20 -0
  36. package/plugins/pbr/agents/synthesizer.md +12 -0
  37. package/plugins/pbr/references/config-reference.md +89 -0
  38. package/plugins/pbr/scripts/check-config-change.js +33 -0
  39. package/plugins/pbr/scripts/check-plan-format.js +52 -4
  40. package/plugins/pbr/scripts/check-subagent-output.js +43 -3
  41. package/plugins/pbr/scripts/config-schema.json +48 -0
  42. package/plugins/pbr/scripts/local-llm/client.js +214 -0
  43. package/plugins/pbr/scripts/local-llm/health.js +217 -0
  44. package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
  45. package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
  46. package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
  47. package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
  48. package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
  49. package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
  50. package/plugins/pbr/scripts/local-llm/router.js +101 -0
  51. package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
  52. package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
  53. package/plugins/pbr/scripts/pbr-tools.js +120 -3
  54. package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
  55. package/plugins/pbr/scripts/progress-tracker.js +29 -3
  56. package/plugins/pbr/scripts/session-cleanup.js +36 -1
  57. package/plugins/pbr/scripts/validate-task.js +30 -1
  58. package/plugins/pbr/skills/health/SKILL.md +8 -1
  59. package/plugins/pbr/skills/help/SKILL.md +4 -4
  60. package/plugins/pbr/skills/milestone/SKILL.md +12 -12
  61. package/plugins/pbr/skills/status/SKILL.md +38 -2
@@ -62,6 +62,26 @@ All claims must be attributed to a source level. Higher levels override lower le
62
62
 
63
63
  **Offline Fallback**: If web tools are unavailable (air-gapped environment, MCP not configured), rely on local sources: codebase analysis via Glob/Grep, existing documentation, and README files. Assign these S3-S4 confidence levels. Do not attempt WebFetch or WebSearch — note in the output header that external sources were unavailable.
64
64
 
65
+ ## Local LLM Source Scoring (Optional)
66
+
67
+ If local LLM offload is configured, you MAY use it to score source credibility instead of manually assigning S-levels. This is advisory — never wait on it or fail if it returns null.
68
+
69
+ Check availability first:
70
+
71
+ ```bash
72
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm status 2>/dev/null
73
+ ```
74
+
75
+ If `enabled: true`, score a source excerpt:
76
+
77
+ ```bash
78
+ echo "Source URL and content excerpt" > /tmp/source-excerpt.txt
79
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm score-source "https://example.com/docs" /tmp/source-excerpt.txt 2>/dev/null
80
+ # Returns: {"level":"S2","confidence":0.87,"reason":"Official library documentation page"}
81
+ ```
82
+
83
+ Use the returned `level` to set your source tag. If the call fails or returns `null`, assign the level manually per the hierarchy table above.
84
+
65
85
  ---
66
86
 
67
87
  ## Confidence Levels
@@ -100,6 +100,18 @@ conflicts: N
100
100
  - **Research gaps**: Add `[RESEARCH GAP]` flag, add to Open Questions with high impact, never fabricate
101
101
  - **Duplicates**: Consolidate into one entry, note multi-source agreement, reference all documents
102
102
 
103
+ ## Local LLM Context Summarization (Optional)
104
+
105
+ When input research documents are large (>2000 words combined), you MAY use the local LLM to pre-summarize each document before synthesis. This reduces your own context consumption. Advisory only — if unavailable, read documents normally.
106
+
107
+ ```bash
108
+ # Pre-summarize a large research document to ~150 words:
109
+ node "${CLAUDE_PLUGIN_ROOT}/scripts/pbr-tools.js" llm summarize /path/to/RESEARCH.md 150 2>/dev/null
110
+ # Returns: {"summary":"...plain text summary under 150 words...","latency_ms":2100,"fallback_used":false}
111
+ ```
112
+
113
+ Use the returned `summary` string as your working copy of that document's findings. Still read the original for any specific version numbers, code examples, or direct quotes needed in the output.
114
+
103
115
  ## Anti-Patterns
104
116
 
105
117
  ### Universal Anti-Patterns
@@ -439,3 +439,92 @@ Run validation with: `node plugins/pbr/scripts/pbr-tools.js config validate`
439
439
  | `tdd_mode: true` + `depth: quick` | quick depth skips verification, which conflicts with TDD's verify-first approach |
440
440
  | `git.mode: disabled` + `atomic_commits: true` | atomic_commits has no effect when git is disabled |
441
441
  | `git.branching: phase` + `git.mode: disabled` | Branching settings are ignored when git is disabled |
442
+
443
+ ---
444
+
445
+ ## local_llm
446
+
447
+ Offloads selected PBR inference tasks to a locally running Ollama instance, reducing frontier model usage and latency for fast classification calls. The key `enabled` defaults to `false`, so users without Ollama see no change — all LLM calls continue routing to Claude as normal. When enabled, PBR uses a `local_first` routing strategy: fast tasks (artifact classification, task validation) go to the local model; complex tasks (planning, execution) stay on the frontier model.
448
+
449
+ ### Quick setup
450
+
451
+ 1. Install Ollama:
452
+ - **Linux/macOS**: `curl -fsSL https://ollama.com/install.sh | sh`
453
+ - **Windows**: Download from [ollama.com/download](https://ollama.com/download) and run the installer
454
+ 2. Pull the recommended model: `ollama pull qwen2.5-coder:7b`
455
+ 3. Add to `.planning/config.json`:
456
+
457
+ ```json
458
+ "local_llm": {
459
+ "enabled": true,
460
+ "model": "qwen2.5-coder:7b"
461
+ }
462
+ ```
463
+
464
+ 4. Verify connectivity: `node /path/to/plugins/pbr/scripts/pbr-tools.js llm health`
465
+
466
+ ### Field reference
467
+
468
+ | Property | Type | Default | Description |
469
+ |----------|------|---------|-------------|
470
+ | `local_llm.enabled` | boolean | `false` | Enable local LLM offloading; `false` = all calls use frontier |
471
+ | `local_llm.provider` | string | `"ollama"` | Backend provider; only `"ollama"` is supported |
472
+ | `local_llm.endpoint` | string | `"http://localhost:11434"` | Ollama API base URL |
473
+ | `local_llm.model` | string | `"qwen2.5-coder:7b"` | Model tag to use for local inference |
474
+ | `local_llm.timeout_ms` | integer | `3000` | Per-request timeout in milliseconds; >= 500 |
475
+ | `local_llm.max_retries` | integer | `1` | Number of retry attempts on failure before falling back |
476
+ | `local_llm.fallback` | string | `"frontier"` | What to use when local LLM fails: `"frontier"` or `"skip"` |
477
+ | `local_llm.routing_strategy` | string | `"local_first"` | `"local_first"` sends fast tasks local; `"always_local"` routes everything |
478
+
479
+ ### features sub-table
480
+
481
+ Controls which PBR tasks are eligible for local LLM offloading.
482
+
483
+ | Property | Default | Description |
484
+ |----------|---------|-------------|
485
+ | `artifact_classification` | `true` | Classify artifact types (PLAN, SUMMARY, VERIFICATION) locally |
486
+ | `task_validation` | `true` | Validate task scope and completeness locally |
487
+ | `context_summarization` | `false` | Summarize context windows locally (higher token demand) |
488
+ | `source_scoring` | `false` | Score source files by relevance locally |
489
+
490
+ ### advanced sub-table
491
+
492
+ | Property | Default | Description |
493
+ |----------|---------|-------------|
494
+ | `confidence_threshold` | `0.9` | Minimum confidence (0–1) for local output to be accepted; below this, falls back to frontier |
495
+ | `shadow_mode` | `false` | Run local LLM in parallel with frontier but discard local results — useful for tuning confidence thresholds without affecting output |
496
+ | `max_input_tokens` | `2000` | Truncate inputs longer than this before sending to local model |
497
+ | `keep_alive` | `"30m"` | How long Ollama keeps the model loaded between requests (Ollama format: `"5m"`, `"1h"`) |
498
+ | `num_ctx` | `4096` | Context window size passed to Ollama; **must be 4096 on Windows** (see Windows gotchas) |
499
+ | `disable_after_failures` | `3` | Automatically disable local LLM for the session after this many consecutive failures |
500
+
501
+ ### Hardware requirements
502
+
503
+ | Tier | Hardware | Notes |
504
+ |------|----------|-------|
505
+ | Recommended | RTX 3060+ with 8 GB VRAM | Full GPU acceleration; qwen2.5-coder:7b loads entirely in VRAM |
506
+ | Functional | GTX 1660+ with 6 GB VRAM | GPU acceleration with slight layer offload to RAM |
507
+ | Marginal | CPU only, 32 GB RAM | Works but adds 5-20s latency per call; disable context-heavy features |
508
+
509
+ For GPU acceleration, ensure NVIDIA drivers are 520+ and CUDA 11.8+ is installed. AMD GPU support is available via ROCm on Linux only.
510
+
511
+ ### Windows gotchas
512
+
513
+ - **Smart App Control**: May block `ollama_llama_server.exe` on first run. Allow it via Security settings or disable Smart App Control.
514
+ - **Windows Defender**: Add an exclusion for `%LOCALAPPDATA%\Programs\Ollama\ollama_llama_server.exe` to prevent Defender from scanning inference calls in real time.
515
+ - **`num_ctx` must be 4096**: Higher values cause GPU memory fragmentation on Windows and result in OOM errors mid-session. Always set `advanced.num_ctx: 4096` in your config.
516
+ - **Firewall**: Ollama listens on `localhost:11434` by default. If you see connection refused errors, check that Windows Firewall is not blocking loopback connections.
517
+
518
+ ### Viewing metrics
519
+
520
+ After enabling local LLM, PBR logs per-call metrics to `.planning/logs/local-llm-metrics.jsonl`. Use the built-in subcommands to inspect them:
521
+
522
+ ```bash
523
+ # Show session summary (calls routed, latency, token savings)
524
+ node plugins/pbr/scripts/pbr-tools.js llm metrics
525
+
526
+ # Suggest routing threshold adjustments based on recent accuracy
527
+ node plugins/pbr/scripts/pbr-tools.js llm adjust-thresholds
528
+ ```
529
+
530
+ Metrics include: routing decision, model used, latency ms, confidence score, whether the frontier fallback was triggered, and estimated tokens saved.
@@ -64,6 +64,18 @@ function validateConfig(configPath) {
64
64
  }
65
65
  }
66
66
 
67
+ // Advisory: suggest local_llm defaults if the key is absent
68
+ if (!config.local_llm) {
69
+ warnings.push(
70
+ 'local_llm config missing. To enable local LLM offload, add to config.json:\n' +
71
+ '"local_llm": {\n' +
72
+ ' "enabled": false,\n' +
73
+ ' "model": "qwen2.5-coder:7b",\n' +
74
+ ' "endpoint": "http://localhost:11434"\n' +
75
+ '} (set enabled: true after running: ollama pull qwen2.5-coder:7b)'
76
+ );
77
+ }
78
+
67
79
  // Check version
68
80
  if (config.version && config.version < 2) {
69
81
  warnings.push(`Config version ${config.version} is outdated — expected version 2+`);
@@ -90,6 +102,27 @@ function validateConfig(configPath) {
90
102
  }
91
103
  }
92
104
 
105
+ // Validate local_llm block
106
+ if (config.local_llm !== undefined) {
107
+ const llm = config.local_llm;
108
+ if (llm.enabled !== undefined && typeof llm.enabled !== 'boolean') {
109
+ warnings.push('local_llm.enabled must be a boolean');
110
+ }
111
+ if (llm.provider !== undefined && llm.provider !== 'ollama') {
112
+ warnings.push(`local_llm.provider "${llm.provider}" is not supported — use "ollama"`);
113
+ }
114
+ if (llm.timeout_ms !== undefined && (typeof llm.timeout_ms !== 'number' || llm.timeout_ms < 500)) {
115
+ warnings.push('local_llm.timeout_ms must be a number >= 500');
116
+ }
117
+ if (llm.advanced && llm.advanced.num_ctx !== undefined && llm.advanced.num_ctx !== 4096) {
118
+ warnings.push(`local_llm.advanced.num_ctx is ${llm.advanced.num_ctx} — strongly recommend 4096 to avoid GPU memory issues on Windows`);
119
+ }
120
+ if (llm.advanced && llm.advanced.disable_after_failures !== undefined &&
121
+ (typeof llm.advanced.disable_after_failures !== 'number' || llm.advanced.disable_after_failures < 1)) {
122
+ warnings.push('local_llm.advanced.disable_after_failures must be a number >= 1');
123
+ }
124
+ }
125
+
93
126
  return warnings;
94
127
  }
95
128
 
@@ -25,13 +25,29 @@ const path = require('path');
25
25
  const { logHook } = require('./hook-logger');
26
26
  const { logEvent } = require('./event-logger');
27
27
  const { atomicWrite } = require('./pbr-tools');
28
+ const { resolveConfig } = require('./local-llm/health');
29
+ const { classifyArtifact } = require('./local-llm/operations/classify-artifact');
28
30
 
29
- function main() {
31
+ /**
32
+ * Load and resolve the local_llm config block from .planning/config.json.
33
+ * Returns a resolved config (always safe to use — disabled by default on error).
34
+ */
35
+ function loadLocalLlmConfig() {
36
+ try {
37
+ const configPath = path.join(process.cwd(), '.planning', 'config.json');
38
+ const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
39
+ return resolveConfig(parsed.local_llm);
40
+ } catch (_e) {
41
+ return resolveConfig(undefined);
42
+ }
43
+ }
44
+
45
+ async function main() {
30
46
  let input = '';
31
47
 
32
48
  process.stdin.setEncoding('utf8');
33
49
  process.stdin.on('data', (chunk) => { input += chunk; });
34
- process.stdin.on('end', () => {
50
+ process.stdin.on('end', async () => {
35
51
  try {
36
52
  const data = JSON.parse(input);
37
53
 
@@ -62,6 +78,22 @@ function main() {
62
78
  ? validateRoadmap(content, filePath)
63
79
  : validateSummary(content, filePath);
64
80
 
81
+ // LLM advisory enrichment — advisory only, never blocks
82
+ if ((isPlan || isSummary) && result.errors.length === 0) {
83
+ try {
84
+ const llmConfig = loadLocalLlmConfig();
85
+ const planningDir = path.join(process.cwd(), '.planning');
86
+ const fileType = isPlan ? 'PLAN' : 'SUMMARY';
87
+ const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, undefined);
88
+ if (llmResult && llmResult.classification) {
89
+ const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
90
+ result.warnings.push(llmNote);
91
+ }
92
+ } catch (_llmErr) {
93
+ // Never propagate LLM errors
94
+ }
95
+ }
96
+
65
97
  const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
66
98
 
67
99
  if (result.errors.length > 0) {
@@ -227,9 +259,9 @@ function validateSummary(content, _filePath) {
227
259
  /**
228
260
  * Core plan/summary check logic for use by dispatchers.
229
261
  * @param {Object} data - Parsed hook input (tool_input, etc.)
230
- * @returns {null|{output: Object}} null if pass or not applicable, result otherwise
262
+ * @returns {Promise<null|{output: Object}>} null if pass or not applicable, result otherwise
231
263
  */
232
- function checkPlanWrite(data) {
264
+ async function checkPlanWrite(data) {
233
265
  const filePath = data.tool_input?.file_path || data.tool_input?.path || '';
234
266
  const basename = path.basename(filePath);
235
267
  const isPlan = basename.endsWith('PLAN.md');
@@ -249,6 +281,22 @@ function checkPlanWrite(data) {
249
281
  ? validateRoadmap(content, filePath)
250
282
  : validateSummary(content, filePath);
251
283
 
284
+ // LLM advisory enrichment — advisory only, never blocks
285
+ if ((isPlan || isSummary) && result.errors.length === 0) {
286
+ try {
287
+ const llmConfig = loadLocalLlmConfig();
288
+ const planningDir = path.join(process.cwd(), '.planning');
289
+ const fileType = isPlan ? 'PLAN' : 'SUMMARY';
290
+ const llmResult = await classifyArtifact(llmConfig, planningDir, content, fileType, undefined);
291
+ if (llmResult && llmResult.classification) {
292
+ const llmNote = `Local LLM: ${fileType} classified as "${llmResult.classification}" (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)${llmResult.reason ? ' — ' + llmResult.reason : ''}`;
293
+ result.warnings.push(llmNote);
294
+ }
295
+ } catch (_llmErr) {
296
+ // Never propagate LLM errors
297
+ }
298
+ }
299
+
252
300
  const eventType = isPlan ? 'plan-validated' : isVerification ? 'verification-validated' : isRoadmap ? 'roadmap-validated' : 'summary-validated';
253
301
 
254
302
  if (result.errors.length > 0) {
@@ -20,6 +20,8 @@
20
20
  const fs = require('fs');
21
21
  const path = require('path');
22
22
  const { logHook } = require('./hook-logger');
23
+ const { resolveConfig } = require('./local-llm/health');
24
+ const { classifyError } = require('./local-llm/operations/classify-error');
23
25
 
24
26
  /**
25
27
  * Check if a file was modified recently (within thresholdMs).
@@ -310,7 +312,17 @@ function readStdin() {
310
312
  return {};
311
313
  }
312
314
 
313
- function main() {
315
+ function loadLocalLlmConfig(cwd) {
316
+ try {
317
+ const configPath = path.join(cwd, '.planning', 'config.json');
318
+ const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
319
+ return resolveConfig(parsed.local_llm);
320
+ } catch (_) {
321
+ return resolveConfig(undefined);
322
+ }
323
+ }
324
+
325
+ async function main() {
314
326
  const data = readStdin();
315
327
  const cwd = process.cwd();
316
328
  const planningDir = path.join(cwd, '.planning');
@@ -426,8 +438,22 @@ function main() {
426
438
  agent_type: agentType,
427
439
  warnings: skillWarnings
428
440
  });
441
+ // LLM error classification — advisory enrichment
442
+ let llmCategoryNote = '';
443
+ try {
444
+ const llmConfig = loadLocalLlmConfig(cwd);
445
+ const errorText = (data.tool_output || '').substring(0, 500);
446
+ if (errorText) {
447
+ const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, undefined);
448
+ if (llmResult && llmResult.category) {
449
+ llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
450
+ }
451
+ }
452
+ } catch (_llmErr) {
453
+ // Never propagate
454
+ }
429
455
  const msg = `Warning: Agent ${agentType} completed but no ${outputSpec.description} was found.\nSkill-specific warnings:\n` +
430
- skillWarnings.map(w => `- ${w}`).join('\n');
456
+ skillWarnings.map(w => `- ${w}`).join('\n') + llmCategoryNote;
431
457
  process.stdout.write(JSON.stringify({ additionalContext: msg }));
432
458
  } else if (genericMissing) {
433
459
  logHook('check-subagent-output', 'PostToolUse', 'warning', {
@@ -435,8 +461,22 @@ function main() {
435
461
  expected: outputSpec.description,
436
462
  found: 'none'
437
463
  });
464
+ // LLM error classification — advisory enrichment
465
+ let llmCategoryNote = '';
466
+ try {
467
+ const llmConfig = loadLocalLlmConfig(cwd);
468
+ const errorText = (data.tool_output || '').substring(0, 500);
469
+ if (errorText) {
470
+ const llmResult = await classifyError(llmConfig, planningDir, errorText, agentType, undefined);
471
+ if (llmResult && llmResult.category) {
472
+ llmCategoryNote = `\nLLM error category: ${llmResult.category} (confidence: ${(llmResult.confidence * 100).toFixed(0)}%)`;
473
+ }
474
+ }
475
+ } catch (_llmErr) {
476
+ // Never propagate
477
+ }
438
478
  const output = {
439
- additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.`
479
+ additionalContext: `[WARN] Agent ${agentType} completed but no ${outputSpec.description} was found. Likely causes: (1) agent hit an error mid-run, (2) wrong working directory. To fix: re-run the parent skill — the executor gate will block until the output is present. Check the Task() output above for error details.` + llmCategoryNote
440
480
  };
441
481
  process.stdout.write(JSON.stringify(output));
442
482
  } else if (skillWarnings.length > 0) {
@@ -253,6 +253,54 @@
253
253
  }
254
254
  },
255
255
  "additionalProperties": false
256
+ },
257
+ "local_llm": {
258
+ "type": "object",
259
+ "properties": {
260
+ "enabled": { "type": "boolean" },
261
+ "provider": { "type": "string", "enum": ["ollama"] },
262
+ "endpoint": { "type": "string", "format": "uri" },
263
+ "model": { "type": "string" },
264
+ "timeout_ms": { "type": "integer", "minimum": 500 },
265
+ "max_retries": { "type": "integer", "minimum": 0, "maximum": 3 },
266
+ "fallback": { "type": "string", "enum": ["frontier", "skip"] },
267
+ "routing_strategy": { "type": "string", "enum": ["local_first", "frontier_first"] },
268
+ "features": {
269
+ "type": "object",
270
+ "properties": {
271
+ "artifact_classification": { "type": "boolean" },
272
+ "task_validation": { "type": "boolean" },
273
+ "plan_adequacy": { "type": "boolean" },
274
+ "gap_detection": { "type": "boolean" },
275
+ "context_summarization": { "type": "boolean" },
276
+ "source_scoring": { "type": "boolean" }
277
+ },
278
+ "additionalProperties": false
279
+ },
280
+ "metrics": {
281
+ "type": "object",
282
+ "properties": {
283
+ "enabled": { "type": "boolean" },
284
+ "log_file": { "type": "string" },
285
+ "show_session_summary": { "type": "boolean" },
286
+ "frontier_token_rate": { "type": "number", "minimum": 0 }
287
+ },
288
+ "additionalProperties": false
289
+ },
290
+ "advanced": {
291
+ "type": "object",
292
+ "properties": {
293
+ "confidence_threshold": { "type": "number", "minimum": 0, "maximum": 1 },
294
+ "max_input_tokens": { "type": "integer", "minimum": 100 },
295
+ "keep_alive": { "type": "string" },
296
+ "num_ctx": { "type": "integer", "minimum": 512 },
297
+ "disable_after_failures": { "type": "integer", "minimum": 1 },
298
+ "shadow_mode": { "type": "boolean" }
299
+ },
300
+ "additionalProperties": false
301
+ }
302
+ },
303
+ "additionalProperties": false
256
304
  }
257
305
  },
258
306
  "additionalProperties": false
@@ -0,0 +1,214 @@
1
+ /* global fetch, AbortSignal, performance */
2
+ 'use strict';
3
+
4
+ // Circuit breaker: Map<operationType, { failures: number, disabled: boolean }>
5
+ const circuitState = new Map();
6
+
7
+ /**
8
+ * Attempts to parse JSON from text that may be raw JSON or wrapped in a markdown code block.
9
+ * @param {string} text
10
+ * @returns {{ ok: true, data: any } | { ok: false, raw: string }}
11
+ */
12
+ function tryParseJSON(text) {
13
+ // Attempt 1: direct parse
14
+ try {
15
+ const data = JSON.parse(text);
16
+ return { ok: true, data };
17
+ } catch (_) {
18
+ // fall through
19
+ }
20
+
21
+ // Attempt 2: extract from markdown code block
22
+ const codeBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
23
+ if (codeBlockMatch) {
24
+ try {
25
+ const data = JSON.parse(codeBlockMatch[1].trim());
26
+ return { ok: true, data };
27
+ } catch (_) {
28
+ // fall through
29
+ }
30
+ }
31
+
32
+ // Attempt 3: find first {...}
33
+ const objectMatch = text.match(/\{[\s\S]*\}/);
34
+ if (objectMatch) {
35
+ try {
36
+ const data = JSON.parse(objectMatch[0]);
37
+ return { ok: true, data };
38
+ } catch (_) {
39
+ // fall through
40
+ }
41
+ }
42
+
43
+ return { ok: false, raw: text };
44
+ }
45
+
46
+ /**
47
+ * Maps an error to one of 5 canonical types.
48
+ * @param {Error} err
49
+ * @returns {{ type: string, message: string }}
50
+ */
51
+ function categorizeError(err) {
52
+ if (
53
+ (err.cause && err.cause.code === 'ECONNREFUSED') ||
54
+ (err.message && err.message.includes('ECONNREFUSED'))
55
+ ) {
56
+ return { type: 'ECONNREFUSED', message: err.message };
57
+ }
58
+ if (err.name === 'TimeoutError' || err.name === 'AbortError') {
59
+ return { type: 'timeout', message: err.message };
60
+ }
61
+ if (err.message && err.message.startsWith('HTTP ')) {
62
+ return { type: 'http_error', message: err.message };
63
+ }
64
+ if (err instanceof SyntaxError) {
65
+ return { type: 'json_parse', message: err.message };
66
+ }
67
+ return { type: 'wrong_answer', message: err.message };
68
+ }
69
+
70
+ /**
71
+ * Returns true if the circuit is open (operation should be skipped).
72
+ * @param {string} operationType
73
+ * @param {number} maxFailures
74
+ * @returns {boolean}
75
+ */
76
+ function isDisabled(operationType, maxFailures) {
77
+ const entry = circuitState.get(operationType);
78
+ if (!entry) return false;
79
+ return entry.disabled || entry.failures >= maxFailures;
80
+ }
81
+
82
+ /**
83
+ * Records a failure for an operation type. Disables the circuit if maxFailures is reached.
84
+ * @param {string} operationType
85
+ * @param {number} maxFailures
86
+ */
87
+ function recordFailure(operationType, maxFailures) {
88
+ const entry = circuitState.get(operationType) || { failures: 0, disabled: false };
89
+ entry.failures += 1;
90
+ if (entry.failures >= maxFailures) {
91
+ entry.disabled = true;
92
+ }
93
+ circuitState.set(operationType, entry);
94
+ }
95
+
96
+ /**
97
+ * Resets the circuit breaker for an operation type.
98
+ * @param {string} operationType
99
+ */
100
+ function resetCircuit(operationType) {
101
+ circuitState.delete(operationType);
102
+ }
103
+
104
+ /**
105
+ * Sends a chat completion request to a local LLM endpoint with retry and circuit-breaker logic.
106
+ *
107
+ * @param {object} config - local_llm config block (resolved)
108
+ * @param {string} prompt - user message to send
109
+ * @param {string} operationType - operation identifier for circuit breaker tracking
110
+ * @param {object} [options={}] - optional parameters
111
+ * @param {boolean} [options.logprobs] - if true, request logprobs from the API
112
+ * @returns {Promise<{ content: string, latency_ms: number, tokens: number, logprobsData: Array<{token: string, logprob: number}>|null }>}
113
+ */
114
+ async function complete(config, prompt, operationType, options = {}) {
115
+ const endpoint = config.endpoint || 'http://localhost:11434';
116
+ const model = config.model || 'qwen2.5-coder:7b';
117
+ const timeoutMs = config.timeout_ms || 3000;
118
+ const maxRetries = config.max_retries != null ? config.max_retries : 1;
119
+ const numCtx = (config.advanced && config.advanced.num_ctx) || 4096;
120
+ const keepAlive = (config.advanced && config.advanced.keep_alive) || '30m';
121
+ const maxFailures = (config.advanced && config.advanced.disable_after_failures) || 3;
122
+
123
+ if (isDisabled(operationType, maxFailures)) {
124
+ const err = new Error('Circuit open for operation: ' + operationType);
125
+ err.type = 'circuit_open';
126
+ throw err;
127
+ }
128
+
129
+ const bodyObj = {
130
+ model,
131
+ messages: [
132
+ {
133
+ role: 'system',
134
+ content:
135
+ 'You are a precise classification assistant. Always respond with valid JSON only. No explanations outside the JSON.'
136
+ },
137
+ { role: 'user', content: prompt }
138
+ ],
139
+ response_format: { type: 'json_object' },
140
+ temperature: 0.1,
141
+ max_tokens: 200,
142
+ keep_alive: keepAlive,
143
+ num_ctx: numCtx
144
+ };
145
+ if (options.logprobs === true) {
146
+ bodyObj.logprobs = true;
147
+ bodyObj.top_logprobs = 3;
148
+ }
149
+ const body = JSON.stringify(bodyObj);
150
+
151
+ const url = endpoint + '/v1/chat/completions';
152
+ const totalAttempts = maxRetries + 1;
153
+
154
+ let lastErr;
155
+ for (let attempt = 0; attempt < totalAttempts; attempt++) {
156
+ const start = performance.now();
157
+ try {
158
+ const res = await fetch(url, {
159
+ method: 'POST',
160
+ headers: { 'Content-Type': 'application/json' },
161
+ body,
162
+ signal: AbortSignal.timeout(timeoutMs)
163
+ });
164
+
165
+ if (!res.ok) {
166
+ const errText = await res.text().catch(() => '');
167
+ throw new Error('HTTP ' + res.status + ': ' + errText);
168
+ }
169
+
170
+ const json = await res.json();
171
+ const content = json.choices[0].message.content;
172
+ const completionTokens = (json.usage && json.usage.completion_tokens) || 0;
173
+ const latency_ms = performance.now() - start;
174
+ const logprobsData = (options.logprobs && json.choices[0].logprobs)
175
+ ? json.choices[0].logprobs.content
176
+ : null;
177
+
178
+ return { content, latency_ms, tokens: completionTokens, logprobsData };
179
+ } catch (err) {
180
+ lastErr = err;
181
+ const isConnRefused =
182
+ (err.cause && err.cause.code === 'ECONNREFUSED') ||
183
+ (err.message && err.message.includes('ECONNREFUSED'));
184
+
185
+ if (isConnRefused) {
186
+ // Server not running — no point retrying
187
+ recordFailure(operationType, maxFailures);
188
+ throw err;
189
+ }
190
+
191
+ const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
192
+ const isHttpError = err.message && err.message.startsWith('HTTP ');
193
+
194
+ if ((isTimeout || isHttpError) && attempt < totalAttempts - 1) {
195
+ // Retry
196
+ continue;
197
+ }
198
+
199
+ // Final attempt or non-retryable error
200
+ if (attempt === totalAttempts - 1) {
201
+ recordFailure(operationType, maxFailures);
202
+ } else {
203
+ recordFailure(operationType, maxFailures);
204
+ }
205
+ throw err;
206
+ }
207
+ }
208
+
209
+ // Should not reach here, but guard anyway
210
+ recordFailure(operationType, maxFailures);
211
+ throw lastErr;
212
+ }
213
+
214
+ module.exports = { tryParseJSON, categorizeError, isDisabled, recordFailure, resetCircuit, complete };