@sienklogic/plan-build-run 2.22.2 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/dashboard/package.json +2 -1
  3. package/dashboard/src/middleware/errorHandler.js +12 -2
  4. package/dashboard/src/repositories/planning.repository.js +23 -1
  5. package/dashboard/src/routes/pages.routes.js +65 -2
  6. package/dashboard/src/services/local-llm-metrics.service.js +81 -0
  7. package/dashboard/src/services/quick.service.js +62 -0
  8. package/dashboard/src/views/partials/analytics-content.ejs +61 -0
  9. package/dashboard/src/views/partials/quick-content.ejs +40 -0
  10. package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
  11. package/dashboard/src/views/partials/sidebar.ejs +8 -0
  12. package/dashboard/src/views/quick-detail.ejs +5 -0
  13. package/dashboard/src/views/quick.ejs +5 -0
  14. package/package.json +1 -1
  15. package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
  16. package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
  17. package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
  18. package/plugins/copilot-pbr/plugin.json +1 -1
  19. package/plugins/copilot-pbr/references/config-reference.md +89 -0
  20. package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
  21. package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
  22. package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
  23. package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
  24. package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
  25. package/plugins/cursor-pbr/agents/debugger.md +15 -0
  26. package/plugins/cursor-pbr/agents/researcher.md +20 -0
  27. package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
  28. package/plugins/cursor-pbr/references/config-reference.md +89 -0
  29. package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
  30. package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
  31. package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
  32. package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
  33. package/plugins/pbr/.claude-plugin/plugin.json +1 -1
  34. package/plugins/pbr/agents/debugger.md +15 -0
  35. package/plugins/pbr/agents/researcher.md +20 -0
  36. package/plugins/pbr/agents/synthesizer.md +12 -0
  37. package/plugins/pbr/references/config-reference.md +89 -0
  38. package/plugins/pbr/scripts/check-config-change.js +33 -0
  39. package/plugins/pbr/scripts/check-plan-format.js +52 -4
  40. package/plugins/pbr/scripts/check-subagent-output.js +43 -3
  41. package/plugins/pbr/scripts/config-schema.json +48 -0
  42. package/plugins/pbr/scripts/local-llm/client.js +214 -0
  43. package/plugins/pbr/scripts/local-llm/health.js +217 -0
  44. package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
  45. package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
  46. package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
  47. package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
  48. package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
  49. package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
  50. package/plugins/pbr/scripts/local-llm/router.js +101 -0
  51. package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
  52. package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
  53. package/plugins/pbr/scripts/pbr-tools.js +120 -3
  54. package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
  55. package/plugins/pbr/scripts/progress-tracker.js +29 -3
  56. package/plugins/pbr/scripts/session-cleanup.js +36 -1
  57. package/plugins/pbr/scripts/validate-task.js +30 -1
  58. package/plugins/pbr/skills/health/SKILL.md +8 -1
  59. package/plugins/pbr/skills/help/SKILL.md +4 -4
  60. package/plugins/pbr/skills/milestone/SKILL.md +12 -12
  61. package/plugins/pbr/skills/status/SKILL.md +38 -2
@@ -0,0 +1,217 @@
1
+ /* global fetch, AbortSignal */
2
+ 'use strict';
3
+
4
+ const WARMUP_TIMEOUT_MS = 120000;
5
+
6
+ /**
7
+ * Merges the raw local_llm config block with defaults.
8
+ * @param {object|undefined} rawConfig
9
+ * @returns {object} Fully-defaulted local_llm config
10
+ */
11
+ function resolveConfig(rawConfig) {
12
+ return {
13
+ enabled: rawConfig != null && rawConfig.enabled != null ? rawConfig.enabled : false,
14
+ provider: (rawConfig && rawConfig.provider) || 'ollama',
15
+ endpoint: (rawConfig && rawConfig.endpoint) || 'http://localhost:11434',
16
+ model: (rawConfig && rawConfig.model) || 'qwen2.5-coder:7b',
17
+ timeout_ms: (rawConfig && rawConfig.timeout_ms) || 3000,
18
+ max_retries: rawConfig != null && rawConfig.max_retries != null ? rawConfig.max_retries : 1,
19
+ fallback: (rawConfig && rawConfig.fallback) || 'frontier',
20
+ routing_strategy: (rawConfig && rawConfig.routing_strategy) || 'local_first',
21
+ features: Object.assign(
22
+ {
23
+ artifact_classification: true,
24
+ task_validation: true,
25
+ plan_adequacy: false,
26
+ gap_detection: false,
27
+ context_summarization: false,
28
+ source_scoring: false
29
+ },
30
+ (rawConfig && rawConfig.features) || {}
31
+ ),
32
+ metrics: Object.assign(
33
+ {
34
+ enabled: true,
35
+ log_file: '.planning/logs/local-llm-metrics.jsonl',
36
+ show_session_summary: true,
37
+ frontier_token_rate: 3.0
38
+ },
39
+ (rawConfig && rawConfig.metrics) || {}
40
+ ),
41
+ advanced: Object.assign(
42
+ {
43
+ confidence_threshold: 0.9,
44
+ max_input_tokens: 2000,
45
+ keep_alive: '30m',
46
+ num_ctx: 4096,
47
+ disable_after_failures: 3,
48
+ shadow_mode: false
49
+ },
50
+ (rawConfig && rawConfig.advanced) || {}
51
+ )
52
+ };
53
+ }
54
+
55
+ /**
56
+ * Checks availability of the configured Ollama instance and model.
57
+ * Always resolves — never rejects.
58
+ * @param {object} config - resolved config from resolveConfig()
59
+ * @returns {Promise<object>} Structured health status
60
+ */
61
+ async function checkHealth(config) {
62
+ try {
63
+ if (!config.enabled) {
64
+ return { available: false, reason: 'disabled', model: null, version: null };
65
+ }
66
+
67
+ const timeoutShort = 3000;
68
+ const timeoutModel = 5000;
69
+
70
+ // Step 1 — Check server reachable
71
+ try {
72
+ const res = await fetch(config.endpoint + '/', {
73
+ signal: AbortSignal.timeout(timeoutShort)
74
+ });
75
+ const body = await res.text().catch(() => '');
76
+ if (!body.includes('Ollama')) {
77
+ return {
78
+ available: false,
79
+ reason: 'not_running',
80
+ detail: 'Ollama is not running. Start with: ollama serve',
81
+ model: null,
82
+ version: null
83
+ };
84
+ }
85
+ } catch (err) {
86
+ const isConnRefused =
87
+ (err.cause && err.cause.code === 'ECONNREFUSED') ||
88
+ (err.message && err.message.includes('ECONNREFUSED'));
89
+ const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
90
+ if (isConnRefused || isTimeout) {
91
+ return {
92
+ available: false,
93
+ reason: 'not_running',
94
+ detail: 'Ollama is not running. Start with: ollama serve',
95
+ model: null,
96
+ version: null
97
+ };
98
+ }
99
+ throw err;
100
+ }
101
+
102
+ // Step 2 — Check version (non-fatal)
103
+ let version = null;
104
+ try {
105
+ const res = await fetch(config.endpoint + '/api/version', {
106
+ signal: AbortSignal.timeout(timeoutShort)
107
+ });
108
+ const data = await res.json();
109
+ version = data.version || null;
110
+ } catch (_) {
111
+ version = null;
112
+ }
113
+
114
+ // Step 3 — Check model available
115
+ try {
116
+ const res = await fetch(config.endpoint + '/v1/models', {
117
+ signal: AbortSignal.timeout(timeoutModel)
118
+ });
119
+ const data = await res.json();
120
+ const modelList = (data.data || []).map((m) => m.id || '');
121
+ const baseModel = config.model.split(':')[0];
122
+ const found = modelList.some((m) => m.startsWith(baseModel));
123
+ if (!found) {
124
+ return {
125
+ available: false,
126
+ reason: 'model_missing',
127
+ detail: 'Run: ollama pull ' + config.model,
128
+ model: null,
129
+ version
130
+ };
131
+ }
132
+ } catch (_err) {
133
+ return {
134
+ available: false,
135
+ reason: 'model_missing',
136
+ detail: 'Run: ollama pull ' + config.model,
137
+ model: null,
138
+ version
139
+ };
140
+ }
141
+
142
+ // Step 4 — Detect GPU error (sleep/wake CUDA bug)
143
+ let warm = false;
144
+ try {
145
+ const res = await fetch(config.endpoint + '/v1/chat/completions', {
146
+ method: 'POST',
147
+ headers: { 'Content-Type': 'application/json' },
148
+ body: JSON.stringify({
149
+ model: config.model,
150
+ messages: [{ role: 'user', content: '{"status":"ok"}' }],
151
+ max_tokens: 10,
152
+ num_ctx: 512
153
+ }),
154
+ signal: AbortSignal.timeout(timeoutModel)
155
+ });
156
+
157
+ if (!res.ok) {
158
+ const errBody = await res.text().catch(() => '');
159
+ if (res.status === 500 && (errBody.includes('GPU') || errBody.includes('CUDA'))) {
160
+ return {
161
+ available: false,
162
+ reason: 'gpu_error',
163
+ detail: 'GPU error detected. Restart Ollama: ollama serve',
164
+ model: config.model,
165
+ version
166
+ };
167
+ }
168
+ // Non-GPU HTTP error — treat as available but cold
169
+ warm = false;
170
+ } else {
171
+ warm = true;
172
+ }
173
+ } catch (err) {
174
+ const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
175
+ if (isTimeout) {
176
+ warm = false; // cold start in progress — skip, don't block
177
+ } else {
178
+ warm = false;
179
+ }
180
+ }
181
+
182
+ return { available: true, warm, reason: 'ok', model: config.model, version };
183
+ } catch (_err) {
184
+ return {
185
+ available: false,
186
+ reason: 'unknown_error',
187
+ detail: _err.message,
188
+ model: null,
189
+ version: null
190
+ };
191
+ }
192
+ }
193
+
194
+ /**
195
+ * Fire-and-forget warm-up request. Callers should NOT await this.
196
+ * @param {object} config - resolved config from resolveConfig()
197
+ */
198
+ async function warmUp(config) {
199
+ try {
200
+ await fetch(config.endpoint + '/v1/chat/completions', {
201
+ method: 'POST',
202
+ headers: { 'Content-Type': 'application/json' },
203
+ body: JSON.stringify({
204
+ model: config.model,
205
+ messages: [{ role: 'user', content: '{"status":"ready"}' }],
206
+ max_tokens: 10,
207
+ num_ctx: 512,
208
+ keep_alive: config.advanced.keep_alive
209
+ }),
210
+ signal: AbortSignal.timeout(WARMUP_TIMEOUT_MS)
211
+ });
212
+ } catch (_) {
213
+ // Swallow all errors silently — fire and forget
214
+ }
215
+ }
216
+
217
+ module.exports = { resolveConfig, checkHealth, warmUp };
@@ -0,0 +1,252 @@
1
+ 'use strict';
2
+
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ const MAX_ENTRIES = 200;
7
+
8
+ /**
9
+ * Appends a metric entry to the JSONL log file.
10
+ * Rotates to keep only the last 200 entries when the log exceeds MAX_ENTRIES lines.
11
+ * Swallows all errors silently — metrics must never crash hooks.
12
+ *
13
+ * @param {string} planningDir - path to the .planning directory
14
+ * @param {object} entry - metric entry object
15
+ * @param {string} entry.session_id
16
+ * @param {string} entry.timestamp
17
+ * @param {string} entry.operation
18
+ * @param {string} entry.model
19
+ * @param {number} entry.latency_ms
20
+ * @param {number} entry.tokens_used_local
21
+ * @param {number} entry.tokens_saved_frontier
22
+ * @param {string} entry.result
23
+ * @param {boolean} entry.fallback_used
24
+ * @param {number} entry.confidence
25
+ */
26
+ function logMetric(planningDir, entry) {
27
+ try {
28
+ const logsDir = path.join(planningDir, 'logs');
29
+ const logFile = path.join(logsDir, 'local-llm-metrics.jsonl');
30
+
31
+ fs.mkdirSync(logsDir, { recursive: true });
32
+ fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf8');
33
+
34
+ // Rotate if over MAX_ENTRIES
35
+ try {
36
+ const contents = fs.readFileSync(logFile, 'utf8');
37
+ const lines = contents.split(/\r?\n/).filter((l) => l.trim() !== '');
38
+ if (lines.length > MAX_ENTRIES) {
39
+ const trimmed = lines.slice(lines.length - MAX_ENTRIES);
40
+ fs.writeFileSync(logFile, trimmed.join('\n') + '\n', 'utf8');
41
+ }
42
+ } catch (_) {
43
+ // Rotation failure is non-fatal
44
+ }
45
+ } catch (_) {
46
+ // Swallow all errors silently
47
+ }
48
+ }
49
+
50
+ /**
51
+ * Reads metric entries from the JSONL log that occurred at or after sessionStartTime.
52
+ *
53
+ * @param {string} planningDir - path to the .planning directory
54
+ * @param {string|Date} sessionStartTime - ISO string or Date
55
+ * @returns {object[]} Array of matching metric entry objects
56
+ */
57
+ function readSessionMetrics(planningDir, sessionStartTime) {
58
+ try {
59
+ const logFile = path.join(planningDir, 'logs', 'local-llm-metrics.jsonl');
60
+ const contents = fs.readFileSync(logFile, 'utf8');
61
+ const startMs = new Date(sessionStartTime).getTime();
62
+
63
+ return contents
64
+ .split(/\r?\n/)
65
+ .filter((l) => l.trim() !== '')
66
+ .map((l) => {
67
+ try {
68
+ return JSON.parse(l);
69
+ } catch (_) {
70
+ return null;
71
+ }
72
+ })
73
+ .filter((e) => e !== null)
74
+ .filter((e) => {
75
+ try {
76
+ return new Date(e.timestamp).getTime() >= startMs;
77
+ } catch (_) {
78
+ return false;
79
+ }
80
+ });
81
+ } catch (_) {
82
+ return [];
83
+ }
84
+ }
85
+
86
+ /**
87
+ * Summarizes an array of metric entries.
88
+ *
89
+ * @param {object[]} entries
90
+ * @param {number} [frontierTokenRate=3.0] - cost per million tokens in USD
91
+ * @returns {{ total_calls: number, fallback_count: number, avg_latency_ms: number, tokens_saved: number, cost_saved_usd: number }}
92
+ */
93
+ function summarizeMetrics(entries, frontierTokenRate) {
94
+ if (!entries || entries.length === 0) {
95
+ return {
96
+ total_calls: 0,
97
+ fallback_count: 0,
98
+ avg_latency_ms: 0,
99
+ tokens_saved: 0,
100
+ cost_saved_usd: 0
101
+ };
102
+ }
103
+
104
+ const rate = frontierTokenRate != null ? frontierTokenRate : 3.0;
105
+ const total_calls = entries.length;
106
+ const fallback_count = entries.filter((e) => e.fallback_used).length;
107
+ const totalLatency = entries.reduce((sum, e) => sum + (e.latency_ms || 0), 0);
108
+ const avg_latency_ms = total_calls > 0 ? totalLatency / total_calls : 0;
109
+ const tokens_saved = entries.reduce((sum, e) => sum + (e.tokens_saved_frontier || 0), 0);
110
+ const cost_saved_usd = tokens_saved * (rate / 1_000_000);
111
+
112
+ return { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd };
113
+ }
114
+
115
+ /**
116
+ * Computes lifetime aggregate metrics by reading all entries from the JSONL log.
117
+ * No date filter — reads everything. Adds a by_operation breakdown keyed by operation.
118
+ *
119
+ * @param {string} planningDir - path to the .planning directory
120
+ * @param {number} [frontierTokenRate=3.0] - cost per million tokens in USD
121
+ * @returns {{ total_calls: number, fallback_count: number, avg_latency_ms: number, tokens_saved: number, cost_saved_usd: number, by_operation: object }}
122
+ */
123
+ function computeLifetimeMetrics(planningDir, frontierTokenRate) {
124
+ const zero = {
125
+ total_calls: 0,
126
+ fallback_count: 0,
127
+ avg_latency_ms: 0,
128
+ tokens_saved: 0,
129
+ cost_saved_usd: 0,
130
+ by_operation: {}
131
+ };
132
+
133
+ try {
134
+ const logFile = path.join(planningDir, 'logs', 'local-llm-metrics.jsonl');
135
+ let contents;
136
+ try {
137
+ contents = fs.readFileSync(logFile, 'utf8');
138
+ } catch (_) {
139
+ return zero;
140
+ }
141
+
142
+ const entries = contents
143
+ .split(/\r?\n/)
144
+ .filter((l) => l.trim() !== '')
145
+ .map((l) => {
146
+ try {
147
+ return JSON.parse(l);
148
+ } catch (_) {
149
+ return null;
150
+ }
151
+ })
152
+ .filter((e) => e !== null);
153
+
154
+ if (entries.length === 0) return zero;
155
+
156
+ const rate = frontierTokenRate != null ? frontierTokenRate : 3.0;
157
+ const total_calls = entries.length;
158
+ const fallback_count = entries.filter((e) => e.fallback_used).length;
159
+ const totalLatency = entries.reduce((sum, e) => sum + (e.latency_ms || 0), 0);
160
+ const avg_latency_ms = total_calls > 0 ? totalLatency / total_calls : 0;
161
+ const tokens_saved = entries.reduce((sum, e) => sum + (e.tokens_saved_frontier || 0), 0);
162
+ const cost_saved_usd = tokens_saved * (rate / 1_000_000);
163
+
164
+ const by_operation = {};
165
+ for (const e of entries) {
166
+ const op = e.operation || 'unknown';
167
+ if (!by_operation[op]) {
168
+ by_operation[op] = { calls: 0, fallbacks: 0, tokens_saved: 0 };
169
+ }
170
+ by_operation[op].calls += 1;
171
+ if (e.fallback_used) by_operation[op].fallbacks += 1;
172
+ by_operation[op].tokens_saved += e.tokens_saved_frontier || 0;
173
+ }
174
+
175
+ return { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd, by_operation };
176
+ } catch (_) {
177
+ return zero;
178
+ }
179
+ }
180
+
181
+ /**
182
+ * Formats a metrics aggregate (output of summarizeMetrics) into a human-readable one-liner.
183
+ *
184
+ * @param {object} summary - output of summarizeMetrics()
185
+ * @param {string} [model] - optional model name
186
+ * @returns {string}
187
+ */
188
+ function formatSessionSummary(summary, model) {
189
+ if (!summary || summary.total_calls === 0) {
190
+ return 'Local LLM: no calls this session';
191
+ }
192
+
193
+ const { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd } = summary;
194
+
195
+ let costStr = '';
196
+ if (cost_saved_usd > 0) {
197
+ costStr = ` ($${cost_saved_usd.toFixed(2)})`;
198
+ }
199
+
200
+ let fallbackStr = '';
201
+ if (fallback_count > 0) {
202
+ fallbackStr = `, ${fallback_count} fallback(s)`;
203
+ }
204
+
205
+ let modelStr = '';
206
+ if (model) {
207
+ modelStr = ` [${model}]`;
208
+ }
209
+
210
+ const avgMs = Math.round(avg_latency_ms);
211
+
212
+ return `Local LLM: ${total_calls} calls, ~${tokens_saved} frontier tokens saved${costStr}, avg ${avgMs}ms${fallbackStr}${modelStr}`;
213
+ }
214
+
215
+ /**
216
+ * Appends a shadow comparison entry to the shadow JSONL log file.
217
+ * Rotates to keep only the last 200 entries. Swallows all errors silently.
218
+ *
219
+ * @param {string} planningDir - path to the .planning directory
220
+ * @param {object} entry - shadow comparison entry object
221
+ * @param {string} entry.timestamp
222
+ * @param {string} entry.operation
223
+ * @param {string} entry.session_id
224
+ * @param {boolean} entry.agrees
225
+ * @param {string|null} entry.local_result
226
+ * @param {string} entry.frontier_result
227
+ */
228
+ function logAgreement(planningDir, entry) {
229
+ try {
230
+ const logsDir = path.join(planningDir, 'logs');
231
+ const logFile = path.join(logsDir, 'local-llm-shadow.jsonl');
232
+
233
+ fs.mkdirSync(logsDir, { recursive: true });
234
+ fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf8');
235
+
236
+ // Rotate if over MAX_ENTRIES
237
+ try {
238
+ const contents = fs.readFileSync(logFile, 'utf8');
239
+ const lines = contents.split(/\r?\n/).filter((l) => l.trim() !== '');
240
+ if (lines.length > MAX_ENTRIES) {
241
+ const trimmed = lines.slice(lines.length - MAX_ENTRIES);
242
+ fs.writeFileSync(logFile, trimmed.join('\n') + '\n', 'utf8');
243
+ }
244
+ } catch (_) {
245
+ // Rotation failure is non-fatal
246
+ }
247
+ } catch (_) {
248
+ // Swallow all errors silently
249
+ }
250
+ }
251
+
252
+ module.exports = { logMetric, readSessionMetrics, summarizeMetrics, computeLifetimeMetrics, formatSessionSummary, logAgreement };
@@ -0,0 +1,76 @@
1
+ 'use strict';
2
+
3
+ const { complete, tryParseJSON, isDisabled } = require('../client');
4
+ const { logMetric } = require('../metrics');
5
+ const { route } = require('../router');
6
+
7
+ /**
8
+ * Classifies a PLAN.md or SUMMARY.md artifact using the local LLM.
9
+ *
10
+ * @param {object} config - resolved local_llm config block
11
+ * @param {string} planningDir - path to the .planning directory
12
+ * @param {string} content - file content to classify
13
+ * @param {string} fileType - 'PLAN' or 'SUMMARY'
14
+ * @param {string} [sessionId] - optional session identifier for metrics
15
+ * @returns {Promise<{ classification: string, confidence: number, reason: string, latency_ms: number, fallback_used: boolean }|null>}
16
+ */
17
+ async function classifyArtifact(config, planningDir, content, fileType, sessionId) {
18
+ if (!config.enabled || !config.features.artifact_classification) return null;
19
+ if (isDisabled('artifact-classification', config.advanced.disable_after_failures)) return null;
20
+
21
+ const maxChars = (config.advanced.max_input_tokens || 1024) * 4;
22
+ const truncatedContent = content.length > maxChars ? content.slice(0, maxChars) : content;
23
+
24
+ let prompt;
25
+ if (fileType === 'PLAN') {
26
+ prompt =
27
+ 'Classify this PLAN.md as stub, partial, or complete. A stub has placeholder tasks or missing required XML elements. A partial has some tasks filled but action/verify/done are vague. A complete has all tasks with specific steps, executable verify commands, and observable done conditions. Respond with JSON: {"classification": "stub"|"partial"|"complete", "confidence": 0.0-1.0, "reason": "one sentence"}\n\nContent:\n' +
28
+ truncatedContent;
29
+ } else if (fileType === 'SUMMARY') {
30
+ prompt =
31
+ 'Classify this SUMMARY.md as substantive or thin. Substantive means it has specific artifact paths, commit hashes, and observable outcomes. Thin means vague or placeholder content. Respond with JSON: {"classification": "substantive"|"thin", "confidence": 0.0-1.0, "reason": "one sentence"}\n\nContent:\n' +
32
+ truncatedContent;
33
+ } else {
34
+ return null;
35
+ }
36
+
37
+ try {
38
+ const result = await route(config, prompt, 'artifact-classification', (logprobs) =>
39
+ complete(config, prompt, 'artifact-classification', { logprobs })
40
+ );
41
+ if (result === null) return null;
42
+ const parsed = tryParseJSON(result.content);
43
+ if (!parsed.ok) return null;
44
+
45
+ const validPlanClassifications = ['stub', 'partial', 'complete'];
46
+ const validSummaryClassifications = ['substantive', 'thin'];
47
+ const validValues = fileType === 'PLAN' ? validPlanClassifications : validSummaryClassifications;
48
+ if (!parsed.data.classification || !validValues.includes(parsed.data.classification)) return null;
49
+
50
+ const metricEntry = {
51
+ session_id: sessionId || 'unknown',
52
+ timestamp: new Date().toISOString(),
53
+ operation: 'artifact-classification',
54
+ model: config.model,
55
+ latency_ms: result.latency_ms,
56
+ tokens_used_local: result.tokens,
57
+ tokens_saved_frontier: 420,
58
+ result: parsed.data.classification,
59
+ fallback_used: false,
60
+ confidence: parsed.data.confidence || 0.9
61
+ };
62
+ logMetric(planningDir, metricEntry);
63
+
64
+ return {
65
+ classification: parsed.data.classification,
66
+ confidence: parsed.data.confidence || 0.9,
67
+ reason: parsed.data.reason || '',
68
+ latency_ms: result.latency_ms,
69
+ fallback_used: false
70
+ };
71
+ } catch (_) {
72
+ return null;
73
+ }
74
+ }
75
+
76
+ module.exports = { classifyArtifact };
@@ -0,0 +1,75 @@
1
+ 'use strict';
2
+
3
+ const { complete, tryParseJSON, isDisabled } = require('../client');
4
+ const { logMetric } = require('../metrics');
5
+ const { route } = require('../router');
6
+
7
+ const ERROR_CATEGORIES = [
8
+ 'connection_refused',
9
+ 'timeout',
10
+ 'missing_output',
11
+ 'wrong_output_format',
12
+ 'permission_error',
13
+ 'unknown'
14
+ ];
15
+
16
+ /**
17
+ * Classifies an agent error into one of 6 categories using the local LLM.
18
+ *
19
+ * @param {object} config - resolved local_llm config block
20
+ * @param {string} planningDir - path to the .planning directory
21
+ * @param {string} errorText - the error message or stack trace
22
+ * @param {string} [agentType] - the agent type that produced the error
23
+ * @param {string} [sessionId] - optional session identifier for metrics
24
+ * @returns {Promise<{ category: string, confidence: number, latency_ms: number, fallback_used: boolean }|null>}
25
+ */
26
+ async function classifyError(config, planningDir, errorText, agentType, sessionId) {
27
+ if (!config.enabled) return null;
28
+ if (isDisabled('error-classification', config.advanced.disable_after_failures)) return null;
29
+
30
+ const truncatedError = errorText.length > 500 ? errorText.slice(0, 500) : errorText;
31
+
32
+ const prompt =
33
+ 'Classify this agent error into one category. Categories: connection_refused (network/ECONNREFUSED), timeout (operation timed out), missing_output (expected file/artifact not found), wrong_output_format (output exists but malformed), permission_error (filesystem/permission issue), unknown (none of the above). Respond with JSON: {"category": "<one of the 6>", "confidence": 0.0-1.0}\n\nAgent: ' +
34
+ (agentType || 'unknown') +
35
+ '\nError: ' +
36
+ truncatedError;
37
+
38
+ try {
39
+ const result = await route(config, prompt, 'error-classification', (logprobs) =>
40
+ complete(config, prompt, 'error-classification', { logprobs })
41
+ );
42
+ if (result === null) return null;
43
+ const parsed = tryParseJSON(result.content);
44
+ if (!parsed.ok) return null;
45
+
46
+ const category = ERROR_CATEGORIES.includes(parsed.data.category)
47
+ ? parsed.data.category
48
+ : 'unknown';
49
+
50
+ const metricEntry = {
51
+ session_id: sessionId || 'unknown',
52
+ timestamp: new Date().toISOString(),
53
+ operation: 'error-classification',
54
+ model: config.model,
55
+ latency_ms: result.latency_ms,
56
+ tokens_used_local: result.tokens,
57
+ tokens_saved_frontier: 120,
58
+ result: category,
59
+ fallback_used: false,
60
+ confidence: parsed.data.confidence || 0.9
61
+ };
62
+ logMetric(planningDir, metricEntry);
63
+
64
+ return {
65
+ category,
66
+ confidence: parsed.data.confidence || 0.9,
67
+ latency_ms: result.latency_ms,
68
+ fallback_used: false
69
+ };
70
+ } catch (_) {
71
+ return null;
72
+ }
73
+ }
74
+
75
+ module.exports = { classifyError, ERROR_CATEGORIES };
@@ -0,0 +1,72 @@
1
+ 'use strict';
2
+
3
+ const { complete, tryParseJSON, isDisabled } = require('../client');
4
+ const { logMetric } = require('../metrics');
5
+ const { route } = require('../router');
6
+
7
+ const SOURCE_LEVELS = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6'];
8
+
9
+ /**
10
+ * Scores a research source on the S0-S6 credibility scale using the local LLM.
11
+ *
12
+ * S0=local prior research, S1=live MCP docs, S2=official docs, S3=official GitHub,
13
+ * S4=verified WebSearch (2+ sources), S5=unverified WebSearch, S6=training knowledge.
14
+ *
15
+ * @param {object} config - resolved local_llm config block
16
+ * @param {string} planningDir - path to the .planning directory
17
+ * @param {string} sourceText - text content from the source
18
+ * @param {string} sourceUrl - URL or identifier for the source
19
+ * @param {string} [sessionId] - optional session identifier for metrics
20
+ * @returns {Promise<{ level: string, confidence: number, reason: string, latency_ms: number, fallback_used: boolean }|null>}
21
+ */
22
+ async function scoreSource(config, planningDir, sourceText, sourceUrl, sessionId) {
23
+ if (!config.enabled) return null;
24
+ if (!config.features || !config.features.source_scoring) return null;
25
+ if (isDisabled('source-scoring', config.advanced.disable_after_failures)) return null;
26
+
27
+ const maxChars = (config.advanced.max_input_tokens || 1024) * 4;
28
+ const truncated = sourceText.length > maxChars ? sourceText.slice(0, maxChars) : sourceText;
29
+
30
+ const prompt =
31
+ 'Score this research source on the S0-S6 credibility scale. S0=local prior research, S1=live MCP docs, S2=official docs, S3=official GitHub, S4=verified WebSearch (2+ sources), S5=unverified WebSearch, S6=training knowledge. Respond with JSON: {"level": "S0"-"S6", "confidence": 0.0-1.0, "reason": "one sentence"}\n\nURL: ' +
32
+ sourceUrl +
33
+ '\nContent excerpt:\n' +
34
+ truncated;
35
+
36
+ try {
37
+ const result = await route(config, prompt, 'source-scoring', (logprobs) =>
38
+ complete(config, prompt, 'source-scoring', { logprobs })
39
+ );
40
+ if (result === null) return null;
41
+ const parsed = tryParseJSON(result.content);
42
+ if (!parsed.ok) return null;
43
+
44
+ const level = SOURCE_LEVELS.includes(parsed.data.level) ? parsed.data.level : 'S6';
45
+
46
+ const metricEntry = {
47
+ session_id: sessionId || 'unknown',
48
+ timestamp: new Date().toISOString(),
49
+ operation: 'source-scoring',
50
+ model: config.model,
51
+ latency_ms: result.latency_ms,
52
+ tokens_used_local: result.tokens,
53
+ tokens_saved_frontier: 80,
54
+ result: level,
55
+ fallback_used: false,
56
+ confidence: parsed.data.confidence || 0.9
57
+ };
58
+ logMetric(planningDir, metricEntry);
59
+
60
+ return {
61
+ level,
62
+ confidence: parsed.data.confidence || 0.9,
63
+ reason: parsed.data.reason || '',
64
+ latency_ms: result.latency_ms,
65
+ fallback_used: false
66
+ };
67
+ } catch (_) {
68
+ return null;
69
+ }
70
+ }
71
+
72
+ module.exports = { scoreSource, SOURCE_LEVELS };