@sienklogic/plan-build-run 2.22.2 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/dashboard/package.json +2 -1
- package/dashboard/src/middleware/errorHandler.js +12 -2
- package/dashboard/src/repositories/planning.repository.js +23 -1
- package/dashboard/src/routes/pages.routes.js +65 -2
- package/dashboard/src/services/local-llm-metrics.service.js +81 -0
- package/dashboard/src/services/quick.service.js +62 -0
- package/dashboard/src/views/partials/analytics-content.ejs +61 -0
- package/dashboard/src/views/partials/quick-content.ejs +40 -0
- package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
- package/dashboard/src/views/partials/sidebar.ejs +8 -0
- package/dashboard/src/views/quick-detail.ejs +5 -0
- package/dashboard/src/views/quick.ejs +5 -0
- package/package.json +1 -1
- package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
- package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
- package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
- package/plugins/copilot-pbr/plugin.json +1 -1
- package/plugins/copilot-pbr/references/config-reference.md +89 -0
- package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
- package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
- package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
- package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-pbr/agents/debugger.md +15 -0
- package/plugins/cursor-pbr/agents/researcher.md +20 -0
- package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
- package/plugins/cursor-pbr/references/config-reference.md +89 -0
- package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
- package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
- package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
- package/plugins/pbr/.claude-plugin/plugin.json +1 -1
- package/plugins/pbr/agents/debugger.md +15 -0
- package/plugins/pbr/agents/researcher.md +20 -0
- package/plugins/pbr/agents/synthesizer.md +12 -0
- package/plugins/pbr/references/config-reference.md +89 -0
- package/plugins/pbr/scripts/check-config-change.js +33 -0
- package/plugins/pbr/scripts/check-plan-format.js +52 -4
- package/plugins/pbr/scripts/check-subagent-output.js +43 -3
- package/plugins/pbr/scripts/config-schema.json +48 -0
- package/plugins/pbr/scripts/local-llm/client.js +214 -0
- package/plugins/pbr/scripts/local-llm/health.js +217 -0
- package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
- package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
- package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
- package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
- package/plugins/pbr/scripts/local-llm/router.js +101 -0
- package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
- package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
- package/plugins/pbr/scripts/pbr-tools.js +120 -3
- package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
- package/plugins/pbr/scripts/progress-tracker.js +29 -3
- package/plugins/pbr/scripts/session-cleanup.js +36 -1
- package/plugins/pbr/scripts/validate-task.js +30 -1
- package/plugins/pbr/skills/health/SKILL.md +8 -1
- package/plugins/pbr/skills/help/SKILL.md +4 -4
- package/plugins/pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/pbr/skills/status/SKILL.md +38 -2
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/* global fetch, AbortSignal */
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const WARMUP_TIMEOUT_MS = 120000;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Merges the raw local_llm config block with defaults.
|
|
8
|
+
* @param {object|undefined} rawConfig
|
|
9
|
+
* @returns {object} Fully-defaulted local_llm config
|
|
10
|
+
*/
|
|
11
|
+
function resolveConfig(rawConfig) {
|
|
12
|
+
return {
|
|
13
|
+
enabled: rawConfig != null && rawConfig.enabled != null ? rawConfig.enabled : false,
|
|
14
|
+
provider: (rawConfig && rawConfig.provider) || 'ollama',
|
|
15
|
+
endpoint: (rawConfig && rawConfig.endpoint) || 'http://localhost:11434',
|
|
16
|
+
model: (rawConfig && rawConfig.model) || 'qwen2.5-coder:7b',
|
|
17
|
+
timeout_ms: (rawConfig && rawConfig.timeout_ms) || 3000,
|
|
18
|
+
max_retries: rawConfig != null && rawConfig.max_retries != null ? rawConfig.max_retries : 1,
|
|
19
|
+
fallback: (rawConfig && rawConfig.fallback) || 'frontier',
|
|
20
|
+
routing_strategy: (rawConfig && rawConfig.routing_strategy) || 'local_first',
|
|
21
|
+
features: Object.assign(
|
|
22
|
+
{
|
|
23
|
+
artifact_classification: true,
|
|
24
|
+
task_validation: true,
|
|
25
|
+
plan_adequacy: false,
|
|
26
|
+
gap_detection: false,
|
|
27
|
+
context_summarization: false,
|
|
28
|
+
source_scoring: false
|
|
29
|
+
},
|
|
30
|
+
(rawConfig && rawConfig.features) || {}
|
|
31
|
+
),
|
|
32
|
+
metrics: Object.assign(
|
|
33
|
+
{
|
|
34
|
+
enabled: true,
|
|
35
|
+
log_file: '.planning/logs/local-llm-metrics.jsonl',
|
|
36
|
+
show_session_summary: true,
|
|
37
|
+
frontier_token_rate: 3.0
|
|
38
|
+
},
|
|
39
|
+
(rawConfig && rawConfig.metrics) || {}
|
|
40
|
+
),
|
|
41
|
+
advanced: Object.assign(
|
|
42
|
+
{
|
|
43
|
+
confidence_threshold: 0.9,
|
|
44
|
+
max_input_tokens: 2000,
|
|
45
|
+
keep_alive: '30m',
|
|
46
|
+
num_ctx: 4096,
|
|
47
|
+
disable_after_failures: 3,
|
|
48
|
+
shadow_mode: false
|
|
49
|
+
},
|
|
50
|
+
(rawConfig && rawConfig.advanced) || {}
|
|
51
|
+
)
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Checks availability of the configured Ollama instance and model.
|
|
57
|
+
* Always resolves — never rejects.
|
|
58
|
+
* @param {object} config - resolved config from resolveConfig()
|
|
59
|
+
* @returns {Promise<object>} Structured health status
|
|
60
|
+
*/
|
|
61
|
+
async function checkHealth(config) {
|
|
62
|
+
try {
|
|
63
|
+
if (!config.enabled) {
|
|
64
|
+
return { available: false, reason: 'disabled', model: null, version: null };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const timeoutShort = 3000;
|
|
68
|
+
const timeoutModel = 5000;
|
|
69
|
+
|
|
70
|
+
// Step 1 — Check server reachable
|
|
71
|
+
try {
|
|
72
|
+
const res = await fetch(config.endpoint + '/', {
|
|
73
|
+
signal: AbortSignal.timeout(timeoutShort)
|
|
74
|
+
});
|
|
75
|
+
const body = await res.text().catch(() => '');
|
|
76
|
+
if (!body.includes('Ollama')) {
|
|
77
|
+
return {
|
|
78
|
+
available: false,
|
|
79
|
+
reason: 'not_running',
|
|
80
|
+
detail: 'Ollama is not running. Start with: ollama serve',
|
|
81
|
+
model: null,
|
|
82
|
+
version: null
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
} catch (err) {
|
|
86
|
+
const isConnRefused =
|
|
87
|
+
(err.cause && err.cause.code === 'ECONNREFUSED') ||
|
|
88
|
+
(err.message && err.message.includes('ECONNREFUSED'));
|
|
89
|
+
const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
|
|
90
|
+
if (isConnRefused || isTimeout) {
|
|
91
|
+
return {
|
|
92
|
+
available: false,
|
|
93
|
+
reason: 'not_running',
|
|
94
|
+
detail: 'Ollama is not running. Start with: ollama serve',
|
|
95
|
+
model: null,
|
|
96
|
+
version: null
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
throw err;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Step 2 — Check version (non-fatal)
|
|
103
|
+
let version = null;
|
|
104
|
+
try {
|
|
105
|
+
const res = await fetch(config.endpoint + '/api/version', {
|
|
106
|
+
signal: AbortSignal.timeout(timeoutShort)
|
|
107
|
+
});
|
|
108
|
+
const data = await res.json();
|
|
109
|
+
version = data.version || null;
|
|
110
|
+
} catch (_) {
|
|
111
|
+
version = null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Step 3 — Check model available
|
|
115
|
+
try {
|
|
116
|
+
const res = await fetch(config.endpoint + '/v1/models', {
|
|
117
|
+
signal: AbortSignal.timeout(timeoutModel)
|
|
118
|
+
});
|
|
119
|
+
const data = await res.json();
|
|
120
|
+
const modelList = (data.data || []).map((m) => m.id || '');
|
|
121
|
+
const baseModel = config.model.split(':')[0];
|
|
122
|
+
const found = modelList.some((m) => m.startsWith(baseModel));
|
|
123
|
+
if (!found) {
|
|
124
|
+
return {
|
|
125
|
+
available: false,
|
|
126
|
+
reason: 'model_missing',
|
|
127
|
+
detail: 'Run: ollama pull ' + config.model,
|
|
128
|
+
model: null,
|
|
129
|
+
version
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
} catch (_err) {
|
|
133
|
+
return {
|
|
134
|
+
available: false,
|
|
135
|
+
reason: 'model_missing',
|
|
136
|
+
detail: 'Run: ollama pull ' + config.model,
|
|
137
|
+
model: null,
|
|
138
|
+
version
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Step 4 — Detect GPU error (sleep/wake CUDA bug)
|
|
143
|
+
let warm = false;
|
|
144
|
+
try {
|
|
145
|
+
const res = await fetch(config.endpoint + '/v1/chat/completions', {
|
|
146
|
+
method: 'POST',
|
|
147
|
+
headers: { 'Content-Type': 'application/json' },
|
|
148
|
+
body: JSON.stringify({
|
|
149
|
+
model: config.model,
|
|
150
|
+
messages: [{ role: 'user', content: '{"status":"ok"}' }],
|
|
151
|
+
max_tokens: 10,
|
|
152
|
+
num_ctx: 512
|
|
153
|
+
}),
|
|
154
|
+
signal: AbortSignal.timeout(timeoutModel)
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
if (!res.ok) {
|
|
158
|
+
const errBody = await res.text().catch(() => '');
|
|
159
|
+
if (res.status === 500 && (errBody.includes('GPU') || errBody.includes('CUDA'))) {
|
|
160
|
+
return {
|
|
161
|
+
available: false,
|
|
162
|
+
reason: 'gpu_error',
|
|
163
|
+
detail: 'GPU error detected. Restart Ollama: ollama serve',
|
|
164
|
+
model: config.model,
|
|
165
|
+
version
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
// Non-GPU HTTP error — treat as available but cold
|
|
169
|
+
warm = false;
|
|
170
|
+
} else {
|
|
171
|
+
warm = true;
|
|
172
|
+
}
|
|
173
|
+
} catch (err) {
|
|
174
|
+
const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
|
|
175
|
+
if (isTimeout) {
|
|
176
|
+
warm = false; // cold start in progress — skip, don't block
|
|
177
|
+
} else {
|
|
178
|
+
warm = false;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return { available: true, warm, reason: 'ok', model: config.model, version };
|
|
183
|
+
} catch (_err) {
|
|
184
|
+
return {
|
|
185
|
+
available: false,
|
|
186
|
+
reason: 'unknown_error',
|
|
187
|
+
detail: _err.message,
|
|
188
|
+
model: null,
|
|
189
|
+
version: null
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Fire-and-forget warm-up request. Callers should NOT await this.
|
|
196
|
+
* @param {object} config - resolved config from resolveConfig()
|
|
197
|
+
*/
|
|
198
|
+
async function warmUp(config) {
|
|
199
|
+
try {
|
|
200
|
+
await fetch(config.endpoint + '/v1/chat/completions', {
|
|
201
|
+
method: 'POST',
|
|
202
|
+
headers: { 'Content-Type': 'application/json' },
|
|
203
|
+
body: JSON.stringify({
|
|
204
|
+
model: config.model,
|
|
205
|
+
messages: [{ role: 'user', content: '{"status":"ready"}' }],
|
|
206
|
+
max_tokens: 10,
|
|
207
|
+
num_ctx: 512,
|
|
208
|
+
keep_alive: config.advanced.keep_alive
|
|
209
|
+
}),
|
|
210
|
+
signal: AbortSignal.timeout(WARMUP_TIMEOUT_MS)
|
|
211
|
+
});
|
|
212
|
+
} catch (_) {
|
|
213
|
+
// Swallow all errors silently — fire and forget
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
module.exports = { resolveConfig, checkHealth, warmUp };
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const MAX_ENTRIES = 200;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Appends a metric entry to the JSONL log file.
|
|
10
|
+
* Rotates to keep only the last 200 entries when the log exceeds MAX_ENTRIES lines.
|
|
11
|
+
* Swallows all errors silently — metrics must never crash hooks.
|
|
12
|
+
*
|
|
13
|
+
* @param {string} planningDir - path to the .planning directory
|
|
14
|
+
* @param {object} entry - metric entry object
|
|
15
|
+
* @param {string} entry.session_id
|
|
16
|
+
* @param {string} entry.timestamp
|
|
17
|
+
* @param {string} entry.operation
|
|
18
|
+
* @param {string} entry.model
|
|
19
|
+
* @param {number} entry.latency_ms
|
|
20
|
+
* @param {number} entry.tokens_used_local
|
|
21
|
+
* @param {number} entry.tokens_saved_frontier
|
|
22
|
+
* @param {string} entry.result
|
|
23
|
+
* @param {boolean} entry.fallback_used
|
|
24
|
+
* @param {number} entry.confidence
|
|
25
|
+
*/
|
|
26
|
+
function logMetric(planningDir, entry) {
|
|
27
|
+
try {
|
|
28
|
+
const logsDir = path.join(planningDir, 'logs');
|
|
29
|
+
const logFile = path.join(logsDir, 'local-llm-metrics.jsonl');
|
|
30
|
+
|
|
31
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
32
|
+
fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf8');
|
|
33
|
+
|
|
34
|
+
// Rotate if over MAX_ENTRIES
|
|
35
|
+
try {
|
|
36
|
+
const contents = fs.readFileSync(logFile, 'utf8');
|
|
37
|
+
const lines = contents.split(/\r?\n/).filter((l) => l.trim() !== '');
|
|
38
|
+
if (lines.length > MAX_ENTRIES) {
|
|
39
|
+
const trimmed = lines.slice(lines.length - MAX_ENTRIES);
|
|
40
|
+
fs.writeFileSync(logFile, trimmed.join('\n') + '\n', 'utf8');
|
|
41
|
+
}
|
|
42
|
+
} catch (_) {
|
|
43
|
+
// Rotation failure is non-fatal
|
|
44
|
+
}
|
|
45
|
+
} catch (_) {
|
|
46
|
+
// Swallow all errors silently
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Reads metric entries from the JSONL log that occurred at or after sessionStartTime.
|
|
52
|
+
*
|
|
53
|
+
* @param {string} planningDir - path to the .planning directory
|
|
54
|
+
* @param {string|Date} sessionStartTime - ISO string or Date
|
|
55
|
+
* @returns {object[]} Array of matching metric entry objects
|
|
56
|
+
*/
|
|
57
|
+
function readSessionMetrics(planningDir, sessionStartTime) {
|
|
58
|
+
try {
|
|
59
|
+
const logFile = path.join(planningDir, 'logs', 'local-llm-metrics.jsonl');
|
|
60
|
+
const contents = fs.readFileSync(logFile, 'utf8');
|
|
61
|
+
const startMs = new Date(sessionStartTime).getTime();
|
|
62
|
+
|
|
63
|
+
return contents
|
|
64
|
+
.split(/\r?\n/)
|
|
65
|
+
.filter((l) => l.trim() !== '')
|
|
66
|
+
.map((l) => {
|
|
67
|
+
try {
|
|
68
|
+
return JSON.parse(l);
|
|
69
|
+
} catch (_) {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
.filter((e) => e !== null)
|
|
74
|
+
.filter((e) => {
|
|
75
|
+
try {
|
|
76
|
+
return new Date(e.timestamp).getTime() >= startMs;
|
|
77
|
+
} catch (_) {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
} catch (_) {
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Summarizes an array of metric entries.
|
|
88
|
+
*
|
|
89
|
+
* @param {object[]} entries
|
|
90
|
+
* @param {number} [frontierTokenRate=3.0] - cost per million tokens in USD
|
|
91
|
+
* @returns {{ total_calls: number, fallback_count: number, avg_latency_ms: number, tokens_saved: number, cost_saved_usd: number }}
|
|
92
|
+
*/
|
|
93
|
+
function summarizeMetrics(entries, frontierTokenRate) {
|
|
94
|
+
if (!entries || entries.length === 0) {
|
|
95
|
+
return {
|
|
96
|
+
total_calls: 0,
|
|
97
|
+
fallback_count: 0,
|
|
98
|
+
avg_latency_ms: 0,
|
|
99
|
+
tokens_saved: 0,
|
|
100
|
+
cost_saved_usd: 0
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const rate = frontierTokenRate != null ? frontierTokenRate : 3.0;
|
|
105
|
+
const total_calls = entries.length;
|
|
106
|
+
const fallback_count = entries.filter((e) => e.fallback_used).length;
|
|
107
|
+
const totalLatency = entries.reduce((sum, e) => sum + (e.latency_ms || 0), 0);
|
|
108
|
+
const avg_latency_ms = total_calls > 0 ? totalLatency / total_calls : 0;
|
|
109
|
+
const tokens_saved = entries.reduce((sum, e) => sum + (e.tokens_saved_frontier || 0), 0);
|
|
110
|
+
const cost_saved_usd = tokens_saved * (rate / 1_000_000);
|
|
111
|
+
|
|
112
|
+
return { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Computes lifetime aggregate metrics by reading all entries from the JSONL log.
|
|
117
|
+
* No date filter — reads everything. Adds a by_operation breakdown keyed by operation.
|
|
118
|
+
*
|
|
119
|
+
* @param {string} planningDir - path to the .planning directory
|
|
120
|
+
* @param {number} [frontierTokenRate=3.0] - cost per million tokens in USD
|
|
121
|
+
* @returns {{ total_calls: number, fallback_count: number, avg_latency_ms: number, tokens_saved: number, cost_saved_usd: number, by_operation: object }}
|
|
122
|
+
*/
|
|
123
|
+
function computeLifetimeMetrics(planningDir, frontierTokenRate) {
|
|
124
|
+
const zero = {
|
|
125
|
+
total_calls: 0,
|
|
126
|
+
fallback_count: 0,
|
|
127
|
+
avg_latency_ms: 0,
|
|
128
|
+
tokens_saved: 0,
|
|
129
|
+
cost_saved_usd: 0,
|
|
130
|
+
by_operation: {}
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
try {
|
|
134
|
+
const logFile = path.join(planningDir, 'logs', 'local-llm-metrics.jsonl');
|
|
135
|
+
let contents;
|
|
136
|
+
try {
|
|
137
|
+
contents = fs.readFileSync(logFile, 'utf8');
|
|
138
|
+
} catch (_) {
|
|
139
|
+
return zero;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const entries = contents
|
|
143
|
+
.split(/\r?\n/)
|
|
144
|
+
.filter((l) => l.trim() !== '')
|
|
145
|
+
.map((l) => {
|
|
146
|
+
try {
|
|
147
|
+
return JSON.parse(l);
|
|
148
|
+
} catch (_) {
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
})
|
|
152
|
+
.filter((e) => e !== null);
|
|
153
|
+
|
|
154
|
+
if (entries.length === 0) return zero;
|
|
155
|
+
|
|
156
|
+
const rate = frontierTokenRate != null ? frontierTokenRate : 3.0;
|
|
157
|
+
const total_calls = entries.length;
|
|
158
|
+
const fallback_count = entries.filter((e) => e.fallback_used).length;
|
|
159
|
+
const totalLatency = entries.reduce((sum, e) => sum + (e.latency_ms || 0), 0);
|
|
160
|
+
const avg_latency_ms = total_calls > 0 ? totalLatency / total_calls : 0;
|
|
161
|
+
const tokens_saved = entries.reduce((sum, e) => sum + (e.tokens_saved_frontier || 0), 0);
|
|
162
|
+
const cost_saved_usd = tokens_saved * (rate / 1_000_000);
|
|
163
|
+
|
|
164
|
+
const by_operation = {};
|
|
165
|
+
for (const e of entries) {
|
|
166
|
+
const op = e.operation || 'unknown';
|
|
167
|
+
if (!by_operation[op]) {
|
|
168
|
+
by_operation[op] = { calls: 0, fallbacks: 0, tokens_saved: 0 };
|
|
169
|
+
}
|
|
170
|
+
by_operation[op].calls += 1;
|
|
171
|
+
if (e.fallback_used) by_operation[op].fallbacks += 1;
|
|
172
|
+
by_operation[op].tokens_saved += e.tokens_saved_frontier || 0;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd, by_operation };
|
|
176
|
+
} catch (_) {
|
|
177
|
+
return zero;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Formats a metrics aggregate (output of summarizeMetrics) into a human-readable one-liner.
|
|
183
|
+
*
|
|
184
|
+
* @param {object} summary - output of summarizeMetrics()
|
|
185
|
+
* @param {string} [model] - optional model name
|
|
186
|
+
* @returns {string}
|
|
187
|
+
*/
|
|
188
|
+
function formatSessionSummary(summary, model) {
|
|
189
|
+
if (!summary || summary.total_calls === 0) {
|
|
190
|
+
return 'Local LLM: no calls this session';
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd } = summary;
|
|
194
|
+
|
|
195
|
+
let costStr = '';
|
|
196
|
+
if (cost_saved_usd > 0) {
|
|
197
|
+
costStr = ` ($${cost_saved_usd.toFixed(2)})`;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
let fallbackStr = '';
|
|
201
|
+
if (fallback_count > 0) {
|
|
202
|
+
fallbackStr = `, ${fallback_count} fallback(s)`;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
let modelStr = '';
|
|
206
|
+
if (model) {
|
|
207
|
+
modelStr = ` [${model}]`;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const avgMs = Math.round(avg_latency_ms);
|
|
211
|
+
|
|
212
|
+
return `Local LLM: ${total_calls} calls, ~${tokens_saved} frontier tokens saved${costStr}, avg ${avgMs}ms${fallbackStr}${modelStr}`;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Appends a shadow comparison entry to the shadow JSONL log file.
|
|
217
|
+
* Rotates to keep only the last 200 entries. Swallows all errors silently.
|
|
218
|
+
*
|
|
219
|
+
* @param {string} planningDir - path to the .planning directory
|
|
220
|
+
* @param {object} entry - shadow comparison entry object
|
|
221
|
+
* @param {string} entry.timestamp
|
|
222
|
+
* @param {string} entry.operation
|
|
223
|
+
* @param {string} entry.session_id
|
|
224
|
+
* @param {boolean} entry.agrees
|
|
225
|
+
* @param {string|null} entry.local_result
|
|
226
|
+
* @param {string} entry.frontier_result
|
|
227
|
+
*/
|
|
228
|
+
function logAgreement(planningDir, entry) {
|
|
229
|
+
try {
|
|
230
|
+
const logsDir = path.join(planningDir, 'logs');
|
|
231
|
+
const logFile = path.join(logsDir, 'local-llm-shadow.jsonl');
|
|
232
|
+
|
|
233
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
234
|
+
fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf8');
|
|
235
|
+
|
|
236
|
+
// Rotate if over MAX_ENTRIES
|
|
237
|
+
try {
|
|
238
|
+
const contents = fs.readFileSync(logFile, 'utf8');
|
|
239
|
+
const lines = contents.split(/\r?\n/).filter((l) => l.trim() !== '');
|
|
240
|
+
if (lines.length > MAX_ENTRIES) {
|
|
241
|
+
const trimmed = lines.slice(lines.length - MAX_ENTRIES);
|
|
242
|
+
fs.writeFileSync(logFile, trimmed.join('\n') + '\n', 'utf8');
|
|
243
|
+
}
|
|
244
|
+
} catch (_) {
|
|
245
|
+
// Rotation failure is non-fatal
|
|
246
|
+
}
|
|
247
|
+
} catch (_) {
|
|
248
|
+
// Swallow all errors silently
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
module.exports = { logMetric, readSessionMetrics, summarizeMetrics, computeLifetimeMetrics, formatSessionSummary, logAgreement };
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { complete, tryParseJSON, isDisabled } = require('../client');
|
|
4
|
+
const { logMetric } = require('../metrics');
|
|
5
|
+
const { route } = require('../router');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Classifies a PLAN.md or SUMMARY.md artifact using the local LLM.
|
|
9
|
+
*
|
|
10
|
+
* @param {object} config - resolved local_llm config block
|
|
11
|
+
* @param {string} planningDir - path to the .planning directory
|
|
12
|
+
* @param {string} content - file content to classify
|
|
13
|
+
* @param {string} fileType - 'PLAN' or 'SUMMARY'
|
|
14
|
+
* @param {string} [sessionId] - optional session identifier for metrics
|
|
15
|
+
* @returns {Promise<{ classification: string, confidence: number, reason: string, latency_ms: number, fallback_used: boolean }|null>}
|
|
16
|
+
*/
|
|
17
|
+
async function classifyArtifact(config, planningDir, content, fileType, sessionId) {
|
|
18
|
+
if (!config.enabled || !config.features.artifact_classification) return null;
|
|
19
|
+
if (isDisabled('artifact-classification', config.advanced.disable_after_failures)) return null;
|
|
20
|
+
|
|
21
|
+
const maxChars = (config.advanced.max_input_tokens || 1024) * 4;
|
|
22
|
+
const truncatedContent = content.length > maxChars ? content.slice(0, maxChars) : content;
|
|
23
|
+
|
|
24
|
+
let prompt;
|
|
25
|
+
if (fileType === 'PLAN') {
|
|
26
|
+
prompt =
|
|
27
|
+
'Classify this PLAN.md as stub, partial, or complete. A stub has placeholder tasks or missing required XML elements. A partial has some tasks filled but action/verify/done are vague. A complete has all tasks with specific steps, executable verify commands, and observable done conditions. Respond with JSON: {"classification": "stub"|"partial"|"complete", "confidence": 0.0-1.0, "reason": "one sentence"}\n\nContent:\n' +
|
|
28
|
+
truncatedContent;
|
|
29
|
+
} else if (fileType === 'SUMMARY') {
|
|
30
|
+
prompt =
|
|
31
|
+
'Classify this SUMMARY.md as substantive or thin. Substantive means it has specific artifact paths, commit hashes, and observable outcomes. Thin means vague or placeholder content. Respond with JSON: {"classification": "substantive"|"thin", "confidence": 0.0-1.0, "reason": "one sentence"}\n\nContent:\n' +
|
|
32
|
+
truncatedContent;
|
|
33
|
+
} else {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const result = await route(config, prompt, 'artifact-classification', (logprobs) =>
|
|
39
|
+
complete(config, prompt, 'artifact-classification', { logprobs })
|
|
40
|
+
);
|
|
41
|
+
if (result === null) return null;
|
|
42
|
+
const parsed = tryParseJSON(result.content);
|
|
43
|
+
if (!parsed.ok) return null;
|
|
44
|
+
|
|
45
|
+
const validPlanClassifications = ['stub', 'partial', 'complete'];
|
|
46
|
+
const validSummaryClassifications = ['substantive', 'thin'];
|
|
47
|
+
const validValues = fileType === 'PLAN' ? validPlanClassifications : validSummaryClassifications;
|
|
48
|
+
if (!parsed.data.classification || !validValues.includes(parsed.data.classification)) return null;
|
|
49
|
+
|
|
50
|
+
const metricEntry = {
|
|
51
|
+
session_id: sessionId || 'unknown',
|
|
52
|
+
timestamp: new Date().toISOString(),
|
|
53
|
+
operation: 'artifact-classification',
|
|
54
|
+
model: config.model,
|
|
55
|
+
latency_ms: result.latency_ms,
|
|
56
|
+
tokens_used_local: result.tokens,
|
|
57
|
+
tokens_saved_frontier: 420,
|
|
58
|
+
result: parsed.data.classification,
|
|
59
|
+
fallback_used: false,
|
|
60
|
+
confidence: parsed.data.confidence || 0.9
|
|
61
|
+
};
|
|
62
|
+
logMetric(planningDir, metricEntry);
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
classification: parsed.data.classification,
|
|
66
|
+
confidence: parsed.data.confidence || 0.9,
|
|
67
|
+
reason: parsed.data.reason || '',
|
|
68
|
+
latency_ms: result.latency_ms,
|
|
69
|
+
fallback_used: false
|
|
70
|
+
};
|
|
71
|
+
} catch (_) {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
module.exports = { classifyArtifact };
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { complete, tryParseJSON, isDisabled } = require('../client');
|
|
4
|
+
const { logMetric } = require('../metrics');
|
|
5
|
+
const { route } = require('../router');
|
|
6
|
+
|
|
7
|
+
const ERROR_CATEGORIES = [
|
|
8
|
+
'connection_refused',
|
|
9
|
+
'timeout',
|
|
10
|
+
'missing_output',
|
|
11
|
+
'wrong_output_format',
|
|
12
|
+
'permission_error',
|
|
13
|
+
'unknown'
|
|
14
|
+
];
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Classifies an agent error into one of 6 categories using the local LLM.
|
|
18
|
+
*
|
|
19
|
+
* @param {object} config - resolved local_llm config block
|
|
20
|
+
* @param {string} planningDir - path to the .planning directory
|
|
21
|
+
* @param {string} errorText - the error message or stack trace
|
|
22
|
+
* @param {string} [agentType] - the agent type that produced the error
|
|
23
|
+
* @param {string} [sessionId] - optional session identifier for metrics
|
|
24
|
+
* @returns {Promise<{ category: string, confidence: number, latency_ms: number, fallback_used: boolean }|null>}
|
|
25
|
+
*/
|
|
26
|
+
async function classifyError(config, planningDir, errorText, agentType, sessionId) {
|
|
27
|
+
if (!config.enabled) return null;
|
|
28
|
+
if (isDisabled('error-classification', config.advanced.disable_after_failures)) return null;
|
|
29
|
+
|
|
30
|
+
const truncatedError = errorText.length > 500 ? errorText.slice(0, 500) : errorText;
|
|
31
|
+
|
|
32
|
+
const prompt =
|
|
33
|
+
'Classify this agent error into one category. Categories: connection_refused (network/ECONNREFUSED), timeout (operation timed out), missing_output (expected file/artifact not found), wrong_output_format (output exists but malformed), permission_error (filesystem/permission issue), unknown (none of the above). Respond with JSON: {"category": "<one of the 6>", "confidence": 0.0-1.0}\n\nAgent: ' +
|
|
34
|
+
(agentType || 'unknown') +
|
|
35
|
+
'\nError: ' +
|
|
36
|
+
truncatedError;
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
const result = await route(config, prompt, 'error-classification', (logprobs) =>
|
|
40
|
+
complete(config, prompt, 'error-classification', { logprobs })
|
|
41
|
+
);
|
|
42
|
+
if (result === null) return null;
|
|
43
|
+
const parsed = tryParseJSON(result.content);
|
|
44
|
+
if (!parsed.ok) return null;
|
|
45
|
+
|
|
46
|
+
const category = ERROR_CATEGORIES.includes(parsed.data.category)
|
|
47
|
+
? parsed.data.category
|
|
48
|
+
: 'unknown';
|
|
49
|
+
|
|
50
|
+
const metricEntry = {
|
|
51
|
+
session_id: sessionId || 'unknown',
|
|
52
|
+
timestamp: new Date().toISOString(),
|
|
53
|
+
operation: 'error-classification',
|
|
54
|
+
model: config.model,
|
|
55
|
+
latency_ms: result.latency_ms,
|
|
56
|
+
tokens_used_local: result.tokens,
|
|
57
|
+
tokens_saved_frontier: 120,
|
|
58
|
+
result: category,
|
|
59
|
+
fallback_used: false,
|
|
60
|
+
confidence: parsed.data.confidence || 0.9
|
|
61
|
+
};
|
|
62
|
+
logMetric(planningDir, metricEntry);
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
category,
|
|
66
|
+
confidence: parsed.data.confidence || 0.9,
|
|
67
|
+
latency_ms: result.latency_ms,
|
|
68
|
+
fallback_used: false
|
|
69
|
+
};
|
|
70
|
+
} catch (_) {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
module.exports = { classifyError, ERROR_CATEGORIES };
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { complete, tryParseJSON, isDisabled } = require('../client');
|
|
4
|
+
const { logMetric } = require('../metrics');
|
|
5
|
+
const { route } = require('../router');
|
|
6
|
+
|
|
7
|
+
const SOURCE_LEVELS = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6'];
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Scores a research source on the S0-S6 credibility scale using the local LLM.
|
|
11
|
+
*
|
|
12
|
+
* S0=local prior research, S1=live MCP docs, S2=official docs, S3=official GitHub,
|
|
13
|
+
* S4=verified WebSearch (2+ sources), S5=unverified WebSearch, S6=training knowledge.
|
|
14
|
+
*
|
|
15
|
+
* @param {object} config - resolved local_llm config block
|
|
16
|
+
* @param {string} planningDir - path to the .planning directory
|
|
17
|
+
* @param {string} sourceText - text content from the source
|
|
18
|
+
* @param {string} sourceUrl - URL or identifier for the source
|
|
19
|
+
* @param {string} [sessionId] - optional session identifier for metrics
|
|
20
|
+
* @returns {Promise<{ level: string, confidence: number, reason: string, latency_ms: number, fallback_used: boolean }|null>}
|
|
21
|
+
*/
|
|
22
|
+
async function scoreSource(config, planningDir, sourceText, sourceUrl, sessionId) {
|
|
23
|
+
if (!config.enabled) return null;
|
|
24
|
+
if (!config.features || !config.features.source_scoring) return null;
|
|
25
|
+
if (isDisabled('source-scoring', config.advanced.disable_after_failures)) return null;
|
|
26
|
+
|
|
27
|
+
const maxChars = (config.advanced.max_input_tokens || 1024) * 4;
|
|
28
|
+
const truncated = sourceText.length > maxChars ? sourceText.slice(0, maxChars) : sourceText;
|
|
29
|
+
|
|
30
|
+
const prompt =
|
|
31
|
+
'Score this research source on the S0-S6 credibility scale. S0=local prior research, S1=live MCP docs, S2=official docs, S3=official GitHub, S4=verified WebSearch (2+ sources), S5=unverified WebSearch, S6=training knowledge. Respond with JSON: {"level": "S0"-"S6", "confidence": 0.0-1.0, "reason": "one sentence"}\n\nURL: ' +
|
|
32
|
+
sourceUrl +
|
|
33
|
+
'\nContent excerpt:\n' +
|
|
34
|
+
truncated;
|
|
35
|
+
|
|
36
|
+
try {
|
|
37
|
+
const result = await route(config, prompt, 'source-scoring', (logprobs) =>
|
|
38
|
+
complete(config, prompt, 'source-scoring', { logprobs })
|
|
39
|
+
);
|
|
40
|
+
if (result === null) return null;
|
|
41
|
+
const parsed = tryParseJSON(result.content);
|
|
42
|
+
if (!parsed.ok) return null;
|
|
43
|
+
|
|
44
|
+
const level = SOURCE_LEVELS.includes(parsed.data.level) ? parsed.data.level : 'S6';
|
|
45
|
+
|
|
46
|
+
const metricEntry = {
|
|
47
|
+
session_id: sessionId || 'unknown',
|
|
48
|
+
timestamp: new Date().toISOString(),
|
|
49
|
+
operation: 'source-scoring',
|
|
50
|
+
model: config.model,
|
|
51
|
+
latency_ms: result.latency_ms,
|
|
52
|
+
tokens_used_local: result.tokens,
|
|
53
|
+
tokens_saved_frontier: 80,
|
|
54
|
+
result: level,
|
|
55
|
+
fallback_used: false,
|
|
56
|
+
confidence: parsed.data.confidence || 0.9
|
|
57
|
+
};
|
|
58
|
+
logMetric(planningDir, metricEntry);
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
level,
|
|
62
|
+
confidence: parsed.data.confidence || 0.9,
|
|
63
|
+
reason: parsed.data.reason || '',
|
|
64
|
+
latency_ms: result.latency_ms,
|
|
65
|
+
fallback_used: false
|
|
66
|
+
};
|
|
67
|
+
} catch (_) {
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
module.exports = { scoreSource, SOURCE_LEVELS };
|