@sienklogic/plan-build-run 2.22.1 → 2.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/dashboard/package.json +2 -1
- package/dashboard/src/middleware/errorHandler.js +12 -2
- package/dashboard/src/repositories/planning.repository.js +23 -1
- package/dashboard/src/routes/pages.routes.js +65 -2
- package/dashboard/src/services/local-llm-metrics.service.js +81 -0
- package/dashboard/src/services/quick.service.js +62 -0
- package/dashboard/src/views/partials/analytics-content.ejs +61 -0
- package/dashboard/src/views/partials/quick-content.ejs +40 -0
- package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
- package/dashboard/src/views/partials/sidebar.ejs +8 -0
- package/dashboard/src/views/quick-detail.ejs +5 -0
- package/dashboard/src/views/quick.ejs +5 -0
- package/package.json +1 -1
- package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
- package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
- package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
- package/plugins/copilot-pbr/plugin.json +1 -1
- package/plugins/copilot-pbr/references/config-reference.md +89 -0
- package/plugins/copilot-pbr/skills/continue/SKILL.md +1 -1
- package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
- package/plugins/copilot-pbr/skills/help/SKILL.md +17 -4
- package/plugins/copilot-pbr/skills/milestone/SKILL.md +13 -13
- package/plugins/copilot-pbr/skills/quick/SKILL.md +4 -1
- package/plugins/copilot-pbr/skills/setup/SKILL.md +17 -6
- package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
- package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-pbr/agents/debugger.md +15 -0
- package/plugins/cursor-pbr/agents/researcher.md +20 -0
- package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
- package/plugins/cursor-pbr/references/config-reference.md +89 -0
- package/plugins/cursor-pbr/skills/continue/SKILL.md +1 -1
- package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
- package/plugins/cursor-pbr/skills/help/SKILL.md +17 -4
- package/plugins/cursor-pbr/skills/milestone/SKILL.md +13 -13
- package/plugins/cursor-pbr/skills/quick/SKILL.md +4 -1
- package/plugins/cursor-pbr/skills/setup/SKILL.md +17 -6
- package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
- package/plugins/pbr/.claude-plugin/plugin.json +1 -1
- package/plugins/pbr/agents/debugger.md +15 -0
- package/plugins/pbr/agents/researcher.md +20 -0
- package/plugins/pbr/agents/synthesizer.md +12 -0
- package/plugins/pbr/references/config-reference.md +89 -0
- package/plugins/pbr/scripts/check-config-change.js +33 -0
- package/plugins/pbr/scripts/check-plan-format.js +52 -4
- package/plugins/pbr/scripts/check-subagent-output.js +43 -3
- package/plugins/pbr/scripts/config-schema.json +48 -0
- package/plugins/pbr/scripts/local-llm/client.js +214 -0
- package/plugins/pbr/scripts/local-llm/health.js +217 -0
- package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
- package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
- package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
- package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
- package/plugins/pbr/scripts/local-llm/router.js +101 -0
- package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
- package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
- package/plugins/pbr/scripts/pbr-tools.js +120 -3
- package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
- package/plugins/pbr/scripts/progress-tracker.js +29 -3
- package/plugins/pbr/scripts/session-cleanup.js +36 -1
- package/plugins/pbr/scripts/validate-task.js +30 -1
- package/plugins/pbr/skills/continue/SKILL.md +1 -1
- package/plugins/pbr/skills/health/SKILL.md +8 -1
- package/plugins/pbr/skills/help/SKILL.md +17 -4
- package/plugins/pbr/skills/milestone/SKILL.md +13 -13
- package/plugins/pbr/skills/quick/SKILL.md +4 -1
- package/plugins/pbr/skills/setup/SKILL.md +17 -6
- package/plugins/pbr/skills/status/SKILL.md +38 -2
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/* global fetch, AbortSignal, performance */
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
// Circuit breaker: Map<operationType, { failures: number, disabled: boolean }>
|
|
5
|
+
const circuitState = new Map();
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Attempts to parse JSON from text that may be raw JSON or wrapped in a markdown code block.
|
|
9
|
+
* @param {string} text
|
|
10
|
+
* @returns {{ ok: true, data: any } | { ok: false, raw: string }}
|
|
11
|
+
*/
|
|
12
|
+
function tryParseJSON(text) {
|
|
13
|
+
// Attempt 1: direct parse
|
|
14
|
+
try {
|
|
15
|
+
const data = JSON.parse(text);
|
|
16
|
+
return { ok: true, data };
|
|
17
|
+
} catch (_) {
|
|
18
|
+
// fall through
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Attempt 2: extract from markdown code block
|
|
22
|
+
const codeBlockMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/);
|
|
23
|
+
if (codeBlockMatch) {
|
|
24
|
+
try {
|
|
25
|
+
const data = JSON.parse(codeBlockMatch[1].trim());
|
|
26
|
+
return { ok: true, data };
|
|
27
|
+
} catch (_) {
|
|
28
|
+
// fall through
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Attempt 3: find first {...}
|
|
33
|
+
const objectMatch = text.match(/\{[\s\S]*\}/);
|
|
34
|
+
if (objectMatch) {
|
|
35
|
+
try {
|
|
36
|
+
const data = JSON.parse(objectMatch[0]);
|
|
37
|
+
return { ok: true, data };
|
|
38
|
+
} catch (_) {
|
|
39
|
+
// fall through
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return { ok: false, raw: text };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Maps an error to one of 5 canonical types.
|
|
48
|
+
* @param {Error} err
|
|
49
|
+
* @returns {{ type: string, message: string }}
|
|
50
|
+
*/
|
|
51
|
+
function categorizeError(err) {
|
|
52
|
+
if (
|
|
53
|
+
(err.cause && err.cause.code === 'ECONNREFUSED') ||
|
|
54
|
+
(err.message && err.message.includes('ECONNREFUSED'))
|
|
55
|
+
) {
|
|
56
|
+
return { type: 'ECONNREFUSED', message: err.message };
|
|
57
|
+
}
|
|
58
|
+
if (err.name === 'TimeoutError' || err.name === 'AbortError') {
|
|
59
|
+
return { type: 'timeout', message: err.message };
|
|
60
|
+
}
|
|
61
|
+
if (err.message && err.message.startsWith('HTTP ')) {
|
|
62
|
+
return { type: 'http_error', message: err.message };
|
|
63
|
+
}
|
|
64
|
+
if (err instanceof SyntaxError) {
|
|
65
|
+
return { type: 'json_parse', message: err.message };
|
|
66
|
+
}
|
|
67
|
+
return { type: 'wrong_answer', message: err.message };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns true if the circuit is open (operation should be skipped).
|
|
72
|
+
* @param {string} operationType
|
|
73
|
+
* @param {number} maxFailures
|
|
74
|
+
* @returns {boolean}
|
|
75
|
+
*/
|
|
76
|
+
function isDisabled(operationType, maxFailures) {
|
|
77
|
+
const entry = circuitState.get(operationType);
|
|
78
|
+
if (!entry) return false;
|
|
79
|
+
return entry.disabled || entry.failures >= maxFailures;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Records a failure for an operation type. Disables the circuit if maxFailures is reached.
|
|
84
|
+
* @param {string} operationType
|
|
85
|
+
* @param {number} maxFailures
|
|
86
|
+
*/
|
|
87
|
+
function recordFailure(operationType, maxFailures) {
|
|
88
|
+
const entry = circuitState.get(operationType) || { failures: 0, disabled: false };
|
|
89
|
+
entry.failures += 1;
|
|
90
|
+
if (entry.failures >= maxFailures) {
|
|
91
|
+
entry.disabled = true;
|
|
92
|
+
}
|
|
93
|
+
circuitState.set(operationType, entry);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Resets the circuit breaker for an operation type.
|
|
98
|
+
* @param {string} operationType
|
|
99
|
+
*/
|
|
100
|
+
function resetCircuit(operationType) {
|
|
101
|
+
circuitState.delete(operationType);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Sends a chat completion request to a local LLM endpoint with retry and circuit-breaker logic.
|
|
106
|
+
*
|
|
107
|
+
* @param {object} config - local_llm config block (resolved)
|
|
108
|
+
* @param {string} prompt - user message to send
|
|
109
|
+
* @param {string} operationType - operation identifier for circuit breaker tracking
|
|
110
|
+
* @param {object} [options={}] - optional parameters
|
|
111
|
+
* @param {boolean} [options.logprobs] - if true, request logprobs from the API
|
|
112
|
+
* @returns {Promise<{ content: string, latency_ms: number, tokens: number, logprobsData: Array<{token: string, logprob: number}>|null }>}
|
|
113
|
+
*/
|
|
114
|
+
async function complete(config, prompt, operationType, options = {}) {
|
|
115
|
+
const endpoint = config.endpoint || 'http://localhost:11434';
|
|
116
|
+
const model = config.model || 'qwen2.5-coder:7b';
|
|
117
|
+
const timeoutMs = config.timeout_ms || 3000;
|
|
118
|
+
const maxRetries = config.max_retries != null ? config.max_retries : 1;
|
|
119
|
+
const numCtx = (config.advanced && config.advanced.num_ctx) || 4096;
|
|
120
|
+
const keepAlive = (config.advanced && config.advanced.keep_alive) || '30m';
|
|
121
|
+
const maxFailures = (config.advanced && config.advanced.disable_after_failures) || 3;
|
|
122
|
+
|
|
123
|
+
if (isDisabled(operationType, maxFailures)) {
|
|
124
|
+
const err = new Error('Circuit open for operation: ' + operationType);
|
|
125
|
+
err.type = 'circuit_open';
|
|
126
|
+
throw err;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const bodyObj = {
|
|
130
|
+
model,
|
|
131
|
+
messages: [
|
|
132
|
+
{
|
|
133
|
+
role: 'system',
|
|
134
|
+
content:
|
|
135
|
+
'You are a precise classification assistant. Always respond with valid JSON only. No explanations outside the JSON.'
|
|
136
|
+
},
|
|
137
|
+
{ role: 'user', content: prompt }
|
|
138
|
+
],
|
|
139
|
+
response_format: { type: 'json_object' },
|
|
140
|
+
temperature: 0.1,
|
|
141
|
+
max_tokens: 200,
|
|
142
|
+
keep_alive: keepAlive,
|
|
143
|
+
num_ctx: numCtx
|
|
144
|
+
};
|
|
145
|
+
if (options.logprobs === true) {
|
|
146
|
+
bodyObj.logprobs = true;
|
|
147
|
+
bodyObj.top_logprobs = 3;
|
|
148
|
+
}
|
|
149
|
+
const body = JSON.stringify(bodyObj);
|
|
150
|
+
|
|
151
|
+
const url = endpoint + '/v1/chat/completions';
|
|
152
|
+
const totalAttempts = maxRetries + 1;
|
|
153
|
+
|
|
154
|
+
let lastErr;
|
|
155
|
+
for (let attempt = 0; attempt < totalAttempts; attempt++) {
|
|
156
|
+
const start = performance.now();
|
|
157
|
+
try {
|
|
158
|
+
const res = await fetch(url, {
|
|
159
|
+
method: 'POST',
|
|
160
|
+
headers: { 'Content-Type': 'application/json' },
|
|
161
|
+
body,
|
|
162
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
if (!res.ok) {
|
|
166
|
+
const errText = await res.text().catch(() => '');
|
|
167
|
+
throw new Error('HTTP ' + res.status + ': ' + errText);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const json = await res.json();
|
|
171
|
+
const content = json.choices[0].message.content;
|
|
172
|
+
const completionTokens = (json.usage && json.usage.completion_tokens) || 0;
|
|
173
|
+
const latency_ms = performance.now() - start;
|
|
174
|
+
const logprobsData = (options.logprobs && json.choices[0].logprobs)
|
|
175
|
+
? json.choices[0].logprobs.content
|
|
176
|
+
: null;
|
|
177
|
+
|
|
178
|
+
return { content, latency_ms, tokens: completionTokens, logprobsData };
|
|
179
|
+
} catch (err) {
|
|
180
|
+
lastErr = err;
|
|
181
|
+
const isConnRefused =
|
|
182
|
+
(err.cause && err.cause.code === 'ECONNREFUSED') ||
|
|
183
|
+
(err.message && err.message.includes('ECONNREFUSED'));
|
|
184
|
+
|
|
185
|
+
if (isConnRefused) {
|
|
186
|
+
// Server not running — no point retrying
|
|
187
|
+
recordFailure(operationType, maxFailures);
|
|
188
|
+
throw err;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
|
|
192
|
+
const isHttpError = err.message && err.message.startsWith('HTTP ');
|
|
193
|
+
|
|
194
|
+
if ((isTimeout || isHttpError) && attempt < totalAttempts - 1) {
|
|
195
|
+
// Retry
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Final attempt or non-retryable error
|
|
200
|
+
if (attempt === totalAttempts - 1) {
|
|
201
|
+
recordFailure(operationType, maxFailures);
|
|
202
|
+
} else {
|
|
203
|
+
recordFailure(operationType, maxFailures);
|
|
204
|
+
}
|
|
205
|
+
throw err;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Should not reach here, but guard anyway
|
|
210
|
+
recordFailure(operationType, maxFailures);
|
|
211
|
+
throw lastErr;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
module.exports = { tryParseJSON, categorizeError, isDisabled, recordFailure, resetCircuit, complete };
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
/* global fetch, AbortSignal */
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const WARMUP_TIMEOUT_MS = 120000;
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Merges the raw local_llm config block with defaults.
|
|
8
|
+
* @param {object|undefined} rawConfig
|
|
9
|
+
* @returns {object} Fully-defaulted local_llm config
|
|
10
|
+
*/
|
|
11
|
+
function resolveConfig(rawConfig) {
|
|
12
|
+
return {
|
|
13
|
+
enabled: rawConfig != null && rawConfig.enabled != null ? rawConfig.enabled : false,
|
|
14
|
+
provider: (rawConfig && rawConfig.provider) || 'ollama',
|
|
15
|
+
endpoint: (rawConfig && rawConfig.endpoint) || 'http://localhost:11434',
|
|
16
|
+
model: (rawConfig && rawConfig.model) || 'qwen2.5-coder:7b',
|
|
17
|
+
timeout_ms: (rawConfig && rawConfig.timeout_ms) || 3000,
|
|
18
|
+
max_retries: rawConfig != null && rawConfig.max_retries != null ? rawConfig.max_retries : 1,
|
|
19
|
+
fallback: (rawConfig && rawConfig.fallback) || 'frontier',
|
|
20
|
+
routing_strategy: (rawConfig && rawConfig.routing_strategy) || 'local_first',
|
|
21
|
+
features: Object.assign(
|
|
22
|
+
{
|
|
23
|
+
artifact_classification: true,
|
|
24
|
+
task_validation: true,
|
|
25
|
+
plan_adequacy: false,
|
|
26
|
+
gap_detection: false,
|
|
27
|
+
context_summarization: false,
|
|
28
|
+
source_scoring: false
|
|
29
|
+
},
|
|
30
|
+
(rawConfig && rawConfig.features) || {}
|
|
31
|
+
),
|
|
32
|
+
metrics: Object.assign(
|
|
33
|
+
{
|
|
34
|
+
enabled: true,
|
|
35
|
+
log_file: '.planning/logs/local-llm-metrics.jsonl',
|
|
36
|
+
show_session_summary: true,
|
|
37
|
+
frontier_token_rate: 3.0
|
|
38
|
+
},
|
|
39
|
+
(rawConfig && rawConfig.metrics) || {}
|
|
40
|
+
),
|
|
41
|
+
advanced: Object.assign(
|
|
42
|
+
{
|
|
43
|
+
confidence_threshold: 0.9,
|
|
44
|
+
max_input_tokens: 2000,
|
|
45
|
+
keep_alive: '30m',
|
|
46
|
+
num_ctx: 4096,
|
|
47
|
+
disable_after_failures: 3,
|
|
48
|
+
shadow_mode: false
|
|
49
|
+
},
|
|
50
|
+
(rawConfig && rawConfig.advanced) || {}
|
|
51
|
+
)
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Checks availability of the configured Ollama instance and model.
|
|
57
|
+
* Always resolves — never rejects.
|
|
58
|
+
* @param {object} config - resolved config from resolveConfig()
|
|
59
|
+
* @returns {Promise<object>} Structured health status
|
|
60
|
+
*/
|
|
61
|
+
async function checkHealth(config) {
|
|
62
|
+
try {
|
|
63
|
+
if (!config.enabled) {
|
|
64
|
+
return { available: false, reason: 'disabled', model: null, version: null };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const timeoutShort = 3000;
|
|
68
|
+
const timeoutModel = 5000;
|
|
69
|
+
|
|
70
|
+
// Step 1 — Check server reachable
|
|
71
|
+
try {
|
|
72
|
+
const res = await fetch(config.endpoint + '/', {
|
|
73
|
+
signal: AbortSignal.timeout(timeoutShort)
|
|
74
|
+
});
|
|
75
|
+
const body = await res.text().catch(() => '');
|
|
76
|
+
if (!body.includes('Ollama')) {
|
|
77
|
+
return {
|
|
78
|
+
available: false,
|
|
79
|
+
reason: 'not_running',
|
|
80
|
+
detail: 'Ollama is not running. Start with: ollama serve',
|
|
81
|
+
model: null,
|
|
82
|
+
version: null
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
} catch (err) {
|
|
86
|
+
const isConnRefused =
|
|
87
|
+
(err.cause && err.cause.code === 'ECONNREFUSED') ||
|
|
88
|
+
(err.message && err.message.includes('ECONNREFUSED'));
|
|
89
|
+
const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
|
|
90
|
+
if (isConnRefused || isTimeout) {
|
|
91
|
+
return {
|
|
92
|
+
available: false,
|
|
93
|
+
reason: 'not_running',
|
|
94
|
+
detail: 'Ollama is not running. Start with: ollama serve',
|
|
95
|
+
model: null,
|
|
96
|
+
version: null
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
throw err;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Step 2 — Check version (non-fatal)
|
|
103
|
+
let version = null;
|
|
104
|
+
try {
|
|
105
|
+
const res = await fetch(config.endpoint + '/api/version', {
|
|
106
|
+
signal: AbortSignal.timeout(timeoutShort)
|
|
107
|
+
});
|
|
108
|
+
const data = await res.json();
|
|
109
|
+
version = data.version || null;
|
|
110
|
+
} catch (_) {
|
|
111
|
+
version = null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Step 3 — Check model available
|
|
115
|
+
try {
|
|
116
|
+
const res = await fetch(config.endpoint + '/v1/models', {
|
|
117
|
+
signal: AbortSignal.timeout(timeoutModel)
|
|
118
|
+
});
|
|
119
|
+
const data = await res.json();
|
|
120
|
+
const modelList = (data.data || []).map((m) => m.id || '');
|
|
121
|
+
const baseModel = config.model.split(':')[0];
|
|
122
|
+
const found = modelList.some((m) => m.startsWith(baseModel));
|
|
123
|
+
if (!found) {
|
|
124
|
+
return {
|
|
125
|
+
available: false,
|
|
126
|
+
reason: 'model_missing',
|
|
127
|
+
detail: 'Run: ollama pull ' + config.model,
|
|
128
|
+
model: null,
|
|
129
|
+
version
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
} catch (_err) {
|
|
133
|
+
return {
|
|
134
|
+
available: false,
|
|
135
|
+
reason: 'model_missing',
|
|
136
|
+
detail: 'Run: ollama pull ' + config.model,
|
|
137
|
+
model: null,
|
|
138
|
+
version
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Step 4 — Detect GPU error (sleep/wake CUDA bug)
|
|
143
|
+
let warm = false;
|
|
144
|
+
try {
|
|
145
|
+
const res = await fetch(config.endpoint + '/v1/chat/completions', {
|
|
146
|
+
method: 'POST',
|
|
147
|
+
headers: { 'Content-Type': 'application/json' },
|
|
148
|
+
body: JSON.stringify({
|
|
149
|
+
model: config.model,
|
|
150
|
+
messages: [{ role: 'user', content: '{"status":"ok"}' }],
|
|
151
|
+
max_tokens: 10,
|
|
152
|
+
num_ctx: 512
|
|
153
|
+
}),
|
|
154
|
+
signal: AbortSignal.timeout(timeoutModel)
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
if (!res.ok) {
|
|
158
|
+
const errBody = await res.text().catch(() => '');
|
|
159
|
+
if (res.status === 500 && (errBody.includes('GPU') || errBody.includes('CUDA'))) {
|
|
160
|
+
return {
|
|
161
|
+
available: false,
|
|
162
|
+
reason: 'gpu_error',
|
|
163
|
+
detail: 'GPU error detected. Restart Ollama: ollama serve',
|
|
164
|
+
model: config.model,
|
|
165
|
+
version
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
// Non-GPU HTTP error — treat as available but cold
|
|
169
|
+
warm = false;
|
|
170
|
+
} else {
|
|
171
|
+
warm = true;
|
|
172
|
+
}
|
|
173
|
+
} catch (err) {
|
|
174
|
+
const isTimeout = err.name === 'TimeoutError' || err.name === 'AbortError';
|
|
175
|
+
if (isTimeout) {
|
|
176
|
+
warm = false; // cold start in progress — skip, don't block
|
|
177
|
+
} else {
|
|
178
|
+
warm = false;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return { available: true, warm, reason: 'ok', model: config.model, version };
|
|
183
|
+
} catch (_err) {
|
|
184
|
+
return {
|
|
185
|
+
available: false,
|
|
186
|
+
reason: 'unknown_error',
|
|
187
|
+
detail: _err.message,
|
|
188
|
+
model: null,
|
|
189
|
+
version: null
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Fire-and-forget warm-up request. Callers should NOT await this.
|
|
196
|
+
* @param {object} config - resolved config from resolveConfig()
|
|
197
|
+
*/
|
|
198
|
+
async function warmUp(config) {
|
|
199
|
+
try {
|
|
200
|
+
await fetch(config.endpoint + '/v1/chat/completions', {
|
|
201
|
+
method: 'POST',
|
|
202
|
+
headers: { 'Content-Type': 'application/json' },
|
|
203
|
+
body: JSON.stringify({
|
|
204
|
+
model: config.model,
|
|
205
|
+
messages: [{ role: 'user', content: '{"status":"ready"}' }],
|
|
206
|
+
max_tokens: 10,
|
|
207
|
+
num_ctx: 512,
|
|
208
|
+
keep_alive: config.advanced.keep_alive
|
|
209
|
+
}),
|
|
210
|
+
signal: AbortSignal.timeout(WARMUP_TIMEOUT_MS)
|
|
211
|
+
});
|
|
212
|
+
} catch (_) {
|
|
213
|
+
// Swallow all errors silently — fire and forget
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
module.exports = { resolveConfig, checkHealth, warmUp };
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const MAX_ENTRIES = 200;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Appends a metric entry to the JSONL log file.
|
|
10
|
+
* Rotates to keep only the last 200 entries when the log exceeds MAX_ENTRIES lines.
|
|
11
|
+
* Swallows all errors silently — metrics must never crash hooks.
|
|
12
|
+
*
|
|
13
|
+
* @param {string} planningDir - path to the .planning directory
|
|
14
|
+
* @param {object} entry - metric entry object
|
|
15
|
+
* @param {string} entry.session_id
|
|
16
|
+
* @param {string} entry.timestamp
|
|
17
|
+
* @param {string} entry.operation
|
|
18
|
+
* @param {string} entry.model
|
|
19
|
+
* @param {number} entry.latency_ms
|
|
20
|
+
* @param {number} entry.tokens_used_local
|
|
21
|
+
* @param {number} entry.tokens_saved_frontier
|
|
22
|
+
* @param {string} entry.result
|
|
23
|
+
* @param {boolean} entry.fallback_used
|
|
24
|
+
* @param {number} entry.confidence
|
|
25
|
+
*/
|
|
26
|
+
function logMetric(planningDir, entry) {
|
|
27
|
+
try {
|
|
28
|
+
const logsDir = path.join(planningDir, 'logs');
|
|
29
|
+
const logFile = path.join(logsDir, 'local-llm-metrics.jsonl');
|
|
30
|
+
|
|
31
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
32
|
+
fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf8');
|
|
33
|
+
|
|
34
|
+
// Rotate if over MAX_ENTRIES
|
|
35
|
+
try {
|
|
36
|
+
const contents = fs.readFileSync(logFile, 'utf8');
|
|
37
|
+
const lines = contents.split(/\r?\n/).filter((l) => l.trim() !== '');
|
|
38
|
+
if (lines.length > MAX_ENTRIES) {
|
|
39
|
+
const trimmed = lines.slice(lines.length - MAX_ENTRIES);
|
|
40
|
+
fs.writeFileSync(logFile, trimmed.join('\n') + '\n', 'utf8');
|
|
41
|
+
}
|
|
42
|
+
} catch (_) {
|
|
43
|
+
// Rotation failure is non-fatal
|
|
44
|
+
}
|
|
45
|
+
} catch (_) {
|
|
46
|
+
// Swallow all errors silently
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Reads metric entries from the JSONL log that occurred at or after sessionStartTime.
|
|
52
|
+
*
|
|
53
|
+
* @param {string} planningDir - path to the .planning directory
|
|
54
|
+
* @param {string|Date} sessionStartTime - ISO string or Date
|
|
55
|
+
* @returns {object[]} Array of matching metric entry objects
|
|
56
|
+
*/
|
|
57
|
+
function readSessionMetrics(planningDir, sessionStartTime) {
|
|
58
|
+
try {
|
|
59
|
+
const logFile = path.join(planningDir, 'logs', 'local-llm-metrics.jsonl');
|
|
60
|
+
const contents = fs.readFileSync(logFile, 'utf8');
|
|
61
|
+
const startMs = new Date(sessionStartTime).getTime();
|
|
62
|
+
|
|
63
|
+
return contents
|
|
64
|
+
.split(/\r?\n/)
|
|
65
|
+
.filter((l) => l.trim() !== '')
|
|
66
|
+
.map((l) => {
|
|
67
|
+
try {
|
|
68
|
+
return JSON.parse(l);
|
|
69
|
+
} catch (_) {
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
})
|
|
73
|
+
.filter((e) => e !== null)
|
|
74
|
+
.filter((e) => {
|
|
75
|
+
try {
|
|
76
|
+
return new Date(e.timestamp).getTime() >= startMs;
|
|
77
|
+
} catch (_) {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
} catch (_) {
|
|
82
|
+
return [];
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Summarizes an array of metric entries.
|
|
88
|
+
*
|
|
89
|
+
* @param {object[]} entries
|
|
90
|
+
* @param {number} [frontierTokenRate=3.0] - cost per million tokens in USD
|
|
91
|
+
* @returns {{ total_calls: number, fallback_count: number, avg_latency_ms: number, tokens_saved: number, cost_saved_usd: number }}
|
|
92
|
+
*/
|
|
93
|
+
function summarizeMetrics(entries, frontierTokenRate) {
|
|
94
|
+
if (!entries || entries.length === 0) {
|
|
95
|
+
return {
|
|
96
|
+
total_calls: 0,
|
|
97
|
+
fallback_count: 0,
|
|
98
|
+
avg_latency_ms: 0,
|
|
99
|
+
tokens_saved: 0,
|
|
100
|
+
cost_saved_usd: 0
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const rate = frontierTokenRate != null ? frontierTokenRate : 3.0;
|
|
105
|
+
const total_calls = entries.length;
|
|
106
|
+
const fallback_count = entries.filter((e) => e.fallback_used).length;
|
|
107
|
+
const totalLatency = entries.reduce((sum, e) => sum + (e.latency_ms || 0), 0);
|
|
108
|
+
const avg_latency_ms = total_calls > 0 ? totalLatency / total_calls : 0;
|
|
109
|
+
const tokens_saved = entries.reduce((sum, e) => sum + (e.tokens_saved_frontier || 0), 0);
|
|
110
|
+
const cost_saved_usd = tokens_saved * (rate / 1_000_000);
|
|
111
|
+
|
|
112
|
+
return { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Computes lifetime aggregate metrics by reading all entries from the JSONL log.
|
|
117
|
+
* No date filter — reads everything. Adds a by_operation breakdown keyed by operation.
|
|
118
|
+
*
|
|
119
|
+
* @param {string} planningDir - path to the .planning directory
|
|
120
|
+
* @param {number} [frontierTokenRate=3.0] - cost per million tokens in USD
|
|
121
|
+
* @returns {{ total_calls: number, fallback_count: number, avg_latency_ms: number, tokens_saved: number, cost_saved_usd: number, by_operation: object }}
|
|
122
|
+
*/
|
|
123
|
+
function computeLifetimeMetrics(planningDir, frontierTokenRate) {
|
|
124
|
+
const zero = {
|
|
125
|
+
total_calls: 0,
|
|
126
|
+
fallback_count: 0,
|
|
127
|
+
avg_latency_ms: 0,
|
|
128
|
+
tokens_saved: 0,
|
|
129
|
+
cost_saved_usd: 0,
|
|
130
|
+
by_operation: {}
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
try {
|
|
134
|
+
const logFile = path.join(planningDir, 'logs', 'local-llm-metrics.jsonl');
|
|
135
|
+
let contents;
|
|
136
|
+
try {
|
|
137
|
+
contents = fs.readFileSync(logFile, 'utf8');
|
|
138
|
+
} catch (_) {
|
|
139
|
+
return zero;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const entries = contents
|
|
143
|
+
.split(/\r?\n/)
|
|
144
|
+
.filter((l) => l.trim() !== '')
|
|
145
|
+
.map((l) => {
|
|
146
|
+
try {
|
|
147
|
+
return JSON.parse(l);
|
|
148
|
+
} catch (_) {
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
})
|
|
152
|
+
.filter((e) => e !== null);
|
|
153
|
+
|
|
154
|
+
if (entries.length === 0) return zero;
|
|
155
|
+
|
|
156
|
+
const rate = frontierTokenRate != null ? frontierTokenRate : 3.0;
|
|
157
|
+
const total_calls = entries.length;
|
|
158
|
+
const fallback_count = entries.filter((e) => e.fallback_used).length;
|
|
159
|
+
const totalLatency = entries.reduce((sum, e) => sum + (e.latency_ms || 0), 0);
|
|
160
|
+
const avg_latency_ms = total_calls > 0 ? totalLatency / total_calls : 0;
|
|
161
|
+
const tokens_saved = entries.reduce((sum, e) => sum + (e.tokens_saved_frontier || 0), 0);
|
|
162
|
+
const cost_saved_usd = tokens_saved * (rate / 1_000_000);
|
|
163
|
+
|
|
164
|
+
const by_operation = {};
|
|
165
|
+
for (const e of entries) {
|
|
166
|
+
const op = e.operation || 'unknown';
|
|
167
|
+
if (!by_operation[op]) {
|
|
168
|
+
by_operation[op] = { calls: 0, fallbacks: 0, tokens_saved: 0 };
|
|
169
|
+
}
|
|
170
|
+
by_operation[op].calls += 1;
|
|
171
|
+
if (e.fallback_used) by_operation[op].fallbacks += 1;
|
|
172
|
+
by_operation[op].tokens_saved += e.tokens_saved_frontier || 0;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
return { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd, by_operation };
|
|
176
|
+
} catch (_) {
|
|
177
|
+
return zero;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Formats a metrics aggregate (output of summarizeMetrics) into a human-readable one-liner.
|
|
183
|
+
*
|
|
184
|
+
* @param {object} summary - output of summarizeMetrics()
|
|
185
|
+
* @param {string} [model] - optional model name
|
|
186
|
+
* @returns {string}
|
|
187
|
+
*/
|
|
188
|
+
function formatSessionSummary(summary, model) {
|
|
189
|
+
if (!summary || summary.total_calls === 0) {
|
|
190
|
+
return 'Local LLM: no calls this session';
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const { total_calls, fallback_count, avg_latency_ms, tokens_saved, cost_saved_usd } = summary;
|
|
194
|
+
|
|
195
|
+
let costStr = '';
|
|
196
|
+
if (cost_saved_usd > 0) {
|
|
197
|
+
costStr = ` ($${cost_saved_usd.toFixed(2)})`;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
let fallbackStr = '';
|
|
201
|
+
if (fallback_count > 0) {
|
|
202
|
+
fallbackStr = `, ${fallback_count} fallback(s)`;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
let modelStr = '';
|
|
206
|
+
if (model) {
|
|
207
|
+
modelStr = ` [${model}]`;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
const avgMs = Math.round(avg_latency_ms);
|
|
211
|
+
|
|
212
|
+
return `Local LLM: ${total_calls} calls, ~${tokens_saved} frontier tokens saved${costStr}, avg ${avgMs}ms${fallbackStr}${modelStr}`;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Appends a shadow comparison entry to the shadow JSONL log file.
|
|
217
|
+
* Rotates to keep only the last 200 entries. Swallows all errors silently.
|
|
218
|
+
*
|
|
219
|
+
* @param {string} planningDir - path to the .planning directory
|
|
220
|
+
* @param {object} entry - shadow comparison entry object
|
|
221
|
+
* @param {string} entry.timestamp
|
|
222
|
+
* @param {string} entry.operation
|
|
223
|
+
* @param {string} entry.session_id
|
|
224
|
+
* @param {boolean} entry.agrees
|
|
225
|
+
* @param {string|null} entry.local_result
|
|
226
|
+
* @param {string} entry.frontier_result
|
|
227
|
+
*/
|
|
228
|
+
function logAgreement(planningDir, entry) {
|
|
229
|
+
try {
|
|
230
|
+
const logsDir = path.join(planningDir, 'logs');
|
|
231
|
+
const logFile = path.join(logsDir, 'local-llm-shadow.jsonl');
|
|
232
|
+
|
|
233
|
+
fs.mkdirSync(logsDir, { recursive: true });
|
|
234
|
+
fs.appendFileSync(logFile, JSON.stringify(entry) + '\n', 'utf8');
|
|
235
|
+
|
|
236
|
+
// Rotate if over MAX_ENTRIES
|
|
237
|
+
try {
|
|
238
|
+
const contents = fs.readFileSync(logFile, 'utf8');
|
|
239
|
+
const lines = contents.split(/\r?\n/).filter((l) => l.trim() !== '');
|
|
240
|
+
if (lines.length > MAX_ENTRIES) {
|
|
241
|
+
const trimmed = lines.slice(lines.length - MAX_ENTRIES);
|
|
242
|
+
fs.writeFileSync(logFile, trimmed.join('\n') + '\n', 'utf8');
|
|
243
|
+
}
|
|
244
|
+
} catch (_) {
|
|
245
|
+
// Rotation failure is non-fatal
|
|
246
|
+
}
|
|
247
|
+
} catch (_) {
|
|
248
|
+
// Swallow all errors silently
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
module.exports = { logMetric, readSessionMetrics, summarizeMetrics, computeLifetimeMetrics, formatSessionSummary, logAgreement };
|