@sienklogic/plan-build-run 2.22.2 → 2.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/dashboard/package.json +3 -2
- package/dashboard/src/middleware/errorHandler.js +12 -2
- package/dashboard/src/repositories/planning.repository.js +24 -12
- package/dashboard/src/routes/pages.routes.js +182 -4
- package/dashboard/src/server.js +4 -0
- package/dashboard/src/services/audit.service.js +42 -0
- package/dashboard/src/services/dashboard.service.js +1 -12
- package/dashboard/src/services/local-llm-metrics.service.js +81 -0
- package/dashboard/src/services/quick.service.js +62 -0
- package/dashboard/src/services/roadmap.service.js +1 -11
- package/dashboard/src/utils/strip-bom.js +8 -0
- package/dashboard/src/views/audit-detail.ejs +5 -0
- package/dashboard/src/views/audits.ejs +5 -0
- package/dashboard/src/views/partials/analytics-content.ejs +61 -0
- package/dashboard/src/views/partials/audit-detail-content.ejs +12 -0
- package/dashboard/src/views/partials/audits-content.ejs +34 -0
- package/dashboard/src/views/partials/quick-content.ejs +40 -0
- package/dashboard/src/views/partials/quick-detail-content.ejs +29 -0
- package/dashboard/src/views/partials/sidebar.ejs +16 -0
- package/dashboard/src/views/partials/todos-content.ejs +13 -3
- package/dashboard/src/views/quick-detail.ejs +5 -0
- package/dashboard/src/views/quick.ejs +5 -0
- package/package.json +1 -1
- package/plugins/copilot-pbr/agents/debugger.agent.md +15 -0
- package/plugins/copilot-pbr/agents/integration-checker.agent.md +9 -2
- package/plugins/copilot-pbr/agents/planner.agent.md +19 -0
- package/plugins/copilot-pbr/agents/researcher.agent.md +20 -0
- package/plugins/copilot-pbr/agents/synthesizer.agent.md +12 -0
- package/plugins/copilot-pbr/agents/verifier.agent.md +22 -2
- package/plugins/copilot-pbr/plugin.json +1 -1
- package/plugins/copilot-pbr/references/config-reference.md +89 -0
- package/plugins/copilot-pbr/references/plan-format.md +22 -0
- package/plugins/copilot-pbr/skills/health/SKILL.md +8 -1
- package/plugins/copilot-pbr/skills/help/SKILL.md +4 -4
- package/plugins/copilot-pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/copilot-pbr/skills/status/SKILL.md +37 -1
- package/plugins/copilot-pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
- package/plugins/copilot-pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
- package/plugins/cursor-pbr/.cursor-plugin/plugin.json +1 -1
- package/plugins/cursor-pbr/agents/debugger.md +15 -0
- package/plugins/cursor-pbr/agents/integration-checker.md +9 -2
- package/plugins/cursor-pbr/agents/planner.md +19 -0
- package/plugins/cursor-pbr/agents/researcher.md +20 -0
- package/plugins/cursor-pbr/agents/synthesizer.md +12 -0
- package/plugins/cursor-pbr/agents/verifier.md +22 -2
- package/plugins/cursor-pbr/references/config-reference.md +89 -0
- package/plugins/cursor-pbr/references/plan-format.md +22 -0
- package/plugins/cursor-pbr/skills/health/SKILL.md +8 -1
- package/plugins/cursor-pbr/skills/help/SKILL.md +4 -4
- package/plugins/cursor-pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/cursor-pbr/skills/status/SKILL.md +37 -1
- package/plugins/cursor-pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
- package/plugins/cursor-pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
- package/plugins/pbr/.claude-plugin/plugin.json +1 -1
- package/plugins/pbr/agents/debugger.md +15 -0
- package/plugins/pbr/agents/integration-checker.md +9 -2
- package/plugins/pbr/agents/planner.md +19 -0
- package/plugins/pbr/agents/researcher.md +20 -0
- package/plugins/pbr/agents/synthesizer.md +12 -0
- package/plugins/pbr/agents/verifier.md +22 -2
- package/plugins/pbr/references/config-reference.md +89 -0
- package/plugins/pbr/references/plan-format.md +22 -0
- package/plugins/pbr/scripts/check-config-change.js +33 -0
- package/plugins/pbr/scripts/check-plan-format.js +52 -4
- package/plugins/pbr/scripts/check-subagent-output.js +43 -3
- package/plugins/pbr/scripts/config-schema.json +48 -0
- package/plugins/pbr/scripts/local-llm/client.js +214 -0
- package/plugins/pbr/scripts/local-llm/health.js +217 -0
- package/plugins/pbr/scripts/local-llm/metrics.js +252 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-artifact.js +76 -0
- package/plugins/pbr/scripts/local-llm/operations/classify-error.js +75 -0
- package/plugins/pbr/scripts/local-llm/operations/score-source.js +72 -0
- package/plugins/pbr/scripts/local-llm/operations/summarize-context.js +62 -0
- package/plugins/pbr/scripts/local-llm/operations/validate-task.js +59 -0
- package/plugins/pbr/scripts/local-llm/router.js +101 -0
- package/plugins/pbr/scripts/local-llm/shadow.js +60 -0
- package/plugins/pbr/scripts/local-llm/threshold-tuner.js +118 -0
- package/plugins/pbr/scripts/pbr-tools.js +120 -3
- package/plugins/pbr/scripts/post-write-dispatch.js +2 -2
- package/plugins/pbr/scripts/progress-tracker.js +29 -3
- package/plugins/pbr/scripts/session-cleanup.js +36 -1
- package/plugins/pbr/scripts/validate-task.js +30 -1
- package/plugins/pbr/skills/health/SKILL.md +8 -1
- package/plugins/pbr/skills/help/SKILL.md +4 -4
- package/plugins/pbr/skills/milestone/SKILL.md +12 -12
- package/plugins/pbr/skills/status/SKILL.md +38 -2
- package/plugins/pbr/templates/INTEGRATION-REPORT.md.tmpl +18 -2
- package/plugins/pbr/templates/VERIFICATION-DETAIL.md.tmpl +2 -1
- package/dashboard/src/views/coming-soon.ejs +0 -11
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { runShadow } = require('./shadow');
|
|
4
|
+
|
|
5
|
+
const COMPLEXITY_HIGH_THRESHOLD = 0.65;
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Scores the complexity of a prompt using a weighted surface heuristic.
|
|
9
|
+
* Returns a value in [0, 1] where higher means more complex.
|
|
10
|
+
*
|
|
11
|
+
* @param {string} prompt
|
|
12
|
+
* @returns {number}
|
|
13
|
+
*/
|
|
14
|
+
function scoreComplexity(prompt) {
|
|
15
|
+
const words = prompt.split(/\s+/).length;
|
|
16
|
+
const codeBlocks = (prompt.match(/```/g) || []).length / 2;
|
|
17
|
+
const constraints = (prompt.match(/\b(must|should|exactly|only|never|always)\b/gi) || []).length;
|
|
18
|
+
const reasoning = (prompt.match(/\b(why|explain|compare|analyze|reason|evaluate)\b/gi) || []).length;
|
|
19
|
+
const structuredOut = /\b(json|schema|yaml|frontmatter)\b/i.test(prompt) ? 1 : 0;
|
|
20
|
+
return Math.min(words / 500, 1.0) * 0.25 +
|
|
21
|
+
Math.min(codeBlocks / 3, 1.0) * 0.20 +
|
|
22
|
+
Math.min(constraints / 5, 1.0) * 0.20 +
|
|
23
|
+
Math.min(reasoning / 3, 1.0) * 0.20 +
|
|
24
|
+
structuredOut * 0.15;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Extracts a confidence score from logprobs data returned by the local LLM.
|
|
29
|
+
* Returns a value in [0, 1] or null if no logprobs data is available.
|
|
30
|
+
*
|
|
31
|
+
* @param {Array<{token: string, logprob: number}>|null|undefined} logprobsData
|
|
32
|
+
* @returns {number|null}
|
|
33
|
+
*/
|
|
34
|
+
function extractConfidence(logprobsData) {
|
|
35
|
+
if (!logprobsData || logprobsData.length === 0) return null;
|
|
36
|
+
const sum = logprobsData.reduce((acc, t) => acc + t.logprob, 0);
|
|
37
|
+
const avgLogprob = sum / logprobsData.length;
|
|
38
|
+
return Math.min(1, Math.max(0, Math.exp(avgLogprob)));
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Routes a prompt through local LLM or signals caller to use frontier model.
|
|
43
|
+
* Returns the local LLM result if local is suitable, or null if caller should
|
|
44
|
+
* fall back to frontier. Never throws — all errors return null.
|
|
45
|
+
*
|
|
46
|
+
* @param {object} config - local_llm config block with routing_strategy and advanced settings
|
|
47
|
+
* @param {string} prompt - the prompt being routed
|
|
48
|
+
* @param {string} operationType - operation identifier
|
|
49
|
+
* @param {function(boolean): Promise<{content: string, logprobsData: Array|null}>} callLocalFn
|
|
50
|
+
* Async function accepting a logprobs boolean, returns the local LLM result object.
|
|
51
|
+
* @param {string} [planningDir] - path to .planning directory; when provided enables shadow mode
|
|
52
|
+
* @param {Function} [frontierResultFn] - async function that calls the frontier model;
|
|
53
|
+
* NOTE: parameter inversion vs shadow.js — here LOCAL has already run (it's the primary result)
|
|
54
|
+
* and FRONTIER is the shadow. We pass frontierResultFn as shadow.js arg 4 (localResultFn slot)
|
|
55
|
+
* so shadow.js calls it, and result.content as arg 5 (frontierResult slot, the committed result).
|
|
56
|
+
* @returns {Promise<{content: string, logprobsData: Array|null}|null>}
|
|
57
|
+
*/
|
|
58
|
+
async function route(config, prompt, operationType, callLocalFn, planningDir, frontierResultFn) {
|
|
59
|
+
try {
|
|
60
|
+
const routingStrategy = (config && config.routing_strategy) || 'local_first';
|
|
61
|
+
const confidenceThreshold = (config && config.advanced && config.advanced.confidence_threshold) || 0.9;
|
|
62
|
+
|
|
63
|
+
if (routingStrategy === 'quality_first') {
|
|
64
|
+
const score = scoreComplexity(prompt);
|
|
65
|
+
if (score >= 0.3) return null;
|
|
66
|
+
const result = await callLocalFn(false);
|
|
67
|
+
if (result !== null && planningDir && frontierResultFn) {
|
|
68
|
+
runShadow(config, planningDir, operationType, frontierResultFn, result.content);
|
|
69
|
+
}
|
|
70
|
+
return result;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (routingStrategy === 'balanced') {
|
|
74
|
+
const score = scoreComplexity(prompt);
|
|
75
|
+
if (score > 0.45) return null;
|
|
76
|
+
const result = await callLocalFn(true);
|
|
77
|
+
const confidence = extractConfidence(result && result.logprobsData);
|
|
78
|
+
if (confidence === null || confidence < 0.75) return null;
|
|
79
|
+
if (result !== null && planningDir && frontierResultFn) {
|
|
80
|
+
runShadow(config, planningDir, operationType, frontierResultFn, result.content);
|
|
81
|
+
}
|
|
82
|
+
return result;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Default: local_first
|
|
86
|
+
const score = scoreComplexity(prompt);
|
|
87
|
+
if (score > COMPLEXITY_HIGH_THRESHOLD) return null;
|
|
88
|
+
const result = await callLocalFn(true);
|
|
89
|
+
const confidence = extractConfidence(result && result.logprobsData);
|
|
90
|
+
if (confidence === null || confidence < confidenceThreshold) return null;
|
|
91
|
+
if (result !== null && planningDir && frontierResultFn) {
|
|
92
|
+
runShadow(config, planningDir, operationType, frontierResultFn, result.content);
|
|
93
|
+
}
|
|
94
|
+
return result;
|
|
95
|
+
} catch (_) {
|
|
96
|
+
return null;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
module.exports = { route, scoreComplexity, extractConfidence };
|
|
101
|
+
module.exports.COMPLEXITY_HIGH_THRESHOLD = COMPLEXITY_HIGH_THRESHOLD;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { logAgreement } = require('./metrics');
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Fire-and-forget shadow comparison.
|
|
7
|
+
* When shadow_mode is enabled, runs localResultFn() in the background and
|
|
8
|
+
* logs agreement/disagreement with frontierResult — but ALWAYS returns frontierResult.
|
|
9
|
+
*
|
|
10
|
+
* @param {object} config - resolved config from resolveConfig()
|
|
11
|
+
* @param {string} planningDir - path to the .planning directory
|
|
12
|
+
* @param {string} operationType - e.g. 'artifact_classification'
|
|
13
|
+
* @param {Function} localResultFn - async function that returns the local LLM result
|
|
14
|
+
* @param {*} frontierResult - the result already returned to the caller (never changed)
|
|
15
|
+
* @param {string} [sessionId] - current session identifier
|
|
16
|
+
* @returns {*} frontierResult — unchanged
|
|
17
|
+
*/
|
|
18
|
+
function runShadow(config, planningDir, operationType, localResultFn, frontierResult, sessionId) {
|
|
19
|
+
// Shadow off or LLM disabled — return immediately
|
|
20
|
+
if (!config.advanced || !config.advanced.shadow_mode) {
|
|
21
|
+
return frontierResult;
|
|
22
|
+
}
|
|
23
|
+
if (!config.enabled) {
|
|
24
|
+
return frontierResult;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Fire-and-forget: never propagates errors, never affects frontierResult
|
|
28
|
+
Promise.resolve()
|
|
29
|
+
.then(async () => {
|
|
30
|
+
let localValue;
|
|
31
|
+
try {
|
|
32
|
+
const raw = await localResultFn();
|
|
33
|
+
localValue = typeof raw === 'string' ? raw : JSON.stringify(raw);
|
|
34
|
+
} catch (_) {
|
|
35
|
+
// Local call failed — log as disagreement
|
|
36
|
+
localValue = null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const frontierStr =
|
|
40
|
+
typeof frontierResult === 'string' ? frontierResult : JSON.stringify(frontierResult);
|
|
41
|
+
const localStr = localValue != null ? localValue.trim() : null;
|
|
42
|
+
const agrees = localStr !== null && localStr === frontierStr.trim();
|
|
43
|
+
|
|
44
|
+
logAgreement(planningDir, {
|
|
45
|
+
timestamp: new Date().toISOString(),
|
|
46
|
+
operation: operationType,
|
|
47
|
+
session_id: sessionId || 'unknown',
|
|
48
|
+
agrees,
|
|
49
|
+
local_result: localStr,
|
|
50
|
+
frontier_result: frontierStr
|
|
51
|
+
});
|
|
52
|
+
})
|
|
53
|
+
.catch(() => {
|
|
54
|
+
// Swallow all errors — shadow must never throw
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
return frontierResult;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = { runShadow };
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
// --- Constants ---
|
|
7
|
+
|
|
8
|
+
/** Minimum shadow log entries per operation before suggesting an adjustment */
|
|
9
|
+
const MIN_SAMPLES = 20;
|
|
10
|
+
|
|
11
|
+
/** Step size for each threshold adjustment */
|
|
12
|
+
const ADJUST_STEP = 0.05;
|
|
13
|
+
|
|
14
|
+
/** Clamp floor for suggested threshold */
|
|
15
|
+
const THRESHOLD_MIN = 0.5;
|
|
16
|
+
|
|
17
|
+
/** Clamp ceiling for suggested threshold */
|
|
18
|
+
const THRESHOLD_MAX = 0.99;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Failure rate above which local LLM is considered too unreliable.
|
|
22
|
+
* Suggests raising the confidence_threshold so fewer calls are routed locally.
|
|
23
|
+
*/
|
|
24
|
+
const HIGH_FAILURE_RATE = 0.20;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Failure rate below which local LLM is considered very reliable.
|
|
28
|
+
* Suggests lowering the confidence_threshold so more calls are routed locally.
|
|
29
|
+
*/
|
|
30
|
+
const LOW_FAILURE_RATE = 0.05;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Reads the shadow agreement log and returns advisory threshold adjustments
|
|
34
|
+
* per operation type.
|
|
35
|
+
*
|
|
36
|
+
* Only emits a suggestion for an operation when it has >= MIN_SAMPLES entries.
|
|
37
|
+
* All suggestions are ±ADJUST_STEP clamped to [THRESHOLD_MIN, THRESHOLD_MAX].
|
|
38
|
+
* Never writes to config — purely advisory.
|
|
39
|
+
*
|
|
40
|
+
* @param {string} planningDir - Absolute path to the .planning directory
|
|
41
|
+
* @param {number} currentThreshold - Current confidence_threshold from config
|
|
42
|
+
* @returns {Array<{operation: string, current: number, suggested: number, sample_count: number, agreement_rate: number}>}
|
|
43
|
+
*/
|
|
44
|
+
function computeThresholdAdjustments(planningDir, currentThreshold) {
|
|
45
|
+
try {
|
|
46
|
+
const shadowLogPath = path.join(planningDir, 'logs', 'local-llm-shadow.jsonl');
|
|
47
|
+
|
|
48
|
+
if (!fs.existsSync(shadowLogPath)) {
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const raw = fs.readFileSync(shadowLogPath, 'utf8');
|
|
53
|
+
const lines = raw.split('\n').filter(l => l.trim().length > 0);
|
|
54
|
+
|
|
55
|
+
// Parse lines, skip unparseable
|
|
56
|
+
const entries = [];
|
|
57
|
+
for (const line of lines) {
|
|
58
|
+
try {
|
|
59
|
+
const parsed = JSON.parse(line);
|
|
60
|
+
if (parsed && typeof parsed === 'object') {
|
|
61
|
+
entries.push(parsed);
|
|
62
|
+
}
|
|
63
|
+
} catch (_e) {
|
|
64
|
+
// Skip malformed lines
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Group by operation
|
|
69
|
+
const groups = {};
|
|
70
|
+
for (const entry of entries) {
|
|
71
|
+
const op = entry.operation;
|
|
72
|
+
if (!op) continue;
|
|
73
|
+
if (!groups[op]) {
|
|
74
|
+
groups[op] = { count: 0, agrees: 0 };
|
|
75
|
+
}
|
|
76
|
+
groups[op].count += 1;
|
|
77
|
+
if (entry.agrees === true) {
|
|
78
|
+
groups[op].agrees += 1;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Build suggestions for operations with enough samples
|
|
83
|
+
const suggestions = [];
|
|
84
|
+
for (const [operation, stats] of Object.entries(groups)) {
|
|
85
|
+
if (stats.count < MIN_SAMPLES) continue;
|
|
86
|
+
|
|
87
|
+
const agreementRate = stats.agrees / stats.count;
|
|
88
|
+
const failureRate = 1 - agreementRate;
|
|
89
|
+
|
|
90
|
+
let suggested;
|
|
91
|
+
if (failureRate > HIGH_FAILURE_RATE) {
|
|
92
|
+
// Local is too unreliable — raise threshold (fewer local calls)
|
|
93
|
+
suggested = Math.min(THRESHOLD_MAX, currentThreshold + ADJUST_STEP);
|
|
94
|
+
} else if (failureRate < LOW_FAILURE_RATE) {
|
|
95
|
+
// Local is very reliable — lower threshold (more local calls)
|
|
96
|
+
suggested = Math.max(THRESHOLD_MIN, currentThreshold - ADJUST_STEP);
|
|
97
|
+
} else {
|
|
98
|
+
// Within acceptable range — no change
|
|
99
|
+
suggested = currentThreshold;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
suggestions.push({
|
|
103
|
+
operation,
|
|
104
|
+
current: currentThreshold,
|
|
105
|
+
suggested,
|
|
106
|
+
sample_count: stats.count,
|
|
107
|
+
agreement_rate: agreementRate
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return suggestions;
|
|
112
|
+
} catch (_e) {
|
|
113
|
+
// Never throws
|
|
114
|
+
return [];
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
module.exports = { computeThresholdAdjustments };
|
|
@@ -20,10 +20,18 @@
|
|
|
20
20
|
* roadmap update-plans <phase> <complete> <total> — Update phase plans in ROADMAP.md
|
|
21
21
|
* history append <type> <title> [body] — Append record to HISTORY.md
|
|
22
22
|
* history load — Load all HISTORY.md records as JSON
|
|
23
|
+
* llm metrics [--session <ISO>] — Lifetime or session-scoped LLM usage metrics
|
|
23
24
|
*/
|
|
24
25
|
|
|
25
26
|
const fs = require('fs');
|
|
26
27
|
const path = require('path');
|
|
28
|
+
const { resolveConfig, checkHealth } = require('./local-llm/health');
|
|
29
|
+
const { classifyArtifact } = require('./local-llm/operations/classify-artifact');
|
|
30
|
+
const { scoreSource } = require('./local-llm/operations/score-source');
|
|
31
|
+
const { classifyError } = require('./local-llm/operations/classify-error');
|
|
32
|
+
const { summarizeContext } = require('./local-llm/operations/summarize-context');
|
|
33
|
+
const { readSessionMetrics, summarizeMetrics, computeLifetimeMetrics } = require('./local-llm/metrics');
|
|
34
|
+
const { computeThresholdAdjustments } = require('./local-llm/threshold-tuner');
|
|
27
35
|
|
|
28
36
|
const cwd = process.cwd();
|
|
29
37
|
const planningDir = path.join(cwd, '.planning');
|
|
@@ -207,7 +215,7 @@ function resolveDepthProfile(config) {
|
|
|
207
215
|
return { depth, profile };
|
|
208
216
|
}
|
|
209
217
|
|
|
210
|
-
function main() {
|
|
218
|
+
async function main() {
|
|
211
219
|
const args = process.argv.slice(2);
|
|
212
220
|
const command = args[0];
|
|
213
221
|
const subcommand = args[1];
|
|
@@ -292,8 +300,117 @@ function main() {
|
|
|
292
300
|
const { logEvent } = require('./event-logger');
|
|
293
301
|
logEvent(category, event, details);
|
|
294
302
|
output({ logged: true, category, event });
|
|
303
|
+
} else if (command === 'llm' && subcommand === 'health') {
|
|
304
|
+
let rawConfig = {};
|
|
305
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
306
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
307
|
+
const health = await checkHealth(llmConfig);
|
|
308
|
+
output(health);
|
|
309
|
+
} else if (command === 'llm' && subcommand === 'status') {
|
|
310
|
+
let rawConfig = {};
|
|
311
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
312
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
313
|
+
output({
|
|
314
|
+
enabled: llmConfig.enabled,
|
|
315
|
+
model: llmConfig.model,
|
|
316
|
+
endpoint: llmConfig.endpoint,
|
|
317
|
+
features: llmConfig.features,
|
|
318
|
+
metrics_file: path.join(planningDir, 'logs', 'local-llm-metrics.jsonl'),
|
|
319
|
+
timeout_ms: llmConfig.timeout_ms,
|
|
320
|
+
disable_after_failures: llmConfig.advanced.disable_after_failures
|
|
321
|
+
});
|
|
322
|
+
} else if (command === 'llm' && subcommand === 'classify') {
|
|
323
|
+
const fileType = args[2];
|
|
324
|
+
const filePath = args[3];
|
|
325
|
+
if (!fileType || !filePath) {
|
|
326
|
+
error('Usage: pbr-tools.js llm classify <PLAN|SUMMARY> <filepath>');
|
|
327
|
+
}
|
|
328
|
+
const upperType = fileType.toUpperCase();
|
|
329
|
+
if (upperType !== 'PLAN' && upperType !== 'SUMMARY') {
|
|
330
|
+
error('llm classify: fileType must be PLAN or SUMMARY');
|
|
331
|
+
}
|
|
332
|
+
let content = '';
|
|
333
|
+
try {
|
|
334
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
335
|
+
} catch (_e) {
|
|
336
|
+
error('llm classify: cannot read file: ' + filePath);
|
|
337
|
+
}
|
|
338
|
+
let rawConfig = {};
|
|
339
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
340
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
341
|
+
const result = await classifyArtifact(llmConfig, planningDir, content, upperType, undefined);
|
|
342
|
+
output(result || { classification: null, reason: 'LLM disabled or unavailable' });
|
|
343
|
+
} else if (command === 'llm' && subcommand === 'score-source') {
|
|
344
|
+
const sourceUrl = args[2];
|
|
345
|
+
const filePath = args[3];
|
|
346
|
+
if (!sourceUrl || !filePath) {
|
|
347
|
+
error('Usage: pbr-tools.js llm score-source <url> <file-path>');
|
|
348
|
+
}
|
|
349
|
+
if (!fs.existsSync(filePath)) {
|
|
350
|
+
error('File not found: ' + filePath);
|
|
351
|
+
}
|
|
352
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
353
|
+
let rawConfig = {};
|
|
354
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
355
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
356
|
+
const result = await scoreSource(llmConfig, planningDir, content, sourceUrl, undefined);
|
|
357
|
+
output(result || { level: null, reason: 'LLM disabled or unavailable' });
|
|
358
|
+
} else if (command === 'llm' && subcommand === 'classify-error') {
|
|
359
|
+
const filePath = args[2];
|
|
360
|
+
const agentType = args[3] || 'unknown';
|
|
361
|
+
if (!filePath) {
|
|
362
|
+
error('Usage: pbr-tools.js llm classify-error <file-path> [agent-type]');
|
|
363
|
+
}
|
|
364
|
+
if (!fs.existsSync(filePath)) {
|
|
365
|
+
error('File not found: ' + filePath);
|
|
366
|
+
}
|
|
367
|
+
const errorText = fs.readFileSync(filePath, 'utf8');
|
|
368
|
+
let rawConfig = {};
|
|
369
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
370
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
371
|
+
const result = await classifyError(llmConfig, planningDir, errorText, agentType, undefined);
|
|
372
|
+
output(result || { category: null, reason: 'LLM disabled or unavailable' });
|
|
373
|
+
} else if (command === 'llm' && subcommand === 'summarize') {
|
|
374
|
+
const filePath = args[2];
|
|
375
|
+
const maxWords = args[3] ? parseInt(args[3], 10) : undefined;
|
|
376
|
+
if (!filePath) {
|
|
377
|
+
error('Usage: pbr-tools.js llm summarize <file-path> [max-words]');
|
|
378
|
+
}
|
|
379
|
+
if (!fs.existsSync(filePath)) {
|
|
380
|
+
error('File not found: ' + filePath);
|
|
381
|
+
}
|
|
382
|
+
const contextText = fs.readFileSync(filePath, 'utf8');
|
|
383
|
+
let rawConfig = {};
|
|
384
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
385
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
386
|
+
const result = await summarizeContext(llmConfig, planningDir, contextText, maxWords, undefined);
|
|
387
|
+
output(result || { summary: null, reason: 'LLM disabled or unavailable' });
|
|
388
|
+
} else if (command === 'llm' && subcommand === 'metrics') {
|
|
389
|
+
const sessionFlag = args[2]; // '--session'
|
|
390
|
+
const sessionStart = args[3]; // ISO timestamp
|
|
391
|
+
let rawConfig = {};
|
|
392
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* defaults */ }
|
|
393
|
+
const rate = rawConfig.local_llm && rawConfig.local_llm.metrics && rawConfig.local_llm.metrics.frontier_token_rate
|
|
394
|
+
? rawConfig.local_llm.metrics.frontier_token_rate : 3.0;
|
|
395
|
+
if (sessionFlag === '--session' && sessionStart) {
|
|
396
|
+
const entries = readSessionMetrics(planningDir, sessionStart);
|
|
397
|
+
const summary = summarizeMetrics(entries, rate);
|
|
398
|
+
output({ scope: 'session', session_start: sessionStart, ...summary });
|
|
399
|
+
} else {
|
|
400
|
+
const lifetime = computeLifetimeMetrics(planningDir, rate);
|
|
401
|
+
output({ scope: 'lifetime', ...lifetime });
|
|
402
|
+
}
|
|
403
|
+
} else if (command === 'llm' && subcommand === 'adjust-thresholds') {
|
|
404
|
+
let rawConfig = {};
|
|
405
|
+
try { rawConfig = configLoad(planningDir) || {}; } catch (_e) { /* use defaults */ }
|
|
406
|
+
const llmConfig = resolveConfig(rawConfig.local_llm);
|
|
407
|
+
const currentThreshold = llmConfig.advanced.confidence_threshold;
|
|
408
|
+
const suggestions = computeThresholdAdjustments(planningDir, currentThreshold);
|
|
409
|
+
output(suggestions.length > 0
|
|
410
|
+
? { suggestions }
|
|
411
|
+
: { suggestions: [], message: 'Not enough shadow samples yet (need >= 20 per operation)' });
|
|
295
412
|
} else {
|
|
296
|
-
error(`Unknown command: ${args.join(' ')}\nCommands: state load|check-progress|update, config validate, plan-index, frontmatter, must-haves, phase-info, roadmap update-status|update-plans, history append|load, event`);
|
|
413
|
+
error(`Unknown command: ${args.join(' ')}\nCommands: state load|check-progress|update, config validate, plan-index, frontmatter, must-haves, phase-info, roadmap update-status|update-plans, history append|load, event, llm health|status|classify|score-source|classify-error|summarize|metrics [--session <ISO>]|adjust-thresholds`);
|
|
297
414
|
}
|
|
298
415
|
} catch (e) {
|
|
299
416
|
error(e.message);
|
|
@@ -1374,5 +1491,5 @@ function atomicWrite(filePath, content) {
|
|
|
1374
1491
|
}
|
|
1375
1492
|
}
|
|
1376
1493
|
|
|
1377
|
-
if (require.main === module || process.argv[1] === __filename) { main(); }
|
|
1494
|
+
if (require.main === module || process.argv[1] === __filename) { main().catch(err => { process.stderr.write(err.message + '\n'); process.exit(1); }); }
|
|
1378
1495
|
module.exports = { parseStateMd, parseRoadmapMd, parseYamlFrontmatter, parseMustHaves, countMustHaves, stateLoad, stateCheckProgress, configLoad, configClearCache, configValidate, lockedFileUpdate, planIndex, determinePhaseStatus, findFiles, atomicWrite, tailLines, frontmatter, mustHavesCollect, phaseInfo, stateUpdate, roadmapUpdateStatus, roadmapUpdatePlans, updateLegacyStateField, updateFrontmatterField, updateTableRow, findRoadmapRow, resolveDepthProfile, DEPTH_PROFILE_DEFAULTS, historyAppend, historyLoad, VALID_STATUS_TRANSITIONS, validateStatusTransition };
|
|
@@ -68,7 +68,7 @@ function main() {
|
|
|
68
68
|
|
|
69
69
|
process.stdin.setEncoding('utf8');
|
|
70
70
|
process.stdin.on('data', (chunk) => { input += chunk; });
|
|
71
|
-
process.stdin.on('end', () => {
|
|
71
|
+
process.stdin.on('end', async () => {
|
|
72
72
|
try {
|
|
73
73
|
const data = JSON.parse(input);
|
|
74
74
|
|
|
@@ -76,7 +76,7 @@ function main() {
|
|
|
76
76
|
// Note: SUMMARY files intentionally trigger BOTH this check AND the state-sync
|
|
77
77
|
// check below. The plan format check validates frontmatter structure, while
|
|
78
78
|
// state-sync auto-updates ROADMAP.md and STATE.md tracking fields.
|
|
79
|
-
const planResult = checkPlanWrite(data);
|
|
79
|
+
const planResult = await checkPlanWrite(data);
|
|
80
80
|
if (planResult) {
|
|
81
81
|
process.stdout.write(JSON.stringify(planResult.output));
|
|
82
82
|
process.exit(0);
|
|
@@ -15,8 +15,9 @@ const { execSync } = require('child_process');
|
|
|
15
15
|
const { logHook } = require('./hook-logger');
|
|
16
16
|
const { logEvent } = require('./event-logger');
|
|
17
17
|
const { configLoad } = require('./pbr-tools');
|
|
18
|
+
const { resolveConfig, checkHealth, warmUp } = require('./local-llm/health');
|
|
18
19
|
|
|
19
|
-
function main() {
|
|
20
|
+
async function main() {
|
|
20
21
|
const cwd = process.cwd();
|
|
21
22
|
const planningDir = path.join(cwd, '.planning');
|
|
22
23
|
const stateFile = path.join(planningDir, 'STATE.md');
|
|
@@ -38,9 +39,34 @@ function main() {
|
|
|
38
39
|
tryLaunchDashboard(config.dashboard.port || 3000, planningDir, cwd);
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
// Write session-start timestamp for local-llm metrics correlation
|
|
43
|
+
const sessionStartFile = path.join(planningDir, '.session-start');
|
|
44
|
+
try {
|
|
45
|
+
fs.writeFileSync(sessionStartFile, new Date().toISOString(), 'utf8');
|
|
46
|
+
} catch (_e) { /* non-fatal */ }
|
|
47
|
+
|
|
48
|
+
// Local LLM health check (advisory only — never blocks SessionStart)
|
|
49
|
+
let llmContext = '';
|
|
50
|
+
try {
|
|
51
|
+
const rawLlmConfig = config && config.local_llm;
|
|
52
|
+
const llmConfig = resolveConfig(rawLlmConfig);
|
|
53
|
+
if (llmConfig.enabled) {
|
|
54
|
+
const health = await checkHealth(llmConfig);
|
|
55
|
+
if (health.available) {
|
|
56
|
+
llmContext = `\nLocal LLM: ${llmConfig.model} (${health.warm ? 'warm' : 'cold start'})`;
|
|
57
|
+
if (!health.warm) {
|
|
58
|
+
// Fire warm-up without awaiting — 23s cold start must not block hook
|
|
59
|
+
warmUp(llmConfig);
|
|
60
|
+
}
|
|
61
|
+
} else if (health.reason !== 'disabled') {
|
|
62
|
+
llmContext = `\nLocal LLM: unavailable — ${health.detail || health.reason}`;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
} catch (_e) { /* graceful degradation — never surface to user */ }
|
|
66
|
+
|
|
41
67
|
if (context) {
|
|
42
68
|
const output = {
|
|
43
|
-
additionalContext: context
|
|
69
|
+
additionalContext: context + llmContext
|
|
44
70
|
};
|
|
45
71
|
process.stdout.write(JSON.stringify(output));
|
|
46
72
|
logHook('progress-tracker', 'SessionStart', 'injected', { hasState: true });
|
|
@@ -373,4 +399,4 @@ function tryLaunchDashboard(port, _planningDir, projectDir) {
|
|
|
373
399
|
// Exported for testing
|
|
374
400
|
module.exports = { getHookHealthSummary, FAILURE_DECISIONS, HOOK_HEALTH_MAX_ENTRIES, tryLaunchDashboard };
|
|
375
401
|
|
|
376
|
-
main();
|
|
402
|
+
main().catch(() => {});
|
|
@@ -20,7 +20,8 @@
|
|
|
20
20
|
const fs = require('fs');
|
|
21
21
|
const path = require('path');
|
|
22
22
|
const { logHook } = require('./hook-logger');
|
|
23
|
-
const { tailLines } = require('./pbr-tools');
|
|
23
|
+
const { tailLines, configLoad } = require('./pbr-tools');
|
|
24
|
+
const { readSessionMetrics, summarizeMetrics, formatSessionSummary } = require('./local-llm/metrics');
|
|
24
25
|
|
|
25
26
|
function readStdin() {
|
|
26
27
|
try {
|
|
@@ -238,6 +239,36 @@ function main() {
|
|
|
238
239
|
// Write session history log
|
|
239
240
|
writeSessionHistory(planningDir, data);
|
|
240
241
|
|
|
242
|
+
// Local LLM metrics summary (SessionEnd — sync reads only, never throws)
|
|
243
|
+
let llmAdditionalContext = null;
|
|
244
|
+
try {
|
|
245
|
+
const sessionStartFile = path.join(planningDir, '.session-start');
|
|
246
|
+
if (fs.existsSync(sessionStartFile)) {
|
|
247
|
+
const sessionStartTime = fs.readFileSync(sessionStartFile, 'utf8').trim();
|
|
248
|
+
const entries = readSessionMetrics(planningDir, sessionStartTime);
|
|
249
|
+
if (entries.length > 0) {
|
|
250
|
+
const summary = summarizeMetrics(entries);
|
|
251
|
+
logHook('session-cleanup', 'SessionEnd', 'llm-metrics', {
|
|
252
|
+
total_calls: summary.total_calls,
|
|
253
|
+
fallback_count: summary.fallback_count,
|
|
254
|
+
avg_latency_ms: summary.avg_latency_ms,
|
|
255
|
+
tokens_saved: summary.tokens_saved,
|
|
256
|
+
cost_saved_usd: summary.cost_saved_usd
|
|
257
|
+
});
|
|
258
|
+
if (summary.total_calls > 0) {
|
|
259
|
+
let modelName = null;
|
|
260
|
+
try {
|
|
261
|
+
const rawConfig = configLoad(planningDir) || {};
|
|
262
|
+
modelName = (rawConfig.local_llm && rawConfig.local_llm.model) || null;
|
|
263
|
+
} catch (_e) { /* config read failure is non-fatal */ }
|
|
264
|
+
llmAdditionalContext = formatSessionSummary(summary, modelName);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
// Clean up session-start file
|
|
268
|
+
try { fs.unlinkSync(sessionStartFile); } catch (_e) { /* non-fatal */ }
|
|
269
|
+
}
|
|
270
|
+
} catch (_e) { /* metrics never crash the hook */ }
|
|
271
|
+
|
|
241
272
|
const decision = cleaned.length > 0 ? 'cleaned' : 'nothing';
|
|
242
273
|
logHook('session-cleanup', 'SessionEnd', decision, {
|
|
243
274
|
reason: data.reason || null,
|
|
@@ -246,6 +277,10 @@ function main() {
|
|
|
246
277
|
orphaned_progress_files: orphans.length > 0 ? orphans : undefined
|
|
247
278
|
});
|
|
248
279
|
|
|
280
|
+
if (llmAdditionalContext) {
|
|
281
|
+
process.stdout.write(JSON.stringify({ additionalContext: llmAdditionalContext }) + '\n');
|
|
282
|
+
}
|
|
283
|
+
|
|
249
284
|
process.exit(0);
|
|
250
285
|
}
|
|
251
286
|
|
|
@@ -20,6 +20,23 @@
|
|
|
20
20
|
const fs = require('fs');
|
|
21
21
|
const path = require('path');
|
|
22
22
|
const { logHook } = require('./hook-logger');
|
|
23
|
+
const { resolveConfig } = require('./local-llm/health');
|
|
24
|
+
const { validateTask: llmValidateTask } = require('./local-llm/operations/validate-task');
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Load and resolve the local_llm config block from .planning/config.json.
|
|
28
|
+
* Returns a resolved config (always safe to use — disabled by default on error).
|
|
29
|
+
* @param {string} cwd - working directory to resolve .planning/config.json from
|
|
30
|
+
*/
|
|
31
|
+
function loadLocalLlmConfig(cwd) {
|
|
32
|
+
try {
|
|
33
|
+
const configPath = path.join(cwd, '.planning', 'config.json');
|
|
34
|
+
const parsed = JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
35
|
+
return resolveConfig(parsed.local_llm);
|
|
36
|
+
} catch (_e) {
|
|
37
|
+
return resolveConfig(undefined);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
23
40
|
|
|
24
41
|
const KNOWN_AGENTS = [
|
|
25
42
|
'researcher',
|
|
@@ -687,7 +704,7 @@ function main() {
|
|
|
687
704
|
|
|
688
705
|
process.stdin.setEncoding('utf8');
|
|
689
706
|
process.stdin.on('data', (chunk) => { input += chunk; });
|
|
690
|
-
process.stdin.on('end', () => {
|
|
707
|
+
process.stdin.on('end', async () => {
|
|
691
708
|
try {
|
|
692
709
|
const data = JSON.parse(input);
|
|
693
710
|
|
|
@@ -784,6 +801,18 @@ function main() {
|
|
|
784
801
|
const activeSkillWarning = checkActiveSkillIntegrity(data);
|
|
785
802
|
if (activeSkillWarning) warnings.push(activeSkillWarning);
|
|
786
803
|
|
|
804
|
+
// LLM task coherence check — advisory only
|
|
805
|
+
try {
|
|
806
|
+
const llmConfig = loadLocalLlmConfig(process.cwd());
|
|
807
|
+
const planningDir = path.join(process.cwd(), '.planning');
|
|
808
|
+
const llmResult = await llmValidateTask(llmConfig, planningDir, data.tool_input || {}, data.session_id);
|
|
809
|
+
if (llmResult && !llmResult.coherent) {
|
|
810
|
+
warnings.push('LLM task coherence advisory: ' + (llmResult.issue || 'Task description may not match intended operation.') + ' (confidence: ' + (llmResult.confidence * 100).toFixed(0) + '%)');
|
|
811
|
+
}
|
|
812
|
+
} catch (_llmErr) {
|
|
813
|
+
// Never propagate LLM errors
|
|
814
|
+
}
|
|
815
|
+
|
|
787
816
|
if (warnings.length > 0) {
|
|
788
817
|
for (const warning of warnings) {
|
|
789
818
|
logHook('validate-task', 'PreToolUse', 'warn', { warning });
|
|
@@ -130,7 +130,7 @@ Read `.planning/config.json` and check for fields referenced by skills:
|
|
|
130
130
|
- PASS: All expected fields present with correct types
|
|
131
131
|
- WARN (missing fields): Report each missing field and which skill uses it — "Run `/pbr:config` to set all options."
|
|
132
132
|
|
|
133
|
-
### Check 10: Orphaned Crash Recovery Files
|
|
133
|
+
### Check 10: Orphaned Crash Recovery & Lock Files
|
|
134
134
|
|
|
135
135
|
The executor creates `.PROGRESS-{plan_id}` files as crash recovery breadcrumbs during builds and deletes them after `SUMMARY.md` is written. Similarly, `.checkpoint-manifest.json` files track checkpoint state during execution. If the executor crashes mid-build, these files remain and could confuse future runs.
|
|
136
136
|
|
|
@@ -150,6 +150,13 @@ Glob for `.planning/phases/**/.PROGRESS-*` and `.planning/phases/**/.checkpoint-
|
|
|
150
150
|
```
|
|
151
151
|
Fix suggestion: "Checkpoint manifests are leftover from interrupted builds. Safe to delete if no `/pbr:build` is currently running. Remove with `rm <path>`."
|
|
152
152
|
|
|
153
|
+
Also check for `.planning/.active-skill`:
|
|
154
|
+
|
|
155
|
+
- If the file does not exist: no action needed (PASS for this sub-check)
|
|
156
|
+
- If the file exists, check its age by comparing the file modification time to the current time:
|
|
157
|
+
- If older than 1 hour: WARN with fix suggestion: "Stale .active-skill lock file detected (set {age} ago). No PBR skill appears to be running. Safe to delete with `rm .planning/.active-skill`."
|
|
158
|
+
- If younger than 1 hour: INFO: "Active skill lock exists ({content}). A PBR skill may be running."
|
|
159
|
+
|
|
153
160
|
---
|
|
154
161
|
|
|
155
162
|
## Auto-Fix for Common Corruption Patterns
|
|
@@ -214,10 +214,10 @@ The `features.team_discussions` config flag (and `/pbr:build --team`) enables **
|
|
|
214
214
|
║ ▶ NEXT UP ║
|
|
215
215
|
╚══════════════════════════════════════════════════════════════╝
|
|
216
216
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
- `/pbr:begin` — start a new project
|
|
218
|
+
- `/pbr:status` — check current project status
|
|
219
|
+
- `/pbr:config` — configure workflow settings
|
|
220
|
+
- `/pbr:help <command>` — detailed help for a specific command
|
|
221
221
|
|
|
222
222
|
|
|
223
223
|
```
|