@massu/core 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +71 -0
- package/README.md +2 -2
- package/dist/hooks/cost-tracker.js +149 -11527
- package/dist/hooks/post-edit-context.js +127 -11493
- package/dist/hooks/post-tool-use.js +169 -11550
- package/dist/hooks/pre-compact.js +149 -11530
- package/dist/hooks/pre-delete-check.js +144 -11523
- package/dist/hooks/quality-event.js +149 -11527
- package/dist/hooks/session-end.js +188 -11570
- package/dist/hooks/session-start.js +159 -11534
- package/dist/hooks/user-prompt.js +149 -11530
- package/package.json +14 -19
- package/src/adr-generator.ts +292 -0
- package/src/analytics.ts +373 -0
- package/src/audit-trail.ts +450 -0
- package/src/backfill-sessions.ts +180 -0
- package/src/cli.ts +105 -0
- package/src/cloud-sync.ts +190 -0
- package/src/commands/doctor.ts +300 -0
- package/src/commands/init.ts +395 -0
- package/src/commands/install-hooks.ts +26 -0
- package/src/config.ts +357 -0
- package/src/cost-tracker.ts +355 -0
- package/src/db.ts +233 -0
- package/src/dependency-scorer.ts +337 -0
- package/src/docs-map.json +100 -0
- package/src/docs-tools.ts +517 -0
- package/src/domains.ts +181 -0
- package/src/hooks/cost-tracker.ts +66 -0
- package/src/hooks/intent-suggester.ts +131 -0
- package/src/hooks/post-edit-context.ts +91 -0
- package/src/hooks/post-tool-use.ts +175 -0
- package/src/hooks/pre-compact.ts +146 -0
- package/src/hooks/pre-delete-check.ts +153 -0
- package/src/hooks/quality-event.ts +127 -0
- package/src/hooks/security-gate.ts +121 -0
- package/src/hooks/session-end.ts +467 -0
- package/src/hooks/session-start.ts +210 -0
- package/src/hooks/user-prompt.ts +91 -0
- package/src/import-resolver.ts +224 -0
- package/src/memory-db.ts +1376 -0
- package/src/memory-tools.ts +391 -0
- package/src/middleware-tree.ts +70 -0
- package/src/observability-tools.ts +343 -0
- package/src/observation-extractor.ts +411 -0
- package/src/page-deps.ts +283 -0
- package/src/prompt-analyzer.ts +332 -0
- package/src/regression-detector.ts +319 -0
- package/src/rules.ts +57 -0
- package/src/schema-mapper.ts +232 -0
- package/src/security-scorer.ts +405 -0
- package/src/security-utils.ts +133 -0
- package/src/sentinel-db.ts +578 -0
- package/src/sentinel-scanner.ts +405 -0
- package/src/sentinel-tools.ts +512 -0
- package/src/sentinel-types.ts +140 -0
- package/src/server.ts +189 -0
- package/src/session-archiver.ts +112 -0
- package/src/session-state-generator.ts +174 -0
- package/src/team-knowledge.ts +407 -0
- package/src/tools.ts +847 -0
- package/src/transcript-parser.ts +458 -0
- package/src/trpc-index.ts +214 -0
- package/src/validate-features-runner.ts +106 -0
- package/src/validation-engine.ts +358 -0
- package/dist/cli.js +0 -7890
- package/dist/server.js +0 -7008
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
import type Database from 'better-sqlite3';
|
|
5
|
+
import type { ToolDefinition, ToolResult } from './tools.ts';
|
|
6
|
+
import { createHash } from 'crypto';
|
|
7
|
+
import { getConfig } from './config.ts';
|
|
8
|
+
import { escapeRegex, redactSensitiveContent } from './security-utils.ts';
|
|
9
|
+
|
|
10
|
+
// ============================================================
|
|
11
|
+
// Prompt Effectiveness Analysis
|
|
12
|
+
// ============================================================
|
|
13
|
+
|
|
14
|
+
/** Prefix a base tool name with the configured tool prefix. */
|
|
15
|
+
function p(baseName: string): string {
|
|
16
|
+
return `${getConfig().toolPrefix}_${baseName}`;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/** Default success/failure indicators. Can be overridden via config.analytics.prompts */
|
|
20
|
+
const DEFAULT_SUCCESS_INDICATORS = ['committed', 'approved', 'looks good', 'perfect', 'great', 'thanks'];
|
|
21
|
+
const DEFAULT_FAILURE_INDICATORS = ['revert', 'wrong', "that's not", 'undo', 'incorrect'];
|
|
22
|
+
const DEFAULT_ABANDON_PATTERNS = /\b(nevermind|forget it|skip|let's move on|different|instead)\b/i;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Categorize a prompt by its intent.
|
|
26
|
+
*/
|
|
27
|
+
export function categorizePrompt(promptText: string): string {
|
|
28
|
+
const lower = promptText.toLowerCase();
|
|
29
|
+
|
|
30
|
+
if (/\b(fix|bug|error|broken|issue|crash|fail)\b/.test(lower)) return 'bugfix';
|
|
31
|
+
if (/\b(refactor|rename|move|extract|cleanup|reorganize)\b/.test(lower)) return 'refactor';
|
|
32
|
+
if (/\b(what|how|why|where|when|explain|describe|tell me)\b/.test(lower)) return 'question';
|
|
33
|
+
if (/^\/\w+/.test(promptText.trim())) return 'command';
|
|
34
|
+
if (/\b(add|create|implement|build|new|feature)\b/.test(lower)) return 'feature';
|
|
35
|
+
|
|
36
|
+
return 'feature'; // Default to feature for implementation requests
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Hash a prompt for deduplication/comparison.
|
|
41
|
+
* Normalizes whitespace and lowercases before hashing.
|
|
42
|
+
*/
|
|
43
|
+
export function hashPrompt(promptText: string): string {
|
|
44
|
+
const normalized = promptText.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
45
|
+
return createHash('sha256').update(normalized).digest('hex').slice(0, 16);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Detect outcome from subsequent conversation context.
|
|
50
|
+
* Heuristic based on what follows a prompt.
|
|
51
|
+
*/
|
|
52
|
+
export function detectOutcome(
|
|
53
|
+
followUpPrompts: string[],
|
|
54
|
+
assistantResponses: string[]
|
|
55
|
+
): { outcome: string; correctionsNeeded: number; followUpCount: number } {
|
|
56
|
+
let correctionsNeeded = 0;
|
|
57
|
+
let outcome = 'success';
|
|
58
|
+
|
|
59
|
+
const correctionPatterns = /\b(no|wrong|that's not|fix this|try again|revert|undo|incorrect|not what)\b/i;
|
|
60
|
+
|
|
61
|
+
const config = getConfig();
|
|
62
|
+
const successIndicators = config.analytics?.prompts?.success_indicators ?? DEFAULT_SUCCESS_INDICATORS;
|
|
63
|
+
// Escape regex special chars from config-provided indicators to prevent ReDoS
|
|
64
|
+
const escapedIndicators = successIndicators.map(escapeRegex);
|
|
65
|
+
const successRegex = new RegExp(`\\b(${escapedIndicators.join('|')})\\b`, 'i');
|
|
66
|
+
|
|
67
|
+
for (const prompt of followUpPrompts) {
|
|
68
|
+
if (correctionPatterns.test(prompt)) {
|
|
69
|
+
correctionsNeeded++;
|
|
70
|
+
}
|
|
71
|
+
if (DEFAULT_ABANDON_PATTERNS.test(prompt)) {
|
|
72
|
+
outcome = 'abandoned';
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Check assistant responses for failure signals
|
|
78
|
+
for (const response of assistantResponses) {
|
|
79
|
+
if (/\b(error|failed|cannot|unable to)\b/i.test(response) && response.length < 200) {
|
|
80
|
+
outcome = 'failure';
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Determine final outcome
|
|
85
|
+
if (outcome === 'abandoned') {
|
|
86
|
+
// Keep abandoned
|
|
87
|
+
} else if (correctionsNeeded >= 3) {
|
|
88
|
+
outcome = 'partial';
|
|
89
|
+
} else if (correctionsNeeded > 0) {
|
|
90
|
+
outcome = 'partial';
|
|
91
|
+
} else {
|
|
92
|
+
// Check for success signals in follow-ups
|
|
93
|
+
for (const prompt of followUpPrompts) {
|
|
94
|
+
if (successRegex.test(prompt)) {
|
|
95
|
+
outcome = 'success';
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
outcome,
|
|
103
|
+
correctionsNeeded,
|
|
104
|
+
followUpCount: followUpPrompts.length,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Analyze prompts from a session and store outcomes.
|
|
110
|
+
*/
|
|
111
|
+
export function analyzeSessionPrompts(db: Database.Database, sessionId: string): number {
|
|
112
|
+
const prompts = db.prepare(
|
|
113
|
+
'SELECT prompt_text, prompt_number FROM user_prompts WHERE session_id = ? ORDER BY prompt_number ASC'
|
|
114
|
+
).all(sessionId) as Array<{ prompt_text: string; prompt_number: number }>;
|
|
115
|
+
|
|
116
|
+
if (prompts.length === 0) return 0;
|
|
117
|
+
|
|
118
|
+
let stored = 0;
|
|
119
|
+
for (let i = 0; i < prompts.length; i++) {
|
|
120
|
+
const prompt = prompts[i];
|
|
121
|
+
const followUps = prompts.slice(i + 1, i + 4).map(p => p.prompt_text);
|
|
122
|
+
|
|
123
|
+
const category = categorizePrompt(prompt.prompt_text);
|
|
124
|
+
const hash = hashPrompt(prompt.prompt_text);
|
|
125
|
+
const { outcome, correctionsNeeded, followUpCount } = detectOutcome(followUps, []);
|
|
126
|
+
|
|
127
|
+
// Check if already analyzed
|
|
128
|
+
const existing = db.prepare(
|
|
129
|
+
'SELECT id FROM prompt_outcomes WHERE session_id = ? AND prompt_hash = ?'
|
|
130
|
+
).get(sessionId, hash);
|
|
131
|
+
if (existing) continue;
|
|
132
|
+
|
|
133
|
+
// Redact sensitive content (API keys, emails, tokens, paths) before storage
|
|
134
|
+
const redactedText = redactSensitiveContent(prompt.prompt_text.slice(0, 2000));
|
|
135
|
+
|
|
136
|
+
db.prepare(`
|
|
137
|
+
INSERT INTO prompt_outcomes
|
|
138
|
+
(session_id, prompt_hash, prompt_text, prompt_category, word_count, outcome,
|
|
139
|
+
corrections_needed, follow_up_prompts)
|
|
140
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
141
|
+
`).run(
|
|
142
|
+
sessionId, hash, redactedText, category,
|
|
143
|
+
prompt.prompt_text.split(/\s+/).length, outcome,
|
|
144
|
+
correctionsNeeded, followUpCount
|
|
145
|
+
);
|
|
146
|
+
stored++;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
return stored;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// ============================================================
|
|
153
|
+
// MCP Tool Definitions & Handlers
|
|
154
|
+
// ============================================================
|
|
155
|
+
|
|
156
|
+
export function getPromptToolDefinitions(): ToolDefinition[] {
|
|
157
|
+
return [
|
|
158
|
+
{
|
|
159
|
+
name: p('prompt_effectiveness'),
|
|
160
|
+
description: 'Prompt effectiveness statistics by category. Shows success rates, average corrections needed, and best-performing prompt patterns.',
|
|
161
|
+
inputSchema: {
|
|
162
|
+
type: 'object',
|
|
163
|
+
properties: {
|
|
164
|
+
category: {
|
|
165
|
+
type: 'string',
|
|
166
|
+
description: 'Filter by category: feature, bugfix, refactor, question, command',
|
|
167
|
+
},
|
|
168
|
+
days: { type: 'number', description: 'Days to look back (default: 30)' },
|
|
169
|
+
},
|
|
170
|
+
required: [],
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
name: p('prompt_suggestions'),
|
|
175
|
+
description: 'Suggest improvements for a prompt based on past outcomes. Finds similar prompts ranked by success rate.',
|
|
176
|
+
inputSchema: {
|
|
177
|
+
type: 'object',
|
|
178
|
+
properties: {
|
|
179
|
+
prompt: { type: 'string', description: 'The prompt text to analyze' },
|
|
180
|
+
},
|
|
181
|
+
required: ['prompt'],
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
];
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const PROMPT_BASE_NAMES = new Set(['prompt_effectiveness', 'prompt_suggestions']);
|
|
188
|
+
|
|
189
|
+
export function isPromptTool(name: string): boolean {
|
|
190
|
+
const pfx = getConfig().toolPrefix + '_';
|
|
191
|
+
const baseName = name.startsWith(pfx) ? name.slice(pfx.length) : name;
|
|
192
|
+
return PROMPT_BASE_NAMES.has(baseName);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export function handlePromptToolCall(
|
|
196
|
+
name: string,
|
|
197
|
+
args: Record<string, unknown>,
|
|
198
|
+
memoryDb: Database.Database
|
|
199
|
+
): ToolResult {
|
|
200
|
+
try {
|
|
201
|
+
const pfx = getConfig().toolPrefix + '_';
|
|
202
|
+
const baseName = name.startsWith(pfx) ? name.slice(pfx.length) : name;
|
|
203
|
+
|
|
204
|
+
switch (baseName) {
|
|
205
|
+
case 'prompt_effectiveness':
|
|
206
|
+
return handleEffectiveness(args, memoryDb);
|
|
207
|
+
case 'prompt_suggestions':
|
|
208
|
+
return handleSuggestions(args, memoryDb);
|
|
209
|
+
default:
|
|
210
|
+
return text(`Unknown prompt tool: ${name}`);
|
|
211
|
+
}
|
|
212
|
+
} catch (error) {
|
|
213
|
+
return text(`Error in ${name}: ${error instanceof Error ? error.message : String(error)}\n\nUsage: ${p('prompt_effectiveness')} { days: 30 }, ${p('prompt_suggestions')} { prompt: "..." }`);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function handleEffectiveness(args: Record<string, unknown>, db: Database.Database): ToolResult {
|
|
218
|
+
const category = args.category as string | undefined;
|
|
219
|
+
const days = (args.days as number) ?? 30;
|
|
220
|
+
|
|
221
|
+
let sql = `
|
|
222
|
+
SELECT prompt_category,
|
|
223
|
+
COUNT(*) as total,
|
|
224
|
+
SUM(CASE WHEN outcome = 'success' THEN 1 ELSE 0 END) as successes,
|
|
225
|
+
SUM(CASE WHEN outcome = 'partial' THEN 1 ELSE 0 END) as partials,
|
|
226
|
+
SUM(CASE WHEN outcome = 'failure' THEN 1 ELSE 0 END) as failures,
|
|
227
|
+
SUM(CASE WHEN outcome = 'abandoned' THEN 1 ELSE 0 END) as abandoned,
|
|
228
|
+
AVG(corrections_needed) as avg_corrections,
|
|
229
|
+
AVG(word_count) as avg_word_count
|
|
230
|
+
FROM prompt_outcomes
|
|
231
|
+
WHERE created_at >= datetime('now', ?)
|
|
232
|
+
`;
|
|
233
|
+
const params: (string | number)[] = [`-${days} days`];
|
|
234
|
+
|
|
235
|
+
if (category) {
|
|
236
|
+
sql += ' AND prompt_category = ?';
|
|
237
|
+
params.push(category);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
sql += ' GROUP BY prompt_category ORDER BY total DESC';
|
|
241
|
+
|
|
242
|
+
const rows = db.prepare(sql).all(...params) as Array<Record<string, unknown>>;
|
|
243
|
+
|
|
244
|
+
if (rows.length === 0) {
|
|
245
|
+
return text(`No prompt outcomes found in the last ${days} days. Prompt analysis runs automatically at session end. Try a longer time range: ${p('prompt_effectiveness')} { days: 90 }, or use ${p('prompt_suggestions')} { prompt: "your text" } to analyze a prompt directly.`);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const lines = [
|
|
249
|
+
`## Prompt Effectiveness (${days} days)`,
|
|
250
|
+
'',
|
|
251
|
+
'| Category | Total | Success % | Partial | Failed | Abandoned | Avg Corrections | Avg Words |',
|
|
252
|
+
'|----------|-------|-----------|---------|--------|-----------|-----------------|-----------|',
|
|
253
|
+
];
|
|
254
|
+
|
|
255
|
+
for (const row of rows) {
|
|
256
|
+
const total = row.total as number;
|
|
257
|
+
const successRate = total > 0 ? Math.round(((row.successes as number) / total) * 100) : 0;
|
|
258
|
+
lines.push(
|
|
259
|
+
`| ${row.prompt_category} | ${total} | ${successRate}% | ${row.partials} | ${row.failures} | ${row.abandoned} | ${(row.avg_corrections as number).toFixed(1)} | ${Math.round(row.avg_word_count as number)} |`
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return text(lines.join('\n'));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function handleSuggestions(args: Record<string, unknown>, db: Database.Database): ToolResult {
|
|
267
|
+
const prompt = args.prompt as string;
|
|
268
|
+
if (!prompt) return text(`Usage: ${p('prompt_suggestions')} { prompt: "your prompt text here" } - Analyzes a prompt and suggests improvements based on past outcomes.`);
|
|
269
|
+
|
|
270
|
+
const category = categorizePrompt(prompt);
|
|
271
|
+
const wordCount = prompt.split(/\s+/).length;
|
|
272
|
+
|
|
273
|
+
// Find successful prompts in the same category with similar length
|
|
274
|
+
const similar = db.prepare(`
|
|
275
|
+
SELECT prompt_text, outcome, corrections_needed, word_count
|
|
276
|
+
FROM prompt_outcomes
|
|
277
|
+
WHERE prompt_category = ? AND outcome = 'success'
|
|
278
|
+
ORDER BY ABS(word_count - ?) ASC
|
|
279
|
+
LIMIT 5
|
|
280
|
+
`).all(category, wordCount) as Array<{
|
|
281
|
+
prompt_text: string;
|
|
282
|
+
outcome: string;
|
|
283
|
+
corrections_needed: number;
|
|
284
|
+
word_count: number;
|
|
285
|
+
}>;
|
|
286
|
+
|
|
287
|
+
const lines = [
|
|
288
|
+
`## Prompt Analysis`,
|
|
289
|
+
`Category: ${category}`,
|
|
290
|
+
`Word count: ${wordCount}`,
|
|
291
|
+
'',
|
|
292
|
+
];
|
|
293
|
+
|
|
294
|
+
// Suggestions based on patterns
|
|
295
|
+
if (wordCount < 10) {
|
|
296
|
+
lines.push('**Suggestion**: Short prompts often need follow-up corrections. Consider adding more context about:');
|
|
297
|
+
lines.push('- Expected behavior or output');
|
|
298
|
+
lines.push('- Specific files or components to modify');
|
|
299
|
+
lines.push('- Constraints or patterns to follow');
|
|
300
|
+
lines.push('');
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (similar.length > 0) {
|
|
304
|
+
lines.push('### Successful Similar Prompts');
|
|
305
|
+
for (const s of similar) {
|
|
306
|
+
lines.push(`- [${s.word_count} words] ${s.prompt_text.slice(0, 150)}...`);
|
|
307
|
+
}
|
|
308
|
+
} else {
|
|
309
|
+
lines.push('No similar successful prompts found in this category.');
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Category-specific stats
|
|
313
|
+
const stats = db.prepare(`
|
|
314
|
+
SELECT COUNT(*) as total,
|
|
315
|
+
SUM(CASE WHEN outcome = 'success' THEN 1 ELSE 0 END) as successes,
|
|
316
|
+
AVG(corrections_needed) as avg_corrections
|
|
317
|
+
FROM prompt_outcomes WHERE prompt_category = ?
|
|
318
|
+
`).get(category) as { total: number; successes: number; avg_corrections: number };
|
|
319
|
+
|
|
320
|
+
if (stats.total > 0) {
|
|
321
|
+
lines.push('');
|
|
322
|
+
lines.push(`### Category Stats: ${category}`);
|
|
323
|
+
lines.push(`- Success rate: ${Math.round((stats.successes / stats.total) * 100)}%`);
|
|
324
|
+
lines.push(`- Avg corrections needed: ${stats.avg_corrections.toFixed(1)}`);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return text(lines.join('\n'));
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
function text(content: string): ToolResult {
|
|
331
|
+
return { content: [{ type: 'text', text: content }] };
|
|
332
|
+
}
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
// Copyright (c) 2026 Massu. All rights reserved.
|
|
2
|
+
// Licensed under BSL 1.1 - see LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
import type Database from 'better-sqlite3';
|
|
5
|
+
import type { ToolDefinition, ToolResult } from './tools.ts';
|
|
6
|
+
import { getConfig } from './config.ts';
|
|
7
|
+
|
|
8
|
+
// ============================================================
|
|
9
|
+
// Regression Detection
|
|
10
|
+
// ============================================================
|
|
11
|
+
|
|
12
|
+
/** Prefix a base tool name with the configured tool prefix. */
|
|
13
|
+
function p(baseName: string): string {
|
|
14
|
+
return `${getConfig().toolPrefix}_${baseName}`;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Default health thresholds. Configurable via regression.health_thresholds */
|
|
18
|
+
const DEFAULT_HEALTH_THRESHOLDS = {
|
|
19
|
+
healthy: 80,
|
|
20
|
+
warning: 50,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Get health thresholds from config or defaults.
|
|
25
|
+
*/
|
|
26
|
+
function getHealthThresholds(): { healthy: number; warning: number } {
|
|
27
|
+
const configured = getConfig().regression?.health_thresholds;
|
|
28
|
+
return {
|
|
29
|
+
healthy: configured?.healthy ?? DEFAULT_HEALTH_THRESHOLDS.healthy,
|
|
30
|
+
warning: configured?.warning ?? DEFAULT_HEALTH_THRESHOLDS.warning,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Calculate feature health score based on modification/test gaps.
|
|
36
|
+
* 0 = critical, 100 = healthy.
|
|
37
|
+
*/
|
|
38
|
+
export function calculateHealthScore(
|
|
39
|
+
testsPassing: number,
|
|
40
|
+
testsFailing: number,
|
|
41
|
+
modificationsSinceTest: number,
|
|
42
|
+
lastTested: string | null,
|
|
43
|
+
lastModified: string | null
|
|
44
|
+
): number {
|
|
45
|
+
let score = 100;
|
|
46
|
+
|
|
47
|
+
// Test failures
|
|
48
|
+
if (testsFailing > 0) {
|
|
49
|
+
score -= Math.min(40, testsFailing * 10);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Modifications since last test
|
|
53
|
+
if (modificationsSinceTest > 0) {
|
|
54
|
+
score -= Math.min(30, modificationsSinceTest * 5);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Time gap between modification and test
|
|
58
|
+
if (lastModified && lastTested) {
|
|
59
|
+
const modDate = new Date(lastModified).getTime();
|
|
60
|
+
const testDate = new Date(lastTested).getTime();
|
|
61
|
+
if (modDate > testDate) {
|
|
62
|
+
const daysSinceTest = (modDate - testDate) / (1000 * 60 * 60 * 24);
|
|
63
|
+
score -= Math.min(20, Math.floor(daysSinceTest * 2));
|
|
64
|
+
}
|
|
65
|
+
} else if (lastModified && !lastTested) {
|
|
66
|
+
// Modified but never tested
|
|
67
|
+
score -= 30;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return Math.max(0, score);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Update feature health when a file is modified.
|
|
75
|
+
*/
|
|
76
|
+
export function trackModification(
|
|
77
|
+
db: Database.Database,
|
|
78
|
+
featureKey: string
|
|
79
|
+
): void {
|
|
80
|
+
const existing = db.prepare(
|
|
81
|
+
'SELECT * FROM feature_health WHERE feature_key = ?'
|
|
82
|
+
).get(featureKey) as Record<string, unknown> | undefined;
|
|
83
|
+
|
|
84
|
+
if (existing) {
|
|
85
|
+
db.prepare(`
|
|
86
|
+
UPDATE feature_health
|
|
87
|
+
SET last_modified = datetime('now'),
|
|
88
|
+
modifications_since_test = modifications_since_test + 1,
|
|
89
|
+
health_score = ?
|
|
90
|
+
WHERE feature_key = ?
|
|
91
|
+
`).run(
|
|
92
|
+
calculateHealthScore(
|
|
93
|
+
(existing.tests_passing as number) ?? 0,
|
|
94
|
+
(existing.tests_failing as number) ?? 0,
|
|
95
|
+
((existing.modifications_since_test as number) ?? 0) + 1,
|
|
96
|
+
existing.last_tested as string | null,
|
|
97
|
+
new Date().toISOString()
|
|
98
|
+
),
|
|
99
|
+
featureKey
|
|
100
|
+
);
|
|
101
|
+
} else {
|
|
102
|
+
db.prepare(`
|
|
103
|
+
INSERT INTO feature_health
|
|
104
|
+
(feature_key, last_modified, modifications_since_test, health_score, tests_passing, tests_failing)
|
|
105
|
+
VALUES (?, datetime('now'), 1, 70, 0, 0)
|
|
106
|
+
`).run(featureKey);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Record test results for a feature.
|
|
112
|
+
*/
|
|
113
|
+
export function recordTestResult(
|
|
114
|
+
db: Database.Database,
|
|
115
|
+
featureKey: string,
|
|
116
|
+
passing: number,
|
|
117
|
+
failing: number
|
|
118
|
+
): void {
|
|
119
|
+
const existing = db.prepare(
|
|
120
|
+
'SELECT * FROM feature_health WHERE feature_key = ?'
|
|
121
|
+
).get(featureKey) as Record<string, unknown> | undefined;
|
|
122
|
+
|
|
123
|
+
const healthScore = calculateHealthScore(passing, failing, 0, new Date().toISOString(), existing?.last_modified as string | null);
|
|
124
|
+
|
|
125
|
+
db.prepare(`
|
|
126
|
+
INSERT INTO feature_health
|
|
127
|
+
(feature_key, last_tested, test_coverage_pct, health_score, tests_passing, tests_failing, modifications_since_test)
|
|
128
|
+
VALUES (?, datetime('now'), ?, ?, ?, ?, 0)
|
|
129
|
+
ON CONFLICT(feature_key) DO UPDATE SET
|
|
130
|
+
last_tested = datetime('now'),
|
|
131
|
+
health_score = ?,
|
|
132
|
+
tests_passing = ?,
|
|
133
|
+
tests_failing = ?,
|
|
134
|
+
modifications_since_test = 0
|
|
135
|
+
`).run(
|
|
136
|
+
featureKey, passing > 0 ? (passing / (passing + failing)) * 100 : 0,
|
|
137
|
+
healthScore, passing, failing,
|
|
138
|
+
healthScore, passing, failing
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Build alerts for unhealthy features.
|
|
144
|
+
*/
|
|
145
|
+
function buildAlerts(feature: Record<string, unknown>): string[] {
|
|
146
|
+
const alerts: string[] = [];
|
|
147
|
+
|
|
148
|
+
if ((feature.tests_failing as number) > 0) {
|
|
149
|
+
alerts.push(`${feature.tests_failing} tests failing`);
|
|
150
|
+
}
|
|
151
|
+
if ((feature.modifications_since_test as number) > 3) {
|
|
152
|
+
alerts.push(`${feature.modifications_since_test} modifications since last test`);
|
|
153
|
+
}
|
|
154
|
+
if (!feature.last_tested && feature.last_modified) {
|
|
155
|
+
alerts.push('Never tested');
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return alerts;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// ============================================================
|
|
162
|
+
// MCP Tool Definitions & Handlers
|
|
163
|
+
// ============================================================
|
|
164
|
+
|
|
165
|
+
export function getRegressionToolDefinitions(): ToolDefinition[] {
|
|
166
|
+
return [
|
|
167
|
+
{
|
|
168
|
+
name: p('feature_health'),
|
|
169
|
+
description: 'Feature health dashboard. Shows health scores, modification/test gaps, and alerts for registered features.',
|
|
170
|
+
inputSchema: {
|
|
171
|
+
type: 'object',
|
|
172
|
+
properties: {
|
|
173
|
+
unhealthy_only: { type: 'boolean', description: 'Show only features with health below warning threshold (default: false)' },
|
|
174
|
+
},
|
|
175
|
+
required: [],
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
name: p('regression_risk'),
|
|
180
|
+
description: 'Check if recent changes risk regression. Shows affected features, test coverage status, and risk assessment.',
|
|
181
|
+
inputSchema: {
|
|
182
|
+
type: 'object',
|
|
183
|
+
properties: {},
|
|
184
|
+
required: [],
|
|
185
|
+
},
|
|
186
|
+
},
|
|
187
|
+
];
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
const REGRESSION_BASE_NAMES = new Set(['feature_health', 'regression_risk']);
|
|
191
|
+
|
|
192
|
+
export function isRegressionTool(name: string): boolean {
|
|
193
|
+
const pfx = getConfig().toolPrefix + '_';
|
|
194
|
+
const baseName = name.startsWith(pfx) ? name.slice(pfx.length) : name;
|
|
195
|
+
return REGRESSION_BASE_NAMES.has(baseName);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function handleRegressionToolCall(
|
|
199
|
+
name: string,
|
|
200
|
+
args: Record<string, unknown>,
|
|
201
|
+
memoryDb: Database.Database
|
|
202
|
+
): ToolResult {
|
|
203
|
+
try {
|
|
204
|
+
const pfx = getConfig().toolPrefix + '_';
|
|
205
|
+
const baseName = name.startsWith(pfx) ? name.slice(pfx.length) : name;
|
|
206
|
+
|
|
207
|
+
switch (baseName) {
|
|
208
|
+
case 'feature_health':
|
|
209
|
+
return handleFeatureHealth(args, memoryDb);
|
|
210
|
+
case 'regression_risk':
|
|
211
|
+
return handleRegressionCheck(args, memoryDb);
|
|
212
|
+
default:
|
|
213
|
+
return text(`Unknown regression tool: ${name}`);
|
|
214
|
+
}
|
|
215
|
+
} catch (error) {
|
|
216
|
+
return text(`Error in ${name}: ${error instanceof Error ? error.message : String(error)}\n\nUsage: ${p('feature_health')} { unhealthy_only: true }, ${p('regression_risk')} {}`);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function handleFeatureHealth(args: Record<string, unknown>, db: Database.Database): ToolResult {
|
|
221
|
+
const unhealthyOnly = args.unhealthy_only as boolean | undefined;
|
|
222
|
+
const thresholds = getHealthThresholds();
|
|
223
|
+
|
|
224
|
+
let sql = 'SELECT * FROM feature_health';
|
|
225
|
+
const params: (string | number)[] = [];
|
|
226
|
+
|
|
227
|
+
if (unhealthyOnly) {
|
|
228
|
+
sql += ' WHERE health_score < ?';
|
|
229
|
+
params.push(thresholds.healthy);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
sql += ' ORDER BY health_score ASC';
|
|
233
|
+
|
|
234
|
+
const features = db.prepare(sql).all(...params) as Array<Record<string, unknown>>;
|
|
235
|
+
|
|
236
|
+
if (features.length === 0) {
|
|
237
|
+
const filterMsg = unhealthyOnly
|
|
238
|
+
? `No unhealthy features found (threshold: ${thresholds.healthy}). All tracked features are currently healthy. Use ${p('feature_health')} {} without filters to see all features.`
|
|
239
|
+
: `No feature health data available yet. Feature health is tracked automatically when files in registered features are modified and tested. Try: ${p('regression_risk')} {} to check for untested modifications.`;
|
|
240
|
+
return text(filterMsg);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
const lines = [
|
|
244
|
+
`## Feature Health Dashboard`,
|
|
245
|
+
`Features tracked: ${features.length}`,
|
|
246
|
+
'',
|
|
247
|
+
'| Feature | Health | Tests P/F | Mods Since Test | Alerts |',
|
|
248
|
+
'|---------|--------|-----------|-----------------|--------|',
|
|
249
|
+
];
|
|
250
|
+
|
|
251
|
+
for (const f of features) {
|
|
252
|
+
const alerts = buildAlerts(f);
|
|
253
|
+
const healthScore = f.health_score as number;
|
|
254
|
+
const healthIndicator = healthScore >= thresholds.healthy ? 'OK'
|
|
255
|
+
: healthScore >= thresholds.warning ? 'WARN'
|
|
256
|
+
: 'CRIT';
|
|
257
|
+
|
|
258
|
+
lines.push(
|
|
259
|
+
`| ${f.feature_key} | ${healthScore} [${healthIndicator}] | ${f.tests_passing ?? 0}/${f.tests_failing ?? 0} | ${f.modifications_since_test ?? 0} | ${alerts.join('; ') || '-'} |`
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return text(lines.join('\n'));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function handleRegressionCheck(_args: Record<string, unknown>, db: Database.Database): ToolResult {
|
|
267
|
+
const thresholds = getHealthThresholds();
|
|
268
|
+
|
|
269
|
+
const recentlyModified = db.prepare(`
|
|
270
|
+
SELECT feature_key, health_score, modifications_since_test, tests_failing, last_modified, last_tested
|
|
271
|
+
FROM feature_health
|
|
272
|
+
WHERE modifications_since_test > 0
|
|
273
|
+
ORDER BY modifications_since_test DESC
|
|
274
|
+
`).all() as Array<Record<string, unknown>>;
|
|
275
|
+
|
|
276
|
+
if (recentlyModified.length === 0) {
|
|
277
|
+
return text(`No features have been modified since their last test run. Low regression risk. Use ${p('feature_health')} {} to see the full feature health dashboard.`);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
const highRisk = recentlyModified.filter(f => (f.health_score as number) < thresholds.warning);
|
|
281
|
+
const mediumRisk = recentlyModified.filter(f => (f.health_score as number) >= thresholds.warning && (f.health_score as number) < thresholds.healthy);
|
|
282
|
+
const lowRisk = recentlyModified.filter(f => (f.health_score as number) >= thresholds.healthy);
|
|
283
|
+
|
|
284
|
+
const lines = [
|
|
285
|
+
`## Regression Risk Assessment`,
|
|
286
|
+
`Features with untested modifications: ${recentlyModified.length}`,
|
|
287
|
+
`High risk: ${highRisk.length} | Medium: ${mediumRisk.length} | Low: ${lowRisk.length}`,
|
|
288
|
+
'',
|
|
289
|
+
];
|
|
290
|
+
|
|
291
|
+
if (highRisk.length > 0) {
|
|
292
|
+
lines.push('### HIGH RISK (test immediately)');
|
|
293
|
+
for (const f of highRisk) {
|
|
294
|
+
lines.push(`- **${f.feature_key}** (health: ${f.health_score}, ${f.modifications_since_test} untested modifications)`);
|
|
295
|
+
}
|
|
296
|
+
lines.push('');
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
if (mediumRisk.length > 0) {
|
|
300
|
+
lines.push('### Medium Risk');
|
|
301
|
+
for (const f of mediumRisk) {
|
|
302
|
+
lines.push(`- ${f.feature_key} (health: ${f.health_score}, ${f.modifications_since_test} untested modifications)`);
|
|
303
|
+
}
|
|
304
|
+
lines.push('');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (lowRisk.length > 0) {
|
|
308
|
+
lines.push('### Low Risk');
|
|
309
|
+
for (const f of lowRisk) {
|
|
310
|
+
lines.push(`- ${f.feature_key} (health: ${f.health_score})`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
return text(lines.join('\n'));
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function text(content: string): ToolResult {
|
|
318
|
+
return { content: [{ type: 'text', text: content }] };
|
|
319
|
+
}
|