@rigour-labs/core 4.3.6 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -10
- package/dist/gates/base.d.ts +3 -0
- package/dist/gates/checkpoint.d.ts +23 -8
- package/dist/gates/checkpoint.js +109 -45
- package/dist/gates/checkpoint.test.js +6 -3
- package/dist/gates/dependency.d.ts +39 -0
- package/dist/gates/dependency.js +212 -5
- package/dist/gates/duplication-drift.d.ts +101 -6
- package/dist/gates/duplication-drift.js +427 -33
- package/dist/gates/logic-drift.d.ts +70 -0
- package/dist/gates/logic-drift.js +280 -0
- package/dist/gates/runner.js +29 -1
- package/dist/gates/style-drift.d.ts +53 -0
- package/dist/gates/style-drift.js +305 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +4 -0
- package/dist/services/adaptive-thresholds.d.ts +54 -10
- package/dist/services/adaptive-thresholds.js +161 -35
- package/dist/services/adaptive-thresholds.test.js +24 -20
- package/dist/services/filesystem-cache.d.ts +50 -0
- package/dist/services/filesystem-cache.js +124 -0
- package/dist/services/temporal-drift.d.ts +101 -0
- package/dist/services/temporal-drift.js +386 -0
- package/dist/templates/universal-config.js +17 -0
- package/dist/types/index.d.ts +196 -0
- package/dist/types/index.js +19 -0
- package/dist/utils/scanner.d.ts +6 -1
- package/dist/utils/scanner.js +8 -1
- package/package.json +6 -6
|
@@ -1,21 +1,59 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Adaptive Thresholds Service
|
|
2
|
+
* Adaptive Thresholds Service (v2)
|
|
3
3
|
*
|
|
4
4
|
* Dynamically adjusts quality gate thresholds based on:
|
|
5
5
|
* - Project maturity (age, commit count, file count)
|
|
6
|
-
* - Historical failure rates
|
|
6
|
+
* - Historical failure rates with Z-score anomaly detection
|
|
7
7
|
* - Complexity tier (hobby/startup/enterprise)
|
|
8
|
-
* -
|
|
8
|
+
* - Per-provenance trend analysis (ai-drift, structural, security separate)
|
|
9
9
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
10
|
+
* v2 upgrades:
|
|
11
|
+
* - Z-score replaces naive delta comparison for trend detection
|
|
12
|
+
* - Per-provenance failure tracking (AI drift vs structural vs security)
|
|
13
|
+
* - Statistical anomaly detection normalizes across project sizes
|
|
12
14
|
*
|
|
13
|
-
* @since v2.14.0
|
|
15
|
+
* @since v2.14.0 (original)
|
|
16
|
+
* @since v5.0.0 (Z-score + provenance-aware trends)
|
|
14
17
|
*/
|
|
15
18
|
import * as fs from 'fs';
|
|
16
19
|
import * as path from 'path';
|
|
17
20
|
import { Logger } from '../utils/logger.js';
|
|
18
21
|
let cachedHistory = null;
|
|
22
|
+
// ─── Statistical Utilities ──────────────────────────────────────────
|
|
23
|
+
/**
|
|
24
|
+
* Compute mean and standard deviation of an array of numbers.
|
|
25
|
+
* Returns { mean: 0, std: 0 } for empty arrays.
|
|
26
|
+
*/
|
|
27
|
+
function meanAndStd(values) {
|
|
28
|
+
if (values.length === 0)
|
|
29
|
+
return { mean: 0, std: 0 };
|
|
30
|
+
const mean = values.reduce((a, b) => a + b, 0) / values.length;
|
|
31
|
+
const variance = values.reduce((sum, v) => sum + (v - mean) ** 2, 0) / values.length;
|
|
32
|
+
return { mean, std: Math.sqrt(variance) };
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Calculate Z-score for a value against a population.
|
|
36
|
+
* Z > 2.0 → statistically abnormal HIGH (degrading)
|
|
37
|
+
* Z < -2.0 → statistically abnormal LOW (improving)
|
|
38
|
+
* Returns 0 if std is 0 (all values identical).
|
|
39
|
+
*/
|
|
40
|
+
function zScore(value, mean, std) {
|
|
41
|
+
if (std === 0)
|
|
42
|
+
return 0;
|
|
43
|
+
return Math.round(((value - mean) / std) * 100) / 100;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Determine trend from Z-score.
|
|
47
|
+
* For failure counts: positive Z = more failures = degrading.
|
|
48
|
+
*/
|
|
49
|
+
function trendFromZScore(z) {
|
|
50
|
+
if (z > 2.0)
|
|
51
|
+
return 'degrading';
|
|
52
|
+
if (z < -2.0)
|
|
53
|
+
return 'improving';
|
|
54
|
+
return 'stable';
|
|
55
|
+
}
|
|
56
|
+
// ─── History Persistence ────────────────────────────────────────────
|
|
19
57
|
/**
|
|
20
58
|
* Load failure history from disk
|
|
21
59
|
*/
|
|
@@ -47,16 +85,19 @@ function saveHistory(cwd, history) {
|
|
|
47
85
|
fs.writeFileSync(historyPath, JSON.stringify(history, null, 2));
|
|
48
86
|
cachedHistory = history;
|
|
49
87
|
}
|
|
88
|
+
// ─── Public API ─────────────────────────────────────────────────────
|
|
50
89
|
/**
|
|
51
|
-
* Record a gate run for historical tracking
|
|
90
|
+
* Record a gate run for historical tracking.
|
|
91
|
+
* v5: accepts optional per-provenance breakdown.
|
|
52
92
|
*/
|
|
53
|
-
export function recordGateRun(cwd, passedGates, failedGates, totalFailures) {
|
|
93
|
+
export function recordGateRun(cwd, passedGates, failedGates, totalFailures, provenance) {
|
|
54
94
|
const history = loadHistory(cwd);
|
|
55
95
|
history.runs.push({
|
|
56
96
|
timestamp: new Date().toISOString(),
|
|
57
97
|
passedGates,
|
|
58
98
|
failedGates,
|
|
59
99
|
totalFailures,
|
|
100
|
+
provenance,
|
|
60
101
|
});
|
|
61
102
|
// Keep last 100 runs
|
|
62
103
|
if (history.runs.length > 100) {
|
|
@@ -66,24 +107,82 @@ export function recordGateRun(cwd, passedGates, failedGates, totalFailures) {
|
|
|
66
107
|
saveHistory(cwd, history);
|
|
67
108
|
}
|
|
68
109
|
/**
|
|
69
|
-
* Get quality trend
|
|
110
|
+
* Get quality trend using Z-score analysis (v5).
|
|
111
|
+
*
|
|
112
|
+
* How it works:
|
|
113
|
+
* 1. Take the last N runs (baseline window, default 20)
|
|
114
|
+
* 2. Compute mean and std of failure counts
|
|
115
|
+
* 3. Take the most recent window (last 5 runs)
|
|
116
|
+
* 4. Compute the average failure count in the recent window
|
|
117
|
+
* 5. Z-score = (recent_avg - baseline_mean) / baseline_std
|
|
118
|
+
*
|
|
119
|
+
* Z > 2.0 → statistically abnormal spike → DEGRADING
|
|
120
|
+
* Z < -2.0 → statistically abnormal drop → IMPROVING
|
|
121
|
+
*
|
|
122
|
+
* Why better than delta: A project with 100 failures/run and a spike to 108
|
|
123
|
+
* is stable (Z ≈ 0.5). A project with 2 failures/run and a spike to 8
|
|
124
|
+
* is degrading (Z ≈ 3.0). Z-score normalizes for project size.
|
|
70
125
|
*/
|
|
71
126
|
export function getQualityTrend(cwd) {
|
|
72
127
|
const history = loadHistory(cwd);
|
|
73
|
-
|
|
128
|
+
const RECENT_WINDOW = 5;
|
|
129
|
+
const MIN_BASELINE = 5;
|
|
130
|
+
// Need enough data for both a baseline and a separate recent window
|
|
131
|
+
if (history.runs.length < RECENT_WINDOW + MIN_BASELINE)
|
|
74
132
|
return 'stable';
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
const
|
|
80
|
-
const
|
|
81
|
-
const
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
133
|
+
// Non-overlapping windows: baseline excludes recent to avoid Z-score compression.
|
|
134
|
+
// When recent data is part of the baseline, Z-scores are mathematically bounded
|
|
135
|
+
// at ~1.73 for a 5-of-20 overlap, which never exceeds the 2.0 threshold.
|
|
136
|
+
const recent = history.runs.slice(-RECENT_WINDOW);
|
|
137
|
+
const baseline = history.runs.slice(0, -RECENT_WINDOW);
|
|
138
|
+
const baselineFailures = baseline.map(r => r.totalFailures);
|
|
139
|
+
const { mean, std } = meanAndStd(baselineFailures);
|
|
140
|
+
const recentAvg = recent.reduce((sum, r) => sum + r.totalFailures, 0) / recent.length;
|
|
141
|
+
const z = zScore(recentAvg, mean, std);
|
|
142
|
+
return trendFromZScore(z);
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Get per-provenance trend analysis (v5).
|
|
146
|
+
*
|
|
147
|
+
* Runs separate Z-score analysis for each provenance category.
|
|
148
|
+
* This is the core differentiator: Rigour can tell you
|
|
149
|
+
* "your AI is getting worse" separately from "your code quality is dropping."
|
|
150
|
+
*
|
|
151
|
+
* Falls back gracefully for legacy history data without provenance.
|
|
152
|
+
*/
|
|
153
|
+
export function getProvenanceTrends(cwd) {
|
|
154
|
+
const history = loadHistory(cwd);
|
|
155
|
+
// Filter to runs that have provenance data (v5+ only)
|
|
156
|
+
const withProvenance = history.runs.filter(r => r.provenance);
|
|
157
|
+
const RECENT_WINDOW = 5;
|
|
158
|
+
const MIN_BASELINE = 5;
|
|
159
|
+
if (withProvenance.length < RECENT_WINDOW + MIN_BASELINE) {
|
|
160
|
+
return {
|
|
161
|
+
aiDrift: 'stable', structural: 'stable', security: 'stable',
|
|
162
|
+
aiDriftZScore: 0, structuralZScore: 0, securityZScore: 0,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
// Non-overlapping windows (consistent with getQualityTrend)
|
|
166
|
+
const recent = withProvenance.slice(-RECENT_WINDOW);
|
|
167
|
+
const baseline = withProvenance.slice(0, -RECENT_WINDOW);
|
|
168
|
+
const computeForField = (field) => {
|
|
169
|
+
const baselineValues = baseline.map(r => r.provenance[field]);
|
|
170
|
+
const { mean, std } = meanAndStd(baselineValues);
|
|
171
|
+
const recentAvg = recent.reduce((sum, r) => sum + r.provenance[field], 0) / recent.length;
|
|
172
|
+
const z = zScore(recentAvg, mean, std);
|
|
173
|
+
return { trend: trendFromZScore(z), z: Math.round(z * 100) / 100 };
|
|
174
|
+
};
|
|
175
|
+
const ai = computeForField('aiDriftFailures');
|
|
176
|
+
const structural = computeForField('structuralFailures');
|
|
177
|
+
const security = computeForField('securityFailures');
|
|
178
|
+
return {
|
|
179
|
+
aiDrift: ai.trend,
|
|
180
|
+
structural: structural.trend,
|
|
181
|
+
security: security.trend,
|
|
182
|
+
aiDriftZScore: ai.z,
|
|
183
|
+
structuralZScore: structural.z,
|
|
184
|
+
securityZScore: security.z,
|
|
185
|
+
};
|
|
87
186
|
}
|
|
88
187
|
/**
|
|
89
188
|
* Detect project complexity tier based on metrics
|
|
@@ -101,7 +200,8 @@ export function detectComplexityTier(metrics) {
|
|
|
101
200
|
return 'hobby';
|
|
102
201
|
}
|
|
103
202
|
/**
|
|
104
|
-
* Calculate adaptive thresholds based on project state
|
|
203
|
+
* Calculate adaptive thresholds based on project state.
|
|
204
|
+
* v5: Uses Z-score trending and per-provenance analysis.
|
|
105
205
|
*/
|
|
106
206
|
export function calculateAdaptiveThresholds(cwd, metrics, config = {}) {
|
|
107
207
|
const reasoning = [];
|
|
@@ -109,9 +209,20 @@ export function calculateAdaptiveThresholds(cwd, metrics, config = {}) {
|
|
|
109
209
|
const tier = config.forced_tier ??
|
|
110
210
|
(config.auto_detect_tier !== false ? detectComplexityTier(metrics) : 'startup');
|
|
111
211
|
reasoning.push(`Complexity tier: ${tier} (files: ${metrics.fileCount})`);
|
|
112
|
-
// Get trend
|
|
212
|
+
// Get overall trend (Z-score based)
|
|
113
213
|
const trend = getQualityTrend(cwd);
|
|
114
|
-
reasoning.push(`Quality trend: ${trend}`);
|
|
214
|
+
reasoning.push(`Quality trend: ${trend} (Z-score analysis)`);
|
|
215
|
+
// Get per-provenance trends
|
|
216
|
+
const provenanceTrends = getProvenanceTrends(cwd);
|
|
217
|
+
if (provenanceTrends.aiDrift !== 'stable') {
|
|
218
|
+
reasoning.push(`AI drift trend: ${provenanceTrends.aiDrift} (Z=${provenanceTrends.aiDriftZScore})`);
|
|
219
|
+
}
|
|
220
|
+
if (provenanceTrends.structural !== 'stable') {
|
|
221
|
+
reasoning.push(`Structural trend: ${provenanceTrends.structural} (Z=${provenanceTrends.structuralZScore})`);
|
|
222
|
+
}
|
|
223
|
+
if (provenanceTrends.security !== 'stable') {
|
|
224
|
+
reasoning.push(`Security trend: ${provenanceTrends.security} (Z=${provenanceTrends.securityZScore})`);
|
|
225
|
+
}
|
|
115
226
|
// Base thresholds
|
|
116
227
|
let coverageThreshold = config.base_coverage_threshold ?? 80;
|
|
117
228
|
let qualityThreshold = config.base_quality_threshold ?? 80;
|
|
@@ -120,15 +231,13 @@ export function calculateAdaptiveThresholds(cwd, metrics, config = {}) {
|
|
|
120
231
|
// Adjust by tier
|
|
121
232
|
switch (tier) {
|
|
122
233
|
case 'hobby':
|
|
123
|
-
// Lenient for small/new projects
|
|
124
234
|
coverageThreshold = Math.max(50, coverageThreshold - 30);
|
|
125
235
|
qualityThreshold = Math.max(60, qualityThreshold - 20);
|
|
126
|
-
securityBlockLevel = 'critical';
|
|
236
|
+
securityBlockLevel = 'critical';
|
|
127
237
|
leniencyFactor = 0.8;
|
|
128
238
|
reasoning.push('Hobby tier: relaxed thresholds, only critical security blocks');
|
|
129
239
|
break;
|
|
130
240
|
case 'startup':
|
|
131
|
-
// Moderate strictness
|
|
132
241
|
coverageThreshold = Math.max(60, coverageThreshold - 15);
|
|
133
242
|
qualityThreshold = Math.max(70, qualityThreshold - 10);
|
|
134
243
|
securityBlockLevel = 'high';
|
|
@@ -136,7 +245,6 @@ export function calculateAdaptiveThresholds(cwd, metrics, config = {}) {
|
|
|
136
245
|
reasoning.push('Startup tier: moderate thresholds, high+ security blocks');
|
|
137
246
|
break;
|
|
138
247
|
case 'enterprise':
|
|
139
|
-
// Strict standards
|
|
140
248
|
coverageThreshold = coverageThreshold;
|
|
141
249
|
qualityThreshold = qualityThreshold;
|
|
142
250
|
securityBlockLevel = 'medium';
|
|
@@ -144,30 +252,40 @@ export function calculateAdaptiveThresholds(cwd, metrics, config = {}) {
|
|
|
144
252
|
reasoning.push('Enterprise tier: strict thresholds, medium+ security blocks');
|
|
145
253
|
break;
|
|
146
254
|
}
|
|
147
|
-
// Adjust by trend
|
|
255
|
+
// Adjust by overall trend
|
|
148
256
|
if (trend === 'improving') {
|
|
149
|
-
// Reward improvement with slightly relaxed thresholds
|
|
150
257
|
coverageThreshold = Math.max(50, coverageThreshold - 5);
|
|
151
258
|
qualityThreshold = Math.max(60, qualityThreshold - 5);
|
|
152
259
|
leniencyFactor = Math.min(1, leniencyFactor + 0.1);
|
|
153
260
|
reasoning.push('Improving trend: bonus threshold relaxation (+5%)');
|
|
154
261
|
}
|
|
155
262
|
else if (trend === 'degrading') {
|
|
156
|
-
// Tighten thresholds to encourage recovery
|
|
157
263
|
coverageThreshold = Math.min(95, coverageThreshold + 5);
|
|
158
264
|
qualityThreshold = Math.min(95, qualityThreshold + 5);
|
|
159
265
|
leniencyFactor = Math.max(0, leniencyFactor - 0.1);
|
|
160
266
|
reasoning.push('Degrading trend: tightened thresholds (-5%)');
|
|
161
267
|
}
|
|
268
|
+
// v5: Per-provenance adjustments
|
|
269
|
+
// If AI drift is degrading but structural is stable, tighten AI-specific gates
|
|
270
|
+
if (provenanceTrends.aiDrift === 'degrading' && provenanceTrends.structural !== 'degrading') {
|
|
271
|
+
leniencyFactor = Math.max(0, leniencyFactor - 0.15);
|
|
272
|
+
reasoning.push('AI drift degrading while structural stable: AI is the problem, tightening AI gates');
|
|
273
|
+
}
|
|
274
|
+
// If security is degrading, escalate security block level
|
|
275
|
+
if (provenanceTrends.security === 'degrading') {
|
|
276
|
+
if (securityBlockLevel === 'critical')
|
|
277
|
+
securityBlockLevel = 'high';
|
|
278
|
+
else if (securityBlockLevel === 'high')
|
|
279
|
+
securityBlockLevel = 'medium';
|
|
280
|
+
reasoning.push(`Security trend degrading: escalated block level to ${securityBlockLevel}+`);
|
|
281
|
+
}
|
|
162
282
|
// Recent failure rate adjustment
|
|
163
283
|
if (metrics.recentFailureRate !== undefined) {
|
|
164
284
|
if (metrics.recentFailureRate > 50) {
|
|
165
|
-
// High failure rate - be more lenient to avoid discouragement
|
|
166
285
|
leniencyFactor = Math.min(1, leniencyFactor + 0.2);
|
|
167
286
|
reasoning.push(`High failure rate (${metrics.recentFailureRate.toFixed(0)}%): increased leniency`);
|
|
168
287
|
}
|
|
169
288
|
else if (metrics.recentFailureRate < 10) {
|
|
170
|
-
// Low failure rate - team is mature, can handle stricter gates
|
|
171
289
|
leniencyFactor = Math.max(0, leniencyFactor - 0.1);
|
|
172
290
|
reasoning.push(`Low failure rate (${metrics.recentFailureRate.toFixed(0)}%): stricter enforcement`);
|
|
173
291
|
}
|
|
@@ -180,6 +298,7 @@ export function calculateAdaptiveThresholds(cwd, metrics, config = {}) {
|
|
|
180
298
|
securityBlockLevel,
|
|
181
299
|
leniencyFactor: Math.round(leniencyFactor * 100) / 100,
|
|
182
300
|
reasoning,
|
|
301
|
+
provenanceTrends,
|
|
183
302
|
};
|
|
184
303
|
}
|
|
185
304
|
/**
|
|
@@ -196,9 +315,16 @@ export function clearAdaptiveHistory(cwd) {
|
|
|
196
315
|
* Get summary of adaptive thresholds for logging
|
|
197
316
|
*/
|
|
198
317
|
export function getAdaptiveSummary(adjustments) {
|
|
199
|
-
|
|
318
|
+
let summary = `[${adjustments.tier.toUpperCase()}] ` +
|
|
200
319
|
`Coverage: ${adjustments.coverageThreshold}%, ` +
|
|
201
320
|
`Quality: ${adjustments.qualityThreshold}%, ` +
|
|
202
321
|
`Security: ${adjustments.securityBlockLevel}+, ` +
|
|
203
322
|
`Trend: ${adjustments.trend}`;
|
|
323
|
+
if (adjustments.provenanceTrends) {
|
|
324
|
+
const pt = adjustments.provenanceTrends;
|
|
325
|
+
if (pt.aiDrift !== 'stable' || pt.structural !== 'stable' || pt.security !== 'stable') {
|
|
326
|
+
summary += ` | AI:${pt.aiDrift}(Z=${pt.aiDriftZScore}) Struct:${pt.structural}(Z=${pt.structuralZScore}) Sec:${pt.security}(Z=${pt.securityZScore})`;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return summary;
|
|
204
330
|
}
|
|
@@ -69,23 +69,25 @@ describe('AdaptiveThresholds', () => {
|
|
|
69
69
|
expect(trend).toBe('stable');
|
|
70
70
|
});
|
|
71
71
|
it('should detect improving trend', () => {
|
|
72
|
-
//
|
|
73
|
-
for (let i = 0; i <
|
|
74
|
-
recordGateRun(testDir, 3, 5,
|
|
72
|
+
// Baseline: 15 runs with high failures (with variance for valid std dev)
|
|
73
|
+
for (let i = 0; i < 15; i++) {
|
|
74
|
+
recordGateRun(testDir, 3, 5, 15 + (i % 5) * 3); // 15,18,21,24,27 repeating
|
|
75
75
|
}
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
// Recent: 5 runs with very low failures (clear improvement)
|
|
77
|
+
for (let i = 0; i < 5; i++) {
|
|
78
|
+
recordGateRun(testDir, 7, 0, 1);
|
|
78
79
|
}
|
|
79
80
|
const trend = getQualityTrend(testDir);
|
|
80
81
|
expect(trend).toBe('improving');
|
|
81
82
|
});
|
|
82
83
|
it('should detect degrading trend', () => {
|
|
83
|
-
//
|
|
84
|
-
for (let i = 0; i <
|
|
85
|
-
recordGateRun(testDir, 7, 1, 3);
|
|
84
|
+
// Baseline: 15 runs with low failures (with variance for valid std dev)
|
|
85
|
+
for (let i = 0; i < 15; i++) {
|
|
86
|
+
recordGateRun(testDir, 7, 1, 1 + (i % 3)); // 1,2,3 repeating
|
|
86
87
|
}
|
|
87
|
-
|
|
88
|
-
|
|
88
|
+
// Recent: 5 runs with high failures (clear degradation)
|
|
89
|
+
for (let i = 0; i < 5; i++) {
|
|
90
|
+
recordGateRun(testDir, 3, 5, 25);
|
|
89
91
|
}
|
|
90
92
|
const trend = getQualityTrend(testDir);
|
|
91
93
|
expect(trend).toBe('degrading');
|
|
@@ -93,24 +95,26 @@ describe('AdaptiveThresholds', () => {
|
|
|
93
95
|
});
|
|
94
96
|
describe('trend-based adjustments', () => {
|
|
95
97
|
it('should relax thresholds for improving trend', () => {
|
|
96
|
-
//
|
|
97
|
-
for (let i = 0; i <
|
|
98
|
-
recordGateRun(testDir, 3, 5,
|
|
98
|
+
// Baseline: 15 runs with high failures (with variance)
|
|
99
|
+
for (let i = 0; i < 15; i++) {
|
|
100
|
+
recordGateRun(testDir, 3, 5, 15 + (i % 5) * 3);
|
|
99
101
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
+
// Recent: 5 runs with very low failures
|
|
103
|
+
for (let i = 0; i < 5; i++) {
|
|
104
|
+
recordGateRun(testDir, 7, 0, 1);
|
|
102
105
|
}
|
|
103
106
|
const adjustments = calculateAdaptiveThresholds(testDir, { fileCount: 100 });
|
|
104
107
|
expect(adjustments.trend).toBe('improving');
|
|
105
108
|
expect(adjustments.reasoning.some(r => r.includes('bonus'))).toBe(true);
|
|
106
109
|
});
|
|
107
110
|
it('should tighten thresholds for degrading trend', () => {
|
|
108
|
-
//
|
|
109
|
-
for (let i = 0; i <
|
|
110
|
-
recordGateRun(testDir, 7, 1, 3);
|
|
111
|
+
// Baseline: 15 runs with low failures (with variance)
|
|
112
|
+
for (let i = 0; i < 15; i++) {
|
|
113
|
+
recordGateRun(testDir, 7, 1, 1 + (i % 3));
|
|
111
114
|
}
|
|
112
|
-
|
|
113
|
-
|
|
115
|
+
// Recent: 5 runs with high failures
|
|
116
|
+
for (let i = 0; i < 5; i++) {
|
|
117
|
+
recordGateRun(testDir, 3, 5, 25);
|
|
114
118
|
}
|
|
115
119
|
const adjustments = calculateAdaptiveThresholds(testDir, { fileCount: 100 });
|
|
116
120
|
expect(adjustments.trend).toBe('degrading');
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FileSystemCache — Shared file content cache across gates
|
|
3
|
+
*
|
|
4
|
+
* Solves the memory problem: each gate was independently loading ALL files
|
|
5
|
+
* into a Map<string, string>. With 25+ gates, a 5000-file repo would
|
|
6
|
+
* allocate ~500MB+ (50MB per gate × 10 gates reading files).
|
|
7
|
+
*
|
|
8
|
+
* Now: one cache per scan, all gates share it. LRU eviction keeps
|
|
9
|
+
* memory bounded. Hit rate >70% by the 2nd gate.
|
|
10
|
+
*
|
|
11
|
+
* @since v5.0.0
|
|
12
|
+
*/
|
|
13
|
+
export interface CacheStats {
|
|
14
|
+
hits: number;
|
|
15
|
+
misses: number;
|
|
16
|
+
evictions: number;
|
|
17
|
+
entries: number;
|
|
18
|
+
estimatedSizeBytes: number;
|
|
19
|
+
}
|
|
20
|
+
export declare class FileSystemCache {
|
|
21
|
+
private readonly maxEntries;
|
|
22
|
+
private readonly maxSizeBytes;
|
|
23
|
+
private cache;
|
|
24
|
+
private accessOrder;
|
|
25
|
+
private stats;
|
|
26
|
+
constructor(maxEntries?: number, maxSizeBytes?: number);
|
|
27
|
+
/**
|
|
28
|
+
* Get a single file's content. Reads from disk on cache miss.
|
|
29
|
+
*/
|
|
30
|
+
getFile(absolutePath: string): Promise<string>;
|
|
31
|
+
/**
|
|
32
|
+
* Get multiple files. Compatible with FileScanner.readFiles() return type.
|
|
33
|
+
*/
|
|
34
|
+
getFiles(cwd: string, files: string[]): Promise<Map<string, string>>;
|
|
35
|
+
/**
|
|
36
|
+
* Invalidate a specific file or the entire cache.
|
|
37
|
+
*/
|
|
38
|
+
invalidate(absolutePath?: string): void;
|
|
39
|
+
/**
|
|
40
|
+
* Get cache statistics for monitoring.
|
|
41
|
+
*/
|
|
42
|
+
getStats(): CacheStats;
|
|
43
|
+
/**
|
|
44
|
+
* Hit rate as a percentage (0-100).
|
|
45
|
+
*/
|
|
46
|
+
get hitRate(): number;
|
|
47
|
+
private put;
|
|
48
|
+
private touch;
|
|
49
|
+
private evictLRU;
|
|
50
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FileSystemCache — Shared file content cache across gates
|
|
3
|
+
*
|
|
4
|
+
* Solves the memory problem: each gate was independently loading ALL files
|
|
5
|
+
* into a Map<string, string>. With 25+ gates, a 5000-file repo would
|
|
6
|
+
* allocate ~500MB+ (50MB per gate × 10 gates reading files).
|
|
7
|
+
*
|
|
8
|
+
* Now: one cache per scan, all gates share it. LRU eviction keeps
|
|
9
|
+
* memory bounded. Hit rate >70% by the 2nd gate.
|
|
10
|
+
*
|
|
11
|
+
* @since v5.0.0
|
|
12
|
+
*/
|
|
13
|
+
import fs from 'fs-extra';
|
|
14
|
+
import path from 'path';
|
|
15
|
+
export class FileSystemCache {
|
|
16
|
+
maxEntries;
|
|
17
|
+
maxSizeBytes;
|
|
18
|
+
cache = new Map();
|
|
19
|
+
accessOrder = [];
|
|
20
|
+
stats = { hits: 0, misses: 0, evictions: 0, entries: 0, estimatedSizeBytes: 0 };
|
|
21
|
+
constructor(maxEntries = 2000, maxSizeBytes = 200 * 1024 * 1024 // 200MB default
|
|
22
|
+
) {
|
|
23
|
+
this.maxEntries = maxEntries;
|
|
24
|
+
this.maxSizeBytes = maxSizeBytes;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Get a single file's content. Reads from disk on cache miss.
|
|
28
|
+
*/
|
|
29
|
+
async getFile(absolutePath) {
|
|
30
|
+
const cached = this.cache.get(absolutePath);
|
|
31
|
+
if (cached !== undefined) {
|
|
32
|
+
this.stats.hits++;
|
|
33
|
+
this.touch(absolutePath);
|
|
34
|
+
return cached;
|
|
35
|
+
}
|
|
36
|
+
this.stats.misses++;
|
|
37
|
+
const content = await fs.readFile(absolutePath, 'utf-8');
|
|
38
|
+
this.put(absolutePath, content);
|
|
39
|
+
return content;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Get multiple files. Compatible with FileScanner.readFiles() return type.
|
|
43
|
+
*/
|
|
44
|
+
async getFiles(cwd, files) {
|
|
45
|
+
const contents = new Map();
|
|
46
|
+
for (const file of files) {
|
|
47
|
+
const normalizedFile = file.replace(/\//g, path.sep);
|
|
48
|
+
const filePath = path.isAbsolute(normalizedFile) ? normalizedFile : path.join(cwd, normalizedFile);
|
|
49
|
+
try {
|
|
50
|
+
contents.set(file, await this.getFile(filePath));
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
// File not readable — skip (same behavior as original scanner)
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return contents;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Invalidate a specific file or the entire cache.
|
|
60
|
+
*/
|
|
61
|
+
invalidate(absolutePath) {
|
|
62
|
+
if (absolutePath) {
|
|
63
|
+
const content = this.cache.get(absolutePath);
|
|
64
|
+
if (content) {
|
|
65
|
+
this.stats.estimatedSizeBytes -= content.length * 2; // UTF-16
|
|
66
|
+
this.cache.delete(absolutePath);
|
|
67
|
+
this.accessOrder = this.accessOrder.filter(k => k !== absolutePath);
|
|
68
|
+
this.stats.entries--;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
this.cache.clear();
|
|
73
|
+
this.accessOrder = [];
|
|
74
|
+
this.stats.entries = 0;
|
|
75
|
+
this.stats.estimatedSizeBytes = 0;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Get cache statistics for monitoring.
|
|
80
|
+
*/
|
|
81
|
+
getStats() {
|
|
82
|
+
return { ...this.stats };
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Hit rate as a percentage (0-100).
|
|
86
|
+
*/
|
|
87
|
+
get hitRate() {
|
|
88
|
+
const total = this.stats.hits + this.stats.misses;
|
|
89
|
+
return total === 0 ? 0 : Math.round((this.stats.hits / total) * 100);
|
|
90
|
+
}
|
|
91
|
+
// ─── Internal LRU Logic ─────────────────────────────────────────
|
|
92
|
+
put(key, content) {
|
|
93
|
+
const sizeBytes = content.length * 2; // UTF-16 estimate
|
|
94
|
+
// Evict if over limits
|
|
95
|
+
while ((this.cache.size >= this.maxEntries || this.stats.estimatedSizeBytes + sizeBytes > this.maxSizeBytes) &&
|
|
96
|
+
this.accessOrder.length > 0) {
|
|
97
|
+
this.evictLRU();
|
|
98
|
+
}
|
|
99
|
+
this.cache.set(key, content);
|
|
100
|
+
this.accessOrder.push(key);
|
|
101
|
+
this.stats.entries = this.cache.size;
|
|
102
|
+
this.stats.estimatedSizeBytes += sizeBytes;
|
|
103
|
+
}
|
|
104
|
+
touch(key) {
|
|
105
|
+
// Move to end of access order (most recently used)
|
|
106
|
+
const idx = this.accessOrder.indexOf(key);
|
|
107
|
+
if (idx !== -1) {
|
|
108
|
+
this.accessOrder.splice(idx, 1);
|
|
109
|
+
this.accessOrder.push(key);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
evictLRU() {
|
|
113
|
+
const oldest = this.accessOrder.shift();
|
|
114
|
+
if (oldest) {
|
|
115
|
+
const content = this.cache.get(oldest);
|
|
116
|
+
if (content) {
|
|
117
|
+
this.stats.estimatedSizeBytes -= content.length * 2;
|
|
118
|
+
}
|
|
119
|
+
this.cache.delete(oldest);
|
|
120
|
+
this.stats.evictions++;
|
|
121
|
+
this.stats.entries = this.cache.size;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Temporal Drift Engine (v5)
|
|
3
|
+
*
|
|
4
|
+
* The "bank statement" for code quality.
|
|
5
|
+
* Reads from SQLite scan history and computes:
|
|
6
|
+
*
|
|
7
|
+
* 1. Cross-session temporal trends — how is quality changing over weeks/months?
|
|
8
|
+
* 2. Per-provenance EWMA streams — is AI getting worse? Structural? Security?
|
|
9
|
+
* 3. Anomaly detection — is today's scan statistically unusual?
|
|
10
|
+
*
|
|
11
|
+
* This is Rigour's core differentiator:
|
|
12
|
+
* No other tool can tell a CTO "your AI contributions are degrading
|
|
13
|
+
* your codebase at 3x the rate of human contributions."
|
|
14
|
+
*
|
|
15
|
+
* Data source: ~/.rigour/rigour.db (scans + findings tables)
|
|
16
|
+
* All computation is read-only — no writes to DB.
|
|
17
|
+
*
|
|
18
|
+
* @since v5.0.0
|
|
19
|
+
*/
|
|
20
|
+
export type DriftDirection = 'improving' | 'stable' | 'degrading';
|
|
21
|
+
/** A single data point in a time series. */
|
|
22
|
+
export interface TrendPoint {
|
|
23
|
+
timestamp: number;
|
|
24
|
+
value: number;
|
|
25
|
+
ewma: number;
|
|
26
|
+
}
|
|
27
|
+
/** Per-provenance EWMA stream. */
|
|
28
|
+
export interface ProvenanceStream {
|
|
29
|
+
direction: DriftDirection;
|
|
30
|
+
zScore: number;
|
|
31
|
+
/** EWMA time series (oldest first) */
|
|
32
|
+
series: TrendPoint[];
|
|
33
|
+
/** Current EWMA value */
|
|
34
|
+
currentEWMA: number;
|
|
35
|
+
/** Average over the full history */
|
|
36
|
+
historicalAvg: number;
|
|
37
|
+
}
|
|
38
|
+
/** Monthly aggregation for the "3 months trend" view. */
|
|
39
|
+
export interface MonthlyBucket {
|
|
40
|
+
month: string;
|
|
41
|
+
avgScore: number;
|
|
42
|
+
avgAiHealth: number;
|
|
43
|
+
avgStructural: number;
|
|
44
|
+
scanCount: number;
|
|
45
|
+
totalFailures: number;
|
|
46
|
+
provenanceBreakdown: {
|
|
47
|
+
aiDrift: number;
|
|
48
|
+
structural: number;
|
|
49
|
+
security: number;
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
/** Weekly aggregation for more granular view. */
|
|
53
|
+
export interface WeeklyBucket {
|
|
54
|
+
weekStart: string;
|
|
55
|
+
avgScore: number;
|
|
56
|
+
avgAiHealth: number;
|
|
57
|
+
scanCount: number;
|
|
58
|
+
}
|
|
59
|
+
/** Complete temporal drift report for a project. */
|
|
60
|
+
export interface TemporalDriftReport {
|
|
61
|
+
repo: string;
|
|
62
|
+
totalScans: number;
|
|
63
|
+
timeSpanDays: number;
|
|
64
|
+
/** Overall quality trend direction */
|
|
65
|
+
overallDirection: DriftDirection;
|
|
66
|
+
overallZScore: number;
|
|
67
|
+
/** Per-provenance EWMA streams — the core differentiator */
|
|
68
|
+
streams: {
|
|
69
|
+
aiDrift: ProvenanceStream;
|
|
70
|
+
structural: ProvenanceStream;
|
|
71
|
+
security: ProvenanceStream;
|
|
72
|
+
overall: ProvenanceStream;
|
|
73
|
+
};
|
|
74
|
+
/** Monthly rollups for executive dashboard */
|
|
75
|
+
monthly: MonthlyBucket[];
|
|
76
|
+
/** Weekly rollups for team dashboard */
|
|
77
|
+
weekly: WeeklyBucket[];
|
|
78
|
+
/** Anomaly flag: is the latest scan statistically unusual? */
|
|
79
|
+
latestScanAnomaly: {
|
|
80
|
+
isAnomaly: boolean;
|
|
81
|
+
direction: 'spike' | 'dip' | 'normal';
|
|
82
|
+
zScore: number;
|
|
83
|
+
message: string;
|
|
84
|
+
};
|
|
85
|
+
/** Human-readable narrative for the CTO question */
|
|
86
|
+
narrative: string;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Generate a complete temporal drift report for a project.
|
|
90
|
+
*
|
|
91
|
+
* Reads from SQLite: scans (scores over time) + findings (provenance counts).
|
|
92
|
+
* Computes EWMA streams, Z-scores, monthly/weekly rollups, and a narrative.
|
|
93
|
+
*
|
|
94
|
+
* @param cwd - Project root path (used to derive repo name)
|
|
95
|
+
* @param maxScans - Max scans to analyze (default 200)
|
|
96
|
+
*/
|
|
97
|
+
export declare function generateTemporalDriftReport(cwd: string, maxScans?: number): TemporalDriftReport | null;
|
|
98
|
+
/**
|
|
99
|
+
* Get a formatted summary string for CLI/MCP output.
|
|
100
|
+
*/
|
|
101
|
+
export declare function formatDriftSummary(report: TemporalDriftReport): string;
|