@rigour-labs/core 2.21.1 → 2.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Context Window Artifacts Gate
3
+ *
4
+ * Detects quality degradation patterns within a single file that emerge
5
+ * when AI loses context mid-generation. The telltale sign: clean,
6
+ * well-structured code at the top of a file that gradually degrades
7
+ * toward the bottom.
8
+ *
9
+ * Detection signals:
10
+ * 1. Comment density drops sharply (top half vs bottom half)
11
+ * 2. Function complexity increases toward end of file
12
+ * 3. Variable naming quality degrades (shorter names, more single-letter vars)
13
+ * 4. Error handling becomes sparser toward the bottom
14
+ * 5. Code style inconsistencies emerge (indentation, spacing)
15
+ *
16
+ * @since v2.16.0
17
+ */
18
+
19
+ import { Gate, GateContext } from './base.js';
20
+ import { Failure } from '../types/index.js';
21
+ import { FileScanner } from '../utils/scanner.js';
22
+ import { Logger } from '../utils/logger.js';
23
+ import fs from 'fs-extra';
24
+ import path from 'path';
25
+
26
+ interface FileQualityMetrics {
27
+ file: string;
28
+ totalLines: number;
29
+ topHalf: HalfMetrics;
30
+ bottomHalf: HalfMetrics;
31
+ degradationScore: number; // 0-1, higher = more degradation
32
+ signals: string[];
33
+ }
34
+
35
+ interface HalfMetrics {
36
+ commentDensity: number; // comments per code line
37
+ avgFunctionLength: number; // average lines per function
38
+ singleCharVarCount: number; // number of single-char variables
39
+ errorHandlingDensity: number; // try/catch per function
40
+ emptyBlockCount: number; // empty {} blocks
41
+ todoCount: number; // TODO/FIXME/HACK comments
42
+ avgIdentifierLength: number; // average variable/function name length
43
+ }
44
+
45
+ export interface ContextWindowArtifactsConfig {
46
+ enabled?: boolean;
47
+ min_file_lines?: number; // Only analyze files with 100+ lines
48
+ degradation_threshold?: number; // 0-1, flag if degradation > this, default 0.4
49
+ signals_required?: number; // How many signals needed to flag, default 2
50
+ }
51
+
52
+ export class ContextWindowArtifactsGate extends Gate {
53
+ private config: Required<ContextWindowArtifactsConfig>;
54
+
55
+ constructor(config: ContextWindowArtifactsConfig = {}) {
56
+ super('context-window-artifacts', 'Context Window Artifact Detection');
57
+ this.config = {
58
+ enabled: config.enabled ?? true,
59
+ min_file_lines: config.min_file_lines ?? 100,
60
+ degradation_threshold: config.degradation_threshold ?? 0.4,
61
+ signals_required: config.signals_required ?? 2,
62
+ };
63
+ }
64
+
65
+ async run(context: GateContext): Promise<Failure[]> {
66
+ if (!this.config.enabled) return [];
67
+
68
+ const failures: Failure[] = [];
69
+
70
+ const files = await FileScanner.findFiles({
71
+ cwd: context.cwd,
72
+ patterns: ['**/*.{ts,js,tsx,jsx,py}'],
73
+ ignore: [...(context.ignore || []), '**/node_modules/**', '**/dist/**', '**/*.test.*', '**/*.spec.*', '**/*.min.*'],
74
+ });
75
+
76
+ Logger.info(`Context Window Artifacts: Scanning ${files.length} files`);
77
+
78
+ for (const file of files) {
79
+ try {
80
+ const content = await fs.readFile(path.join(context.cwd, file), 'utf-8');
81
+ const lines = content.split('\n');
82
+
83
+ if (lines.length < this.config.min_file_lines) continue;
84
+
85
+ const metrics = this.analyzeFile(content, file);
86
+ if (metrics && metrics.signals.length >= this.config.signals_required &&
87
+ metrics.degradationScore >= this.config.degradation_threshold) {
88
+
89
+ const signalList = metrics.signals.map(s => ` • ${s}`).join('\n');
90
+ const midpoint = Math.floor(metrics.totalLines / 2);
91
+
92
+ failures.push(this.createFailure(
93
+ `Context window artifact detected in ${file} (${metrics.totalLines} lines, degradation: ${(metrics.degradationScore * 100).toFixed(0)}%):\n${signalList}`,
94
+ [file],
95
+ `This file shows quality degradation from top to bottom, a pattern typical of AI context window exhaustion. Consider refactoring the bottom half or splitting the file. The quality drop begins around line ${midpoint}.`,
96
+ 'Context Window Artifacts',
97
+ midpoint,
98
+ undefined,
99
+ 'high'
100
+ ));
101
+ }
102
+ } catch (e) { }
103
+ }
104
+
105
+ return failures;
106
+ }
107
+
108
+ private analyzeFile(content: string, file: string): FileQualityMetrics | null {
109
+ const lines = content.split('\n');
110
+ const midpoint = Math.floor(lines.length / 2);
111
+
112
+ const topContent = lines.slice(0, midpoint).join('\n');
113
+ const bottomContent = lines.slice(midpoint).join('\n');
114
+
115
+ const topMetrics = this.measureHalf(topContent);
116
+ const bottomMetrics = this.measureHalf(bottomContent);
117
+
118
+ const signals: string[] = [];
119
+ let degradationScore = 0;
120
+
121
+ // Signal 1: Comment density drops
122
+ if (topMetrics.commentDensity > 0) {
123
+ const commentRatio = bottomMetrics.commentDensity / topMetrics.commentDensity;
124
+ if (commentRatio < 0.5) {
125
+ signals.push(`Comment density drops ${((1 - commentRatio) * 100).toFixed(0)}% in bottom half`);
126
+ degradationScore += 0.25;
127
+ }
128
+ }
129
+
130
+ // Signal 2: Function length increases
131
+ if (topMetrics.avgFunctionLength > 0 && bottomMetrics.avgFunctionLength > 0) {
132
+ const lengthRatio = bottomMetrics.avgFunctionLength / topMetrics.avgFunctionLength;
133
+ if (lengthRatio > 1.5) {
134
+ signals.push(`Average function length ${lengthRatio.toFixed(1)}x longer in bottom half`);
135
+ degradationScore += 0.2;
136
+ }
137
+ }
138
+
139
+ // Signal 3: Variable naming quality degrades
140
+ if (bottomMetrics.singleCharVarCount > topMetrics.singleCharVarCount * 2 &&
141
+ bottomMetrics.singleCharVarCount >= 3) {
142
+ signals.push(`${bottomMetrics.singleCharVarCount} single-char variables in bottom half vs ${topMetrics.singleCharVarCount} in top`);
143
+ degradationScore += 0.2;
144
+ }
145
+
146
+ // Signal 3b: Average identifier length shrinks
147
+ if (topMetrics.avgIdentifierLength > 0 && bottomMetrics.avgIdentifierLength > 0) {
148
+ const nameRatio = bottomMetrics.avgIdentifierLength / topMetrics.avgIdentifierLength;
149
+ if (nameRatio < 0.7) {
150
+ signals.push(`Identifier names ${((1 - nameRatio) * 100).toFixed(0)}% shorter in bottom half`);
151
+ degradationScore += 0.15;
152
+ }
153
+ }
154
+
155
+ // Signal 4: Error handling becomes sparser
156
+ if (topMetrics.errorHandlingDensity > 0) {
157
+ const errorRatio = bottomMetrics.errorHandlingDensity / topMetrics.errorHandlingDensity;
158
+ if (errorRatio < 0.3) {
159
+ signals.push(`Error handling ${((1 - errorRatio) * 100).toFixed(0)}% less frequent in bottom half`);
160
+ degradationScore += 0.2;
161
+ }
162
+ }
163
+
164
+ // Signal 5: Empty blocks increase
165
+ if (bottomMetrics.emptyBlockCount > topMetrics.emptyBlockCount + 2) {
166
+ signals.push(`${bottomMetrics.emptyBlockCount} empty blocks in bottom half vs ${topMetrics.emptyBlockCount} in top`);
167
+ degradationScore += 0.15;
168
+ }
169
+
170
+ // Signal 6: TODO/FIXME/HACK density increases at bottom
171
+ if (bottomMetrics.todoCount > topMetrics.todoCount + 1) {
172
+ signals.push(`${bottomMetrics.todoCount} TODO/FIXME/HACK in bottom half vs ${topMetrics.todoCount} in top`);
173
+ degradationScore += 0.1;
174
+ }
175
+
176
+ // Cap at 1.0
177
+ degradationScore = Math.min(1.0, degradationScore);
178
+
179
+ return {
180
+ file,
181
+ totalLines: lines.length,
182
+ topHalf: topMetrics,
183
+ bottomHalf: bottomMetrics,
184
+ degradationScore,
185
+ signals,
186
+ };
187
+ }
188
+
189
+ private measureHalf(content: string): HalfMetrics {
190
+ const lines = content.split('\n');
191
+ const codeLines = lines.filter(l => l.trim() && !l.trim().startsWith('//') && !l.trim().startsWith('#') && !l.trim().startsWith('*'));
192
+ const commentLines = lines.filter(l => {
193
+ const trimmed = l.trim();
194
+ return trimmed.startsWith('//') || trimmed.startsWith('#') || trimmed.startsWith('*') || trimmed.startsWith('/*');
195
+ });
196
+
197
+ // Comment density
198
+ const commentDensity = codeLines.length > 0 ? commentLines.length / codeLines.length : 0;
199
+
200
+ // Function lengths
201
+ const funcLengths = this.measureFunctionLengths(content);
202
+ const avgFunctionLength = funcLengths.length > 0
203
+ ? funcLengths.reduce((a, b) => a + b, 0) / funcLengths.length
204
+ : 0;
205
+
206
+ // Single-char variables (excluding common loop vars i, j, k in for loops)
207
+ const singleCharMatches = content.match(/\b(?:const|let|var)\s+([a-z])\b/g) || [];
208
+ const singleCharVarCount = singleCharMatches.length;
209
+
210
+ // Error handling density
211
+ const tryCount = (content.match(/\btry\s*\{/g) || []).length;
212
+ const funcCount = Math.max(1, funcLengths.length);
213
+ const errorHandlingDensity = tryCount / funcCount;
214
+
215
+ // Empty blocks
216
+ const emptyBlockCount = (content.match(/\{\s*\}/g) || []).length;
217
+
218
+ // TODO/FIXME/HACK count
219
+ const todoCount = (content.match(/\b(TODO|FIXME|HACK|XXX)\b/gi) || []).length;
220
+
221
+ // Average identifier length
222
+ const identifiers = content.match(/\b(?:const|let|var|function)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g) || [];
223
+ const identNames = identifiers.map(m => {
224
+ const parts = m.split(/\s+/);
225
+ return parts[parts.length - 1];
226
+ });
227
+ const avgIdentifierLength = identNames.length > 0
228
+ ? identNames.reduce((sum, n) => sum + n.length, 0) / identNames.length
229
+ : 0;
230
+
231
+ return {
232
+ commentDensity,
233
+ avgFunctionLength,
234
+ singleCharVarCount,
235
+ errorHandlingDensity,
236
+ emptyBlockCount,
237
+ todoCount,
238
+ avgIdentifierLength,
239
+ };
240
+ }
241
+
242
+ private measureFunctionLengths(content: string): number[] {
243
+ const lines = content.split('\n');
244
+ const lengths: number[] = [];
245
+
246
+ const funcStarts = [
247
+ /^(?:export\s+)?(?:async\s+)?function\s+\w+/,
248
+ /^(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?(?:\([^)]*\)|\w+)\s*=>/,
249
+ /^\s+(?:async\s+)?\w+\s*\([^)]*\)\s*\{/,
250
+ ];
251
+
252
+ for (let i = 0; i < lines.length; i++) {
253
+ for (const pattern of funcStarts) {
254
+ if (pattern.test(lines[i])) {
255
+ // Count function body length
256
+ let braceDepth = 0;
257
+ let started = false;
258
+ let bodyLines = 0;
259
+
260
+ for (let j = i; j < lines.length; j++) {
261
+ for (const ch of lines[j]) {
262
+ if (ch === '{') { braceDepth++; started = true; }
263
+ if (ch === '}') braceDepth--;
264
+ }
265
+ if (started) bodyLines++;
266
+ if (started && braceDepth === 0) break;
267
+ }
268
+
269
+ if (bodyLines > 0) lengths.push(bodyLines);
270
+ break;
271
+ }
272
+ }
273
+ }
274
+
275
+ return lengths;
276
+ }
277
+ }
@@ -92,7 +92,8 @@ export class ContextGate extends Gate {
92
92
  failures.push(this.createFailure(
93
93
  `Context Drift: Redundant variation '${accessedVar}' detected in ${file}.`,
94
94
  [file],
95
- `The project already uses '${anchor.id}' as a standard anchor. Avoid inventing variations like '${deviation}'. Reuse the existing anchor or align with established project patterns.`
95
+ `The project already uses '${anchor.id}' as a standard anchor. Avoid inventing variations like '${deviation}'. Reuse the existing anchor or align with established project patterns.`,
96
+ undefined, undefined, undefined, 'high'
96
97
  ));
97
98
  }
98
99
  }
@@ -194,7 +195,8 @@ export class ContextGate extends Gate {
194
195
  `Cross-file naming inconsistency: ${type} names use ${casing} in ${count} places (dominant is ${dominant})`,
195
196
  uniqueFiles,
196
197
  `Standardize ${type} naming to ${dominant}. Found ${casing} in: ${uniqueFiles.join(', ')}`,
197
- 'Naming Convention Drift'
198
+ 'Naming Convention Drift',
199
+ undefined, undefined, 'high'
198
200
  ));
199
201
  }
200
202
  }
@@ -236,7 +238,8 @@ export class ContextGate extends Gate {
236
238
  `Cross-file import inconsistency: ${mixedFiles.length} files mix relative and absolute imports`,
237
239
  mixedFiles.slice(0, 5),
238
240
  'Standardize import style across the codebase. Use either relative (./foo) or path aliases (@/foo) consistently.',
239
- 'Import Pattern Drift'
241
+ 'Import Pattern Drift',
242
+ undefined, undefined, 'high'
240
243
  ));
241
244
  }
242
245
  }
@@ -0,0 +1,231 @@
1
+ /**
2
+ * Duplication Drift Gate
3
+ *
4
+ * Detects when AI generates near-identical functions across files because
5
+ * it doesn't remember what it already wrote. This is an AI-specific failure
6
+ * mode — humans reuse via copy-paste (same file), AI re-invents (cross-file).
7
+ *
8
+ * Detection strategy:
9
+ * 1. Extract all function bodies (normalized: strip whitespace, comments)
10
+ * 2. Compare function signatures + body hashes across files
11
+ * 3. Flag functions with >80% similarity in different files
12
+ *
13
+ * @since v2.16.0
14
+ */
15
+
16
+ import { Gate, GateContext } from './base.js';
17
+ import { Failure } from '../types/index.js';
18
+ import { FileScanner } from '../utils/scanner.js';
19
+ import { Logger } from '../utils/logger.js';
20
+ import crypto from 'crypto';
21
+ import path from 'path';
22
+
23
+ interface FunctionSignature {
24
+ name: string;
25
+ file: string;
26
+ line: number;
27
+ paramCount: number;
28
+ bodyHash: string;
29
+ bodyLength: number;
30
+ normalized: string;
31
+ }
32
+
33
+ export interface DuplicationDriftConfig {
34
+ enabled?: boolean;
35
+ similarity_threshold?: number; // 0-1, default 0.8
36
+ min_body_lines?: number; // Ignore trivial functions, default 5
37
+ }
38
+
39
+ export class DuplicationDriftGate extends Gate {
40
+ private config: Required<DuplicationDriftConfig>;
41
+
42
+ constructor(config: DuplicationDriftConfig = {}) {
43
+ super('duplication-drift', 'AI Duplication Drift Detection');
44
+ this.config = {
45
+ enabled: config.enabled ?? true,
46
+ similarity_threshold: config.similarity_threshold ?? 0.8,
47
+ min_body_lines: config.min_body_lines ?? 5,
48
+ };
49
+ }
50
+
51
+ async run(context: GateContext): Promise<Failure[]> {
52
+ if (!this.config.enabled) return [];
53
+
54
+ const failures: Failure[] = [];
55
+ const functions: FunctionSignature[] = [];
56
+
57
+ const files = await FileScanner.findFiles({
58
+ cwd: context.cwd,
59
+ patterns: ['**/*.{ts,js,tsx,jsx,py}'],
60
+ ignore: [...(context.ignore || []), '**/node_modules/**', '**/dist/**', '**/*.test.*', '**/*.spec.*'],
61
+ });
62
+
63
+ Logger.info(`Duplication Drift: Scanning ${files.length} files`);
64
+
65
+ for (const file of files) {
66
+ try {
67
+ const { readFile } = await import('fs-extra');
68
+ const content = await readFile(path.join(context.cwd, file), 'utf-8');
69
+ const ext = path.extname(file);
70
+
71
+ if (['.ts', '.js', '.tsx', '.jsx'].includes(ext)) {
72
+ this.extractJSFunctions(content, file, functions);
73
+ } else if (ext === '.py') {
74
+ this.extractPyFunctions(content, file, functions);
75
+ }
76
+ } catch (e) { }
77
+ }
78
+
79
+ // Compare all function pairs across different files
80
+ const duplicateGroups = this.findDuplicateGroups(functions);
81
+
82
+ for (const group of duplicateGroups) {
83
+ const files = group.map(f => f.file);
84
+ const locations = group.map(f => `${f.file}:${f.line} (${f.name})`).join(', ');
85
+
86
+ failures.push(this.createFailure(
87
+ `AI Duplication Drift: Function '${group[0].name}' has ${group.length} near-identical copies across files`,
88
+ [...new Set(files)],
89
+ `Found duplicate implementations at: ${locations}. Extract to a shared module and import.`,
90
+ 'Duplication Drift',
91
+ group[0].line,
92
+ undefined,
93
+ 'high'
94
+ ));
95
+ }
96
+
97
+ return failures;
98
+ }
99
+
100
+ private extractJSFunctions(content: string, file: string, functions: FunctionSignature[]) {
101
+ const lines = content.split('\n');
102
+
103
+ // Match function declarations, arrow functions, and method definitions
104
+ const patterns = [
105
+ // function name(...) {
106
+ /^(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)/,
107
+ // const name = (...) => {
108
+ /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|(\w+))\s*=>/,
109
+ // name(...) { — class method
110
+ /^\s+(?:async\s+)?(\w+)\s*\(([^)]*)\)\s*\{/,
111
+ ];
112
+
113
+ for (let i = 0; i < lines.length; i++) {
114
+ const line = lines[i];
115
+ for (const pattern of patterns) {
116
+ const match = line.match(pattern);
117
+ if (match) {
118
+ const name = match[1];
119
+ const params = match[2] || '';
120
+ const body = this.extractFunctionBody(lines, i);
121
+
122
+ if (body.length >= this.config.min_body_lines) {
123
+ const normalized = this.normalizeBody(body.join('\n'));
124
+ functions.push({
125
+ name,
126
+ file,
127
+ line: i + 1,
128
+ paramCount: params ? params.split(',').length : 0,
129
+ bodyHash: this.hash(normalized),
130
+ bodyLength: body.length,
131
+ normalized,
132
+ });
133
+ }
134
+ break;
135
+ }
136
+ }
137
+ }
138
+ }
139
+
140
+ private extractPyFunctions(content: string, file: string, functions: FunctionSignature[]) {
141
+ const lines = content.split('\n');
142
+
143
+ for (let i = 0; i < lines.length; i++) {
144
+ const match = lines[i].match(/^(?:\s*)(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)/);
145
+ if (match) {
146
+ const name = match[1];
147
+ const params = match[2] || '';
148
+ const indent = lines[i].match(/^(\s*)/)?.[1]?.length || 0;
149
+
150
+ // Extract body by indentation
151
+ const body: string[] = [];
152
+ for (let j = i + 1; j < lines.length; j++) {
153
+ const lineIndent = lines[j].match(/^(\s*)/)?.[1]?.length || 0;
154
+ if (lines[j].trim() === '' || lineIndent > indent) {
155
+ body.push(lines[j]);
156
+ } else {
157
+ break;
158
+ }
159
+ }
160
+
161
+ if (body.length >= this.config.min_body_lines) {
162
+ const normalized = this.normalizeBody(body.join('\n'));
163
+ functions.push({
164
+ name,
165
+ file,
166
+ line: i + 1,
167
+ paramCount: params ? params.split(',').length : 0,
168
+ bodyHash: this.hash(normalized),
169
+ bodyLength: body.length,
170
+ normalized,
171
+ });
172
+ }
173
+ }
174
+ }
175
+ }
176
+
177
+ private extractFunctionBody(lines: string[], startIndex: number): string[] {
178
+ let braceDepth = 0;
179
+ let started = false;
180
+ const body: string[] = [];
181
+
182
+ for (let i = startIndex; i < lines.length; i++) {
183
+ const line = lines[i];
184
+ for (const ch of line) {
185
+ if (ch === '{') { braceDepth++; started = true; }
186
+ if (ch === '}') braceDepth--;
187
+ }
188
+ if (started) body.push(line);
189
+ if (started && braceDepth === 0) break;
190
+ }
191
+
192
+ return body;
193
+ }
194
+
195
+ private normalizeBody(body: string): string {
196
+ return body
197
+ .replace(/\/\/.*/g, '') // strip single-line comments
198
+ .replace(/\/\*[\s\S]*?\*\//g, '') // strip multi-line comments
199
+ .replace(/#.*/g, '') // strip Python comments
200
+ .replace(/\s+/g, ' ') // collapse whitespace
201
+ .replace(/['"`]/g, '"') // normalize quotes
202
+ .trim();
203
+ }
204
+
205
+ private hash(text: string): string {
206
+ return crypto.createHash('md5').update(text).digest('hex');
207
+ }
208
+
209
+ private findDuplicateGroups(functions: FunctionSignature[]): FunctionSignature[][] {
210
+ const groups = new Map<string, FunctionSignature[]>();
211
+
212
+ // Group by body hash (exact duplicates across files)
213
+ for (const fn of functions) {
214
+ const existing = groups.get(fn.bodyHash) || [];
215
+ existing.push(fn);
216
+ groups.set(fn.bodyHash, existing);
217
+ }
218
+
219
+ // Filter: only groups with functions from DIFFERENT files, 2+ members
220
+ const duplicates: FunctionSignature[][] = [];
221
+ for (const group of groups.values()) {
222
+ if (group.length < 2) continue;
223
+ const uniqueFiles = new Set(group.map(f => f.file));
224
+ if (uniqueFiles.size >= 2) {
225
+ duplicates.push(group);
226
+ }
227
+ }
228
+
229
+ return duplicates;
230
+ }
231
+ }
package/src/gates/file.ts CHANGED
@@ -32,7 +32,11 @@ export class FileGate extends Gate {
32
32
  this.createFailure(
33
33
  `The following files exceed the maximum limit of ${this.config.maxLines} lines:`,
34
34
  violations,
35
- 'Break these files into smaller, more modular components to improve maintainability (SOLID - Single Responsibility Principle).'
35
+ 'Break these files into smaller, more modular components to improve maintainability (SOLID - Single Responsibility Principle).',
36
+ undefined,
37
+ undefined,
38
+ undefined,
39
+ 'low'
36
40
  ),
37
41
  ];
38
42
  }