@rigour-labs/core 5.0.1 → 5.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/README.md +9 -1
  2. package/dist/gates/agent-team.d.ts +0 -1
  3. package/dist/gates/agent-team.js +0 -1
  4. package/dist/gates/checkpoint.d.ts +0 -2
  5. package/dist/gates/checkpoint.js +0 -2
  6. package/dist/gates/context-window-artifacts.d.ts +6 -2
  7. package/dist/gates/context-window-artifacts.js +107 -31
  8. package/dist/gates/deep-analysis.d.ts +2 -0
  9. package/dist/gates/deep-analysis.js +41 -11
  10. package/dist/gates/dependency.d.ts +0 -2
  11. package/dist/gates/dependency.js +23 -5
  12. package/dist/gates/deprecated-apis.d.ts +0 -2
  13. package/dist/gates/deprecated-apis.js +33 -20
  14. package/dist/gates/duplication-drift/index.d.ts +61 -0
  15. package/dist/gates/duplication-drift/index.js +240 -0
  16. package/dist/gates/duplication-drift/similarity.d.ts +68 -0
  17. package/dist/gates/duplication-drift/similarity.js +177 -0
  18. package/dist/gates/duplication-drift/tokenizer.d.ts +55 -0
  19. package/dist/gates/duplication-drift/tokenizer.js +195 -0
  20. package/dist/gates/frontend-secret-exposure.d.ts +0 -3
  21. package/dist/gates/frontend-secret-exposure.js +1 -114
  22. package/dist/gates/frontend-secret-patterns.d.ts +33 -0
  23. package/dist/gates/frontend-secret-patterns.js +119 -0
  24. package/dist/gates/{hallucinated-imports.d.ts → hallucinated-imports/index.d.ts} +2 -29
  25. package/dist/gates/hallucinated-imports/index.js +174 -0
  26. package/dist/gates/hallucinated-imports/js-resolver.d.ts +45 -0
  27. package/dist/gates/hallucinated-imports/js-resolver.js +320 -0
  28. package/dist/gates/hallucinated-imports/manifest-discovery.d.ts +28 -0
  29. package/dist/gates/hallucinated-imports/manifest-discovery.js +114 -0
  30. package/dist/gates/hallucinated-imports/python-resolver.d.ts +24 -0
  31. package/dist/gates/hallucinated-imports/python-resolver.js +306 -0
  32. package/dist/gates/hallucinated-imports-lang.d.ts +2 -2
  33. package/dist/gates/hallucinated-imports-lang.js +269 -34
  34. package/dist/gates/hallucinated-imports.test.js +1 -2
  35. package/dist/gates/inconsistent-error-handling.d.ts +0 -5
  36. package/dist/gates/inconsistent-error-handling.js +15 -144
  37. package/dist/gates/language-adapters/csharp-adapter.d.ts +16 -0
  38. package/dist/gates/language-adapters/csharp-adapter.js +211 -0
  39. package/dist/gates/language-adapters/go-adapter.d.ts +26 -0
  40. package/dist/gates/language-adapters/go-adapter.js +195 -0
  41. package/dist/gates/language-adapters/index.d.ts +15 -0
  42. package/dist/gates/language-adapters/index.js +16 -0
  43. package/dist/gates/language-adapters/java-adapter.d.ts +16 -0
  44. package/dist/gates/language-adapters/java-adapter.js +237 -0
  45. package/dist/gates/language-adapters/js-adapter.d.ts +26 -0
  46. package/dist/gates/language-adapters/js-adapter.js +279 -0
  47. package/dist/gates/language-adapters/python-adapter.d.ts +25 -0
  48. package/dist/gates/language-adapters/python-adapter.js +183 -0
  49. package/dist/gates/language-adapters/registry.d.ts +26 -0
  50. package/dist/gates/language-adapters/registry.js +65 -0
  51. package/dist/gates/language-adapters/ruby-adapter.d.ts +25 -0
  52. package/dist/gates/language-adapters/ruby-adapter.js +217 -0
  53. package/dist/gates/language-adapters/rust-adapter.d.ts +27 -0
  54. package/dist/gates/language-adapters/rust-adapter.js +235 -0
  55. package/dist/gates/language-adapters/types.d.ts +60 -0
  56. package/dist/gates/language-adapters/types.js +22 -0
  57. package/dist/gates/logic-drift-extractors.d.ts +15 -0
  58. package/dist/gates/logic-drift-extractors.js +34 -0
  59. package/dist/gates/logic-drift.d.ts +0 -30
  60. package/dist/gates/logic-drift.js +39 -129
  61. package/dist/gates/phantom-apis.d.ts +0 -2
  62. package/dist/gates/phantom-apis.js +49 -20
  63. package/dist/gates/promise-safety.d.ts +0 -1
  64. package/dist/gates/promise-safety.js +14 -2
  65. package/dist/gates/runner.js +52 -23
  66. package/dist/gates/runner.test.js +1 -1
  67. package/dist/gates/security-patterns-data.d.ts +14 -0
  68. package/dist/gates/security-patterns-data.js +235 -0
  69. package/dist/gates/security-patterns.d.ts +17 -3
  70. package/dist/gates/security-patterns.js +80 -211
  71. package/dist/gates/side-effect-analysis/categorizer.d.ts +32 -0
  72. package/dist/gates/side-effect-analysis/categorizer.js +83 -0
  73. package/dist/gates/{side-effect-analysis.d.ts → side-effect-analysis/index.d.ts} +3 -5
  74. package/dist/gates/{side-effect-analysis.js → side-effect-analysis/index.js} +33 -45
  75. package/dist/gates/side-effect-analysis/scope-tracker.d.ts +37 -0
  76. package/dist/gates/side-effect-analysis/scope-tracker.js +40 -0
  77. package/dist/gates/side-effect-helpers/index.d.ts +4 -0
  78. package/dist/gates/side-effect-helpers/index.js +4 -0
  79. package/dist/gates/side-effect-helpers/pattern-detection.d.ts +123 -0
  80. package/dist/gates/{side-effect-helpers.js → side-effect-helpers/pattern-detection.js} +22 -468
  81. package/dist/gates/side-effect-helpers/resource-tracking.d.ts +80 -0
  82. package/dist/gates/side-effect-helpers/resource-tracking.js +281 -0
  83. package/dist/gates/side-effect-helpers/scope-analysis.d.ts +21 -0
  84. package/dist/gates/side-effect-helpers/scope-analysis.js +146 -0
  85. package/dist/gates/side-effect-helpers/types.d.ts +38 -0
  86. package/dist/gates/side-effect-helpers/types.js +41 -0
  87. package/dist/gates/side-effect-rules.d.ts +0 -1
  88. package/dist/gates/side-effect-rules.js +0 -1
  89. package/dist/gates/style-drift-rules.d.ts +86 -0
  90. package/dist/gates/style-drift-rules.js +103 -0
  91. package/dist/gates/style-drift.d.ts +7 -16
  92. package/dist/gates/style-drift.js +101 -119
  93. package/dist/gates/test-quality-matchers.d.ts +53 -0
  94. package/dist/gates/test-quality-matchers.js +86 -0
  95. package/dist/gates/test-quality.d.ts +0 -3
  96. package/dist/gates/test-quality.js +47 -44
  97. package/dist/hooks/checker.d.ts +0 -1
  98. package/dist/hooks/checker.js +0 -2
  99. package/dist/hooks/dlp-templates.d.ts +0 -1
  100. package/dist/hooks/dlp-templates.js +0 -4
  101. package/dist/hooks/index.d.ts +0 -2
  102. package/dist/hooks/index.js +0 -2
  103. package/dist/hooks/input-validator.d.ts +0 -1
  104. package/dist/hooks/input-validator.js +0 -1
  105. package/dist/hooks/input-validator.test.js +0 -1
  106. package/dist/hooks/standalone-checker.d.ts +0 -1
  107. package/dist/hooks/standalone-checker.js +0 -1
  108. package/dist/hooks/standalone-dlp-checker.d.ts +0 -1
  109. package/dist/hooks/standalone-dlp-checker.js +0 -1
  110. package/dist/hooks/templates.d.ts +0 -1
  111. package/dist/hooks/templates.js +0 -1
  112. package/dist/hooks/types.d.ts +0 -1
  113. package/dist/hooks/types.js +0 -1
  114. package/dist/index.d.ts +1 -1
  115. package/dist/index.js +1 -1
  116. package/dist/inference/index.js +1 -1
  117. package/dist/services/adaptive-thresholds.d.ts +0 -2
  118. package/dist/services/adaptive-thresholds.js +0 -2
  119. package/dist/services/filesystem-cache.d.ts +0 -1
  120. package/dist/services/filesystem-cache.js +0 -1
  121. package/dist/services/score-history.d.ts +0 -1
  122. package/dist/services/score-history.js +0 -1
  123. package/dist/services/temporal-drift.d.ts +1 -2
  124. package/dist/services/temporal-drift.js +7 -8
  125. package/dist/storage/db.d.ts +23 -7
  126. package/dist/storage/db.js +116 -55
  127. package/dist/storage/findings.d.ts +4 -3
  128. package/dist/storage/findings.js +13 -20
  129. package/dist/storage/local-memory.d.ts +4 -4
  130. package/dist/storage/local-memory.js +20 -22
  131. package/dist/storage/patterns.d.ts +5 -5
  132. package/dist/storage/patterns.js +20 -26
  133. package/dist/storage/scans.d.ts +6 -6
  134. package/dist/storage/scans.js +12 -21
  135. package/dist/types/index.d.ts +1 -0
  136. package/dist/utils/scanner.js +1 -1
  137. package/package.json +7 -8
  138. package/dist/gates/duplication-drift.d.ts +0 -128
  139. package/dist/gates/duplication-drift.js +0 -585
  140. package/dist/gates/hallucinated-imports.js +0 -641
  141. package/dist/gates/side-effect-helpers.d.ts +0 -260
package/README.md CHANGED
@@ -28,6 +28,14 @@ The core library powering [Rigour](https://rigour.run) — 27+ quality gates, fi
28
28
 
29
29
  **Agent Governance:** Multi-agent scope isolation, EWMA-based checkpoint supervision, context drift, retry loop breaker, memory & skills governance with DLP scanning.
30
30
 
31
+ ### Real-Time Hook Engine
32
+
33
+ Sub-200ms per-file-write checker with 5 fast gates (governance, hallucinated imports, promise safety, security patterns, file size). Generates native hook configs for Claude Code, Cursor, Cline, and Windsurf.
34
+
35
+ ### AI Agent DLP (Data Loss Prevention)
36
+
37
+ 29 credential patterns with anti-evasion hardening (unicode normalization, zero-width char removal, bidi control stripping, Shannon entropy detection >4.5 bits). Compliance-mapped to SOC2-CC6.1, HIPAA-164.312, PCI-DSS-3.4/3.5/6.5, OWASP-A2, CWE-798.
38
+
31
39
  ### Five-Signal Deep Analysis Pipeline
32
40
 
33
41
  Rigour's deep analysis is not a wrapper around a generic LLM. The model operates within a cage of deterministic facts:
@@ -46,7 +54,7 @@ Key capabilities: per-provenance EWMA streams (alpha=0.3), Z-score anomaly detec
46
54
 
47
55
  ### Multi-Language Support
48
56
 
49
- All gates support: TypeScript, JavaScript, Python, Go, Ruby, and C#/.NET.
57
+ Hallucinated import detection supports 8 languages with stdlib whitelists and dependency manifest parsing: TypeScript, JavaScript, Python, Go, Ruby, C#/.NET, Rust, Java, and Kotlin. Core structural gates support all languages via AST analysis.
50
58
 
51
59
  ### Two-Score System
52
60
 
@@ -9,7 +9,6 @@
9
9
  * - Task scope violations
10
10
  * - Handoff context loss
11
11
  *
12
- * @since v2.14.0
13
12
  */
14
13
  import { Gate, GateContext } from './base.js';
15
14
  import { Failure, Provenance } from '../types/index.js';
@@ -9,7 +9,6 @@
9
9
  * - Task scope violations
10
10
  * - Handoff context loss
11
11
  *
12
- * @since v2.14.0
13
12
  */
14
13
  import { Gate } from './base.js';
15
14
  import { Logger } from '../utils/logger.js';
@@ -18,8 +18,6 @@
18
18
  * - EWMA: one bad score dampened by history, persistent drops amplified
19
19
  * - α=0.3: ~70% weight on history, 30% on new data → noise-resistant
20
20
  *
21
- * @since v2.14.0 (original, linear regression)
22
- * @since v5.0.0 (EWMA drift detection)
23
21
  */
24
22
  import { Gate, GateContext } from './base.js';
25
23
  import { Failure, Provenance } from '../types/index.js';
@@ -18,8 +18,6 @@
18
18
  * - EWMA: one bad score dampened by history, persistent drops amplified
19
19
  * - α=0.3: ~70% weight on history, 30% on new data → noise-resistant
20
20
  *
21
- * @since v2.14.0 (original, linear regression)
22
- * @since v5.0.0 (EWMA drift detection)
23
21
  */
24
22
  import { Gate } from './base.js';
25
23
  import { Logger } from '../utils/logger.js';
@@ -13,7 +13,6 @@
13
13
  * 4. Error handling becomes sparser toward the bottom
14
14
  * 5. Code style inconsistencies emerge (indentation, spacing)
15
15
  *
16
- * @since v2.16.0
17
16
  */
18
17
  import { Gate, GateContext } from './base.js';
19
18
  import { Failure, Provenance } from '../types/index.js';
@@ -31,5 +30,10 @@ export declare class ContextWindowArtifactsGate extends Gate {
31
30
  private shouldSkipFile;
32
31
  private analyzeFile;
33
32
  private measureHalf;
34
- private measureFunctionLengths;
33
+ private measureFunctionLengthsFromAdapter;
34
+ private countSingleCharVariablesFromAdapter;
35
+ private getAvgIdentifierLengthFromAdapter;
36
+ private measureFunctionLengthsLegacy;
37
+ private countSingleCharVariablesLegacy;
38
+ private getAvgIdentifierLengthLegacy;
35
39
  }
@@ -13,11 +13,11 @@
13
13
  * 4. Error handling becomes sparser toward the bottom
14
14
  * 5. Code style inconsistencies emerge (indentation, spacing)
15
15
  *
16
- * @since v2.16.0
17
16
  */
18
17
  import { Gate } from './base.js';
19
18
  import { FileScanner } from '../utils/scanner.js';
20
19
  import { Logger } from '../utils/logger.js';
20
+ import { languageAdapters } from './language-adapters/index.js';
21
21
  import fs from 'fs-extra';
22
22
  import path from 'path';
23
23
  export class ContextWindowArtifactsGate extends Gate {
@@ -36,7 +36,7 @@ export class ContextWindowArtifactsGate extends Gate {
36
36
  if (!this.config.enabled)
37
37
  return [];
38
38
  const failures = [];
39
- const scanPatterns = context.patterns || ['**/*.{ts,js,tsx,jsx,py}'];
39
+ const scanPatterns = context.patterns || languageAdapters.getScanPatterns();
40
40
  const files = await FileScanner.findFiles({
41
41
  cwd: context.cwd,
42
42
  patterns: scanPatterns,
@@ -51,7 +51,7 @@ export class ContextWindowArtifactsGate extends Gate {
51
51
  const lines = content.split('\n');
52
52
  if (lines.length < this.config.min_file_lines)
53
53
  continue;
54
- const metrics = this.analyzeFile(content, file);
54
+ const metrics = this.analyzeFile(content, file, path.join(context.cwd, file));
55
55
  if (metrics && metrics.signals.length >= this.config.signals_required &&
56
56
  metrics.degradationScore >= this.config.degradation_threshold) {
57
57
  const signalList = metrics.signals.map(s => ` • ${s}`).join('\n');
@@ -68,13 +68,13 @@ export class ContextWindowArtifactsGate extends Gate {
68
68
  return (normalized.includes('/examples/') ||
69
69
  normalized.includes('/src/gates/'));
70
70
  }
71
- analyzeFile(content, file) {
71
+ analyzeFile(content, file, fullPath) {
72
72
  const lines = content.split('\n');
73
73
  const midpoint = Math.floor(lines.length / 2);
74
74
  const topContent = lines.slice(0, midpoint).join('\n');
75
75
  const bottomContent = lines.slice(midpoint).join('\n');
76
- const topMetrics = this.measureHalf(topContent);
77
- const bottomMetrics = this.measureHalf(bottomContent);
76
+ const topMetrics = this.measureHalf(topContent, fullPath);
77
+ const bottomMetrics = this.measureHalf(bottomContent, fullPath);
78
78
  const signals = [];
79
79
  let degradationScore = 0;
80
80
  // Signal 1: Comment density drops (use threshold to avoid tiny-denominator noise)
@@ -136,42 +136,44 @@ export class ContextWindowArtifactsGate extends Gate {
136
136
  signals,
137
137
  };
138
138
  }
139
- measureHalf(content) {
139
+ measureHalf(content, fullPath) {
140
+ const adapter = languageAdapters.getAdapter(fullPath);
140
141
  const lines = content.split('\n');
141
- const codeLines = lines.filter(l => l.trim() && !l.trim().startsWith('//') && !l.trim().startsWith('#') && !l.trim().startsWith('*'));
142
- // Only count inline comments (//), not JSDoc/block comments (/** ... */ or * ...)
143
- // JSDoc tends to cluster at file top, skewing "degradation" unfairly
142
+ // Strip comments using adapter if available, otherwise fallback
143
+ const codeWithoutComments = adapter ? adapter.stripComments(content) : content;
144
+ const codeLines = codeWithoutComments.split('\n').filter(l => l.trim());
145
+ // Count inline comments (// for C-style, # for Python/Ruby)
146
+ // Exclude JSDoc/block comments (/** ... */ or * ...) which cluster at top unfairly
144
147
  const commentLines = lines.filter(l => {
145
148
  const trimmed = l.trim();
146
149
  return trimmed.startsWith('//') || trimmed.startsWith('#');
147
150
  });
148
151
  // Comment density
149
152
  const commentDensity = codeLines.length > 0 ? commentLines.length / codeLines.length : 0;
150
- // Function lengths
151
- const funcLengths = this.measureFunctionLengths(content);
153
+ // Function lengths using adapter if available
154
+ const funcLengths = adapter
155
+ ? this.measureFunctionLengthsFromAdapter(content, adapter)
156
+ : this.measureFunctionLengthsLegacy(content);
152
157
  const avgFunctionLength = funcLengths.length > 0
153
158
  ? funcLengths.reduce((a, b) => a + b, 0) / funcLengths.length
154
159
  : 0;
155
- // Single-char variables (excluding common loop vars i, j, k in for loops)
156
- const singleCharMatches = content.match(/\b(?:const|let|var)\s+([a-z])\b/g) || [];
157
- const singleCharVarCount = singleCharMatches.length;
158
- // Error handling density
159
- const tryCount = (content.match(/\btry\s*\{/g) || []).length;
160
+ // Single-char variables using naming patterns from adapter if available
161
+ const singleCharVarCount = adapter
162
+ ? this.countSingleCharVariablesFromAdapter(adapter, content)
163
+ : this.countSingleCharVariablesLegacy(content);
164
+ // Error handling density use adapter if available
165
+ const errorHandlers = adapter ? adapter.extractErrorHandlers(content) : [];
166
+ const totalErrHandling = errorHandlers.length;
160
167
  const funcCount = Math.max(1, funcLengths.length);
161
- const errorHandlingDensity = tryCount / funcCount;
168
+ const errorHandlingDensity = totalErrHandling / funcCount;
162
169
  // Empty blocks
163
170
  const emptyBlockCount = (content.match(/\{\s*\}/g) || []).length;
164
171
  // TODO/FIXME/HACK count
165
172
  const todoCount = (content.match(/\b(TODO|FIXME|HACK|XXX)\b/gi) || []).length;
166
- // Average identifier length
167
- const identifiers = content.match(/\b(?:const|let|var|function)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g) || [];
168
- const identNames = identifiers.map(m => {
169
- const parts = m.split(/\s+/);
170
- return parts[parts.length - 1];
171
- });
172
- const avgIdentifierLength = identNames.length > 0
173
- ? identNames.reduce((sum, n) => sum + n.length, 0) / identNames.length
174
- : 0;
173
+ // Average identifier length using naming patterns from adapter if available
174
+ const avgIdentifierLength = adapter
175
+ ? this.getAvgIdentifierLengthFromAdapter(adapter, content)
176
+ : this.getAvgIdentifierLengthLegacy(content);
175
177
  return {
176
178
  commentDensity,
177
179
  avgFunctionLength,
@@ -182,18 +184,48 @@ export class ContextWindowArtifactsGate extends Gate {
182
184
  avgIdentifierLength,
183
185
  };
184
186
  }
185
- measureFunctionLengths(content) {
187
+ measureFunctionLengthsFromAdapter(content, adapter) {
188
+ if (!adapter)
189
+ return [];
190
+ const functions = adapter.extractFunctions(content);
191
+ return functions.map(func => func.endLine - func.startLine + 1);
192
+ }
193
+ countSingleCharVariablesFromAdapter(adapter, content) {
194
+ if (!adapter)
195
+ return 0;
196
+ const patterns = adapter.extractNamingPatterns(content);
197
+ return patterns.filter(p => p.name.length === 1 && p.kind === 'variable').length;
198
+ }
199
+ getAvgIdentifierLengthFromAdapter(adapter, content) {
200
+ if (!adapter)
201
+ return 0;
202
+ const patterns = adapter.extractNamingPatterns(content);
203
+ if (patterns.length === 0)
204
+ return 0;
205
+ const totalLength = patterns.reduce((sum, p) => sum + p.name.length, 0);
206
+ return totalLength / patterns.length;
207
+ }
208
+ measureFunctionLengthsLegacy(content) {
186
209
  const lines = content.split('\n');
187
210
  const lengths = [];
188
- const funcStarts = [
211
+ // Brace-based function patterns (JS/TS, Go, Rust, Java, Kotlin, C#)
212
+ const bracePatterns = [
189
213
  /^(?:export\s+)?(?:async\s+)?function\s+\w+/,
190
214
  /^(?:export\s+)?(?:const|let|var)\s+\w+\s*=\s*(?:async\s+)?(?:\([^)]*\)|\w+)\s*=>/,
191
215
  /^\s+(?:async\s+)?\w+\s*\([^)]*\)\s*\{/,
216
+ /^func\s+/, // Go
217
+ /^\s*(?:pub\s+)?(?:async\s+)?fn\s+/, // Rust
218
+ /^\s*(?:public|private|protected|internal|static|override)\s+.*\w+\s*\(/, // Java/C#/Kotlin
219
+ ];
220
+ // Indent-based function patterns (Python, Ruby)
221
+ const indentPatterns = [
222
+ /^\s*(?:async\s+)?def\s+\w+/, // Python/Ruby
192
223
  ];
193
224
  for (let i = 0; i < lines.length; i++) {
194
- for (const pattern of funcStarts) {
225
+ // Try brace-based languages first
226
+ let matched = false;
227
+ for (const pattern of bracePatterns) {
195
228
  if (pattern.test(lines[i])) {
196
- // Count function body length
197
229
  let braceDepth = 0;
198
230
  let started = false;
199
231
  let bodyLines = 0;
@@ -211,6 +243,29 @@ export class ContextWindowArtifactsGate extends Gate {
211
243
  if (started && braceDepth === 0)
212
244
  break;
213
245
  }
246
+ if (bodyLines > 0)
247
+ lengths.push(bodyLines);
248
+ matched = true;
249
+ break;
250
+ }
251
+ }
252
+ if (matched)
253
+ continue;
254
+ // Try indent-based languages
255
+ for (const pattern of indentPatterns) {
256
+ if (pattern.test(lines[i])) {
257
+ const indent = lines[i].match(/^(\s*)/)?.[1]?.length || 0;
258
+ let bodyLines = 0;
259
+ for (let j = i + 1; j < lines.length; j++) {
260
+ if (lines[j].trim() === '') {
261
+ bodyLines++;
262
+ continue;
263
+ }
264
+ const curIndent = lines[j].match(/^(\s*)/)?.[1]?.length || 0;
265
+ if (curIndent <= indent)
266
+ break;
267
+ bodyLines++;
268
+ }
214
269
  if (bodyLines > 0)
215
270
  lengths.push(bodyLines);
216
271
  break;
@@ -219,4 +274,25 @@ export class ContextWindowArtifactsGate extends Gate {
219
274
  }
220
275
  return lengths;
221
276
  }
277
+ countSingleCharVariablesLegacy(content) {
278
+ const singleCharPatternsJS = content.match(/\b(?:const|let|var)\s+([a-z])\b/g) || [];
279
+ const singleCharPatternsPy = content.match(/^\s*([a-z])\s*=/gm) || [];
280
+ const singleCharPatternsGo = content.match(/\b(\w)\s*:=/g) || [];
281
+ return singleCharPatternsJS.length + singleCharPatternsPy.length + singleCharPatternsGo.length;
282
+ }
283
+ getAvgIdentifierLengthLegacy(content) {
284
+ const identPatternsJS = content.match(/\b(?:const|let|var|function)\s+([a-zA-Z_$][a-zA-Z0-9_$]*)/g) || [];
285
+ const identPatternsPy = content.match(/\bdef\s+([a-zA-Z_]\w*)/g) || [];
286
+ const identPatternsGo = content.match(/\bfunc\s+(?:\([^)]+\)\s+)?([a-zA-Z_]\w*)/g) || [];
287
+ const identPatternsRs = content.match(/\bfn\s+([a-zA-Z_]\w*)/g) || [];
288
+ const identPatternsRb = content.match(/\bdef\s+(?:self\.)?([a-zA-Z_]\w*)/g) || [];
289
+ const allIdents = [...identPatternsJS, ...identPatternsPy, ...identPatternsGo, ...identPatternsRs, ...identPatternsRb];
290
+ const identNames = allIdents.map(m => {
291
+ const parts = m.split(/\s+/);
292
+ return parts[parts.length - 1];
293
+ });
294
+ return identNames.length > 0
295
+ ? identNames.reduce((sum, n) => sum + n.length, 0) / identNames.length
296
+ : 0;
297
+ }
222
298
  }
@@ -24,5 +24,7 @@ export declare class DeepAnalysisGate extends Gate {
24
24
  private provider;
25
25
  constructor(config: DeepGateConfig);
26
26
  protected get provenance(): Provenance;
27
+ /** Check if a file is a likely entry point (higher analysis priority) */
28
+ private isEntryPoint;
27
29
  run(context: GateContext): Promise<Failure[]>;
28
30
  }
@@ -13,8 +13,11 @@ import { createProvider } from '../inference/index.js';
13
13
  import { extractFacts, factsToPromptString, chunkFacts, buildAnalysisPrompt, buildCrossFilePrompt, verifyFindings } from '../deep/index.js';
14
14
  import { checkLocalPatterns } from '../storage/local-memory.js';
15
15
  import { Logger } from '../utils/logger.js';
16
- /** Max files to analyze before truncating (prevents OOM on huge repos) */
17
- const MAX_ANALYZABLE_FILES = 500;
16
+ import path from 'path';
17
+ /** Default batch size for streaming analysis of large repos.
18
+ * Files are processed in batches to avoid OOM on huge repos.
19
+ * Optional soft limit via rigour.yml: deep.maxFiles */
20
+ const DEFAULT_BATCH_SIZE = 200;
18
21
  /** Setup timeout: 120s for model download, 30s for API connection */
19
22
  const SETUP_TIMEOUT_MS = 120_000;
20
23
  export class DeepAnalysisGate extends Gate {
@@ -27,6 +30,18 @@ export class DeepAnalysisGate extends Gate {
27
30
  get provenance() {
28
31
  return 'deep-analysis';
29
32
  }
33
+ /** Check if a file is a likely entry point (higher analysis priority) */
34
+ isEntryPoint(filePath) {
35
+ const basename = path.basename(filePath).toLowerCase();
36
+ const entryNames = [
37
+ 'index.ts', 'index.js', 'index.tsx', 'index.jsx', 'index.mjs',
38
+ 'main.ts', 'main.js', 'main.py', 'main.go', 'main.rs', 'main.java', 'main.kt',
39
+ 'app.ts', 'app.js', 'app.py', 'app.go', 'app.rb',
40
+ 'server.ts', 'server.js', 'server.py', 'server.go',
41
+ 'mod.rs', 'lib.rs',
42
+ ];
43
+ return entryNames.includes(basename);
44
+ }
30
45
  async run(context) {
31
46
  const { onProgress } = this.config;
32
47
  const failures = [];
@@ -53,21 +68,36 @@ export class DeepAnalysisGate extends Gate {
53
68
  onProgress?.(' No analyzable files found. Check ignore patterns and file extensions.');
54
69
  return [];
55
70
  }
56
- // Cap file count to prevent OOM on huge repos
57
- if (allFacts.length > MAX_ANALYZABLE_FILES) {
58
- onProgress?.(` ⚠ Found ${allFacts.length} files, capping at ${MAX_ANALYZABLE_FILES} (largest files prioritized).`);
59
- // Sort by line count descending analyze the biggest files first
60
- allFacts.sort((a, b) => b.lineCount - a.lineCount);
61
- allFacts = allFacts.slice(0, MAX_ANALYZABLE_FILES);
71
+ // Smart prioritization: entry points first, then by complexity
72
+ allFacts.sort((a, b) => {
73
+ const aEntry = this.isEntryPoint(a.path) ? 1 : 0;
74
+ const bEntry = this.isEntryPoint(b.path) ? 1 : 0;
75
+ if (aEntry !== bEntry)
76
+ return bEntry - aEntry;
77
+ return b.lineCount - a.lineCount;
78
+ });
79
+ // Optional soft limit — if user configures deep.maxFiles, respect it
80
+ // Otherwise process ALL files in batches (no hard cap)
81
+ if (this.config.options.maxFiles && allFacts.length > this.config.options.maxFiles) {
82
+ onProgress?.(` Limiting to ${this.config.options.maxFiles} files (configured in rigour.yml).`);
83
+ allFacts = allFacts.slice(0, this.config.options.maxFiles);
62
84
  }
63
85
  const agentCount = this.config.options.agents || 1;
64
86
  const isCloud = !!this.config.options.apiKey;
65
87
  onProgress?.(` Found ${allFacts.length} files to analyze${agentCount > 1 ? ` with ${agentCount} parallel agents` : ''}.`);
66
88
  // Step 1.5: Check local project memory for known patterns (instant, no LLM)
89
+ // Wrapped in try/catch: sqlite3 may not be available in all environments
67
90
  const fileList = allFacts.map(f => f.path).filter(Boolean);
68
- const localFindings = checkLocalPatterns(context.cwd, fileList);
69
- if (localFindings.length > 0) {
70
- onProgress?.(` 🧠 Local memory: ${localFindings.length} known pattern(s) matched instantly.`);
91
+ let localFindings = [];
92
+ try {
93
+ localFindings = await checkLocalPatterns(context.cwd, fileList);
94
+ if (localFindings.length > 0) {
95
+ onProgress?.(` 🧠 Local memory: ${localFindings.length} known pattern(s) matched instantly.`);
96
+ }
97
+ }
98
+ catch (error) {
99
+ Logger.debug(`Local memory check skipped (${error.message?.substring(0, 80)})`);
100
+ onProgress?.(' ℹ Local memory unavailable — continuing with LLM analysis only.');
71
101
  }
72
102
  // Step 2: LLM interprets facts (in chunks)
73
103
  const chunks = chunkFacts(allFacts);
@@ -12,8 +12,6 @@
12
12
  * - Using heavy/popular packages when lighter alternatives exist
13
13
  * - Installing multiple HTTP clients, date libs, etc. across different sessions
14
14
  *
15
- * @since v2.0.0 (forbidden deps)
16
- * @since v5.1.0 (unused, heavy alternatives, duplicate purpose)
17
15
  */
18
16
  import { Failure, Config } from '../types/index.js';
19
17
  import { Gate, GateContext } from './base.js';
@@ -12,8 +12,6 @@
12
12
  * - Using heavy/popular packages when lighter alternatives exist
13
13
  * - Installing multiple HTTP clients, date libs, etc. across different sessions
14
14
  *
15
- * @since v2.0.0 (forbidden deps)
16
- * @since v5.1.0 (unused, heavy alternatives, duplicate purpose)
17
15
  */
18
16
  import fs from 'fs-extra';
19
17
  import path from 'path';
@@ -29,7 +27,7 @@ const HEAVY_ALTERNATIVES = {
29
27
  'lodash': 'lodash-es (tree-shakeable) or native Array/Object methods',
30
28
  'underscore': 'native ES6+ methods (Array.map, Object.entries, etc.)',
31
29
  'axios': 'native fetch API (built into Node 18+)',
32
- 'request': 'node-fetch or native fetch (deprecated since 2020)',
30
+ 'request': 'got, undici, or native fetch (request deprecated since 2020)',
33
31
  'bluebird': 'native Promise (built-in since ES2015)',
34
32
  'jquery': 'native DOM APIs (querySelector, fetch, classList)',
35
33
  'classnames': 'clsx (0.3KB vs 1KB) or template literals',
@@ -38,6 +36,24 @@ const HEAVY_ALTERNATIVES = {
38
36
  'is-even': 'n % 2 === 0 (one-liner)',
39
37
  'is-odd': 'n % 2 !== 0 (one-liner)',
40
38
  'chalk': 'picocolors (14x smaller, faster)',
39
+ 'colors': 'picocolors (colors has had supply chain attacks)',
40
+ 'node-fetch': 'native fetch API (built into Node 18+)',
41
+ 'cross-fetch': 'native fetch API (built into Node 18+)',
42
+ 'isomorphic-fetch': 'native fetch API (built into Node 18+)',
43
+ 'whatwg-fetch': 'native fetch API (built into Node 18+)',
44
+ 'faker': '@faker-js/faker (faker was hijacked in supply chain attack)',
45
+ 'glob': 'fast-glob or fs.glob (built into Node 22+)',
46
+ 'rimraf': 'fs.rm with { recursive: true } (built into Node 14+)',
47
+ 'mkdirp': 'fs.mkdir with { recursive: true } (built into Node 10+)',
48
+ 'ncp': 'fs.cp with { recursive: true } (built into Node 16+)',
49
+ 'node-uuid': 'crypto.randomUUID() (node-uuid is unmaintained)',
50
+ 'q': 'native Promise (Q is legacy)',
51
+ 'async': 'native Promise.all/allSettled/race (async lib is legacy)',
52
+ 'superagent': 'native fetch API (built into Node 18+)',
53
+ 'path-exists': 'fs.existsSync() or fs.access() (one-liner)',
54
+ 'path-is-absolute': 'path.isAbsolute() (built-in)',
55
+ 'string-width': 'Intl.Segmenter for grapheme-aware measurement',
56
+ 'strip-ansi': 'picocolors includes strip (or regex one-liner)',
41
57
  };
42
58
  /**
43
59
  * Functional groups — if >1 package from same group is installed,
@@ -156,10 +172,12 @@ export class DependencyGate extends Gate {
156
172
  */
157
173
  async detectUnusedDeps(context, pkg, depNames, allowlist) {
158
174
  const failures = [];
159
- // Only check production + dev dependencies (not peer)
175
+ // Check production + dev dependencies. Peer deps and optional deps are valid — skip them.
160
176
  const prodDeps = Object.keys(pkg.dependencies || {});
161
177
  const devDeps = Object.keys(pkg.devDependencies || {});
162
- const checkDeps = [...prodDeps, ...devDeps];
178
+ const peerDeps = new Set(Object.keys(pkg.peerDependencies || {}));
179
+ const optionalDeps = new Set(Object.keys(pkg.optionalDependencies || {}));
180
+ const checkDeps = [...prodDeps, ...devDeps].filter(d => !peerDeps.has(d) && !optionalDeps.has(d));
163
181
  if (checkDeps.length === 0)
164
182
  return [];
165
183
  // Default allowlist patterns for known side-effect packages
@@ -17,8 +17,6 @@
17
17
  * C# — Deprecated .NET APIs (WebClient, BinaryFormatter, etc.)
18
18
  * Java — Deprecated JDK APIs (Date, Vector, Hashtable, etc.)
19
19
  *
20
- * @since v3.0.0
21
- * @since v3.0.3 — Go, C#, Java deprecated API detection added
22
20
  */
23
21
  import { Gate, GateContext } from './base.js';
24
22
  import { Failure, Provenance } from '../types/index.js';
@@ -17,12 +17,11 @@
17
17
  * C# — Deprecated .NET APIs (WebClient, BinaryFormatter, etc.)
18
18
  * Java — Deprecated JDK APIs (Date, Vector, Hashtable, etc.)
19
19
  *
20
- * @since v3.0.0
21
- * @since v3.0.3 — Go, C#, Java deprecated API detection added
22
20
  */
23
21
  import { Gate } from './base.js';
24
22
  import { FileScanner } from '../utils/scanner.js';
25
23
  import { Logger } from '../utils/logger.js';
24
+ import { languageAdapters } from './language-adapters/index.js';
26
25
  import fs from 'fs-extra';
27
26
  import path from 'path';
28
27
  import { NODE_DEPRECATED_RULES, WEB_DEPRECATED_RULES, PYTHON_DEPRECATED_RULES, GO_DEPRECATED_RULES, CSHARP_DEPRECATED_RULES, JAVA_DEPRECATED_RULES } from './deprecated-apis-rules.js';
@@ -65,24 +64,38 @@ export class DeprecatedApisGate extends Gate {
65
64
  try {
66
65
  const fullPath = path.join(context.cwd, file);
67
66
  const content = await fs.readFile(fullPath, 'utf-8');
68
- const ext = path.extname(file);
69
- if (['.ts', '.js', '.tsx', '.jsx'].includes(ext)) {
70
- if (this.config.check_node)
71
- this.checkNodeDeprecated(content, file, deprecated);
72
- if (this.config.check_web)
73
- this.checkWebDeprecated(content, file, deprecated);
74
- }
75
- else if (ext === '.py' && this.config.check_python) {
76
- this.checkPythonDeprecated(content, file, deprecated);
77
- }
78
- else if (ext === '.go' && this.config.check_go) {
79
- this.checkGoDeprecated(content, file, deprecated);
80
- }
81
- else if (ext === '.cs' && this.config.check_csharp) {
82
- this.checkCSharpDeprecated(content, file, deprecated);
83
- }
84
- else if ((ext === '.java' || ext === '.kt') && this.config.check_java) {
85
- this.checkJavaDeprecated(content, file, deprecated);
67
+ const adapter = languageAdapters.getAdapter(file);
68
+ if (!adapter)
69
+ continue;
70
+ /** Map adapter IDs to config flags */
71
+ const configCheck = {
72
+ js: this.config.check_node,
73
+ python: this.config.check_python,
74
+ go: this.config.check_go,
75
+ csharp: this.config.check_csharp,
76
+ java: this.config.check_java,
77
+ };
78
+ if (configCheck[adapter.id] === false)
79
+ continue;
80
+ switch (adapter.id) {
81
+ case 'js':
82
+ if (this.config.check_node)
83
+ this.checkNodeDeprecated(content, file, deprecated);
84
+ if (this.config.check_web)
85
+ this.checkWebDeprecated(content, file, deprecated);
86
+ break;
87
+ case 'python':
88
+ this.checkPythonDeprecated(content, file, deprecated);
89
+ break;
90
+ case 'go':
91
+ this.checkGoDeprecated(content, file, deprecated);
92
+ break;
93
+ case 'csharp':
94
+ this.checkCSharpDeprecated(content, file, deprecated);
95
+ break;
96
+ case 'java':
97
+ this.checkJavaDeprecated(content, file, deprecated);
98
+ break;
86
99
  }
87
100
  }
88
101
  catch { /* skip */ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Duplication Drift Gate (v2)
3
+ *
4
+ * Detects when AI generates near-identical functions across files because
5
+ * it doesn't remember what it already wrote. This is an AI-specific failure
6
+ * mode — humans reuse via copy-paste (same file), AI re-invents (cross-file).
7
+ *
8
+ * v2 upgrades:
9
+ * - tree-sitter AST node type sequences replace hand-rolled regex tokenizer
10
+ * - Jaccard similarity on AST node multisets (structural, not textual)
11
+ * - Catches duplicates even when every variable name is different
12
+ * - MD5 kept as fast-path for exact matches, Jaccard runs on remaining pairs
13
+ *
14
+ * Detection strategy (three-pass):
15
+ * 1. Extract function bodies, normalize text (strip comments/whitespace)
16
+ * 2. Parse with tree-sitter → walk AST → collect node type multiset
17
+ * 3. Generate semantic embeddings via all-MiniLM-L6-v2 (384D)
18
+ * 4. Pass 1 (fast): MD5 hash → exact duplicates (O(n), <10ms)
19
+ * 5. Pass 2 (Jaccard): AST node multiset similarity → structural near-duplicates (O(n²) bounded)
20
+ * 6. Pass 3 (semantic): Embedding cosine similarity → semantic duplicates (O(n²) bounded)
21
+ * 7. Flag functions with similarity > threshold in different files
22
+ *
23
+ * Why AST node types > raw tokens:
24
+ * - `getUserById(id) { return db.find(x => x.id === id) }`
25
+ * - `fetchUser(userId) { return database.filter(u => u.id === userId)[0] }`
26
+ * Both produce similar AST: [return_statement, call_expression, arrow_function,
27
+ * binary_expression, member_expression]. Variable names are invisible.
28
+ */
29
+ import { Gate, GateContext } from '../base.js';
30
+ import { Failure, Provenance } from '../../types/index.js';
31
+ import { FunctionSignature } from './similarity.js';
32
+ export interface DuplicationDriftConfig {
33
+ enabled?: boolean;
34
+ similarity_threshold?: number;
35
+ semantic_threshold?: number;
36
+ semantic_enabled?: boolean;
37
+ min_body_lines?: number;
38
+ approved_duplications?: string[];
39
+ }
40
+ export type { FunctionSignature };
41
+ export declare class DuplicationDriftGate extends Gate {
42
+ private config;
43
+ private parser;
44
+ constructor(config?: DuplicationDriftConfig);
45
+ protected get provenance(): Provenance;
46
+ run(context: GateContext): Promise<Failure[]>;
47
+ /**
48
+ * Parse the file with tree-sitter, find function nodes that match
49
+ * our extracted functions (by line number), and replace their token
50
+ * multisets with AST node type sequences.
51
+ */
52
+ private enrichWithASTTokens;
53
+ private extractJSFunctions;
54
+ private extractPyFunctions;
55
+ /**
56
+ * Enrich functions with semantic embeddings for Pass 3.
57
+ * Only called for functions not already claimed by Pass 1/2.
58
+ * Uses generateEmbedding() from pattern-index/embeddings.ts.
59
+ */
60
+ private enrichWithEmbeddings;
61
+ }