@rigour-labs/core 3.0.6 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/dist/deep/fact-extractor.d.ts +80 -0
  2. package/dist/deep/fact-extractor.js +626 -0
  3. package/dist/deep/index.d.ts +14 -0
  4. package/dist/deep/index.js +12 -0
  5. package/dist/deep/prompts.d.ts +22 -0
  6. package/dist/deep/prompts.js +374 -0
  7. package/dist/deep/verifier.d.ts +16 -0
  8. package/dist/deep/verifier.js +388 -0
  9. package/dist/gates/deep-analysis.d.ts +28 -0
  10. package/dist/gates/deep-analysis.js +302 -0
  11. package/dist/gates/runner.d.ts +4 -2
  12. package/dist/gates/runner.js +46 -1
  13. package/dist/index.d.ts +10 -0
  14. package/dist/index.js +12 -2
  15. package/dist/inference/cloud-provider.d.ts +34 -0
  16. package/dist/inference/cloud-provider.js +126 -0
  17. package/dist/inference/index.d.ts +17 -0
  18. package/dist/inference/index.js +23 -0
  19. package/dist/inference/model-manager.d.ts +26 -0
  20. package/dist/inference/model-manager.js +106 -0
  21. package/dist/inference/sidecar-provider.d.ts +15 -0
  22. package/dist/inference/sidecar-provider.js +153 -0
  23. package/dist/inference/types.d.ts +77 -0
  24. package/dist/inference/types.js +19 -0
  25. package/dist/settings.d.ts +104 -0
  26. package/dist/settings.js +186 -0
  27. package/dist/storage/db.d.ts +16 -0
  28. package/dist/storage/db.js +132 -0
  29. package/dist/storage/findings.d.ts +14 -0
  30. package/dist/storage/findings.js +38 -0
  31. package/dist/storage/index.d.ts +9 -0
  32. package/dist/storage/index.js +8 -0
  33. package/dist/storage/patterns.d.ts +35 -0
  34. package/dist/storage/patterns.js +62 -0
  35. package/dist/storage/scans.d.ts +42 -0
  36. package/dist/storage/scans.js +55 -0
  37. package/dist/templates/universal-config.js +19 -0
  38. package/dist/types/index.d.ts +438 -15
  39. package/dist/types/index.js +41 -1
  40. package/package.json +6 -2
@@ -0,0 +1,374 @@
1
+ /**
2
+ * System prompt that defines the LLM's role and output format.
3
+ */
4
+ export const DEEP_SYSTEM_PROMPT = `You are an expert code reviewer and software architect performing deep quality analysis. You receive AST-extracted facts about a codebase and must identify quality issues, anti-patterns, and best practice violations.
5
+
6
+ IMPORTANT RULES:
7
+ 1. ONLY report issues you can verify from the provided facts. Do NOT hallucinate files, classes, or functions.
8
+ 2. Every finding MUST reference a real file and entity from the facts.
9
+ 3. Be specific: include file paths, struct/class names, function names, line counts.
10
+ 4. Assign confidence scores honestly: 0.9+ only for certain issues, 0.5-0.7 for probable issues.
11
+ 5. Respond ONLY with valid JSON matching the schema below. No explanation text outside JSON.
12
+ 6. AIM for 5-15 findings per batch. Be thorough — report ALL issues you can identify, not just the most obvious ones.
13
+ 7. For Go code: treat structs as classes, receiver methods as class methods. Check Go idioms specifically.
14
+
15
+ OUTPUT SCHEMA:
16
+ {
17
+ "findings": [
18
+ {
19
+ "category": "string (see CATEGORIES below)",
20
+ "severity": "string (critical|high|medium|low|info)",
21
+ "file": "string (exact file path from facts)",
22
+ "line": "number or null",
23
+ "description": "string (what the issue is, referencing specific entities)",
24
+ "suggestion": "string (actionable fix recommendation)",
25
+ "confidence": "number 0.0-1.0"
26
+ }
27
+ ]
28
+ }
29
+
30
+ CATEGORIES:
31
+ SOLID Principles:
32
+ srp_violation - Single file/struct/class handles multiple unrelated responsibilities
33
+ ocp_violation - Code requires modification (not extension) for new behavior
34
+ lsp_violation - Subtypes break substitutability contracts
35
+ isp_violation - Interface has too many methods forcing unnecessary implementations
36
+ dip_violation - High-level modules depend directly on low-level implementations
37
+
38
+ Design Patterns & Anti-patterns:
39
+ god_class - Class/struct with too many fields, methods, or responsibilities (>8 methods or >300 lines)
40
+ god_function - Function exceeding 50 lines or doing too many things
41
+ feature_envy - Function/method uses another module's data more than its own
42
+ shotgun_surgery - A single change requires modifying many files
43
+ long_params - Function with 4+ parameters (use struct/options pattern)
44
+ data_clump - Same group of fields/params repeated across multiple structs/functions
45
+ inappropriate_intimacy - Two modules too tightly coupled, accessing each other's internals
46
+ primitive_obsession - Using primitives instead of domain types (string for email, int for ID)
47
+ lazy_class - Struct/class that does too little to justify its existence
48
+ speculative_generality - Over-engineered abstractions not justified by current usage
49
+ refused_bequest - Subtype/implementation ignores inherited behavior
50
+
51
+ DRY & Duplication:
52
+ dry_violation - Duplicated logic across files that should be extracted
53
+ copy_paste_code - Nearly identical functions/methods in different files
54
+
55
+ Error Handling:
56
+ error_inconsistency - Mixed error handling strategies in same package/module
57
+ empty_catch - Empty catch/except blocks that silently swallow errors
58
+ error_swallowing - Errors logged but not propagated when they should be
59
+ missing_error_check - Return values (especially errors) not checked
60
+ panic_in_library - Library code using panic/os.Exit instead of returning errors
61
+
62
+ Concurrency (Go/Rust/async languages):
63
+ race_condition - Shared mutable state accessed without synchronization
64
+ goroutine_leak - Goroutines spawned without cancellation/context mechanism
65
+ missing_context - Functions that should accept context.Context but don't
66
+ channel_misuse - Unbuffered channels that could deadlock, or missing close()
67
+ mutex_scope - Mutex held too long or across I/O operations
68
+
69
+ Testing:
70
+ test_quality - Insufficient assertions, no edge cases, weak coverage
71
+ test_coupling - Tests tightly coupled to implementation details
72
+ missing_test - Complex public function/method with no corresponding test
73
+ test_duplication - Multiple tests verifying the same behavior redundantly
74
+
75
+ Architecture:
76
+ architecture - Layer violations, wrong dependency direction
77
+ circular_dependency - Modules that import each other
78
+ package_cohesion - Package/directory contains unrelated concerns
79
+ api_design - Exported API is confusing, inconsistent, or poorly structured
80
+ missing_abstraction - Direct usage where an interface/abstraction would improve design
81
+
82
+ Language Idioms:
83
+ language_idiom - Language-specific anti-patterns
84
+ naming_convention - Names don't follow language conventions (Go: MixedCaps, Python: snake_case)
85
+ dead_code - Unreferenced exports, unused functions
86
+ magic_number - Numeric literals without named constants
87
+
88
+ Performance & Security:
89
+ performance - Obvious performance anti-patterns (N+1 queries, unbounded allocations)
90
+ resource_leak - Opened resources (files, connections, readers) not properly closed
91
+ hardcoded_config - Configuration values hardcoded instead of externalized
92
+
93
+ Code Smells:
94
+ code_smell - General smell with refactoring suggestion
95
+ complex_conditional - Deeply nested or overly complex conditional logic
96
+ long_file - File exceeds reasonable length for its responsibility`;
97
+ /**
98
+ * Language-specific analysis guidance appended to prompts.
99
+ */
100
+ const LANGUAGE_GUIDANCE = {
101
+ go: `
102
+ GO-SPECIFIC CHECKS (apply these strictly):
103
+ - Error handling: Every function returning error must be checked. Look for _ = fn() patterns.
104
+ - Context propagation: HTTP handlers and long-running ops should accept context.Context.
105
+ - Interface design: Go interfaces should be small (1-3 methods). Large interfaces violate ISP.
106
+ - Goroutine safety: Goroutines without context/done channels are potential leaks.
107
+ - Defer usage: Missing defer for Close/Unlock calls → resource leaks.
108
+ - Struct design: Structs with >8 fields may need decomposition.
109
+ - Receiver consistency: All methods on a type should use pointer OR value receiver, not mixed.
110
+ - Package naming: Should be short, lowercase, no underscores.
111
+ - Error wrapping: Errors should be wrapped with %w for context, not just fmt.Errorf.
112
+ - Init functions: Avoid init() — makes testing hard and creates hidden dependencies.
113
+ - Global state: Package-level mutable variables are a code smell.`,
114
+ typescript: `
115
+ TYPESCRIPT-SPECIFIC CHECKS:
116
+ - Use strict null checks. Watch for missing null/undefined guards.
117
+ - Prefer interfaces over type aliases for object shapes.
118
+ - Avoid 'any' type — use 'unknown' with type guards.
119
+ - Async functions should have proper error boundaries.
120
+ - Watch for promise chains without .catch() or try/catch.
121
+ - Large barrel files (index.ts re-exporting everything) hurt tree-shaking.
122
+ - Avoid enums — use 'as const' objects or union types.`,
123
+ javascript: `
124
+ JAVASCRIPT-SPECIFIC CHECKS:
125
+ - Missing null/undefined checks on function parameters.
126
+ - Callback hell — should use async/await.
127
+ - var usage — should use const/let.
128
+ - == instead of === (loose equality).
129
+ - Prototype pollution risks in object manipulation.`,
130
+ python: `
131
+ PYTHON-SPECIFIC CHECKS:
132
+ - Missing type hints on public functions.
133
+ - Bare except clauses that catch all exceptions.
134
+ - Mutable default arguments (def fn(x=[])).
135
+ - Not using context managers (with statement) for resources.
136
+ - Import * polluting namespace.
137
+ - Missing __init__.py or improper package structure.`,
138
+ rust: `
139
+ RUST-SPECIFIC CHECKS:
140
+ - Unwrap/expect in library code instead of proper error handling (?).
141
+ - Clone where borrow would suffice.
142
+ - Large enums that should be split.
143
+ - Missing Send/Sync bounds on async code.
144
+ - Unsafe blocks without safety documentation.`,
145
+ java: `
146
+ JAVA-SPECIFIC CHECKS:
147
+ - God classes with too many responsibilities.
148
+ - Missing @Override annotations.
149
+ - Raw types instead of generics.
150
+ - Checked exceptions caught and ignored.
151
+ - Static utility classes that should be injected services.`,
152
+ csharp: `
153
+ C#-SPECIFIC CHECKS:
154
+ - Not using 'using' for IDisposable resources.
155
+ - Async void methods (should be async Task).
156
+ - Missing null checks (use nullable reference types).
157
+ - Large switch statements that should use polymorphism.`,
158
+ };
159
+ /**
160
+ * Build the analysis prompt for a batch of file facts.
161
+ */
162
+ export function buildAnalysisPrompt(factsStr, checks) {
163
+ const enabledChecks = checks ? Object.entries(checks)
164
+ .filter(([, enabled]) => enabled)
165
+ .map(([check]) => check)
166
+ : ['solid', 'dry', 'design_patterns', 'language_idioms', 'error_handling',
167
+ 'test_quality', 'architecture', 'code_smells', 'concurrency',
168
+ 'performance', 'naming', 'resource_management'];
169
+ const checkDescriptions = {
170
+ solid: 'SOLID principle violations (SRP, OCP, LSP, ISP, DIP)',
171
+ dry: 'DRY violations — duplicated logic, copy-paste code across files',
172
+ design_patterns: 'Design pattern anti-patterns: god class/struct, god function, feature envy, shotgun surgery, long parameter lists, data clumps, inappropriate intimacy, primitive obsession, lazy class, speculative generality',
173
+ language_idioms: 'Language-specific anti-patterns, naming convention violations, dead code',
174
+ error_handling: 'Error handling: inconsistencies, empty catches, swallowed errors, missing error checks, panic in library code',
175
+ test_quality: 'Test quality: insufficient assertions, missing edge cases, test coupling, missing tests for complex code',
176
+ architecture: 'Architecture: layer violations, circular dependencies, package cohesion, API design, missing abstractions',
177
+ code_smells: 'Code smells: complex conditionals, magic numbers, long files, hardcoded config',
178
+ concurrency: 'Concurrency: race conditions, goroutine leaks, missing context, channel misuse, mutex scope',
179
+ performance: 'Performance anti-patterns: resource leaks, unbounded allocations, N+1 patterns',
180
+ naming: 'Naming conventions: language-appropriate naming, unclear/misleading names',
181
+ resource_management: 'Resource management: unclosed files/connections, missing defer/cleanup',
182
+ };
183
+ const checksStr = enabledChecks
184
+ .map(c => `- ${checkDescriptions[c] || c}`)
185
+ .join('\n');
186
+ // Detect dominant language from facts
187
+ const langCounts = new Map();
188
+ const langPattern = /\((\w+),/g;
189
+ let langMatch;
190
+ while ((langMatch = langPattern.exec(factsStr)) !== null) {
191
+ const lang = langMatch[1];
192
+ langCounts.set(lang, (langCounts.get(lang) || 0) + 1);
193
+ }
194
+ let dominantLang = '';
195
+ let maxCount = 0;
196
+ for (const [lang, count] of langCounts) {
197
+ if (count > maxCount) {
198
+ maxCount = count;
199
+ dominantLang = lang;
200
+ }
201
+ }
202
+ const langGuide = LANGUAGE_GUIDANCE[dominantLang] || '';
203
+ return `${DEEP_SYSTEM_PROMPT}
204
+
205
+ ANALYSIS FOCUS:
206
+ ${checksStr}
207
+ ${langGuide}
208
+
209
+ AST-EXTRACTED FACTS:
210
+ ${factsStr}
211
+
212
+ Analyze the codebase facts above. Identify ALL quality issues matching the analysis focus areas. Be thorough — check every file for every category. Return findings as JSON.`;
213
+ }
214
+ /**
215
+ * Build a cross-file analysis prompt that looks at patterns across the whole codebase.
216
+ */
217
+ export function buildCrossFilePrompt(allFacts) {
218
+ // Build a high-level codebase summary
219
+ const summary = [];
220
+ // Error handling consistency
221
+ const errorStrategies = new Map();
222
+ for (const f of allFacts) {
223
+ for (const eh of f.errorHandling) {
224
+ const strategies = errorStrategies.get(f.path) || new Set();
225
+ strategies.add(eh.strategy);
226
+ errorStrategies.set(f.path, strategies);
227
+ }
228
+ }
229
+ const allStrategies = new Set();
230
+ for (const strats of errorStrategies.values()) {
231
+ for (const s of strats)
232
+ allStrategies.add(s);
233
+ }
234
+ if (allStrategies.size > 2) {
235
+ summary.push(`ERROR HANDLING: ${allStrategies.size} different strategies used across codebase: ${[...allStrategies].join(', ')}`);
236
+ }
237
+ // Pattern consistency (naming)
238
+ const classNames = allFacts.flatMap(f => [
239
+ ...f.classes.map(c => c.name),
240
+ ...(f.structs || []).map(s => s.name),
241
+ ]);
242
+ const suffixes = classNames.map(n => {
243
+ const match = n.match(/(Service|Controller|Handler|Manager|Repository|Factory|Provider|Util|Helper|Store|Client|Config|Options|Middleware|Router|Server)$/);
244
+ return match?.[1];
245
+ }).filter(Boolean);
246
+ if (suffixes.length > 0) {
247
+ const suffixCounts = new Map();
248
+ for (const s of suffixes) {
249
+ suffixCounts.set(s, (suffixCounts.get(s) || 0) + 1);
250
+ }
251
+ summary.push(`NAMING PATTERNS: ${[...suffixCounts.entries()].map(([k, v]) => `${v}x ${k}`).join(', ')}`);
252
+ }
253
+ // Dependency flow
254
+ const importMap = new Map();
255
+ for (const f of allFacts) {
256
+ importMap.set(f.path, f.imports);
257
+ }
258
+ // Files with many dependents
259
+ const dependentCounts = new Map();
260
+ for (const [file, imports] of importMap) {
261
+ for (const imp of imports) {
262
+ if (imp.startsWith('.') || imp.startsWith('./') || imp.startsWith('../')) {
263
+ dependentCounts.set(imp, (dependentCounts.get(imp) || 0) + 1);
264
+ }
265
+ }
266
+ }
267
+ const highDependents = [...dependentCounts.entries()]
268
+ .filter(([, count]) => count >= 5)
269
+ .sort((a, b) => b[1] - a[1]);
270
+ if (highDependents.length > 0) {
271
+ summary.push(`HIGH-DEPENDENCY MODULES: ${highDependents.map(([m, c]) => `${m} (${c} dependents)`).join(', ')}`);
272
+ }
273
+ // Package/directory structure
274
+ const dirCounts = new Map();
275
+ for (const f of allFacts) {
276
+ const dir = f.path.split('/').slice(0, -1).join('/') || '.';
277
+ dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
278
+ }
279
+ const largeDirs = [...dirCounts.entries()]
280
+ .filter(([, count]) => count >= 10)
281
+ .sort((a, b) => b[1] - a[1]);
282
+ if (largeDirs.length > 0) {
283
+ summary.push(`LARGE PACKAGES: ${largeDirs.map(([d, c]) => `${d}/ (${c} files)`).join(', ')}`);
284
+ }
285
+ // Test coverage gaps
286
+ const untestedFiles = allFacts.filter(f => !f.hasTests && f.lineCount > 100 && f.functions.length > 2);
287
+ if (untestedFiles.length > 0) {
288
+ summary.push(`UNTESTED COMPLEX FILES: ${untestedFiles.slice(0, 10).map(f => `${f.path} (${f.lineCount}L, ${f.functions.length} fns)`).join(', ')}`);
289
+ }
290
+ // Concurrency summary (Go)
291
+ const concurrentFiles = allFacts.filter(f => (f.goroutines || 0) > 0 || (f.channels || 0) > 0);
292
+ if (concurrentFiles.length > 0) {
293
+ summary.push(`CONCURRENT FILES: ${concurrentFiles.map(f => `${f.path} (${f.goroutines || 0} goroutines, ${f.channels || 0} channels)`).join(', ')}`);
294
+ }
295
+ // Detect dominant language
296
+ const langs = new Map();
297
+ for (const f of allFacts) {
298
+ langs.set(f.language, (langs.get(f.language) || 0) + 1);
299
+ }
300
+ let dominantLang = '';
301
+ let maxCount = 0;
302
+ for (const [lang, count] of langs) {
303
+ if (count > maxCount) {
304
+ maxCount = count;
305
+ dominantLang = lang;
306
+ }
307
+ }
308
+ const langGuide = LANGUAGE_GUIDANCE[dominantLang] || '';
309
+ return `${DEEP_SYSTEM_PROMPT}
310
+
311
+ CROSS-FILE ANALYSIS REQUEST:
312
+ Look at the codebase-wide patterns and identify:
313
+ 1. Inconsistent patterns across files (error handling, naming, structure)
314
+ 2. Module coupling issues (high dependency counts, circular deps)
315
+ 3. Architecture-level concerns (package cohesion, layer violations)
316
+ 4. Missing abstractions (repeated patterns that should be unified)
317
+ 5. Test coverage gaps (complex code without tests)
318
+ 6. Concurrency safety issues across the codebase
319
+ ${langGuide}
320
+
321
+ CODEBASE SUMMARY:
322
+ ${summary.join('\n')}
323
+
324
+ FILE COUNT: ${allFacts.length}
325
+ TOTAL STRUCTS/CLASSES: ${allFacts.reduce((a, f) => a + f.classes.length + (f.structs?.length || 0), 0)}
326
+ TOTAL FUNCTIONS: ${allFacts.reduce((a, f) => a + f.functions.length, 0)}
327
+ TOTAL INTERFACES: ${allFacts.reduce((a, f) => a + (f.interfaces?.length || 0), 0)}
328
+ TEST FILES: ${allFacts.filter(f => f.hasTests).length}
329
+
330
+ Return findings as JSON. Aim for 5-15 cross-cutting findings.`;
331
+ }
332
+ /**
333
+ * Chunk file facts into batches that fit within token limits.
334
+ * Groups related files (same directory) together.
335
+ */
336
+ export function chunkFacts(facts, maxCharsPerChunk = 6000) {
337
+ // Group by directory
338
+ const dirGroups = new Map();
339
+ for (const f of facts) {
340
+ const dir = f.path.split('/').slice(0, -1).join('/') || '.';
341
+ const group = dirGroups.get(dir) || [];
342
+ group.push(f);
343
+ dirGroups.set(dir, group);
344
+ }
345
+ const chunks = [];
346
+ let currentChunk = [];
347
+ let currentSize = 0;
348
+ for (const group of dirGroups.values()) {
349
+ for (const f of group) {
350
+ const factSize = estimateFactSize(f);
351
+ if (currentSize + factSize > maxCharsPerChunk && currentChunk.length > 0) {
352
+ chunks.push(currentChunk);
353
+ currentChunk = [];
354
+ currentSize = 0;
355
+ }
356
+ currentChunk.push(f);
357
+ currentSize += factSize;
358
+ }
359
+ }
360
+ if (currentChunk.length > 0) {
361
+ chunks.push(currentChunk);
362
+ }
363
+ return chunks;
364
+ }
365
+ function estimateFactSize(f) {
366
+ let size = f.path.length + 50;
367
+ size += f.classes.reduce((a, c) => a + c.name.length + c.methods.length * 20 + 50, 0);
368
+ size += (f.structs || []).reduce((a, s) => a + s.name.length + s.methods.length * 20 + s.embeds.length * 15 + 60, 0);
369
+ size += (f.interfaces || []).reduce((a, i) => a + i.name.length + i.methods.length * 15 + 40, 0);
370
+ size += f.functions.reduce((a, fn) => a + fn.name.length + fn.params.length * 15 + 50, 0);
371
+ size += f.imports.length * 30;
372
+ size += f.errorHandling.length * 30;
373
+ return size;
374
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * LLM Verification Layer — Step 3 of the three-step pipeline.
3
+ * AST verifies that LLM findings reference real code entities.
4
+ * Drops hallucinated findings, tags verified ones.
5
+ */
6
+ import type { DeepFinding } from '../inference/types.js';
7
+ import type { FileFacts } from './fact-extractor.js';
8
+ export interface VerifiedFinding extends DeepFinding {
9
+ verified: boolean;
10
+ verificationNotes?: string;
11
+ }
12
+ /**
13
+ * Verify LLM findings against AST-extracted facts.
14
+ * Returns only findings that pass verification.
15
+ */
16
+ export declare function verifyFindings(findings: DeepFinding[], facts: FileFacts[]): VerifiedFinding[];