@rigour-labs/core 3.0.6 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/deep/fact-extractor.d.ts +80 -0
- package/dist/deep/fact-extractor.js +626 -0
- package/dist/deep/index.d.ts +14 -0
- package/dist/deep/index.js +12 -0
- package/dist/deep/prompts.d.ts +22 -0
- package/dist/deep/prompts.js +374 -0
- package/dist/deep/verifier.d.ts +16 -0
- package/dist/deep/verifier.js +388 -0
- package/dist/gates/deep-analysis.d.ts +28 -0
- package/dist/gates/deep-analysis.js +302 -0
- package/dist/gates/runner.d.ts +4 -2
- package/dist/gates/runner.js +46 -1
- package/dist/index.d.ts +10 -0
- package/dist/index.js +12 -2
- package/dist/inference/cloud-provider.d.ts +34 -0
- package/dist/inference/cloud-provider.js +126 -0
- package/dist/inference/index.d.ts +17 -0
- package/dist/inference/index.js +23 -0
- package/dist/inference/model-manager.d.ts +26 -0
- package/dist/inference/model-manager.js +106 -0
- package/dist/inference/sidecar-provider.d.ts +15 -0
- package/dist/inference/sidecar-provider.js +153 -0
- package/dist/inference/types.d.ts +77 -0
- package/dist/inference/types.js +19 -0
- package/dist/settings.d.ts +104 -0
- package/dist/settings.js +186 -0
- package/dist/storage/db.d.ts +16 -0
- package/dist/storage/db.js +132 -0
- package/dist/storage/findings.d.ts +14 -0
- package/dist/storage/findings.js +38 -0
- package/dist/storage/index.d.ts +9 -0
- package/dist/storage/index.js +8 -0
- package/dist/storage/patterns.d.ts +35 -0
- package/dist/storage/patterns.js +62 -0
- package/dist/storage/scans.d.ts +42 -0
- package/dist/storage/scans.js +55 -0
- package/dist/templates/universal-config.js +19 -0
- package/dist/types/index.d.ts +438 -15
- package/dist/types/index.js +41 -1
- package/package.json +6 -2
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System prompt that defines the LLM's role and output format.
|
|
3
|
+
*/
|
|
4
|
+
export const DEEP_SYSTEM_PROMPT = `You are an expert code reviewer and software architect performing deep quality analysis. You receive AST-extracted facts about a codebase and must identify quality issues, anti-patterns, and best practice violations.
|
|
5
|
+
|
|
6
|
+
IMPORTANT RULES:
|
|
7
|
+
1. ONLY report issues you can verify from the provided facts. Do NOT hallucinate files, classes, or functions.
|
|
8
|
+
2. Every finding MUST reference a real file and entity from the facts.
|
|
9
|
+
3. Be specific: include file paths, struct/class names, function names, line counts.
|
|
10
|
+
4. Assign confidence scores honestly: 0.9+ only for certain issues, 0.5-0.7 for probable issues.
|
|
11
|
+
5. Respond ONLY with valid JSON matching the schema below. No explanation text outside JSON.
|
|
12
|
+
6. AIM for 5-15 findings per batch. Be thorough — report ALL issues you can identify, not just the most obvious ones.
|
|
13
|
+
7. For Go code: treat structs as classes, receiver methods as class methods. Check Go idioms specifically.
|
|
14
|
+
|
|
15
|
+
OUTPUT SCHEMA:
|
|
16
|
+
{
|
|
17
|
+
"findings": [
|
|
18
|
+
{
|
|
19
|
+
"category": "string (see CATEGORIES below)",
|
|
20
|
+
"severity": "string (critical|high|medium|low|info)",
|
|
21
|
+
"file": "string (exact file path from facts)",
|
|
22
|
+
"line": "number or null",
|
|
23
|
+
"description": "string (what the issue is, referencing specific entities)",
|
|
24
|
+
"suggestion": "string (actionable fix recommendation)",
|
|
25
|
+
"confidence": "number 0.0-1.0"
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
CATEGORIES:
|
|
31
|
+
SOLID Principles:
|
|
32
|
+
srp_violation - Single file/struct/class handles multiple unrelated responsibilities
|
|
33
|
+
ocp_violation - Code requires modification (not extension) for new behavior
|
|
34
|
+
lsp_violation - Subtypes break substitutability contracts
|
|
35
|
+
isp_violation - Interface has too many methods forcing unnecessary implementations
|
|
36
|
+
dip_violation - High-level modules depend directly on low-level implementations
|
|
37
|
+
|
|
38
|
+
Design Patterns & Anti-patterns:
|
|
39
|
+
god_class - Class/struct with too many fields, methods, or responsibilities (>8 methods or >300 lines)
|
|
40
|
+
god_function - Function exceeding 50 lines or doing too many things
|
|
41
|
+
feature_envy - Function/method uses another module's data more than its own
|
|
42
|
+
shotgun_surgery - A single change requires modifying many files
|
|
43
|
+
long_params - Function with 4+ parameters (use struct/options pattern)
|
|
44
|
+
data_clump - Same group of fields/params repeated across multiple structs/functions
|
|
45
|
+
inappropriate_intimacy - Two modules too tightly coupled, accessing each other's internals
|
|
46
|
+
primitive_obsession - Using primitives instead of domain types (string for email, int for ID)
|
|
47
|
+
lazy_class - Struct/class that does too little to justify its existence
|
|
48
|
+
speculative_generality - Over-engineered abstractions not justified by current usage
|
|
49
|
+
refused_bequest - Subtype/implementation ignores inherited behavior
|
|
50
|
+
|
|
51
|
+
DRY & Duplication:
|
|
52
|
+
dry_violation - Duplicated logic across files that should be extracted
|
|
53
|
+
copy_paste_code - Nearly identical functions/methods in different files
|
|
54
|
+
|
|
55
|
+
Error Handling:
|
|
56
|
+
error_inconsistency - Mixed error handling strategies in same package/module
|
|
57
|
+
empty_catch - Empty catch/except blocks that silently swallow errors
|
|
58
|
+
error_swallowing - Errors logged but not propagated when they should be
|
|
59
|
+
missing_error_check - Return values (especially errors) not checked
|
|
60
|
+
panic_in_library - Library code using panic/os.Exit instead of returning errors
|
|
61
|
+
|
|
62
|
+
Concurrency (Go/Rust/async languages):
|
|
63
|
+
race_condition - Shared mutable state accessed without synchronization
|
|
64
|
+
goroutine_leak - Goroutines spawned without cancellation/context mechanism
|
|
65
|
+
missing_context - Functions that should accept context.Context but don't
|
|
66
|
+
channel_misuse - Unbuffered channels that could deadlock, or missing close()
|
|
67
|
+
mutex_scope - Mutex held too long or across I/O operations
|
|
68
|
+
|
|
69
|
+
Testing:
|
|
70
|
+
test_quality - Insufficient assertions, no edge cases, weak coverage
|
|
71
|
+
test_coupling - Tests tightly coupled to implementation details
|
|
72
|
+
missing_test - Complex public function/method with no corresponding test
|
|
73
|
+
test_duplication - Multiple tests verifying the same behavior redundantly
|
|
74
|
+
|
|
75
|
+
Architecture:
|
|
76
|
+
architecture - Layer violations, wrong dependency direction
|
|
77
|
+
circular_dependency - Modules that import each other
|
|
78
|
+
package_cohesion - Package/directory contains unrelated concerns
|
|
79
|
+
api_design - Exported API is confusing, inconsistent, or poorly structured
|
|
80
|
+
missing_abstraction - Direct usage where an interface/abstraction would improve design
|
|
81
|
+
|
|
82
|
+
Language Idioms:
|
|
83
|
+
language_idiom - Language-specific anti-patterns
|
|
84
|
+
naming_convention - Names don't follow language conventions (Go: MixedCaps, Python: snake_case)
|
|
85
|
+
dead_code - Unreferenced exports, unused functions
|
|
86
|
+
magic_number - Numeric literals without named constants
|
|
87
|
+
|
|
88
|
+
Performance & Security:
|
|
89
|
+
performance - Obvious performance anti-patterns (N+1 queries, unbounded allocations)
|
|
90
|
+
resource_leak - Opened resources (files, connections, readers) not properly closed
|
|
91
|
+
hardcoded_config - Configuration values hardcoded instead of externalized
|
|
92
|
+
|
|
93
|
+
Code Smells:
|
|
94
|
+
code_smell - General smell with refactoring suggestion
|
|
95
|
+
complex_conditional - Deeply nested or overly complex conditional logic
|
|
96
|
+
long_file - File exceeds reasonable length for its responsibility`;
|
|
97
|
+
/**
|
|
98
|
+
* Language-specific analysis guidance appended to prompts.
|
|
99
|
+
*/
|
|
100
|
+
const LANGUAGE_GUIDANCE = {
|
|
101
|
+
go: `
|
|
102
|
+
GO-SPECIFIC CHECKS (apply these strictly):
|
|
103
|
+
- Error handling: Every function returning error must be checked. Look for _ = fn() patterns.
|
|
104
|
+
- Context propagation: HTTP handlers and long-running ops should accept context.Context.
|
|
105
|
+
- Interface design: Go interfaces should be small (1-3 methods). Large interfaces violate ISP.
|
|
106
|
+
- Goroutine safety: Goroutines without context/done channels are potential leaks.
|
|
107
|
+
- Defer usage: Missing defer for Close/Unlock calls → resource leaks.
|
|
108
|
+
- Struct design: Structs with >8 fields may need decomposition.
|
|
109
|
+
- Receiver consistency: All methods on a type should use pointer OR value receiver, not mixed.
|
|
110
|
+
- Package naming: Should be short, lowercase, no underscores.
|
|
111
|
+
- Error wrapping: Errors should be wrapped with %w for context, not just fmt.Errorf.
|
|
112
|
+
- Init functions: Avoid init() — makes testing hard and creates hidden dependencies.
|
|
113
|
+
- Global state: Package-level mutable variables are a code smell.`,
|
|
114
|
+
typescript: `
|
|
115
|
+
TYPESCRIPT-SPECIFIC CHECKS:
|
|
116
|
+
- Use strict null checks. Watch for missing null/undefined guards.
|
|
117
|
+
- Prefer interfaces over type aliases for object shapes.
|
|
118
|
+
- Avoid 'any' type — use 'unknown' with type guards.
|
|
119
|
+
- Async functions should have proper error boundaries.
|
|
120
|
+
- Watch for promise chains without .catch() or try/catch.
|
|
121
|
+
- Large barrel files (index.ts re-exporting everything) hurt tree-shaking.
|
|
122
|
+
- Avoid enums — use 'as const' objects or union types.`,
|
|
123
|
+
javascript: `
|
|
124
|
+
JAVASCRIPT-SPECIFIC CHECKS:
|
|
125
|
+
- Missing null/undefined checks on function parameters.
|
|
126
|
+
- Callback hell — should use async/await.
|
|
127
|
+
- var usage — should use const/let.
|
|
128
|
+
- == instead of === (loose equality).
|
|
129
|
+
- Prototype pollution risks in object manipulation.`,
|
|
130
|
+
python: `
|
|
131
|
+
PYTHON-SPECIFIC CHECKS:
|
|
132
|
+
- Missing type hints on public functions.
|
|
133
|
+
- Bare except clauses that catch all exceptions.
|
|
134
|
+
- Mutable default arguments (def fn(x=[])).
|
|
135
|
+
- Not using context managers (with statement) for resources.
|
|
136
|
+
- Import * polluting namespace.
|
|
137
|
+
- Missing __init__.py or improper package structure.`,
|
|
138
|
+
rust: `
|
|
139
|
+
RUST-SPECIFIC CHECKS:
|
|
140
|
+
- Unwrap/expect in library code instead of proper error handling (?).
|
|
141
|
+
- Clone where borrow would suffice.
|
|
142
|
+
- Large enums that should be split.
|
|
143
|
+
- Missing Send/Sync bounds on async code.
|
|
144
|
+
- Unsafe blocks without safety documentation.`,
|
|
145
|
+
java: `
|
|
146
|
+
JAVA-SPECIFIC CHECKS:
|
|
147
|
+
- God classes with too many responsibilities.
|
|
148
|
+
- Missing @Override annotations.
|
|
149
|
+
- Raw types instead of generics.
|
|
150
|
+
- Checked exceptions caught and ignored.
|
|
151
|
+
- Static utility classes that should be injected services.`,
|
|
152
|
+
csharp: `
|
|
153
|
+
C#-SPECIFIC CHECKS:
|
|
154
|
+
- Not using 'using' for IDisposable resources.
|
|
155
|
+
- Async void methods (should be async Task).
|
|
156
|
+
- Missing null checks (use nullable reference types).
|
|
157
|
+
- Large switch statements that should use polymorphism.`,
|
|
158
|
+
};
|
|
159
|
+
/**
|
|
160
|
+
* Build the analysis prompt for a batch of file facts.
|
|
161
|
+
*/
|
|
162
|
+
export function buildAnalysisPrompt(factsStr, checks) {
|
|
163
|
+
const enabledChecks = checks ? Object.entries(checks)
|
|
164
|
+
.filter(([, enabled]) => enabled)
|
|
165
|
+
.map(([check]) => check)
|
|
166
|
+
: ['solid', 'dry', 'design_patterns', 'language_idioms', 'error_handling',
|
|
167
|
+
'test_quality', 'architecture', 'code_smells', 'concurrency',
|
|
168
|
+
'performance', 'naming', 'resource_management'];
|
|
169
|
+
const checkDescriptions = {
|
|
170
|
+
solid: 'SOLID principle violations (SRP, OCP, LSP, ISP, DIP)',
|
|
171
|
+
dry: 'DRY violations — duplicated logic, copy-paste code across files',
|
|
172
|
+
design_patterns: 'Design pattern anti-patterns: god class/struct, god function, feature envy, shotgun surgery, long parameter lists, data clumps, inappropriate intimacy, primitive obsession, lazy class, speculative generality',
|
|
173
|
+
language_idioms: 'Language-specific anti-patterns, naming convention violations, dead code',
|
|
174
|
+
error_handling: 'Error handling: inconsistencies, empty catches, swallowed errors, missing error checks, panic in library code',
|
|
175
|
+
test_quality: 'Test quality: insufficient assertions, missing edge cases, test coupling, missing tests for complex code',
|
|
176
|
+
architecture: 'Architecture: layer violations, circular dependencies, package cohesion, API design, missing abstractions',
|
|
177
|
+
code_smells: 'Code smells: complex conditionals, magic numbers, long files, hardcoded config',
|
|
178
|
+
concurrency: 'Concurrency: race conditions, goroutine leaks, missing context, channel misuse, mutex scope',
|
|
179
|
+
performance: 'Performance anti-patterns: resource leaks, unbounded allocations, N+1 patterns',
|
|
180
|
+
naming: 'Naming conventions: language-appropriate naming, unclear/misleading names',
|
|
181
|
+
resource_management: 'Resource management: unclosed files/connections, missing defer/cleanup',
|
|
182
|
+
};
|
|
183
|
+
const checksStr = enabledChecks
|
|
184
|
+
.map(c => `- ${checkDescriptions[c] || c}`)
|
|
185
|
+
.join('\n');
|
|
186
|
+
// Detect dominant language from facts
|
|
187
|
+
const langCounts = new Map();
|
|
188
|
+
const langPattern = /\((\w+),/g;
|
|
189
|
+
let langMatch;
|
|
190
|
+
while ((langMatch = langPattern.exec(factsStr)) !== null) {
|
|
191
|
+
const lang = langMatch[1];
|
|
192
|
+
langCounts.set(lang, (langCounts.get(lang) || 0) + 1);
|
|
193
|
+
}
|
|
194
|
+
let dominantLang = '';
|
|
195
|
+
let maxCount = 0;
|
|
196
|
+
for (const [lang, count] of langCounts) {
|
|
197
|
+
if (count > maxCount) {
|
|
198
|
+
maxCount = count;
|
|
199
|
+
dominantLang = lang;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
const langGuide = LANGUAGE_GUIDANCE[dominantLang] || '';
|
|
203
|
+
return `${DEEP_SYSTEM_PROMPT}
|
|
204
|
+
|
|
205
|
+
ANALYSIS FOCUS:
|
|
206
|
+
${checksStr}
|
|
207
|
+
${langGuide}
|
|
208
|
+
|
|
209
|
+
AST-EXTRACTED FACTS:
|
|
210
|
+
${factsStr}
|
|
211
|
+
|
|
212
|
+
Analyze the codebase facts above. Identify ALL quality issues matching the analysis focus areas. Be thorough — check every file for every category. Return findings as JSON.`;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Build a cross-file analysis prompt that looks at patterns across the whole codebase.
|
|
216
|
+
*/
|
|
217
|
+
export function buildCrossFilePrompt(allFacts) {
|
|
218
|
+
// Build a high-level codebase summary
|
|
219
|
+
const summary = [];
|
|
220
|
+
// Error handling consistency
|
|
221
|
+
const errorStrategies = new Map();
|
|
222
|
+
for (const f of allFacts) {
|
|
223
|
+
for (const eh of f.errorHandling) {
|
|
224
|
+
const strategies = errorStrategies.get(f.path) || new Set();
|
|
225
|
+
strategies.add(eh.strategy);
|
|
226
|
+
errorStrategies.set(f.path, strategies);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
const allStrategies = new Set();
|
|
230
|
+
for (const strats of errorStrategies.values()) {
|
|
231
|
+
for (const s of strats)
|
|
232
|
+
allStrategies.add(s);
|
|
233
|
+
}
|
|
234
|
+
if (allStrategies.size > 2) {
|
|
235
|
+
summary.push(`ERROR HANDLING: ${allStrategies.size} different strategies used across codebase: ${[...allStrategies].join(', ')}`);
|
|
236
|
+
}
|
|
237
|
+
// Pattern consistency (naming)
|
|
238
|
+
const classNames = allFacts.flatMap(f => [
|
|
239
|
+
...f.classes.map(c => c.name),
|
|
240
|
+
...(f.structs || []).map(s => s.name),
|
|
241
|
+
]);
|
|
242
|
+
const suffixes = classNames.map(n => {
|
|
243
|
+
const match = n.match(/(Service|Controller|Handler|Manager|Repository|Factory|Provider|Util|Helper|Store|Client|Config|Options|Middleware|Router|Server)$/);
|
|
244
|
+
return match?.[1];
|
|
245
|
+
}).filter(Boolean);
|
|
246
|
+
if (suffixes.length > 0) {
|
|
247
|
+
const suffixCounts = new Map();
|
|
248
|
+
for (const s of suffixes) {
|
|
249
|
+
suffixCounts.set(s, (suffixCounts.get(s) || 0) + 1);
|
|
250
|
+
}
|
|
251
|
+
summary.push(`NAMING PATTERNS: ${[...suffixCounts.entries()].map(([k, v]) => `${v}x ${k}`).join(', ')}`);
|
|
252
|
+
}
|
|
253
|
+
// Dependency flow
|
|
254
|
+
const importMap = new Map();
|
|
255
|
+
for (const f of allFacts) {
|
|
256
|
+
importMap.set(f.path, f.imports);
|
|
257
|
+
}
|
|
258
|
+
// Files with many dependents
|
|
259
|
+
const dependentCounts = new Map();
|
|
260
|
+
for (const [file, imports] of importMap) {
|
|
261
|
+
for (const imp of imports) {
|
|
262
|
+
if (imp.startsWith('.') || imp.startsWith('./') || imp.startsWith('../')) {
|
|
263
|
+
dependentCounts.set(imp, (dependentCounts.get(imp) || 0) + 1);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
const highDependents = [...dependentCounts.entries()]
|
|
268
|
+
.filter(([, count]) => count >= 5)
|
|
269
|
+
.sort((a, b) => b[1] - a[1]);
|
|
270
|
+
if (highDependents.length > 0) {
|
|
271
|
+
summary.push(`HIGH-DEPENDENCY MODULES: ${highDependents.map(([m, c]) => `${m} (${c} dependents)`).join(', ')}`);
|
|
272
|
+
}
|
|
273
|
+
// Package/directory structure
|
|
274
|
+
const dirCounts = new Map();
|
|
275
|
+
for (const f of allFacts) {
|
|
276
|
+
const dir = f.path.split('/').slice(0, -1).join('/') || '.';
|
|
277
|
+
dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
|
|
278
|
+
}
|
|
279
|
+
const largeDirs = [...dirCounts.entries()]
|
|
280
|
+
.filter(([, count]) => count >= 10)
|
|
281
|
+
.sort((a, b) => b[1] - a[1]);
|
|
282
|
+
if (largeDirs.length > 0) {
|
|
283
|
+
summary.push(`LARGE PACKAGES: ${largeDirs.map(([d, c]) => `${d}/ (${c} files)`).join(', ')}`);
|
|
284
|
+
}
|
|
285
|
+
// Test coverage gaps
|
|
286
|
+
const untestedFiles = allFacts.filter(f => !f.hasTests && f.lineCount > 100 && f.functions.length > 2);
|
|
287
|
+
if (untestedFiles.length > 0) {
|
|
288
|
+
summary.push(`UNTESTED COMPLEX FILES: ${untestedFiles.slice(0, 10).map(f => `${f.path} (${f.lineCount}L, ${f.functions.length} fns)`).join(', ')}`);
|
|
289
|
+
}
|
|
290
|
+
// Concurrency summary (Go)
|
|
291
|
+
const concurrentFiles = allFacts.filter(f => (f.goroutines || 0) > 0 || (f.channels || 0) > 0);
|
|
292
|
+
if (concurrentFiles.length > 0) {
|
|
293
|
+
summary.push(`CONCURRENT FILES: ${concurrentFiles.map(f => `${f.path} (${f.goroutines || 0} goroutines, ${f.channels || 0} channels)`).join(', ')}`);
|
|
294
|
+
}
|
|
295
|
+
// Detect dominant language
|
|
296
|
+
const langs = new Map();
|
|
297
|
+
for (const f of allFacts) {
|
|
298
|
+
langs.set(f.language, (langs.get(f.language) || 0) + 1);
|
|
299
|
+
}
|
|
300
|
+
let dominantLang = '';
|
|
301
|
+
let maxCount = 0;
|
|
302
|
+
for (const [lang, count] of langs) {
|
|
303
|
+
if (count > maxCount) {
|
|
304
|
+
maxCount = count;
|
|
305
|
+
dominantLang = lang;
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
const langGuide = LANGUAGE_GUIDANCE[dominantLang] || '';
|
|
309
|
+
return `${DEEP_SYSTEM_PROMPT}
|
|
310
|
+
|
|
311
|
+
CROSS-FILE ANALYSIS REQUEST:
|
|
312
|
+
Look at the codebase-wide patterns and identify:
|
|
313
|
+
1. Inconsistent patterns across files (error handling, naming, structure)
|
|
314
|
+
2. Module coupling issues (high dependency counts, circular deps)
|
|
315
|
+
3. Architecture-level concerns (package cohesion, layer violations)
|
|
316
|
+
4. Missing abstractions (repeated patterns that should be unified)
|
|
317
|
+
5. Test coverage gaps (complex code without tests)
|
|
318
|
+
6. Concurrency safety issues across the codebase
|
|
319
|
+
${langGuide}
|
|
320
|
+
|
|
321
|
+
CODEBASE SUMMARY:
|
|
322
|
+
${summary.join('\n')}
|
|
323
|
+
|
|
324
|
+
FILE COUNT: ${allFacts.length}
|
|
325
|
+
TOTAL STRUCTS/CLASSES: ${allFacts.reduce((a, f) => a + f.classes.length + (f.structs?.length || 0), 0)}
|
|
326
|
+
TOTAL FUNCTIONS: ${allFacts.reduce((a, f) => a + f.functions.length, 0)}
|
|
327
|
+
TOTAL INTERFACES: ${allFacts.reduce((a, f) => a + (f.interfaces?.length || 0), 0)}
|
|
328
|
+
TEST FILES: ${allFacts.filter(f => f.hasTests).length}
|
|
329
|
+
|
|
330
|
+
Return findings as JSON. Aim for 5-15 cross-cutting findings.`;
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Chunk file facts into batches that fit within token limits.
|
|
334
|
+
* Groups related files (same directory) together.
|
|
335
|
+
*/
|
|
336
|
+
export function chunkFacts(facts, maxCharsPerChunk = 6000) {
|
|
337
|
+
// Group by directory
|
|
338
|
+
const dirGroups = new Map();
|
|
339
|
+
for (const f of facts) {
|
|
340
|
+
const dir = f.path.split('/').slice(0, -1).join('/') || '.';
|
|
341
|
+
const group = dirGroups.get(dir) || [];
|
|
342
|
+
group.push(f);
|
|
343
|
+
dirGroups.set(dir, group);
|
|
344
|
+
}
|
|
345
|
+
const chunks = [];
|
|
346
|
+
let currentChunk = [];
|
|
347
|
+
let currentSize = 0;
|
|
348
|
+
for (const group of dirGroups.values()) {
|
|
349
|
+
for (const f of group) {
|
|
350
|
+
const factSize = estimateFactSize(f);
|
|
351
|
+
if (currentSize + factSize > maxCharsPerChunk && currentChunk.length > 0) {
|
|
352
|
+
chunks.push(currentChunk);
|
|
353
|
+
currentChunk = [];
|
|
354
|
+
currentSize = 0;
|
|
355
|
+
}
|
|
356
|
+
currentChunk.push(f);
|
|
357
|
+
currentSize += factSize;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
if (currentChunk.length > 0) {
|
|
361
|
+
chunks.push(currentChunk);
|
|
362
|
+
}
|
|
363
|
+
return chunks;
|
|
364
|
+
}
|
|
365
|
+
function estimateFactSize(f) {
|
|
366
|
+
let size = f.path.length + 50;
|
|
367
|
+
size += f.classes.reduce((a, c) => a + c.name.length + c.methods.length * 20 + 50, 0);
|
|
368
|
+
size += (f.structs || []).reduce((a, s) => a + s.name.length + s.methods.length * 20 + s.embeds.length * 15 + 60, 0);
|
|
369
|
+
size += (f.interfaces || []).reduce((a, i) => a + i.name.length + i.methods.length * 15 + 40, 0);
|
|
370
|
+
size += f.functions.reduce((a, fn) => a + fn.name.length + fn.params.length * 15 + 50, 0);
|
|
371
|
+
size += f.imports.length * 30;
|
|
372
|
+
size += f.errorHandling.length * 30;
|
|
373
|
+
return size;
|
|
374
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Verification Layer — Step 3 of the three-step pipeline.
|
|
3
|
+
* AST verifies that LLM findings reference real code entities.
|
|
4
|
+
* Drops hallucinated findings, tags verified ones.
|
|
5
|
+
*/
|
|
6
|
+
import type { DeepFinding } from '../inference/types.js';
|
|
7
|
+
import type { FileFacts } from './fact-extractor.js';
|
|
8
|
+
export interface VerifiedFinding extends DeepFinding {
|
|
9
|
+
verified: boolean;
|
|
10
|
+
verificationNotes?: string;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Verify LLM findings against AST-extracted facts.
|
|
14
|
+
* Returns only findings that pass verification.
|
|
15
|
+
*/
|
|
16
|
+
export declare function verifyFindings(findings: DeepFinding[], facts: FileFacts[]): VerifiedFinding[];
|