@rigour-labs/core 3.0.5 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/deep/fact-extractor.d.ts +80 -0
- package/dist/deep/fact-extractor.js +626 -0
- package/dist/deep/index.d.ts +14 -0
- package/dist/deep/index.js +12 -0
- package/dist/deep/prompts.d.ts +22 -0
- package/dist/deep/prompts.js +374 -0
- package/dist/deep/verifier.d.ts +16 -0
- package/dist/deep/verifier.js +388 -0
- package/dist/gates/deep-analysis.d.ts +28 -0
- package/dist/gates/deep-analysis.js +302 -0
- package/dist/gates/deprecated-apis-rules-lang.d.ts +21 -0
- package/dist/gates/deprecated-apis-rules-lang.js +311 -0
- package/dist/gates/deprecated-apis-rules-node.d.ts +19 -0
- package/dist/gates/deprecated-apis-rules-node.js +199 -0
- package/dist/gates/deprecated-apis-rules.d.ts +6 -0
- package/dist/gates/deprecated-apis-rules.js +6 -0
- package/dist/gates/deprecated-apis.js +1 -502
- package/dist/gates/hallucinated-imports-lang.d.ts +16 -0
- package/dist/gates/hallucinated-imports-lang.js +374 -0
- package/dist/gates/hallucinated-imports-stdlib.d.ts +12 -0
- package/dist/gates/hallucinated-imports-stdlib.js +228 -0
- package/dist/gates/hallucinated-imports.d.ts +0 -98
- package/dist/gates/hallucinated-imports.js +10 -678
- package/dist/gates/phantom-apis-data.d.ts +33 -0
- package/dist/gates/phantom-apis-data.js +398 -0
- package/dist/gates/phantom-apis.js +1 -393
- package/dist/gates/phantom-apis.test.js +52 -0
- package/dist/gates/promise-safety-helpers.d.ts +19 -0
- package/dist/gates/promise-safety-helpers.js +101 -0
- package/dist/gates/promise-safety-rules.d.ts +7 -0
- package/dist/gates/promise-safety-rules.js +19 -0
- package/dist/gates/promise-safety.d.ts +1 -21
- package/dist/gates/promise-safety.js +51 -257
- package/dist/gates/runner.d.ts +4 -2
- package/dist/gates/runner.js +46 -1
- package/dist/gates/test-quality-lang.d.ts +30 -0
- package/dist/gates/test-quality-lang.js +188 -0
- package/dist/gates/test-quality.d.ts +0 -14
- package/dist/gates/test-quality.js +13 -186
- package/dist/index.d.ts +10 -0
- package/dist/index.js +12 -2
- package/dist/inference/cloud-provider.d.ts +34 -0
- package/dist/inference/cloud-provider.js +126 -0
- package/dist/inference/index.d.ts +17 -0
- package/dist/inference/index.js +23 -0
- package/dist/inference/model-manager.d.ts +26 -0
- package/dist/inference/model-manager.js +106 -0
- package/dist/inference/sidecar-provider.d.ts +15 -0
- package/dist/inference/sidecar-provider.js +153 -0
- package/dist/inference/types.d.ts +77 -0
- package/dist/inference/types.js +19 -0
- package/dist/pattern-index/indexer-helpers.d.ts +38 -0
- package/dist/pattern-index/indexer-helpers.js +111 -0
- package/dist/pattern-index/indexer-lang.d.ts +13 -0
- package/dist/pattern-index/indexer-lang.js +244 -0
- package/dist/pattern-index/indexer-ts.d.ts +22 -0
- package/dist/pattern-index/indexer-ts.js +258 -0
- package/dist/pattern-index/indexer.d.ts +4 -106
- package/dist/pattern-index/indexer.js +58 -707
- package/dist/pattern-index/staleness-data.d.ts +6 -0
- package/dist/pattern-index/staleness-data.js +262 -0
- package/dist/pattern-index/staleness.js +1 -258
- package/dist/settings.d.ts +104 -0
- package/dist/settings.js +186 -0
- package/dist/storage/db.d.ts +16 -0
- package/dist/storage/db.js +132 -0
- package/dist/storage/findings.d.ts +14 -0
- package/dist/storage/findings.js +38 -0
- package/dist/storage/index.d.ts +9 -0
- package/dist/storage/index.js +8 -0
- package/dist/storage/patterns.d.ts +35 -0
- package/dist/storage/patterns.js +62 -0
- package/dist/storage/scans.d.ts +42 -0
- package/dist/storage/scans.js +55 -0
- package/dist/templates/index.d.ts +12 -16
- package/dist/templates/index.js +11 -527
- package/dist/templates/paradigms.d.ts +2 -0
- package/dist/templates/paradigms.js +46 -0
- package/dist/templates/presets.d.ts +14 -0
- package/dist/templates/presets.js +227 -0
- package/dist/templates/universal-config.d.ts +2 -0
- package/dist/templates/universal-config.js +190 -0
- package/dist/types/index.d.ts +438 -15
- package/dist/types/index.js +41 -1
- package/package.json +6 -2
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST Fact Extractor — Step 1 of the three-step pipeline.
|
|
3
|
+
* Extracts structured facts from code files using tree-sitter AST.
|
|
4
|
+
* These facts ground the LLM analysis and prevent hallucination.
|
|
5
|
+
*/
|
|
6
|
+
import fs from 'fs-extra';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import { globby } from 'globby';
|
|
9
|
+
/**
|
|
10
|
+
* Lightweight regex-based fact extraction.
|
|
11
|
+
* Works across languages without tree-sitter grammar loading.
|
|
12
|
+
* Fast enough for the deep analysis pipeline.
|
|
13
|
+
*/
|
|
14
|
+
export async function extractFacts(cwd, ignore) {
|
|
15
|
+
const patterns = ['**/*.{ts,js,tsx,jsx,py,go,rs,cs,java,rb,kt}'];
|
|
16
|
+
const ignorePatterns = [
|
|
17
|
+
...(ignore || []),
|
|
18
|
+
'**/node_modules/**', '**/dist/**', '**/build/**',
|
|
19
|
+
'**/.git/**', '**/vendor/**', '**/__pycache__/**',
|
|
20
|
+
'**/*.min.js', '**/*.bundle.js',
|
|
21
|
+
];
|
|
22
|
+
const files = await globby(patterns, { cwd, ignore: ignorePatterns, followSymbolicLinks: false });
|
|
23
|
+
const allFacts = [];
|
|
24
|
+
for (const file of files) {
|
|
25
|
+
try {
|
|
26
|
+
const content = await fs.readFile(path.join(cwd, file), 'utf-8');
|
|
27
|
+
const facts = extractFileFacts(file, content);
|
|
28
|
+
if (facts)
|
|
29
|
+
allFacts.push(facts);
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
// Skip unreadable files
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return allFacts;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Extract facts from a single file's content.
|
|
39
|
+
*/
|
|
40
|
+
function extractFileFacts(filePath, content) {
|
|
41
|
+
const lines = content.split('\n');
|
|
42
|
+
if (lines.length < 3)
|
|
43
|
+
return null; // Skip trivial files
|
|
44
|
+
const language = detectLanguage(filePath);
|
|
45
|
+
const facts = {
|
|
46
|
+
path: filePath,
|
|
47
|
+
language,
|
|
48
|
+
lineCount: lines.length,
|
|
49
|
+
classes: extractClasses(content, language),
|
|
50
|
+
functions: extractFunctions(content, language),
|
|
51
|
+
imports: extractImports(content, language),
|
|
52
|
+
exports: extractExports(content, language),
|
|
53
|
+
errorHandling: extractErrorHandling(content, language),
|
|
54
|
+
testAssertions: countAssertions(content),
|
|
55
|
+
hasTests: isTestFile(filePath, content),
|
|
56
|
+
};
|
|
57
|
+
// Go/Rust-specific extraction
|
|
58
|
+
if (language === 'go') {
|
|
59
|
+
facts.structs = extractGoStructs(content);
|
|
60
|
+
facts.interfaces = extractGoInterfaces(content);
|
|
61
|
+
facts.goroutines = (content.match(/\bgo\s+\w+/g) || []).length;
|
|
62
|
+
facts.channels = (content.match(/\bch?an\b|make\s*\(\s*chan\b|<-\s*\w+|\w+\s*<-/g) || []).length;
|
|
63
|
+
facts.defers = (content.match(/\bdefer\b/g) || []).length;
|
|
64
|
+
facts.mutexes = (content.match(/sync\.(?:Mutex|RWMutex|WaitGroup|Once|Pool|Map)|\.Lock\(\)|\.Unlock\(\)|\.RLock\(\)|\.RUnlock\(\)/g) || []).length;
|
|
65
|
+
// Go functions include methods with receivers — augment with receiver info
|
|
66
|
+
facts.functions = extractGoFunctions(content);
|
|
67
|
+
}
|
|
68
|
+
// General quality metrics (all languages)
|
|
69
|
+
facts.commentRatio = countCommentRatio(content, language);
|
|
70
|
+
facts.magicNumbers = countMagicNumbers(content, language);
|
|
71
|
+
facts.todoCount = (content.match(/\b(?:TODO|FIXME|HACK|XXX|WORKAROUND)\b/gi) || []).length;
|
|
72
|
+
return facts;
|
|
73
|
+
}
|
|
74
|
+
function detectLanguage(filePath) {
|
|
75
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
76
|
+
const langMap = {
|
|
77
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
78
|
+
'.js': 'javascript', '.jsx': 'javascript',
|
|
79
|
+
'.py': 'python',
|
|
80
|
+
'.go': 'go',
|
|
81
|
+
'.rs': 'rust',
|
|
82
|
+
'.cs': 'csharp',
|
|
83
|
+
'.java': 'java',
|
|
84
|
+
'.rb': 'ruby',
|
|
85
|
+
'.kt': 'kotlin',
|
|
86
|
+
};
|
|
87
|
+
return langMap[ext] || 'unknown';
|
|
88
|
+
}
|
|
89
|
+
function extractClasses(content, lang) {
|
|
90
|
+
const classes = [];
|
|
91
|
+
const lines = content.split('\n');
|
|
92
|
+
// Class pattern: works for TS/JS/Java/C#/Python
|
|
93
|
+
const classPattern = lang === 'python'
|
|
94
|
+
? /^\s*class\s+(\w+)/
|
|
95
|
+
: /(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/;
|
|
96
|
+
for (let i = 0; i < lines.length; i++) {
|
|
97
|
+
const match = lines[i].match(classPattern);
|
|
98
|
+
if (!match)
|
|
99
|
+
continue;
|
|
100
|
+
const name = match[1];
|
|
101
|
+
const lineStart = i + 1;
|
|
102
|
+
// Find class end by brace matching (or indentation for Python)
|
|
103
|
+
let lineEnd = lineStart;
|
|
104
|
+
if (lang === 'python') {
|
|
105
|
+
const baseIndent = lines[i].search(/\S/);
|
|
106
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
107
|
+
const indent = lines[j].search(/\S/);
|
|
108
|
+
if (indent >= 0 && indent <= baseIndent && lines[j].trim().length > 0)
|
|
109
|
+
break;
|
|
110
|
+
lineEnd = j + 1;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
let braces = 0;
|
|
115
|
+
let started = false;
|
|
116
|
+
for (let j = i; j < lines.length; j++) {
|
|
117
|
+
for (const char of lines[j]) {
|
|
118
|
+
if (char === '{') {
|
|
119
|
+
braces++;
|
|
120
|
+
started = true;
|
|
121
|
+
}
|
|
122
|
+
if (char === '}')
|
|
123
|
+
braces--;
|
|
124
|
+
}
|
|
125
|
+
if (started && braces <= 0) {
|
|
126
|
+
lineEnd = j + 1;
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Extract methods within the class
|
|
132
|
+
const classContent = lines.slice(i, lineEnd).join('\n');
|
|
133
|
+
const methodPattern = lang === 'python'
|
|
134
|
+
? /^\s+def\s+(\w+)/gm
|
|
135
|
+
: /(?:public|private|protected|static|async|get|set)?\s*(?:async\s+)?(\w+)\s*\(/gm;
|
|
136
|
+
const methods = [];
|
|
137
|
+
const publicMethods = [];
|
|
138
|
+
let methodMatch;
|
|
139
|
+
while ((methodMatch = methodPattern.exec(classContent)) !== null) {
|
|
140
|
+
const methodName = methodMatch[1];
|
|
141
|
+
if (methodName === name || methodName === 'constructor')
|
|
142
|
+
continue; // Skip constructor
|
|
143
|
+
methods.push(methodName);
|
|
144
|
+
if (!methodMatch[0].includes('private') && !methodMatch[0].includes('protected')) {
|
|
145
|
+
if (lang !== 'python' || !methodName.startsWith('_')) {
|
|
146
|
+
publicMethods.push(methodName);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// Extract dependencies (constructor params, imports used)
|
|
151
|
+
const depPattern = /(?:private|readonly|public)\s+(\w+):\s*(\w+)/g;
|
|
152
|
+
const deps = [];
|
|
153
|
+
let depMatch;
|
|
154
|
+
while ((depMatch = depPattern.exec(classContent)) !== null) {
|
|
155
|
+
deps.push(depMatch[2]);
|
|
156
|
+
}
|
|
157
|
+
classes.push({
|
|
158
|
+
name,
|
|
159
|
+
lineStart,
|
|
160
|
+
lineEnd,
|
|
161
|
+
methodCount: methods.length,
|
|
162
|
+
methods,
|
|
163
|
+
publicMethods,
|
|
164
|
+
lineCount: lineEnd - lineStart + 1,
|
|
165
|
+
dependencies: deps,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
return classes;
|
|
169
|
+
}
|
|
170
|
+
function extractFunctions(content, lang) {
|
|
171
|
+
const functions = [];
|
|
172
|
+
const lines = content.split('\n');
|
|
173
|
+
const patterns = lang === 'python'
|
|
174
|
+
? [/^(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)/]
|
|
175
|
+
: [
|
|
176
|
+
/(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)/,
|
|
177
|
+
/(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*\w+(?:<[^>]+>)?)?\s*=>/,
|
|
178
|
+
/(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?function/,
|
|
179
|
+
];
|
|
180
|
+
for (let i = 0; i < lines.length; i++) {
|
|
181
|
+
for (const pattern of patterns) {
|
|
182
|
+
const match = lines[i].match(pattern);
|
|
183
|
+
if (!match)
|
|
184
|
+
continue;
|
|
185
|
+
const name = match[1];
|
|
186
|
+
if (!name || name === 'if' || name === 'for' || name === 'while')
|
|
187
|
+
continue;
|
|
188
|
+
const lineStart = i + 1;
|
|
189
|
+
let lineEnd = lineStart;
|
|
190
|
+
// Find function end
|
|
191
|
+
if (lang === 'python') {
|
|
192
|
+
const baseIndent = lines[i].search(/\S/);
|
|
193
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
194
|
+
const indent = lines[j].search(/\S/);
|
|
195
|
+
if (indent >= 0 && indent <= baseIndent && lines[j].trim().length > 0)
|
|
196
|
+
break;
|
|
197
|
+
lineEnd = j + 1;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
let braces = 0;
|
|
202
|
+
let started = false;
|
|
203
|
+
for (let j = i; j < Math.min(lines.length, i + 500); j++) {
|
|
204
|
+
for (const char of lines[j]) {
|
|
205
|
+
if (char === '{') {
|
|
206
|
+
braces++;
|
|
207
|
+
started = true;
|
|
208
|
+
}
|
|
209
|
+
if (char === '}')
|
|
210
|
+
braces--;
|
|
211
|
+
}
|
|
212
|
+
if (started && braces <= 0) {
|
|
213
|
+
lineEnd = j + 1;
|
|
214
|
+
break;
|
|
215
|
+
}
|
|
216
|
+
// Arrow functions without braces
|
|
217
|
+
if (!started && lines[j].includes('=>') && !lines[j].includes('{')) {
|
|
218
|
+
lineEnd = j + 1;
|
|
219
|
+
started = true;
|
|
220
|
+
break;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Extract params
|
|
225
|
+
const paramStr = match[2] || '';
|
|
226
|
+
const params = paramStr.split(',').map(p => p.trim()).filter(p => p.length > 0);
|
|
227
|
+
// Nesting depth
|
|
228
|
+
const funcContent = lines.slice(i, lineEnd).join('\n');
|
|
229
|
+
const maxNesting = calculateMaxNesting(funcContent, lang);
|
|
230
|
+
functions.push({
|
|
231
|
+
name,
|
|
232
|
+
lineStart,
|
|
233
|
+
lineEnd,
|
|
234
|
+
lineCount: lineEnd - lineStart + 1,
|
|
235
|
+
paramCount: params.length,
|
|
236
|
+
params,
|
|
237
|
+
maxNesting,
|
|
238
|
+
hasReturn: funcContent.includes('return ') || funcContent.includes('return;'),
|
|
239
|
+
isAsync: lines[i].includes('async'),
|
|
240
|
+
isExported: lines[i].includes('export'),
|
|
241
|
+
});
|
|
242
|
+
break; // One match per line
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return functions;
|
|
246
|
+
}
|
|
247
|
+
function extractImports(content, lang) {
|
|
248
|
+
const imports = [];
|
|
249
|
+
if (lang === 'python') {
|
|
250
|
+
const pattern = /^(?:from\s+(\S+)\s+)?import\s+(.+)$/gm;
|
|
251
|
+
let match;
|
|
252
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
253
|
+
imports.push(match[1] || match[2].trim());
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
const pattern = /import\s+.+?from\s+['"](.*?)['"]/g;
|
|
258
|
+
let match;
|
|
259
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
260
|
+
imports.push(match[1]);
|
|
261
|
+
}
|
|
262
|
+
const reqPattern = /require\s*\(\s*['"](.*?)['"]\s*\)/g;
|
|
263
|
+
while ((match = reqPattern.exec(content)) !== null) {
|
|
264
|
+
imports.push(match[1]);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return imports;
|
|
268
|
+
}
|
|
269
|
+
function extractExports(content, lang) {
|
|
270
|
+
const exports = [];
|
|
271
|
+
if (lang === 'typescript' || lang === 'javascript') {
|
|
272
|
+
const pattern = /export\s+(?:default\s+)?(?:class|function|const|let|var|interface|type|enum)\s+(\w+)/g;
|
|
273
|
+
let match;
|
|
274
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
275
|
+
exports.push(match[1]);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
return exports;
|
|
279
|
+
}
|
|
280
|
+
function extractErrorHandling(content, lang) {
|
|
281
|
+
const handlers = [];
|
|
282
|
+
const lines = content.split('\n');
|
|
283
|
+
for (let i = 0; i < lines.length; i++) {
|
|
284
|
+
const line = lines[i];
|
|
285
|
+
// try-catch blocks
|
|
286
|
+
if (line.match(/\btry\s*{/) || (lang === 'python' && line.match(/^\s*try\s*:/))) {
|
|
287
|
+
// Look for the catch/except
|
|
288
|
+
for (let j = i + 1; j < Math.min(lines.length, i + 50); j++) {
|
|
289
|
+
const catchMatch = lines[j].match(/\bcatch\s*\(/) || (lang === 'python' && lines[j].match(/^\s*except/));
|
|
290
|
+
if (catchMatch) {
|
|
291
|
+
// Check if catch body is empty
|
|
292
|
+
const catchBody = lines.slice(j + 1, Math.min(lines.length, j + 5)).join('\n');
|
|
293
|
+
const isEmpty = !catchBody.trim() || catchBody.match(/^\s*}\s*$/) !== null;
|
|
294
|
+
let strategy = 'custom';
|
|
295
|
+
if (isEmpty || catchBody.match(/^\s*\/\//))
|
|
296
|
+
strategy = 'ignore';
|
|
297
|
+
else if (catchBody.includes('console.log') || catchBody.includes('console.error') || catchBody.includes('print('))
|
|
298
|
+
strategy = 'log';
|
|
299
|
+
else if (catchBody.includes('throw'))
|
|
300
|
+
strategy = 'throw';
|
|
301
|
+
else if (catchBody.includes('return'))
|
|
302
|
+
strategy = 'return';
|
|
303
|
+
handlers.push({
|
|
304
|
+
type: 'try-catch',
|
|
305
|
+
lineStart: i + 1,
|
|
306
|
+
isEmpty: strategy === 'ignore',
|
|
307
|
+
strategy,
|
|
308
|
+
});
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// .catch() handlers
|
|
314
|
+
if (line.match(/\.catch\s*\(/)) {
|
|
315
|
+
const nextContent = lines.slice(i, Math.min(lines.length, i + 5)).join('\n');
|
|
316
|
+
const isEmpty = nextContent.match(/\.catch\s*\(\s*\(\s*\)\s*=>\s*{?\s*}?\s*\)/) !== null;
|
|
317
|
+
handlers.push({
|
|
318
|
+
type: 'promise-catch',
|
|
319
|
+
lineStart: i + 1,
|
|
320
|
+
isEmpty,
|
|
321
|
+
strategy: isEmpty ? 'ignore' : 'custom',
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return handlers;
|
|
326
|
+
}
|
|
327
|
+
function calculateMaxNesting(content, lang) {
|
|
328
|
+
let maxNesting = 0;
|
|
329
|
+
let current = 0;
|
|
330
|
+
if (lang === 'python') {
|
|
331
|
+
const lines = content.split('\n');
|
|
332
|
+
for (const line of lines) {
|
|
333
|
+
const indent = line.search(/\S/);
|
|
334
|
+
if (indent >= 0) {
|
|
335
|
+
const level = Math.floor(indent / 4);
|
|
336
|
+
maxNesting = Math.max(maxNesting, level);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
for (const char of content) {
|
|
342
|
+
if (char === '{') {
|
|
343
|
+
current++;
|
|
344
|
+
maxNesting = Math.max(maxNesting, current);
|
|
345
|
+
}
|
|
346
|
+
if (char === '}')
|
|
347
|
+
current--;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
return maxNesting;
|
|
351
|
+
}
|
|
352
|
+
function countAssertions(content) {
|
|
353
|
+
const patterns = [
|
|
354
|
+
/\bexpect\s*\(/g,
|
|
355
|
+
/\bassert\w*\s*[.(]/g,
|
|
356
|
+
/\bshould\./g,
|
|
357
|
+
/\.to(Be|Equal|Have|Throw|Match|Include|Contain)/g,
|
|
358
|
+
];
|
|
359
|
+
let count = 0;
|
|
360
|
+
for (const p of patterns) {
|
|
361
|
+
const matches = content.match(p);
|
|
362
|
+
if (matches)
|
|
363
|
+
count += matches.length;
|
|
364
|
+
}
|
|
365
|
+
return count;
|
|
366
|
+
}
|
|
367
|
+
function isTestFile(filePath, content) {
|
|
368
|
+
if (filePath.match(/\.(test|spec|_test)\./))
|
|
369
|
+
return true;
|
|
370
|
+
if (filePath.includes('__tests__') || filePath.includes('test/') || filePath.includes('tests/'))
|
|
371
|
+
return true;
|
|
372
|
+
if (content.includes('describe(') || content.includes('it(') || content.includes('test('))
|
|
373
|
+
return true;
|
|
374
|
+
if (content.includes('def test_') || content.includes('@pytest'))
|
|
375
|
+
return true;
|
|
376
|
+
return false;
|
|
377
|
+
}
|
|
378
|
+
// ── Go-specific extractors ──
|
|
379
|
+
function extractGoStructs(content) {
|
|
380
|
+
const structs = [];
|
|
381
|
+
const lines = content.split('\n');
|
|
382
|
+
for (let i = 0; i < lines.length; i++) {
|
|
383
|
+
const match = lines[i].match(/^type\s+(\w+)\s+struct\s*\{/);
|
|
384
|
+
if (!match)
|
|
385
|
+
continue;
|
|
386
|
+
const name = match[1];
|
|
387
|
+
const lineStart = i + 1;
|
|
388
|
+
let lineEnd = lineStart;
|
|
389
|
+
let braces = 0;
|
|
390
|
+
let started = false;
|
|
391
|
+
const fields = [];
|
|
392
|
+
const embeds = [];
|
|
393
|
+
for (let j = i; j < lines.length; j++) {
|
|
394
|
+
for (const char of lines[j]) {
|
|
395
|
+
if (char === '{') {
|
|
396
|
+
braces++;
|
|
397
|
+
started = true;
|
|
398
|
+
}
|
|
399
|
+
if (char === '}')
|
|
400
|
+
braces--;
|
|
401
|
+
}
|
|
402
|
+
// Count fields (lines inside struct with type declarations)
|
|
403
|
+
if (j > i && braces > 0) {
|
|
404
|
+
const fieldLine = lines[j].trim();
|
|
405
|
+
if (fieldLine && !fieldLine.startsWith('//') && !fieldLine.startsWith('{')) {
|
|
406
|
+
// Embedded type (single word, capitalized)
|
|
407
|
+
if (fieldLine.match(/^\*?\w+$/)) {
|
|
408
|
+
embeds.push(fieldLine.replace(/^\*/, ''));
|
|
409
|
+
}
|
|
410
|
+
else if (fieldLine.includes(' ')) {
|
|
411
|
+
fields.push(fieldLine.split(/\s+/)[0]);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
if (started && braces <= 0) {
|
|
416
|
+
lineEnd = j + 1;
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
// Find methods with this struct as receiver
|
|
421
|
+
const methods = [];
|
|
422
|
+
const methodPattern = new RegExp(`^func\\s*\\(\\s*\\w+\\s+\\*?${name}\\s*\\)\\s+(\\w+)\\s*\\(`, 'gm');
|
|
423
|
+
let methodMatch;
|
|
424
|
+
while ((methodMatch = methodPattern.exec(content)) !== null) {
|
|
425
|
+
methods.push(methodMatch[1]);
|
|
426
|
+
}
|
|
427
|
+
structs.push({
|
|
428
|
+
name,
|
|
429
|
+
lineStart,
|
|
430
|
+
lineEnd,
|
|
431
|
+
fieldCount: fields.length + embeds.length,
|
|
432
|
+
methodCount: methods.length,
|
|
433
|
+
methods,
|
|
434
|
+
lineCount: lineEnd - lineStart + 1,
|
|
435
|
+
embeds,
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
return structs;
|
|
439
|
+
}
|
|
440
|
+
function extractGoInterfaces(content) {
|
|
441
|
+
const interfaces = [];
|
|
442
|
+
const lines = content.split('\n');
|
|
443
|
+
for (let i = 0; i < lines.length; i++) {
|
|
444
|
+
const match = lines[i].match(/^type\s+(\w+)\s+interface\s*\{/);
|
|
445
|
+
if (!match)
|
|
446
|
+
continue;
|
|
447
|
+
const name = match[1];
|
|
448
|
+
const methods = [];
|
|
449
|
+
let braces = 0;
|
|
450
|
+
let started = false;
|
|
451
|
+
for (let j = i; j < lines.length; j++) {
|
|
452
|
+
for (const char of lines[j]) {
|
|
453
|
+
if (char === '{') {
|
|
454
|
+
braces++;
|
|
455
|
+
started = true;
|
|
456
|
+
}
|
|
457
|
+
if (char === '}')
|
|
458
|
+
braces--;
|
|
459
|
+
}
|
|
460
|
+
if (j > i && braces > 0) {
|
|
461
|
+
const methodMatch = lines[j].trim().match(/^(\w+)\s*\(/);
|
|
462
|
+
if (methodMatch)
|
|
463
|
+
methods.push(methodMatch[1]);
|
|
464
|
+
}
|
|
465
|
+
if (started && braces <= 0)
|
|
466
|
+
break;
|
|
467
|
+
}
|
|
468
|
+
interfaces.push({
|
|
469
|
+
name,
|
|
470
|
+
lineStart: i + 1,
|
|
471
|
+
methodCount: methods.length,
|
|
472
|
+
methods,
|
|
473
|
+
});
|
|
474
|
+
}
|
|
475
|
+
return interfaces;
|
|
476
|
+
}
|
|
477
|
+
function extractGoFunctions(content) {
|
|
478
|
+
const functions = [];
|
|
479
|
+
const lines = content.split('\n');
|
|
480
|
+
for (let i = 0; i < lines.length; i++) {
|
|
481
|
+
// Match both standalone funcs and receiver methods
|
|
482
|
+
const match = lines[i].match(/^func\s+(?:\(\s*\w+\s+\*?(\w+)\s*\)\s+)?(\w+)\s*\(([^)]*)\)/);
|
|
483
|
+
if (!match)
|
|
484
|
+
continue;
|
|
485
|
+
const receiver = match[1] || '';
|
|
486
|
+
const name = receiver ? `${receiver}.${match[2]}` : match[2];
|
|
487
|
+
const paramStr = match[3] || '';
|
|
488
|
+
const lineStart = i + 1;
|
|
489
|
+
let lineEnd = lineStart;
|
|
490
|
+
let braces = 0;
|
|
491
|
+
let started = false;
|
|
492
|
+
for (let j = i; j < Math.min(lines.length, i + 500); j++) {
|
|
493
|
+
for (const char of lines[j]) {
|
|
494
|
+
if (char === '{') {
|
|
495
|
+
braces++;
|
|
496
|
+
started = true;
|
|
497
|
+
}
|
|
498
|
+
if (char === '}')
|
|
499
|
+
braces--;
|
|
500
|
+
}
|
|
501
|
+
if (started && braces <= 0) {
|
|
502
|
+
lineEnd = j + 1;
|
|
503
|
+
break;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
const params = paramStr.split(',').map(p => p.trim()).filter(p => p.length > 0);
|
|
507
|
+
const funcContent = lines.slice(i, lineEnd).join('\n');
|
|
508
|
+
const maxNesting = calculateMaxNesting(funcContent, 'go');
|
|
509
|
+
const hasErrorReturn = funcContent.includes('error') && funcContent.includes('return');
|
|
510
|
+
functions.push({
|
|
511
|
+
name,
|
|
512
|
+
lineStart,
|
|
513
|
+
lineEnd,
|
|
514
|
+
lineCount: lineEnd - lineStart + 1,
|
|
515
|
+
paramCount: params.length,
|
|
516
|
+
params,
|
|
517
|
+
maxNesting,
|
|
518
|
+
hasReturn: funcContent.includes('return ') || funcContent.includes('return\n'),
|
|
519
|
+
isAsync: funcContent.includes('go ') || funcContent.includes('goroutine'),
|
|
520
|
+
isExported: match[2].charAt(0) === match[2].charAt(0).toUpperCase(),
|
|
521
|
+
});
|
|
522
|
+
}
|
|
523
|
+
return functions;
|
|
524
|
+
}
|
|
525
|
+
// ── General quality metrics ──
|
|
526
|
+
function countCommentRatio(content, lang) {
|
|
527
|
+
const lines = content.split('\n');
|
|
528
|
+
let commentLines = 0;
|
|
529
|
+
const commentPatterns = lang === 'python'
|
|
530
|
+
? [/^\s*#/, /^\s*"""/]
|
|
531
|
+
: [/^\s*\/\//, /^\s*\/\*/, /^\s*\*/];
|
|
532
|
+
for (const line of lines) {
|
|
533
|
+
if (commentPatterns.some(p => p.test(line)))
|
|
534
|
+
commentLines++;
|
|
535
|
+
}
|
|
536
|
+
return lines.length > 0 ? Math.round((commentLines / lines.length) * 100) : 0;
|
|
537
|
+
}
|
|
538
|
+
function countMagicNumbers(content, lang) {
|
|
539
|
+
// Exclude 0, 1, -1, common HTTP codes, common sizes
|
|
540
|
+
const allowed = new Set(['0', '1', '-1', '2', '100', '200', '201', '204', '301', '302', '400', '401', '403', '404', '500']);
|
|
541
|
+
const matches = content.match(/(?<![.\w])\d{2,}(?![.\w])/g) || [];
|
|
542
|
+
return matches.filter(m => !allowed.has(m)).length;
|
|
543
|
+
}
|
|
544
|
+
/**
|
|
545
|
+
* Serialize facts into a compact string for LLM prompts.
|
|
546
|
+
* Keeps only the most relevant information within token budget.
|
|
547
|
+
*/
|
|
548
|
+
export function factsToPromptString(facts, maxChars = 8000) {
|
|
549
|
+
const parts = [];
|
|
550
|
+
for (const f of facts) {
|
|
551
|
+
const filePart = [`FILE: ${f.path} (${f.language}, ${f.lineCount} lines)`];
|
|
552
|
+
// Quality metrics
|
|
553
|
+
const metrics = [];
|
|
554
|
+
if (f.commentRatio !== undefined && f.commentRatio < 5 && f.lineCount > 50)
|
|
555
|
+
metrics.push(`comments:${f.commentRatio}%`);
|
|
556
|
+
if (f.magicNumbers && f.magicNumbers > 3)
|
|
557
|
+
metrics.push(`magic_numbers:${f.magicNumbers}`);
|
|
558
|
+
if (f.todoCount && f.todoCount > 0)
|
|
559
|
+
metrics.push(`todos:${f.todoCount}`);
|
|
560
|
+
if (metrics.length > 0)
|
|
561
|
+
filePart.push(` METRICS: ${metrics.join(', ')}`);
|
|
562
|
+
// Classes (JS/TS/Python/Java/C#)
|
|
563
|
+
for (const cls of f.classes) {
|
|
564
|
+
filePart.push(` CLASS ${cls.name} (${cls.lineCount} lines, ${cls.methodCount} methods: ${cls.methods.join(', ')})`);
|
|
565
|
+
if (cls.dependencies.length > 0) {
|
|
566
|
+
filePart.push(` deps: ${cls.dependencies.join(', ')}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
// Go structs
|
|
570
|
+
if (f.structs) {
|
|
571
|
+
for (const s of f.structs) {
|
|
572
|
+
const embedStr = s.embeds.length > 0 ? `, embeds: ${s.embeds.join(', ')}` : '';
|
|
573
|
+
filePart.push(` STRUCT ${s.name} (${s.lineCount} lines, ${s.fieldCount} fields, ${s.methodCount} methods: ${s.methods.join(', ')}${embedStr})`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
// Go interfaces
|
|
577
|
+
if (f.interfaces) {
|
|
578
|
+
for (const iface of f.interfaces) {
|
|
579
|
+
filePart.push(` INTERFACE ${iface.name} (${iface.methodCount} methods: ${iface.methods.join(', ')})`);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
// Go concurrency signals
|
|
583
|
+
if (f.language === 'go') {
|
|
584
|
+
const goSignals = [];
|
|
585
|
+
if (f.goroutines && f.goroutines > 0)
|
|
586
|
+
goSignals.push(`goroutines:${f.goroutines}`);
|
|
587
|
+
if (f.channels && f.channels > 0)
|
|
588
|
+
goSignals.push(`channels:${f.channels}`);
|
|
589
|
+
if (f.defers && f.defers > 0)
|
|
590
|
+
goSignals.push(`defers:${f.defers}`);
|
|
591
|
+
if (f.mutexes && f.mutexes > 0)
|
|
592
|
+
goSignals.push(`mutexes:${f.mutexes}`);
|
|
593
|
+
if (goSignals.length > 0)
|
|
594
|
+
filePart.push(` CONCURRENCY: ${goSignals.join(', ')}`);
|
|
595
|
+
}
|
|
596
|
+
// Functions (all languages)
|
|
597
|
+
for (const fn of f.functions) {
|
|
598
|
+
if (fn.lineCount < 8)
|
|
599
|
+
continue; // Skip tiny functions
|
|
600
|
+
const flags = [
|
|
601
|
+
fn.isAsync ? 'async' : '',
|
|
602
|
+
fn.isExported ? 'exported' : '',
|
|
603
|
+
fn.maxNesting > 3 ? `nesting:${fn.maxNesting}` : '',
|
|
604
|
+
fn.paramCount > 4 ? `params:${fn.paramCount}` : '',
|
|
605
|
+
].filter(Boolean).join(', ');
|
|
606
|
+
filePart.push(` FN ${fn.name}(${fn.params.join(', ')}) [${fn.lineCount} lines${flags ? ', ' + flags : ''}]`);
|
|
607
|
+
}
|
|
608
|
+
if (f.errorHandling.length > 0) {
|
|
609
|
+
const strategies = f.errorHandling.map(e => e.strategy);
|
|
610
|
+
const unique = [...new Set(strategies)];
|
|
611
|
+
filePart.push(` ERROR_HANDLING: ${unique.join(', ')} (${f.errorHandling.filter(e => e.isEmpty).length} empty catches)`);
|
|
612
|
+
}
|
|
613
|
+
if (f.hasTests) {
|
|
614
|
+
filePart.push(` TESTS: ${f.testAssertions} assertions`);
|
|
615
|
+
}
|
|
616
|
+
if (f.imports.length > 0) {
|
|
617
|
+
filePart.push(` IMPORTS: ${f.imports.length} (${f.imports.slice(0, 8).join(', ')}${f.imports.length > 8 ? '...' : ''})`);
|
|
618
|
+
}
|
|
619
|
+
parts.push(filePart.join('\n'));
|
|
620
|
+
// Rough token budget check
|
|
621
|
+
const totalLength = parts.join('\n\n').length;
|
|
622
|
+
if (totalLength > maxChars)
|
|
623
|
+
break;
|
|
624
|
+
}
|
|
625
|
+
return parts.join('\n\n');
|
|
626
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deep Analysis Pipeline — AST → LLM → Verify
|
|
3
|
+
*
|
|
4
|
+
* Step 1: AST extracts structured facts from code
|
|
5
|
+
* Step 2: LLM interprets facts and identifies quality issues
|
|
6
|
+
* Step 3: AST verifies LLM isn't hallucinating
|
|
7
|
+
*
|
|
8
|
+
* Neither AST nor LLM works alone. Together they're accurate.
|
|
9
|
+
*/
|
|
10
|
+
export { extractFacts, factsToPromptString } from './fact-extractor.js';
|
|
11
|
+
export type { FileFacts, ClassFact, FunctionFact, ErrorHandlingFact, StructFact, InterfaceFact } from './fact-extractor.js';
|
|
12
|
+
export { buildAnalysisPrompt, buildCrossFilePrompt, chunkFacts, DEEP_SYSTEM_PROMPT } from './prompts.js';
|
|
13
|
+
export { verifyFindings } from './verifier.js';
|
|
14
|
+
export type { VerifiedFinding } from './verifier.js';
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deep Analysis Pipeline — AST → LLM → Verify
|
|
3
|
+
*
|
|
4
|
+
* Step 1: AST extracts structured facts from code
|
|
5
|
+
* Step 2: LLM interprets facts and identifies quality issues
|
|
6
|
+
* Step 3: AST verifies LLM isn't hallucinating
|
|
7
|
+
*
|
|
8
|
+
* Neither AST nor LLM works alone. Together they're accurate.
|
|
9
|
+
*/
|
|
10
|
+
export { extractFacts, factsToPromptString } from './fact-extractor.js';
|
|
11
|
+
export { buildAnalysisPrompt, buildCrossFilePrompt, chunkFacts, DEEP_SYSTEM_PROMPT } from './prompts.js';
|
|
12
|
+
export { verifyFindings } from './verifier.js';
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prompt Engineering — Step 2 of the three-step pipeline.
|
|
3
|
+
* Constructs structured prompts that ask the LLM to interpret AST-extracted facts.
|
|
4
|
+
*/
|
|
5
|
+
import type { FileFacts } from './fact-extractor.js';
|
|
6
|
+
/**
|
|
7
|
+
* System prompt that defines the LLM's role and output format.
|
|
8
|
+
*/
|
|
9
|
+
export declare const DEEP_SYSTEM_PROMPT = "You are an expert code reviewer and software architect performing deep quality analysis. You receive AST-extracted facts about a codebase and must identify quality issues, anti-patterns, and best practice violations.\n\nIMPORTANT RULES:\n1. ONLY report issues you can verify from the provided facts. Do NOT hallucinate files, classes, or functions.\n2. Every finding MUST reference a real file and entity from the facts.\n3. Be specific: include file paths, struct/class names, function names, line counts.\n4. Assign confidence scores honestly: 0.9+ only for certain issues, 0.5-0.7 for probable issues.\n5. Respond ONLY with valid JSON matching the schema below. No explanation text outside JSON.\n6. AIM for 5-15 findings per batch. Be thorough \u2014 report ALL issues you can identify, not just the most obvious ones.\n7. For Go code: treat structs as classes, receiver methods as class methods. Check Go idioms specifically.\n\nOUTPUT SCHEMA:\n{\n \"findings\": [\n {\n \"category\": \"string (see CATEGORIES below)\",\n \"severity\": \"string (critical|high|medium|low|info)\",\n \"file\": \"string (exact file path from facts)\",\n \"line\": \"number or null\",\n \"description\": \"string (what the issue is, referencing specific entities)\",\n \"suggestion\": \"string (actionable fix recommendation)\",\n \"confidence\": \"number 0.0-1.0\"\n }\n ]\n}\n\nCATEGORIES:\n SOLID Principles:\n srp_violation - Single file/struct/class handles multiple unrelated responsibilities\n ocp_violation - Code requires modification (not extension) for new behavior\n lsp_violation - Subtypes break substitutability contracts\n isp_violation - Interface has too many methods forcing unnecessary implementations\n dip_violation - High-level modules depend directly on low-level implementations\n\n Design Patterns & Anti-patterns:\n god_class - Class/struct with too many fields, methods, or responsibilities (>8 methods or >300 lines)\n god_function - Function exceeding 50 lines or doing too many things\n feature_envy - Function/method uses another module's data more than its own\n shotgun_surgery - A single change requires modifying many files\n long_params - Function with 4+ parameters (use struct/options pattern)\n data_clump - Same group of fields/params repeated across multiple structs/functions\n inappropriate_intimacy - Two modules too tightly coupled, accessing each other's internals\n primitive_obsession - Using primitives instead of domain types (string for email, int for ID)\n lazy_class - Struct/class that does too little to justify its existence\n speculative_generality - Over-engineered abstractions not justified by current usage\n refused_bequest - Subtype/implementation ignores inherited behavior\n\n DRY & Duplication:\n dry_violation - Duplicated logic across files that should be extracted\n copy_paste_code - Nearly identical functions/methods in different files\n\n Error Handling:\n error_inconsistency - Mixed error handling strategies in same package/module\n empty_catch - Empty catch/except blocks that silently swallow errors\n error_swallowing - Errors logged but not propagated when they should be\n missing_error_check - Return values (especially errors) not checked\n panic_in_library - Library code using panic/os.Exit instead of returning errors\n\n Concurrency (Go/Rust/async languages):\n race_condition - Shared mutable state accessed without synchronization\n goroutine_leak - Goroutines spawned without cancellation/context mechanism\n missing_context - Functions that should accept context.Context but don't\n channel_misuse - Unbuffered channels that could deadlock, or missing close()\n mutex_scope - Mutex held too long or across I/O operations\n\n Testing:\n test_quality - Insufficient assertions, no edge cases, weak coverage\n test_coupling - Tests tightly coupled to implementation details\n missing_test - Complex public function/method with no corresponding test\n test_duplication - Multiple tests verifying the same behavior redundantly\n\n Architecture:\n architecture - Layer violations, wrong dependency direction\n circular_dependency - Modules that import each other\n package_cohesion - Package/directory contains unrelated concerns\n api_design - Exported API is confusing, inconsistent, or poorly structured\n missing_abstraction - Direct usage where an interface/abstraction would improve design\n\n Language Idioms:\n language_idiom - Language-specific anti-patterns\n naming_convention - Names don't follow language conventions (Go: MixedCaps, Python: snake_case)\n dead_code - Unreferenced exports, unused functions\n magic_number - Numeric literals without named constants\n\n Performance & Security:\n performance - Obvious performance anti-patterns (N+1 queries, unbounded allocations)\n resource_leak - Opened resources (files, connections, readers) not properly closed\n hardcoded_config - Configuration values hardcoded instead of externalized\n\n Code Smells:\n code_smell - General smell with refactoring suggestion\n complex_conditional - Deeply nested or overly complex conditional logic\n long_file - File exceeds reasonable length for its responsibility";
|
|
10
|
+
/**
|
|
11
|
+
* Build the analysis prompt for a batch of file facts.
|
|
12
|
+
*/
|
|
13
|
+
export declare function buildAnalysisPrompt(factsStr: string, checks?: Record<string, boolean>): string;
|
|
14
|
+
/**
|
|
15
|
+
* Build a cross-file analysis prompt that looks at patterns across the whole codebase.
|
|
16
|
+
*/
|
|
17
|
+
export declare function buildCrossFilePrompt(allFacts: FileFacts[]): string;
|
|
18
|
+
/**
|
|
19
|
+
* Chunk file facts into batches that fit within token limits.
|
|
20
|
+
* Groups related files (same directory) together.
|
|
21
|
+
*/
|
|
22
|
+
export declare function chunkFacts(facts: FileFacts[], maxCharsPerChunk?: number): FileFacts[][];
|