@rigour-labs/core 3.0.6 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/deep/fact-extractor.d.ts +80 -0
- package/dist/deep/fact-extractor.js +626 -0
- package/dist/deep/fact-extractor.test.d.ts +1 -0
- package/dist/deep/fact-extractor.test.js +547 -0
- package/dist/deep/index.d.ts +14 -0
- package/dist/deep/index.js +12 -0
- package/dist/deep/prompts.d.ts +22 -0
- package/dist/deep/prompts.js +374 -0
- package/dist/deep/prompts.test.d.ts +1 -0
- package/dist/deep/prompts.test.js +220 -0
- package/dist/deep/verifier.d.ts +16 -0
- package/dist/deep/verifier.js +388 -0
- package/dist/deep/verifier.test.d.ts +1 -0
- package/dist/deep/verifier.test.js +514 -0
- package/dist/gates/deep-analysis.d.ts +28 -0
- package/dist/gates/deep-analysis.js +302 -0
- package/dist/gates/runner.d.ts +4 -2
- package/dist/gates/runner.js +46 -1
- package/dist/index.d.ts +10 -0
- package/dist/index.js +12 -2
- package/dist/inference/cloud-provider.d.ts +34 -0
- package/dist/inference/cloud-provider.js +126 -0
- package/dist/inference/index.d.ts +17 -0
- package/dist/inference/index.js +23 -0
- package/dist/inference/model-manager.d.ts +26 -0
- package/dist/inference/model-manager.js +106 -0
- package/dist/inference/sidecar-provider.d.ts +15 -0
- package/dist/inference/sidecar-provider.js +153 -0
- package/dist/inference/types.d.ts +77 -0
- package/dist/inference/types.js +19 -0
- package/dist/settings.d.ts +104 -0
- package/dist/settings.js +186 -0
- package/dist/storage/db.d.ts +16 -0
- package/dist/storage/db.js +132 -0
- package/dist/storage/findings.d.ts +14 -0
- package/dist/storage/findings.js +38 -0
- package/dist/storage/index.d.ts +9 -0
- package/dist/storage/index.js +8 -0
- package/dist/storage/patterns.d.ts +35 -0
- package/dist/storage/patterns.js +62 -0
- package/dist/storage/scans.d.ts +42 -0
- package/dist/storage/scans.js +55 -0
- package/dist/templates/universal-config.js +19 -0
- package/dist/types/index.d.ts +438 -15
- package/dist/types/index.js +41 -1
- package/package.json +6 -2
|
@@ -0,0 +1,626 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AST Fact Extractor — Step 1 of the three-step pipeline.
|
|
3
|
+
* Extracts structured facts from code files using tree-sitter AST.
|
|
4
|
+
* These facts ground the LLM analysis and prevent hallucination.
|
|
5
|
+
*/
|
|
6
|
+
import fs from 'fs-extra';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import { globby } from 'globby';
|
|
9
|
+
/**
|
|
10
|
+
* Lightweight regex-based fact extraction.
|
|
11
|
+
* Works across languages without tree-sitter grammar loading.
|
|
12
|
+
* Fast enough for the deep analysis pipeline.
|
|
13
|
+
*/
|
|
14
|
+
export async function extractFacts(cwd, ignore) {
|
|
15
|
+
const patterns = ['**/*.{ts,js,tsx,jsx,py,go,rs,cs,java,rb,kt}'];
|
|
16
|
+
const ignorePatterns = [
|
|
17
|
+
...(ignore || []),
|
|
18
|
+
'**/node_modules/**', '**/dist/**', '**/build/**',
|
|
19
|
+
'**/.git/**', '**/vendor/**', '**/__pycache__/**',
|
|
20
|
+
'**/*.min.js', '**/*.bundle.js',
|
|
21
|
+
];
|
|
22
|
+
const files = await globby(patterns, { cwd, ignore: ignorePatterns, followSymbolicLinks: false });
|
|
23
|
+
const allFacts = [];
|
|
24
|
+
for (const file of files) {
|
|
25
|
+
try {
|
|
26
|
+
const content = await fs.readFile(path.join(cwd, file), 'utf-8');
|
|
27
|
+
const facts = extractFileFacts(file, content);
|
|
28
|
+
if (facts)
|
|
29
|
+
allFacts.push(facts);
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
// Skip unreadable files
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return allFacts;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Extract facts from a single file's content.
|
|
39
|
+
*/
|
|
40
|
+
function extractFileFacts(filePath, content) {
|
|
41
|
+
const lines = content.split('\n');
|
|
42
|
+
if (lines.length < 3)
|
|
43
|
+
return null; // Skip trivial files
|
|
44
|
+
const language = detectLanguage(filePath);
|
|
45
|
+
const facts = {
|
|
46
|
+
path: filePath,
|
|
47
|
+
language,
|
|
48
|
+
lineCount: lines.length,
|
|
49
|
+
classes: extractClasses(content, language),
|
|
50
|
+
functions: extractFunctions(content, language),
|
|
51
|
+
imports: extractImports(content, language),
|
|
52
|
+
exports: extractExports(content, language),
|
|
53
|
+
errorHandling: extractErrorHandling(content, language),
|
|
54
|
+
testAssertions: countAssertions(content),
|
|
55
|
+
hasTests: isTestFile(filePath, content),
|
|
56
|
+
};
|
|
57
|
+
// Go/Rust-specific extraction
|
|
58
|
+
if (language === 'go') {
|
|
59
|
+
facts.structs = extractGoStructs(content);
|
|
60
|
+
facts.interfaces = extractGoInterfaces(content);
|
|
61
|
+
facts.goroutines = (content.match(/\bgo\s+\w+/g) || []).length;
|
|
62
|
+
facts.channels = (content.match(/\bch?an\b|make\s*\(\s*chan\b|<-\s*\w+|\w+\s*<-/g) || []).length;
|
|
63
|
+
facts.defers = (content.match(/\bdefer\b/g) || []).length;
|
|
64
|
+
facts.mutexes = (content.match(/sync\.(?:Mutex|RWMutex|WaitGroup|Once|Pool|Map)|\.Lock\(\)|\.Unlock\(\)|\.RLock\(\)|\.RUnlock\(\)/g) || []).length;
|
|
65
|
+
// Go functions include methods with receivers — augment with receiver info
|
|
66
|
+
facts.functions = extractGoFunctions(content);
|
|
67
|
+
}
|
|
68
|
+
// General quality metrics (all languages)
|
|
69
|
+
facts.commentRatio = countCommentRatio(content, language);
|
|
70
|
+
facts.magicNumbers = countMagicNumbers(content, language);
|
|
71
|
+
facts.todoCount = (content.match(/\b(?:TODO|FIXME|HACK|XXX|WORKAROUND)\b/gi) || []).length;
|
|
72
|
+
return facts;
|
|
73
|
+
}
|
|
74
|
+
function detectLanguage(filePath) {
|
|
75
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
76
|
+
const langMap = {
|
|
77
|
+
'.ts': 'typescript', '.tsx': 'typescript',
|
|
78
|
+
'.js': 'javascript', '.jsx': 'javascript',
|
|
79
|
+
'.py': 'python',
|
|
80
|
+
'.go': 'go',
|
|
81
|
+
'.rs': 'rust',
|
|
82
|
+
'.cs': 'csharp',
|
|
83
|
+
'.java': 'java',
|
|
84
|
+
'.rb': 'ruby',
|
|
85
|
+
'.kt': 'kotlin',
|
|
86
|
+
};
|
|
87
|
+
return langMap[ext] || 'unknown';
|
|
88
|
+
}
|
|
89
|
+
function extractClasses(content, lang) {
|
|
90
|
+
const classes = [];
|
|
91
|
+
const lines = content.split('\n');
|
|
92
|
+
// Class pattern: works for TS/JS/Java/C#/Python
|
|
93
|
+
const classPattern = lang === 'python'
|
|
94
|
+
? /^\s*class\s+(\w+)/
|
|
95
|
+
: /(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/;
|
|
96
|
+
for (let i = 0; i < lines.length; i++) {
|
|
97
|
+
const match = lines[i].match(classPattern);
|
|
98
|
+
if (!match)
|
|
99
|
+
continue;
|
|
100
|
+
const name = match[1];
|
|
101
|
+
const lineStart = i + 1;
|
|
102
|
+
// Find class end by brace matching (or indentation for Python)
|
|
103
|
+
let lineEnd = lineStart;
|
|
104
|
+
if (lang === 'python') {
|
|
105
|
+
const baseIndent = lines[i].search(/\S/);
|
|
106
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
107
|
+
const indent = lines[j].search(/\S/);
|
|
108
|
+
if (indent >= 0 && indent <= baseIndent && lines[j].trim().length > 0)
|
|
109
|
+
break;
|
|
110
|
+
lineEnd = j + 1;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
let braces = 0;
|
|
115
|
+
let started = false;
|
|
116
|
+
for (let j = i; j < lines.length; j++) {
|
|
117
|
+
for (const char of lines[j]) {
|
|
118
|
+
if (char === '{') {
|
|
119
|
+
braces++;
|
|
120
|
+
started = true;
|
|
121
|
+
}
|
|
122
|
+
if (char === '}')
|
|
123
|
+
braces--;
|
|
124
|
+
}
|
|
125
|
+
if (started && braces <= 0) {
|
|
126
|
+
lineEnd = j + 1;
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Extract methods within the class
|
|
132
|
+
const classContent = lines.slice(i, lineEnd).join('\n');
|
|
133
|
+
const methodPattern = lang === 'python'
|
|
134
|
+
? /^\s+def\s+(\w+)/gm
|
|
135
|
+
: /(?:public|private|protected|static|async|get|set)?\s*(?:async\s+)?(\w+)\s*\(/gm;
|
|
136
|
+
const methods = [];
|
|
137
|
+
const publicMethods = [];
|
|
138
|
+
let methodMatch;
|
|
139
|
+
while ((methodMatch = methodPattern.exec(classContent)) !== null) {
|
|
140
|
+
const methodName = methodMatch[1];
|
|
141
|
+
if (methodName === name || methodName === 'constructor')
|
|
142
|
+
continue; // Skip constructor
|
|
143
|
+
methods.push(methodName);
|
|
144
|
+
if (!methodMatch[0].includes('private') && !methodMatch[0].includes('protected')) {
|
|
145
|
+
if (lang !== 'python' || !methodName.startsWith('_')) {
|
|
146
|
+
publicMethods.push(methodName);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
// Extract dependencies (constructor params, imports used)
|
|
151
|
+
const depPattern = /(?:private|readonly|public)\s+(\w+):\s*(\w+)/g;
|
|
152
|
+
const deps = [];
|
|
153
|
+
let depMatch;
|
|
154
|
+
while ((depMatch = depPattern.exec(classContent)) !== null) {
|
|
155
|
+
deps.push(depMatch[2]);
|
|
156
|
+
}
|
|
157
|
+
classes.push({
|
|
158
|
+
name,
|
|
159
|
+
lineStart,
|
|
160
|
+
lineEnd,
|
|
161
|
+
methodCount: methods.length,
|
|
162
|
+
methods,
|
|
163
|
+
publicMethods,
|
|
164
|
+
lineCount: lineEnd - lineStart + 1,
|
|
165
|
+
dependencies: deps,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
return classes;
|
|
169
|
+
}
|
|
170
|
+
function extractFunctions(content, lang) {
|
|
171
|
+
const functions = [];
|
|
172
|
+
const lines = content.split('\n');
|
|
173
|
+
const patterns = lang === 'python'
|
|
174
|
+
? [/^(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)/]
|
|
175
|
+
: [
|
|
176
|
+
/(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)/,
|
|
177
|
+
/(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*\w+(?:<[^>]+>)?)?\s*=>/,
|
|
178
|
+
/(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?function/,
|
|
179
|
+
];
|
|
180
|
+
for (let i = 0; i < lines.length; i++) {
|
|
181
|
+
for (const pattern of patterns) {
|
|
182
|
+
const match = lines[i].match(pattern);
|
|
183
|
+
if (!match)
|
|
184
|
+
continue;
|
|
185
|
+
const name = match[1];
|
|
186
|
+
if (!name || name === 'if' || name === 'for' || name === 'while')
|
|
187
|
+
continue;
|
|
188
|
+
const lineStart = i + 1;
|
|
189
|
+
let lineEnd = lineStart;
|
|
190
|
+
// Find function end
|
|
191
|
+
if (lang === 'python') {
|
|
192
|
+
const baseIndent = lines[i].search(/\S/);
|
|
193
|
+
for (let j = i + 1; j < lines.length; j++) {
|
|
194
|
+
const indent = lines[j].search(/\S/);
|
|
195
|
+
if (indent >= 0 && indent <= baseIndent && lines[j].trim().length > 0)
|
|
196
|
+
break;
|
|
197
|
+
lineEnd = j + 1;
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
let braces = 0;
|
|
202
|
+
let started = false;
|
|
203
|
+
for (let j = i; j < Math.min(lines.length, i + 500); j++) {
|
|
204
|
+
for (const char of lines[j]) {
|
|
205
|
+
if (char === '{') {
|
|
206
|
+
braces++;
|
|
207
|
+
started = true;
|
|
208
|
+
}
|
|
209
|
+
if (char === '}')
|
|
210
|
+
braces--;
|
|
211
|
+
}
|
|
212
|
+
if (started && braces <= 0) {
|
|
213
|
+
lineEnd = j + 1;
|
|
214
|
+
break;
|
|
215
|
+
}
|
|
216
|
+
// Arrow functions without braces
|
|
217
|
+
if (!started && lines[j].includes('=>') && !lines[j].includes('{')) {
|
|
218
|
+
lineEnd = j + 1;
|
|
219
|
+
started = true;
|
|
220
|
+
break;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
// Extract params
|
|
225
|
+
const paramStr = match[2] || '';
|
|
226
|
+
const params = paramStr.split(',').map(p => p.trim()).filter(p => p.length > 0);
|
|
227
|
+
// Nesting depth
|
|
228
|
+
const funcContent = lines.slice(i, lineEnd).join('\n');
|
|
229
|
+
const maxNesting = calculateMaxNesting(funcContent, lang);
|
|
230
|
+
functions.push({
|
|
231
|
+
name,
|
|
232
|
+
lineStart,
|
|
233
|
+
lineEnd,
|
|
234
|
+
lineCount: lineEnd - lineStart + 1,
|
|
235
|
+
paramCount: params.length,
|
|
236
|
+
params,
|
|
237
|
+
maxNesting,
|
|
238
|
+
hasReturn: funcContent.includes('return ') || funcContent.includes('return;'),
|
|
239
|
+
isAsync: lines[i].includes('async'),
|
|
240
|
+
isExported: lines[i].includes('export'),
|
|
241
|
+
});
|
|
242
|
+
break; // One match per line
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
return functions;
|
|
246
|
+
}
|
|
247
|
+
function extractImports(content, lang) {
|
|
248
|
+
const imports = [];
|
|
249
|
+
if (lang === 'python') {
|
|
250
|
+
const pattern = /^(?:from\s+(\S+)\s+)?import\s+(.+)$/gm;
|
|
251
|
+
let match;
|
|
252
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
253
|
+
imports.push(match[1] || match[2].trim());
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
else {
|
|
257
|
+
const pattern = /import\s+.+?from\s+['"](.*?)['"]/g;
|
|
258
|
+
let match;
|
|
259
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
260
|
+
imports.push(match[1]);
|
|
261
|
+
}
|
|
262
|
+
const reqPattern = /require\s*\(\s*['"](.*?)['"]\s*\)/g;
|
|
263
|
+
while ((match = reqPattern.exec(content)) !== null) {
|
|
264
|
+
imports.push(match[1]);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
return imports;
|
|
268
|
+
}
|
|
269
|
+
function extractExports(content, lang) {
|
|
270
|
+
const exports = [];
|
|
271
|
+
if (lang === 'typescript' || lang === 'javascript') {
|
|
272
|
+
const pattern = /export\s+(?:default\s+)?(?:class|function|const|let|var|interface|type|enum)\s+(\w+)/g;
|
|
273
|
+
let match;
|
|
274
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
275
|
+
exports.push(match[1]);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
return exports;
|
|
279
|
+
}
|
|
280
|
+
function extractErrorHandling(content, lang) {
|
|
281
|
+
const handlers = [];
|
|
282
|
+
const lines = content.split('\n');
|
|
283
|
+
for (let i = 0; i < lines.length; i++) {
|
|
284
|
+
const line = lines[i];
|
|
285
|
+
// try-catch blocks
|
|
286
|
+
if (line.match(/\btry\s*{/) || (lang === 'python' && line.match(/^\s*try\s*:/))) {
|
|
287
|
+
// Look for the catch/except
|
|
288
|
+
for (let j = i + 1; j < Math.min(lines.length, i + 50); j++) {
|
|
289
|
+
const catchMatch = lines[j].match(/\bcatch\s*\(/) || (lang === 'python' && lines[j].match(/^\s*except/));
|
|
290
|
+
if (catchMatch) {
|
|
291
|
+
// Check if catch body is empty
|
|
292
|
+
const catchBody = lines.slice(j + 1, Math.min(lines.length, j + 5)).join('\n');
|
|
293
|
+
const isEmpty = !catchBody.trim() || catchBody.match(/^\s*}\s*$/) !== null;
|
|
294
|
+
let strategy = 'custom';
|
|
295
|
+
if (isEmpty || catchBody.match(/^\s*\/\//))
|
|
296
|
+
strategy = 'ignore';
|
|
297
|
+
else if (catchBody.includes('console.log') || catchBody.includes('console.error') || catchBody.includes('print('))
|
|
298
|
+
strategy = 'log';
|
|
299
|
+
else if (catchBody.includes('throw'))
|
|
300
|
+
strategy = 'throw';
|
|
301
|
+
else if (catchBody.includes('return'))
|
|
302
|
+
strategy = 'return';
|
|
303
|
+
handlers.push({
|
|
304
|
+
type: 'try-catch',
|
|
305
|
+
lineStart: i + 1,
|
|
306
|
+
isEmpty: strategy === 'ignore',
|
|
307
|
+
strategy,
|
|
308
|
+
});
|
|
309
|
+
break;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
// .catch() handlers
|
|
314
|
+
if (line.match(/\.catch\s*\(/)) {
|
|
315
|
+
const nextContent = lines.slice(i, Math.min(lines.length, i + 5)).join('\n');
|
|
316
|
+
const isEmpty = nextContent.match(/\.catch\s*\(\s*\(\s*\)\s*=>\s*{?\s*}?\s*\)/) !== null;
|
|
317
|
+
handlers.push({
|
|
318
|
+
type: 'promise-catch',
|
|
319
|
+
lineStart: i + 1,
|
|
320
|
+
isEmpty,
|
|
321
|
+
strategy: isEmpty ? 'ignore' : 'custom',
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return handlers;
|
|
326
|
+
}
|
|
327
|
+
function calculateMaxNesting(content, lang) {
|
|
328
|
+
let maxNesting = 0;
|
|
329
|
+
let current = 0;
|
|
330
|
+
if (lang === 'python') {
|
|
331
|
+
const lines = content.split('\n');
|
|
332
|
+
for (const line of lines) {
|
|
333
|
+
const indent = line.search(/\S/);
|
|
334
|
+
if (indent >= 0) {
|
|
335
|
+
const level = Math.floor(indent / 4);
|
|
336
|
+
maxNesting = Math.max(maxNesting, level);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
for (const char of content) {
|
|
342
|
+
if (char === '{') {
|
|
343
|
+
current++;
|
|
344
|
+
maxNesting = Math.max(maxNesting, current);
|
|
345
|
+
}
|
|
346
|
+
if (char === '}')
|
|
347
|
+
current--;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
return maxNesting;
|
|
351
|
+
}
|
|
352
|
+
function countAssertions(content) {
|
|
353
|
+
const patterns = [
|
|
354
|
+
/\bexpect\s*\(/g,
|
|
355
|
+
/\bassert\w*\s*[.(]/g,
|
|
356
|
+
/\bshould\./g,
|
|
357
|
+
/\.to(Be|Equal|Have|Throw|Match|Include|Contain)/g,
|
|
358
|
+
];
|
|
359
|
+
let count = 0;
|
|
360
|
+
for (const p of patterns) {
|
|
361
|
+
const matches = content.match(p);
|
|
362
|
+
if (matches)
|
|
363
|
+
count += matches.length;
|
|
364
|
+
}
|
|
365
|
+
return count;
|
|
366
|
+
}
|
|
367
|
+
function isTestFile(filePath, content) {
|
|
368
|
+
if (filePath.match(/\.(test|spec|_test)\./))
|
|
369
|
+
return true;
|
|
370
|
+
if (filePath.includes('__tests__') || filePath.includes('test/') || filePath.includes('tests/'))
|
|
371
|
+
return true;
|
|
372
|
+
if (content.includes('describe(') || content.includes('it(') || content.includes('test('))
|
|
373
|
+
return true;
|
|
374
|
+
if (content.includes('def test_') || content.includes('@pytest'))
|
|
375
|
+
return true;
|
|
376
|
+
return false;
|
|
377
|
+
}
|
|
378
|
+
// ── Go-specific extractors ──
|
|
379
|
+
function extractGoStructs(content) {
|
|
380
|
+
const structs = [];
|
|
381
|
+
const lines = content.split('\n');
|
|
382
|
+
for (let i = 0; i < lines.length; i++) {
|
|
383
|
+
const match = lines[i].match(/^type\s+(\w+)\s+struct\s*\{/);
|
|
384
|
+
if (!match)
|
|
385
|
+
continue;
|
|
386
|
+
const name = match[1];
|
|
387
|
+
const lineStart = i + 1;
|
|
388
|
+
let lineEnd = lineStart;
|
|
389
|
+
let braces = 0;
|
|
390
|
+
let started = false;
|
|
391
|
+
const fields = [];
|
|
392
|
+
const embeds = [];
|
|
393
|
+
for (let j = i; j < lines.length; j++) {
|
|
394
|
+
for (const char of lines[j]) {
|
|
395
|
+
if (char === '{') {
|
|
396
|
+
braces++;
|
|
397
|
+
started = true;
|
|
398
|
+
}
|
|
399
|
+
if (char === '}')
|
|
400
|
+
braces--;
|
|
401
|
+
}
|
|
402
|
+
// Count fields (lines inside struct with type declarations)
|
|
403
|
+
if (j > i && braces > 0) {
|
|
404
|
+
const fieldLine = lines[j].trim();
|
|
405
|
+
if (fieldLine && !fieldLine.startsWith('//') && !fieldLine.startsWith('{')) {
|
|
406
|
+
// Embedded type (single word, capitalized)
|
|
407
|
+
if (fieldLine.match(/^\*?\w+$/)) {
|
|
408
|
+
embeds.push(fieldLine.replace(/^\*/, ''));
|
|
409
|
+
}
|
|
410
|
+
else if (fieldLine.includes(' ')) {
|
|
411
|
+
fields.push(fieldLine.split(/\s+/)[0]);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
if (started && braces <= 0) {
|
|
416
|
+
lineEnd = j + 1;
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
// Find methods with this struct as receiver
|
|
421
|
+
const methods = [];
|
|
422
|
+
const methodPattern = new RegExp(`^func\\s*\\(\\s*\\w+\\s+\\*?${name}\\s*\\)\\s+(\\w+)\\s*\\(`, 'gm');
|
|
423
|
+
let methodMatch;
|
|
424
|
+
while ((methodMatch = methodPattern.exec(content)) !== null) {
|
|
425
|
+
methods.push(methodMatch[1]);
|
|
426
|
+
}
|
|
427
|
+
structs.push({
|
|
428
|
+
name,
|
|
429
|
+
lineStart,
|
|
430
|
+
lineEnd,
|
|
431
|
+
fieldCount: fields.length + embeds.length,
|
|
432
|
+
methodCount: methods.length,
|
|
433
|
+
methods,
|
|
434
|
+
lineCount: lineEnd - lineStart + 1,
|
|
435
|
+
embeds,
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
return structs;
|
|
439
|
+
}
|
|
440
|
+
function extractGoInterfaces(content) {
|
|
441
|
+
const interfaces = [];
|
|
442
|
+
const lines = content.split('\n');
|
|
443
|
+
for (let i = 0; i < lines.length; i++) {
|
|
444
|
+
const match = lines[i].match(/^type\s+(\w+)\s+interface\s*\{/);
|
|
445
|
+
if (!match)
|
|
446
|
+
continue;
|
|
447
|
+
const name = match[1];
|
|
448
|
+
const methods = [];
|
|
449
|
+
let braces = 0;
|
|
450
|
+
let started = false;
|
|
451
|
+
for (let j = i; j < lines.length; j++) {
|
|
452
|
+
for (const char of lines[j]) {
|
|
453
|
+
if (char === '{') {
|
|
454
|
+
braces++;
|
|
455
|
+
started = true;
|
|
456
|
+
}
|
|
457
|
+
if (char === '}')
|
|
458
|
+
braces--;
|
|
459
|
+
}
|
|
460
|
+
if (j > i && braces > 0) {
|
|
461
|
+
const methodMatch = lines[j].trim().match(/^(\w+)\s*\(/);
|
|
462
|
+
if (methodMatch)
|
|
463
|
+
methods.push(methodMatch[1]);
|
|
464
|
+
}
|
|
465
|
+
if (started && braces <= 0)
|
|
466
|
+
break;
|
|
467
|
+
}
|
|
468
|
+
interfaces.push({
|
|
469
|
+
name,
|
|
470
|
+
lineStart: i + 1,
|
|
471
|
+
methodCount: methods.length,
|
|
472
|
+
methods,
|
|
473
|
+
});
|
|
474
|
+
}
|
|
475
|
+
return interfaces;
|
|
476
|
+
}
|
|
477
|
+
function extractGoFunctions(content) {
|
|
478
|
+
const functions = [];
|
|
479
|
+
const lines = content.split('\n');
|
|
480
|
+
for (let i = 0; i < lines.length; i++) {
|
|
481
|
+
// Match both standalone funcs and receiver methods
|
|
482
|
+
const match = lines[i].match(/^func\s+(?:\(\s*\w+\s+\*?(\w+)\s*\)\s+)?(\w+)\s*\(([^)]*)\)/);
|
|
483
|
+
if (!match)
|
|
484
|
+
continue;
|
|
485
|
+
const receiver = match[1] || '';
|
|
486
|
+
const name = receiver ? `${receiver}.${match[2]}` : match[2];
|
|
487
|
+
const paramStr = match[3] || '';
|
|
488
|
+
const lineStart = i + 1;
|
|
489
|
+
let lineEnd = lineStart;
|
|
490
|
+
let braces = 0;
|
|
491
|
+
let started = false;
|
|
492
|
+
for (let j = i; j < Math.min(lines.length, i + 500); j++) {
|
|
493
|
+
for (const char of lines[j]) {
|
|
494
|
+
if (char === '{') {
|
|
495
|
+
braces++;
|
|
496
|
+
started = true;
|
|
497
|
+
}
|
|
498
|
+
if (char === '}')
|
|
499
|
+
braces--;
|
|
500
|
+
}
|
|
501
|
+
if (started && braces <= 0) {
|
|
502
|
+
lineEnd = j + 1;
|
|
503
|
+
break;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
const params = paramStr.split(',').map(p => p.trim()).filter(p => p.length > 0);
|
|
507
|
+
const funcContent = lines.slice(i, lineEnd).join('\n');
|
|
508
|
+
const maxNesting = calculateMaxNesting(funcContent, 'go');
|
|
509
|
+
const hasErrorReturn = funcContent.includes('error') && funcContent.includes('return');
|
|
510
|
+
functions.push({
|
|
511
|
+
name,
|
|
512
|
+
lineStart,
|
|
513
|
+
lineEnd,
|
|
514
|
+
lineCount: lineEnd - lineStart + 1,
|
|
515
|
+
paramCount: params.length,
|
|
516
|
+
params,
|
|
517
|
+
maxNesting,
|
|
518
|
+
hasReturn: funcContent.includes('return ') || funcContent.includes('return\n'),
|
|
519
|
+
isAsync: funcContent.includes('go ') || funcContent.includes('goroutine'),
|
|
520
|
+
isExported: match[2].charAt(0) === match[2].charAt(0).toUpperCase(),
|
|
521
|
+
});
|
|
522
|
+
}
|
|
523
|
+
return functions;
|
|
524
|
+
}
|
|
525
|
+
// ── General quality metrics ──
|
|
526
|
+
function countCommentRatio(content, lang) {
|
|
527
|
+
const lines = content.split('\n');
|
|
528
|
+
let commentLines = 0;
|
|
529
|
+
const commentPatterns = lang === 'python'
|
|
530
|
+
? [/^\s*#/, /^\s*"""/]
|
|
531
|
+
: [/^\s*\/\//, /^\s*\/\*/, /^\s*\*/];
|
|
532
|
+
for (const line of lines) {
|
|
533
|
+
if (commentPatterns.some(p => p.test(line)))
|
|
534
|
+
commentLines++;
|
|
535
|
+
}
|
|
536
|
+
return lines.length > 0 ? Math.round((commentLines / lines.length) * 100) : 0;
|
|
537
|
+
}
|
|
538
|
+
function countMagicNumbers(content, lang) {
|
|
539
|
+
// Exclude 0, 1, -1, common HTTP codes, common sizes
|
|
540
|
+
const allowed = new Set(['0', '1', '-1', '2', '100', '200', '201', '204', '301', '302', '400', '401', '403', '404', '500']);
|
|
541
|
+
const matches = content.match(/(?<![.\w])\d{2,}(?![.\w])/g) || [];
|
|
542
|
+
return matches.filter(m => !allowed.has(m)).length;
|
|
543
|
+
}
|
|
544
|
+
/**
|
|
545
|
+
* Serialize facts into a compact string for LLM prompts.
|
|
546
|
+
* Keeps only the most relevant information within token budget.
|
|
547
|
+
*/
|
|
548
|
+
export function factsToPromptString(facts, maxChars = 8000) {
|
|
549
|
+
const parts = [];
|
|
550
|
+
for (const f of facts) {
|
|
551
|
+
const filePart = [`FILE: ${f.path} (${f.language}, ${f.lineCount} lines)`];
|
|
552
|
+
// Quality metrics
|
|
553
|
+
const metrics = [];
|
|
554
|
+
if (f.commentRatio !== undefined && f.commentRatio < 5 && f.lineCount > 50)
|
|
555
|
+
metrics.push(`comments:${f.commentRatio}%`);
|
|
556
|
+
if (f.magicNumbers && f.magicNumbers > 3)
|
|
557
|
+
metrics.push(`magic_numbers:${f.magicNumbers}`);
|
|
558
|
+
if (f.todoCount && f.todoCount > 0)
|
|
559
|
+
metrics.push(`todos:${f.todoCount}`);
|
|
560
|
+
if (metrics.length > 0)
|
|
561
|
+
filePart.push(` METRICS: ${metrics.join(', ')}`);
|
|
562
|
+
// Classes (JS/TS/Python/Java/C#)
|
|
563
|
+
for (const cls of f.classes) {
|
|
564
|
+
filePart.push(` CLASS ${cls.name} (${cls.lineCount} lines, ${cls.methodCount} methods: ${cls.methods.join(', ')})`);
|
|
565
|
+
if (cls.dependencies.length > 0) {
|
|
566
|
+
filePart.push(` deps: ${cls.dependencies.join(', ')}`);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
// Go structs
|
|
570
|
+
if (f.structs) {
|
|
571
|
+
for (const s of f.structs) {
|
|
572
|
+
const embedStr = s.embeds.length > 0 ? `, embeds: ${s.embeds.join(', ')}` : '';
|
|
573
|
+
filePart.push(` STRUCT ${s.name} (${s.lineCount} lines, ${s.fieldCount} fields, ${s.methodCount} methods: ${s.methods.join(', ')}${embedStr})`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
// Go interfaces
|
|
577
|
+
if (f.interfaces) {
|
|
578
|
+
for (const iface of f.interfaces) {
|
|
579
|
+
filePart.push(` INTERFACE ${iface.name} (${iface.methodCount} methods: ${iface.methods.join(', ')})`);
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
// Go concurrency signals
|
|
583
|
+
if (f.language === 'go') {
|
|
584
|
+
const goSignals = [];
|
|
585
|
+
if (f.goroutines && f.goroutines > 0)
|
|
586
|
+
goSignals.push(`goroutines:${f.goroutines}`);
|
|
587
|
+
if (f.channels && f.channels > 0)
|
|
588
|
+
goSignals.push(`channels:${f.channels}`);
|
|
589
|
+
if (f.defers && f.defers > 0)
|
|
590
|
+
goSignals.push(`defers:${f.defers}`);
|
|
591
|
+
if (f.mutexes && f.mutexes > 0)
|
|
592
|
+
goSignals.push(`mutexes:${f.mutexes}`);
|
|
593
|
+
if (goSignals.length > 0)
|
|
594
|
+
filePart.push(` CONCURRENCY: ${goSignals.join(', ')}`);
|
|
595
|
+
}
|
|
596
|
+
// Functions (all languages)
|
|
597
|
+
for (const fn of f.functions) {
|
|
598
|
+
if (fn.lineCount < 8)
|
|
599
|
+
continue; // Skip tiny functions
|
|
600
|
+
const flags = [
|
|
601
|
+
fn.isAsync ? 'async' : '',
|
|
602
|
+
fn.isExported ? 'exported' : '',
|
|
603
|
+
fn.maxNesting > 3 ? `nesting:${fn.maxNesting}` : '',
|
|
604
|
+
fn.paramCount > 4 ? `params:${fn.paramCount}` : '',
|
|
605
|
+
].filter(Boolean).join(', ');
|
|
606
|
+
filePart.push(` FN ${fn.name}(${fn.params.join(', ')}) [${fn.lineCount} lines${flags ? ', ' + flags : ''}]`);
|
|
607
|
+
}
|
|
608
|
+
if (f.errorHandling.length > 0) {
|
|
609
|
+
const strategies = f.errorHandling.map(e => e.strategy);
|
|
610
|
+
const unique = [...new Set(strategies)];
|
|
611
|
+
filePart.push(` ERROR_HANDLING: ${unique.join(', ')} (${f.errorHandling.filter(e => e.isEmpty).length} empty catches)`);
|
|
612
|
+
}
|
|
613
|
+
if (f.hasTests) {
|
|
614
|
+
filePart.push(` TESTS: ${f.testAssertions} assertions`);
|
|
615
|
+
}
|
|
616
|
+
if (f.imports.length > 0) {
|
|
617
|
+
filePart.push(` IMPORTS: ${f.imports.length} (${f.imports.slice(0, 8).join(', ')}${f.imports.length > 8 ? '...' : ''})`);
|
|
618
|
+
}
|
|
619
|
+
parts.push(filePart.join('\n'));
|
|
620
|
+
// Rough token budget check
|
|
621
|
+
const totalLength = parts.join('\n\n').length;
|
|
622
|
+
if (totalLength > maxChars)
|
|
623
|
+
break;
|
|
624
|
+
}
|
|
625
|
+
return parts.join('\n\n');
|
|
626
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|