@rigour-labs/core 3.0.5 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/deep/fact-extractor.d.ts +80 -0
  2. package/dist/deep/fact-extractor.js +626 -0
  3. package/dist/deep/index.d.ts +14 -0
  4. package/dist/deep/index.js +12 -0
  5. package/dist/deep/prompts.d.ts +22 -0
  6. package/dist/deep/prompts.js +374 -0
  7. package/dist/deep/verifier.d.ts +16 -0
  8. package/dist/deep/verifier.js +388 -0
  9. package/dist/gates/deep-analysis.d.ts +28 -0
  10. package/dist/gates/deep-analysis.js +302 -0
  11. package/dist/gates/deprecated-apis-rules-lang.d.ts +21 -0
  12. package/dist/gates/deprecated-apis-rules-lang.js +311 -0
  13. package/dist/gates/deprecated-apis-rules-node.d.ts +19 -0
  14. package/dist/gates/deprecated-apis-rules-node.js +199 -0
  15. package/dist/gates/deprecated-apis-rules.d.ts +6 -0
  16. package/dist/gates/deprecated-apis-rules.js +6 -0
  17. package/dist/gates/deprecated-apis.js +1 -502
  18. package/dist/gates/hallucinated-imports-lang.d.ts +16 -0
  19. package/dist/gates/hallucinated-imports-lang.js +374 -0
  20. package/dist/gates/hallucinated-imports-stdlib.d.ts +12 -0
  21. package/dist/gates/hallucinated-imports-stdlib.js +228 -0
  22. package/dist/gates/hallucinated-imports.d.ts +0 -98
  23. package/dist/gates/hallucinated-imports.js +10 -678
  24. package/dist/gates/phantom-apis-data.d.ts +33 -0
  25. package/dist/gates/phantom-apis-data.js +398 -0
  26. package/dist/gates/phantom-apis.js +1 -393
  27. package/dist/gates/phantom-apis.test.js +52 -0
  28. package/dist/gates/promise-safety-helpers.d.ts +19 -0
  29. package/dist/gates/promise-safety-helpers.js +101 -0
  30. package/dist/gates/promise-safety-rules.d.ts +7 -0
  31. package/dist/gates/promise-safety-rules.js +19 -0
  32. package/dist/gates/promise-safety.d.ts +1 -21
  33. package/dist/gates/promise-safety.js +51 -257
  34. package/dist/gates/runner.d.ts +4 -2
  35. package/dist/gates/runner.js +46 -1
  36. package/dist/gates/test-quality-lang.d.ts +30 -0
  37. package/dist/gates/test-quality-lang.js +188 -0
  38. package/dist/gates/test-quality.d.ts +0 -14
  39. package/dist/gates/test-quality.js +13 -186
  40. package/dist/index.d.ts +10 -0
  41. package/dist/index.js +12 -2
  42. package/dist/inference/cloud-provider.d.ts +34 -0
  43. package/dist/inference/cloud-provider.js +126 -0
  44. package/dist/inference/index.d.ts +17 -0
  45. package/dist/inference/index.js +23 -0
  46. package/dist/inference/model-manager.d.ts +26 -0
  47. package/dist/inference/model-manager.js +106 -0
  48. package/dist/inference/sidecar-provider.d.ts +15 -0
  49. package/dist/inference/sidecar-provider.js +153 -0
  50. package/dist/inference/types.d.ts +77 -0
  51. package/dist/inference/types.js +19 -0
  52. package/dist/pattern-index/indexer-helpers.d.ts +38 -0
  53. package/dist/pattern-index/indexer-helpers.js +111 -0
  54. package/dist/pattern-index/indexer-lang.d.ts +13 -0
  55. package/dist/pattern-index/indexer-lang.js +244 -0
  56. package/dist/pattern-index/indexer-ts.d.ts +22 -0
  57. package/dist/pattern-index/indexer-ts.js +258 -0
  58. package/dist/pattern-index/indexer.d.ts +4 -106
  59. package/dist/pattern-index/indexer.js +58 -707
  60. package/dist/pattern-index/staleness-data.d.ts +6 -0
  61. package/dist/pattern-index/staleness-data.js +262 -0
  62. package/dist/pattern-index/staleness.js +1 -258
  63. package/dist/settings.d.ts +104 -0
  64. package/dist/settings.js +186 -0
  65. package/dist/storage/db.d.ts +16 -0
  66. package/dist/storage/db.js +132 -0
  67. package/dist/storage/findings.d.ts +14 -0
  68. package/dist/storage/findings.js +38 -0
  69. package/dist/storage/index.d.ts +9 -0
  70. package/dist/storage/index.js +8 -0
  71. package/dist/storage/patterns.d.ts +35 -0
  72. package/dist/storage/patterns.js +62 -0
  73. package/dist/storage/scans.d.ts +42 -0
  74. package/dist/storage/scans.js +55 -0
  75. package/dist/templates/index.d.ts +12 -16
  76. package/dist/templates/index.js +11 -527
  77. package/dist/templates/paradigms.d.ts +2 -0
  78. package/dist/templates/paradigms.js +46 -0
  79. package/dist/templates/presets.d.ts +14 -0
  80. package/dist/templates/presets.js +227 -0
  81. package/dist/templates/universal-config.d.ts +2 -0
  82. package/dist/templates/universal-config.js +190 -0
  83. package/dist/types/index.d.ts +438 -15
  84. package/dist/types/index.js +41 -1
  85. package/package.json +6 -2
@@ -0,0 +1,626 @@
1
+ /**
2
+ * AST Fact Extractor — Step 1 of the three-step pipeline.
3
+ * Extracts structured facts from code files using tree-sitter AST.
4
+ * These facts ground the LLM analysis and prevent hallucination.
5
+ */
6
+ import fs from 'fs-extra';
7
+ import path from 'path';
8
+ import { globby } from 'globby';
9
+ /**
10
+ * Lightweight regex-based fact extraction.
11
+ * Works across languages without tree-sitter grammar loading.
12
+ * Fast enough for the deep analysis pipeline.
13
+ */
14
+ export async function extractFacts(cwd, ignore) {
15
+ const patterns = ['**/*.{ts,js,tsx,jsx,py,go,rs,cs,java,rb,kt}'];
16
+ const ignorePatterns = [
17
+ ...(ignore || []),
18
+ '**/node_modules/**', '**/dist/**', '**/build/**',
19
+ '**/.git/**', '**/vendor/**', '**/__pycache__/**',
20
+ '**/*.min.js', '**/*.bundle.js',
21
+ ];
22
+ const files = await globby(patterns, { cwd, ignore: ignorePatterns, followSymbolicLinks: false });
23
+ const allFacts = [];
24
+ for (const file of files) {
25
+ try {
26
+ const content = await fs.readFile(path.join(cwd, file), 'utf-8');
27
+ const facts = extractFileFacts(file, content);
28
+ if (facts)
29
+ allFacts.push(facts);
30
+ }
31
+ catch {
32
+ // Skip unreadable files
33
+ }
34
+ }
35
+ return allFacts;
36
+ }
37
+ /**
38
+ * Extract facts from a single file's content.
39
+ */
40
+ function extractFileFacts(filePath, content) {
41
+ const lines = content.split('\n');
42
+ if (lines.length < 3)
43
+ return null; // Skip trivial files
44
+ const language = detectLanguage(filePath);
45
+ const facts = {
46
+ path: filePath,
47
+ language,
48
+ lineCount: lines.length,
49
+ classes: extractClasses(content, language),
50
+ functions: extractFunctions(content, language),
51
+ imports: extractImports(content, language),
52
+ exports: extractExports(content, language),
53
+ errorHandling: extractErrorHandling(content, language),
54
+ testAssertions: countAssertions(content),
55
+ hasTests: isTestFile(filePath, content),
56
+ };
57
+ // Go/Rust-specific extraction
58
+ if (language === 'go') {
59
+ facts.structs = extractGoStructs(content);
60
+ facts.interfaces = extractGoInterfaces(content);
61
+ facts.goroutines = (content.match(/\bgo\s+\w+/g) || []).length;
62
+ facts.channels = (content.match(/\bch?an\b|make\s*\(\s*chan\b|<-\s*\w+|\w+\s*<-/g) || []).length;
63
+ facts.defers = (content.match(/\bdefer\b/g) || []).length;
64
+ facts.mutexes = (content.match(/sync\.(?:Mutex|RWMutex|WaitGroup|Once|Pool|Map)|\.Lock\(\)|\.Unlock\(\)|\.RLock\(\)|\.RUnlock\(\)/g) || []).length;
65
+ // Go functions include methods with receivers — augment with receiver info
66
+ facts.functions = extractGoFunctions(content);
67
+ }
68
+ // General quality metrics (all languages)
69
+ facts.commentRatio = countCommentRatio(content, language);
70
+ facts.magicNumbers = countMagicNumbers(content, language);
71
+ facts.todoCount = (content.match(/\b(?:TODO|FIXME|HACK|XXX|WORKAROUND)\b/gi) || []).length;
72
+ return facts;
73
+ }
74
+ function detectLanguage(filePath) {
75
+ const ext = path.extname(filePath).toLowerCase();
76
+ const langMap = {
77
+ '.ts': 'typescript', '.tsx': 'typescript',
78
+ '.js': 'javascript', '.jsx': 'javascript',
79
+ '.py': 'python',
80
+ '.go': 'go',
81
+ '.rs': 'rust',
82
+ '.cs': 'csharp',
83
+ '.java': 'java',
84
+ '.rb': 'ruby',
85
+ '.kt': 'kotlin',
86
+ };
87
+ return langMap[ext] || 'unknown';
88
+ }
89
+ function extractClasses(content, lang) {
90
+ const classes = [];
91
+ const lines = content.split('\n');
92
+ // Class pattern: works for TS/JS/Java/C#/Python
93
+ const classPattern = lang === 'python'
94
+ ? /^\s*class\s+(\w+)/
95
+ : /(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/;
96
+ for (let i = 0; i < lines.length; i++) {
97
+ const match = lines[i].match(classPattern);
98
+ if (!match)
99
+ continue;
100
+ const name = match[1];
101
+ const lineStart = i + 1;
102
+ // Find class end by brace matching (or indentation for Python)
103
+ let lineEnd = lineStart;
104
+ if (lang === 'python') {
105
+ const baseIndent = lines[i].search(/\S/);
106
+ for (let j = i + 1; j < lines.length; j++) {
107
+ const indent = lines[j].search(/\S/);
108
+ if (indent >= 0 && indent <= baseIndent && lines[j].trim().length > 0)
109
+ break;
110
+ lineEnd = j + 1;
111
+ }
112
+ }
113
+ else {
114
+ let braces = 0;
115
+ let started = false;
116
+ for (let j = i; j < lines.length; j++) {
117
+ for (const char of lines[j]) {
118
+ if (char === '{') {
119
+ braces++;
120
+ started = true;
121
+ }
122
+ if (char === '}')
123
+ braces--;
124
+ }
125
+ if (started && braces <= 0) {
126
+ lineEnd = j + 1;
127
+ break;
128
+ }
129
+ }
130
+ }
131
+ // Extract methods within the class
132
+ const classContent = lines.slice(i, lineEnd).join('\n');
133
+ const methodPattern = lang === 'python'
134
+ ? /^\s+def\s+(\w+)/gm
135
+ : /(?:public|private|protected|static|async|get|set)?\s*(?:async\s+)?(\w+)\s*\(/gm;
136
+ const methods = [];
137
+ const publicMethods = [];
138
+ let methodMatch;
139
+ while ((methodMatch = methodPattern.exec(classContent)) !== null) {
140
+ const methodName = methodMatch[1];
141
+ if (methodName === name || methodName === 'constructor')
142
+ continue; // Skip constructor
143
+ methods.push(methodName);
144
+ if (!methodMatch[0].includes('private') && !methodMatch[0].includes('protected')) {
145
+ if (lang !== 'python' || !methodName.startsWith('_')) {
146
+ publicMethods.push(methodName);
147
+ }
148
+ }
149
+ }
150
+ // Extract dependencies (constructor params, imports used)
151
+ const depPattern = /(?:private|readonly|public)\s+(\w+):\s*(\w+)/g;
152
+ const deps = [];
153
+ let depMatch;
154
+ while ((depMatch = depPattern.exec(classContent)) !== null) {
155
+ deps.push(depMatch[2]);
156
+ }
157
+ classes.push({
158
+ name,
159
+ lineStart,
160
+ lineEnd,
161
+ methodCount: methods.length,
162
+ methods,
163
+ publicMethods,
164
+ lineCount: lineEnd - lineStart + 1,
165
+ dependencies: deps,
166
+ });
167
+ }
168
+ return classes;
169
+ }
170
+ function extractFunctions(content, lang) {
171
+ const functions = [];
172
+ const lines = content.split('\n');
173
+ const patterns = lang === 'python'
174
+ ? [/^(?:async\s+)?def\s+(\w+)\s*\(([^)]*)\)/]
175
+ : [
176
+ /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)/,
177
+ /(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*\w+(?:<[^>]+>)?)?\s*=>/,
178
+ /(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?function/,
179
+ ];
180
+ for (let i = 0; i < lines.length; i++) {
181
+ for (const pattern of patterns) {
182
+ const match = lines[i].match(pattern);
183
+ if (!match)
184
+ continue;
185
+ const name = match[1];
186
+ if (!name || name === 'if' || name === 'for' || name === 'while')
187
+ continue;
188
+ const lineStart = i + 1;
189
+ let lineEnd = lineStart;
190
+ // Find function end
191
+ if (lang === 'python') {
192
+ const baseIndent = lines[i].search(/\S/);
193
+ for (let j = i + 1; j < lines.length; j++) {
194
+ const indent = lines[j].search(/\S/);
195
+ if (indent >= 0 && indent <= baseIndent && lines[j].trim().length > 0)
196
+ break;
197
+ lineEnd = j + 1;
198
+ }
199
+ }
200
+ else {
201
+ let braces = 0;
202
+ let started = false;
203
+ for (let j = i; j < Math.min(lines.length, i + 500); j++) {
204
+ for (const char of lines[j]) {
205
+ if (char === '{') {
206
+ braces++;
207
+ started = true;
208
+ }
209
+ if (char === '}')
210
+ braces--;
211
+ }
212
+ if (started && braces <= 0) {
213
+ lineEnd = j + 1;
214
+ break;
215
+ }
216
+ // Arrow functions without braces
217
+ if (!started && lines[j].includes('=>') && !lines[j].includes('{')) {
218
+ lineEnd = j + 1;
219
+ started = true;
220
+ break;
221
+ }
222
+ }
223
+ }
224
+ // Extract params
225
+ const paramStr = match[2] || '';
226
+ const params = paramStr.split(',').map(p => p.trim()).filter(p => p.length > 0);
227
+ // Nesting depth
228
+ const funcContent = lines.slice(i, lineEnd).join('\n');
229
+ const maxNesting = calculateMaxNesting(funcContent, lang);
230
+ functions.push({
231
+ name,
232
+ lineStart,
233
+ lineEnd,
234
+ lineCount: lineEnd - lineStart + 1,
235
+ paramCount: params.length,
236
+ params,
237
+ maxNesting,
238
+ hasReturn: funcContent.includes('return ') || funcContent.includes('return;'),
239
+ isAsync: lines[i].includes('async'),
240
+ isExported: lines[i].includes('export'),
241
+ });
242
+ break; // One match per line
243
+ }
244
+ }
245
+ return functions;
246
+ }
247
+ function extractImports(content, lang) {
248
+ const imports = [];
249
+ if (lang === 'python') {
250
+ const pattern = /^(?:from\s+(\S+)\s+)?import\s+(.+)$/gm;
251
+ let match;
252
+ while ((match = pattern.exec(content)) !== null) {
253
+ imports.push(match[1] || match[2].trim());
254
+ }
255
+ }
256
+ else {
257
+ const pattern = /import\s+.+?from\s+['"](.*?)['"]/g;
258
+ let match;
259
+ while ((match = pattern.exec(content)) !== null) {
260
+ imports.push(match[1]);
261
+ }
262
+ const reqPattern = /require\s*\(\s*['"](.*?)['"]\s*\)/g;
263
+ while ((match = reqPattern.exec(content)) !== null) {
264
+ imports.push(match[1]);
265
+ }
266
+ }
267
+ return imports;
268
+ }
269
+ function extractExports(content, lang) {
270
+ const exports = [];
271
+ if (lang === 'typescript' || lang === 'javascript') {
272
+ const pattern = /export\s+(?:default\s+)?(?:class|function|const|let|var|interface|type|enum)\s+(\w+)/g;
273
+ let match;
274
+ while ((match = pattern.exec(content)) !== null) {
275
+ exports.push(match[1]);
276
+ }
277
+ }
278
+ return exports;
279
+ }
280
+ function extractErrorHandling(content, lang) {
281
+ const handlers = [];
282
+ const lines = content.split('\n');
283
+ for (let i = 0; i < lines.length; i++) {
284
+ const line = lines[i];
285
+ // try-catch blocks
286
+ if (line.match(/\btry\s*{/) || (lang === 'python' && line.match(/^\s*try\s*:/))) {
287
+ // Look for the catch/except
288
+ for (let j = i + 1; j < Math.min(lines.length, i + 50); j++) {
289
+ const catchMatch = lines[j].match(/\bcatch\s*\(/) || (lang === 'python' && lines[j].match(/^\s*except/));
290
+ if (catchMatch) {
291
+ // Check if catch body is empty
292
+ const catchBody = lines.slice(j + 1, Math.min(lines.length, j + 5)).join('\n');
293
+ const isEmpty = !catchBody.trim() || catchBody.match(/^\s*}\s*$/) !== null;
294
+ let strategy = 'custom';
295
+ if (isEmpty || catchBody.match(/^\s*\/\//))
296
+ strategy = 'ignore';
297
+ else if (catchBody.includes('console.log') || catchBody.includes('console.error') || catchBody.includes('print('))
298
+ strategy = 'log';
299
+ else if (catchBody.includes('throw'))
300
+ strategy = 'throw';
301
+ else if (catchBody.includes('return'))
302
+ strategy = 'return';
303
+ handlers.push({
304
+ type: 'try-catch',
305
+ lineStart: i + 1,
306
+ isEmpty: strategy === 'ignore',
307
+ strategy,
308
+ });
309
+ break;
310
+ }
311
+ }
312
+ }
313
+ // .catch() handlers
314
+ if (line.match(/\.catch\s*\(/)) {
315
+ const nextContent = lines.slice(i, Math.min(lines.length, i + 5)).join('\n');
316
+ const isEmpty = nextContent.match(/\.catch\s*\(\s*\(\s*\)\s*=>\s*{?\s*}?\s*\)/) !== null;
317
+ handlers.push({
318
+ type: 'promise-catch',
319
+ lineStart: i + 1,
320
+ isEmpty,
321
+ strategy: isEmpty ? 'ignore' : 'custom',
322
+ });
323
+ }
324
+ }
325
+ return handlers;
326
+ }
327
+ function calculateMaxNesting(content, lang) {
328
+ let maxNesting = 0;
329
+ let current = 0;
330
+ if (lang === 'python') {
331
+ const lines = content.split('\n');
332
+ for (const line of lines) {
333
+ const indent = line.search(/\S/);
334
+ if (indent >= 0) {
335
+ const level = Math.floor(indent / 4);
336
+ maxNesting = Math.max(maxNesting, level);
337
+ }
338
+ }
339
+ }
340
+ else {
341
+ for (const char of content) {
342
+ if (char === '{') {
343
+ current++;
344
+ maxNesting = Math.max(maxNesting, current);
345
+ }
346
+ if (char === '}')
347
+ current--;
348
+ }
349
+ }
350
+ return maxNesting;
351
+ }
352
+ function countAssertions(content) {
353
+ const patterns = [
354
+ /\bexpect\s*\(/g,
355
+ /\bassert\w*\s*[.(]/g,
356
+ /\bshould\./g,
357
+ /\.to(Be|Equal|Have|Throw|Match|Include|Contain)/g,
358
+ ];
359
+ let count = 0;
360
+ for (const p of patterns) {
361
+ const matches = content.match(p);
362
+ if (matches)
363
+ count += matches.length;
364
+ }
365
+ return count;
366
+ }
367
+ function isTestFile(filePath, content) {
368
+ if (filePath.match(/\.(test|spec|_test)\./))
369
+ return true;
370
+ if (filePath.includes('__tests__') || filePath.includes('test/') || filePath.includes('tests/'))
371
+ return true;
372
+ if (content.includes('describe(') || content.includes('it(') || content.includes('test('))
373
+ return true;
374
+ if (content.includes('def test_') || content.includes('@pytest'))
375
+ return true;
376
+ return false;
377
+ }
378
+ // ── Go-specific extractors ──
379
+ function extractGoStructs(content) {
380
+ const structs = [];
381
+ const lines = content.split('\n');
382
+ for (let i = 0; i < lines.length; i++) {
383
+ const match = lines[i].match(/^type\s+(\w+)\s+struct\s*\{/);
384
+ if (!match)
385
+ continue;
386
+ const name = match[1];
387
+ const lineStart = i + 1;
388
+ let lineEnd = lineStart;
389
+ let braces = 0;
390
+ let started = false;
391
+ const fields = [];
392
+ const embeds = [];
393
+ for (let j = i; j < lines.length; j++) {
394
+ for (const char of lines[j]) {
395
+ if (char === '{') {
396
+ braces++;
397
+ started = true;
398
+ }
399
+ if (char === '}')
400
+ braces--;
401
+ }
402
+ // Count fields (lines inside struct with type declarations)
403
+ if (j > i && braces > 0) {
404
+ const fieldLine = lines[j].trim();
405
+ if (fieldLine && !fieldLine.startsWith('//') && !fieldLine.startsWith('{')) {
406
+ // Embedded type (single word, capitalized)
407
+ if (fieldLine.match(/^\*?\w+$/)) {
408
+ embeds.push(fieldLine.replace(/^\*/, ''));
409
+ }
410
+ else if (fieldLine.includes(' ')) {
411
+ fields.push(fieldLine.split(/\s+/)[0]);
412
+ }
413
+ }
414
+ }
415
+ if (started && braces <= 0) {
416
+ lineEnd = j + 1;
417
+ break;
418
+ }
419
+ }
420
+ // Find methods with this struct as receiver
421
+ const methods = [];
422
+ const methodPattern = new RegExp(`^func\\s*\\(\\s*\\w+\\s+\\*?${name}\\s*\\)\\s+(\\w+)\\s*\\(`, 'gm');
423
+ let methodMatch;
424
+ while ((methodMatch = methodPattern.exec(content)) !== null) {
425
+ methods.push(methodMatch[1]);
426
+ }
427
+ structs.push({
428
+ name,
429
+ lineStart,
430
+ lineEnd,
431
+ fieldCount: fields.length + embeds.length,
432
+ methodCount: methods.length,
433
+ methods,
434
+ lineCount: lineEnd - lineStart + 1,
435
+ embeds,
436
+ });
437
+ }
438
+ return structs;
439
+ }
440
+ function extractGoInterfaces(content) {
441
+ const interfaces = [];
442
+ const lines = content.split('\n');
443
+ for (let i = 0; i < lines.length; i++) {
444
+ const match = lines[i].match(/^type\s+(\w+)\s+interface\s*\{/);
445
+ if (!match)
446
+ continue;
447
+ const name = match[1];
448
+ const methods = [];
449
+ let braces = 0;
450
+ let started = false;
451
+ for (let j = i; j < lines.length; j++) {
452
+ for (const char of lines[j]) {
453
+ if (char === '{') {
454
+ braces++;
455
+ started = true;
456
+ }
457
+ if (char === '}')
458
+ braces--;
459
+ }
460
+ if (j > i && braces > 0) {
461
+ const methodMatch = lines[j].trim().match(/^(\w+)\s*\(/);
462
+ if (methodMatch)
463
+ methods.push(methodMatch[1]);
464
+ }
465
+ if (started && braces <= 0)
466
+ break;
467
+ }
468
+ interfaces.push({
469
+ name,
470
+ lineStart: i + 1,
471
+ methodCount: methods.length,
472
+ methods,
473
+ });
474
+ }
475
+ return interfaces;
476
+ }
477
+ function extractGoFunctions(content) {
478
+ const functions = [];
479
+ const lines = content.split('\n');
480
+ for (let i = 0; i < lines.length; i++) {
481
+ // Match both standalone funcs and receiver methods
482
+ const match = lines[i].match(/^func\s+(?:\(\s*\w+\s+\*?(\w+)\s*\)\s+)?(\w+)\s*\(([^)]*)\)/);
483
+ if (!match)
484
+ continue;
485
+ const receiver = match[1] || '';
486
+ const name = receiver ? `${receiver}.${match[2]}` : match[2];
487
+ const paramStr = match[3] || '';
488
+ const lineStart = i + 1;
489
+ let lineEnd = lineStart;
490
+ let braces = 0;
491
+ let started = false;
492
+ for (let j = i; j < Math.min(lines.length, i + 500); j++) {
493
+ for (const char of lines[j]) {
494
+ if (char === '{') {
495
+ braces++;
496
+ started = true;
497
+ }
498
+ if (char === '}')
499
+ braces--;
500
+ }
501
+ if (started && braces <= 0) {
502
+ lineEnd = j + 1;
503
+ break;
504
+ }
505
+ }
506
+ const params = paramStr.split(',').map(p => p.trim()).filter(p => p.length > 0);
507
+ const funcContent = lines.slice(i, lineEnd).join('\n');
508
+ const maxNesting = calculateMaxNesting(funcContent, 'go');
509
+ const hasErrorReturn = funcContent.includes('error') && funcContent.includes('return');
510
+ functions.push({
511
+ name,
512
+ lineStart,
513
+ lineEnd,
514
+ lineCount: lineEnd - lineStart + 1,
515
+ paramCount: params.length,
516
+ params,
517
+ maxNesting,
518
+ hasReturn: funcContent.includes('return ') || funcContent.includes('return\n'),
519
+ isAsync: funcContent.includes('go ') || funcContent.includes('goroutine'),
520
+ isExported: match[2].charAt(0) === match[2].charAt(0).toUpperCase(),
521
+ });
522
+ }
523
+ return functions;
524
+ }
525
+ // ── General quality metrics ──
526
+ function countCommentRatio(content, lang) {
527
+ const lines = content.split('\n');
528
+ let commentLines = 0;
529
+ const commentPatterns = lang === 'python'
530
+ ? [/^\s*#/, /^\s*"""/]
531
+ : [/^\s*\/\//, /^\s*\/\*/, /^\s*\*/];
532
+ for (const line of lines) {
533
+ if (commentPatterns.some(p => p.test(line)))
534
+ commentLines++;
535
+ }
536
+ return lines.length > 0 ? Math.round((commentLines / lines.length) * 100) : 0;
537
+ }
538
+ function countMagicNumbers(content, lang) {
539
+ // Exclude 0, 1, -1, common HTTP codes, common sizes
540
+ const allowed = new Set(['0', '1', '-1', '2', '100', '200', '201', '204', '301', '302', '400', '401', '403', '404', '500']);
541
+ const matches = content.match(/(?<![.\w])\d{2,}(?![.\w])/g) || [];
542
+ return matches.filter(m => !allowed.has(m)).length;
543
+ }
544
+ /**
545
+ * Serialize facts into a compact string for LLM prompts.
546
+ * Keeps only the most relevant information within token budget.
547
+ */
548
+ export function factsToPromptString(facts, maxChars = 8000) {
549
+ const parts = [];
550
+ for (const f of facts) {
551
+ const filePart = [`FILE: ${f.path} (${f.language}, ${f.lineCount} lines)`];
552
+ // Quality metrics
553
+ const metrics = [];
554
+ if (f.commentRatio !== undefined && f.commentRatio < 5 && f.lineCount > 50)
555
+ metrics.push(`comments:${f.commentRatio}%`);
556
+ if (f.magicNumbers && f.magicNumbers > 3)
557
+ metrics.push(`magic_numbers:${f.magicNumbers}`);
558
+ if (f.todoCount && f.todoCount > 0)
559
+ metrics.push(`todos:${f.todoCount}`);
560
+ if (metrics.length > 0)
561
+ filePart.push(` METRICS: ${metrics.join(', ')}`);
562
+ // Classes (JS/TS/Python/Java/C#)
563
+ for (const cls of f.classes) {
564
+ filePart.push(` CLASS ${cls.name} (${cls.lineCount} lines, ${cls.methodCount} methods: ${cls.methods.join(', ')})`);
565
+ if (cls.dependencies.length > 0) {
566
+ filePart.push(` deps: ${cls.dependencies.join(', ')}`);
567
+ }
568
+ }
569
+ // Go structs
570
+ if (f.structs) {
571
+ for (const s of f.structs) {
572
+ const embedStr = s.embeds.length > 0 ? `, embeds: ${s.embeds.join(', ')}` : '';
573
+ filePart.push(` STRUCT ${s.name} (${s.lineCount} lines, ${s.fieldCount} fields, ${s.methodCount} methods: ${s.methods.join(', ')}${embedStr})`);
574
+ }
575
+ }
576
+ // Go interfaces
577
+ if (f.interfaces) {
578
+ for (const iface of f.interfaces) {
579
+ filePart.push(` INTERFACE ${iface.name} (${iface.methodCount} methods: ${iface.methods.join(', ')})`);
580
+ }
581
+ }
582
+ // Go concurrency signals
583
+ if (f.language === 'go') {
584
+ const goSignals = [];
585
+ if (f.goroutines && f.goroutines > 0)
586
+ goSignals.push(`goroutines:${f.goroutines}`);
587
+ if (f.channels && f.channels > 0)
588
+ goSignals.push(`channels:${f.channels}`);
589
+ if (f.defers && f.defers > 0)
590
+ goSignals.push(`defers:${f.defers}`);
591
+ if (f.mutexes && f.mutexes > 0)
592
+ goSignals.push(`mutexes:${f.mutexes}`);
593
+ if (goSignals.length > 0)
594
+ filePart.push(` CONCURRENCY: ${goSignals.join(', ')}`);
595
+ }
596
+ // Functions (all languages)
597
+ for (const fn of f.functions) {
598
+ if (fn.lineCount < 8)
599
+ continue; // Skip tiny functions
600
+ const flags = [
601
+ fn.isAsync ? 'async' : '',
602
+ fn.isExported ? 'exported' : '',
603
+ fn.maxNesting > 3 ? `nesting:${fn.maxNesting}` : '',
604
+ fn.paramCount > 4 ? `params:${fn.paramCount}` : '',
605
+ ].filter(Boolean).join(', ');
606
+ filePart.push(` FN ${fn.name}(${fn.params.join(', ')}) [${fn.lineCount} lines${flags ? ', ' + flags : ''}]`);
607
+ }
608
+ if (f.errorHandling.length > 0) {
609
+ const strategies = f.errorHandling.map(e => e.strategy);
610
+ const unique = [...new Set(strategies)];
611
+ filePart.push(` ERROR_HANDLING: ${unique.join(', ')} (${f.errorHandling.filter(e => e.isEmpty).length} empty catches)`);
612
+ }
613
+ if (f.hasTests) {
614
+ filePart.push(` TESTS: ${f.testAssertions} assertions`);
615
+ }
616
+ if (f.imports.length > 0) {
617
+ filePart.push(` IMPORTS: ${f.imports.length} (${f.imports.slice(0, 8).join(', ')}${f.imports.length > 8 ? '...' : ''})`);
618
+ }
619
+ parts.push(filePart.join('\n'));
620
+ // Rough token budget check
621
+ const totalLength = parts.join('\n\n').length;
622
+ if (totalLength > maxChars)
623
+ break;
624
+ }
625
+ return parts.join('\n\n');
626
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Deep Analysis Pipeline — AST → LLM → Verify
3
+ *
4
+ * Step 1: AST extracts structured facts from code
5
+ * Step 2: LLM interprets facts and identifies quality issues
6
+ * Step 3: AST verifies LLM isn't hallucinating
7
+ *
8
+ * Neither AST nor LLM works alone. Together they're accurate.
9
+ */
10
+ export { extractFacts, factsToPromptString } from './fact-extractor.js';
11
+ export type { FileFacts, ClassFact, FunctionFact, ErrorHandlingFact, StructFact, InterfaceFact } from './fact-extractor.js';
12
+ export { buildAnalysisPrompt, buildCrossFilePrompt, chunkFacts, DEEP_SYSTEM_PROMPT } from './prompts.js';
13
+ export { verifyFindings } from './verifier.js';
14
+ export type { VerifiedFinding } from './verifier.js';
@@ -0,0 +1,12 @@
1
+ /**
2
+ * Deep Analysis Pipeline — AST → LLM → Verify
3
+ *
4
+ * Step 1: AST extracts structured facts from code
5
+ * Step 2: LLM interprets facts and identifies quality issues
6
+ * Step 3: AST verifies LLM isn't hallucinating
7
+ *
8
+ * Neither AST nor LLM works alone. Together they're accurate.
9
+ */
10
+ export { extractFacts, factsToPromptString } from './fact-extractor.js';
11
+ export { buildAnalysisPrompt, buildCrossFilePrompt, chunkFacts, DEEP_SYSTEM_PROMPT } from './prompts.js';
12
+ export { verifyFindings } from './verifier.js';
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Prompt Engineering — Step 2 of the three-step pipeline.
3
+ * Constructs structured prompts that ask the LLM to interpret AST-extracted facts.
4
+ */
5
+ import type { FileFacts } from './fact-extractor.js';
6
+ /**
7
+ * System prompt that defines the LLM's role and output format.
8
+ */
9
+ export declare const DEEP_SYSTEM_PROMPT = "You are an expert code reviewer and software architect performing deep quality analysis. You receive AST-extracted facts about a codebase and must identify quality issues, anti-patterns, and best practice violations.\n\nIMPORTANT RULES:\n1. ONLY report issues you can verify from the provided facts. Do NOT hallucinate files, classes, or functions.\n2. Every finding MUST reference a real file and entity from the facts.\n3. Be specific: include file paths, struct/class names, function names, line counts.\n4. Assign confidence scores honestly: 0.9+ only for certain issues, 0.5-0.7 for probable issues.\n5. Respond ONLY with valid JSON matching the schema below. No explanation text outside JSON.\n6. AIM for 5-15 findings per batch. Be thorough \u2014 report ALL issues you can identify, not just the most obvious ones.\n7. For Go code: treat structs as classes, receiver methods as class methods. Check Go idioms specifically.\n\nOUTPUT SCHEMA:\n{\n \"findings\": [\n {\n \"category\": \"string (see CATEGORIES below)\",\n \"severity\": \"string (critical|high|medium|low|info)\",\n \"file\": \"string (exact file path from facts)\",\n \"line\": \"number or null\",\n \"description\": \"string (what the issue is, referencing specific entities)\",\n \"suggestion\": \"string (actionable fix recommendation)\",\n \"confidence\": \"number 0.0-1.0\"\n }\n ]\n}\n\nCATEGORIES:\n SOLID Principles:\n srp_violation - Single file/struct/class handles multiple unrelated responsibilities\n ocp_violation - Code requires modification (not extension) for new behavior\n lsp_violation - Subtypes break substitutability contracts\n isp_violation - Interface has too many methods forcing unnecessary implementations\n dip_violation - High-level modules depend directly on low-level implementations\n\n Design Patterns & Anti-patterns:\n god_class - Class/struct with too many fields, methods, or responsibilities (>8 methods or >300 lines)\n god_function - Function exceeding 50 lines or doing too many things\n feature_envy - Function/method uses another module's data more than its own\n shotgun_surgery - A single change requires modifying many files\n long_params - Function with 4+ parameters (use struct/options pattern)\n data_clump - Same group of fields/params repeated across multiple structs/functions\n inappropriate_intimacy - Two modules too tightly coupled, accessing each other's internals\n primitive_obsession - Using primitives instead of domain types (string for email, int for ID)\n lazy_class - Struct/class that does too little to justify its existence\n speculative_generality - Over-engineered abstractions not justified by current usage\n refused_bequest - Subtype/implementation ignores inherited behavior\n\n DRY & Duplication:\n dry_violation - Duplicated logic across files that should be extracted\n copy_paste_code - Nearly identical functions/methods in different files\n\n Error Handling:\n error_inconsistency - Mixed error handling strategies in same package/module\n empty_catch - Empty catch/except blocks that silently swallow errors\n error_swallowing - Errors logged but not propagated when they should be\n missing_error_check - Return values (especially errors) not checked\n panic_in_library - Library code using panic/os.Exit instead of returning errors\n\n Concurrency (Go/Rust/async languages):\n race_condition - Shared mutable state accessed without synchronization\n goroutine_leak - Goroutines spawned without cancellation/context mechanism\n missing_context - Functions that should accept context.Context but don't\n channel_misuse - Unbuffered channels that could deadlock, or missing close()\n mutex_scope - Mutex held too long or across I/O operations\n\n Testing:\n test_quality - Insufficient assertions, no edge cases, weak coverage\n test_coupling - Tests tightly coupled to implementation details\n missing_test - Complex public function/method with no corresponding test\n test_duplication - Multiple tests verifying the same behavior redundantly\n\n Architecture:\n architecture - Layer violations, wrong dependency direction\n circular_dependency - Modules that import each other\n package_cohesion - Package/directory contains unrelated concerns\n api_design - Exported API is confusing, inconsistent, or poorly structured\n missing_abstraction - Direct usage where an interface/abstraction would improve design\n\n Language Idioms:\n language_idiom - Language-specific anti-patterns\n naming_convention - Names don't follow language conventions (Go: MixedCaps, Python: snake_case)\n dead_code - Unreferenced exports, unused functions\n magic_number - Numeric literals without named constants\n\n Performance & Security:\n performance - Obvious performance anti-patterns (N+1 queries, unbounded allocations)\n resource_leak - Opened resources (files, connections, readers) not properly closed\n hardcoded_config - Configuration values hardcoded instead of externalized\n\n Code Smells:\n code_smell - General smell with refactoring suggestion\n complex_conditional - Deeply nested or overly complex conditional logic\n long_file - File exceeds reasonable length for its responsibility";
10
+ /**
11
+ * Build the analysis prompt for a batch of file facts.
12
+ */
13
+ export declare function buildAnalysisPrompt(factsStr: string, checks?: Record<string, boolean>): string;
14
+ /**
15
+ * Build a cross-file analysis prompt that looks at patterns across the whole codebase.
16
+ */
17
+ export declare function buildCrossFilePrompt(allFacts: FileFacts[]): string;
18
+ /**
19
+ * Chunk file facts into batches that fit within token limits.
20
+ * Groups related files (same directory) together.
21
+ */
22
+ export declare function chunkFacts(facts: FileFacts[], maxCharsPerChunk?: number): FileFacts[][];