@rigour-labs/core 2.10.0 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -9,3 +9,4 @@ export { PatternMatcher, checkPatternDuplicate, type MatcherConfig } from './mat
|
|
|
9
9
|
export { StalenessDetector, checkCodeStaleness } from './staleness.js';
|
|
10
10
|
export { SecurityDetector } from './security.js';
|
|
11
11
|
export { OverrideManager, loadConfigOverrides } from './overrides.js';
|
|
12
|
+
export { generateEmbedding, semanticSearch, cosineSimilarity } from './embeddings.js';
|
|
@@ -13,3 +13,5 @@ export { StalenessDetector, checkCodeStaleness } from './staleness.js';
|
|
|
13
13
|
export { SecurityDetector } from './security.js';
|
|
14
14
|
// Override Management
|
|
15
15
|
export { OverrideManager, loadConfigOverrides } from './overrides.js';
|
|
16
|
+
// Embeddings
|
|
17
|
+
export { generateEmbedding, semanticSearch, cosineSimilarity } from './embeddings.js';
|
|
@@ -26,6 +26,32 @@ export declare class PatternIndexer {
|
|
|
26
26
|
* Extract patterns from a single file using TypeScript AST.
|
|
27
27
|
*/
|
|
28
28
|
private extractPatterns;
|
|
29
|
+
/**
|
|
30
|
+
* Extract patterns from Go files.
|
|
31
|
+
*/
|
|
32
|
+
private extractGoPatterns;
|
|
33
|
+
/**
|
|
34
|
+
* Extract patterns from Rust files.
|
|
35
|
+
*/
|
|
36
|
+
private extractRustPatterns;
|
|
37
|
+
/**
|
|
38
|
+
* Generic extraction for C-style languages (Java, C++, PHP, etc.)
|
|
39
|
+
*/
|
|
40
|
+
private extractJVMStylePatterns;
|
|
41
|
+
private extractGenericCPatterns;
|
|
42
|
+
private getCOMLineComments;
|
|
43
|
+
private getJavaDoc;
|
|
44
|
+
private findBraceBlockEnd;
|
|
45
|
+
private getBraceBlockContent;
|
|
46
|
+
/**
|
|
47
|
+
* Extract patterns from Python files using regex.
|
|
48
|
+
*/
|
|
49
|
+
private extractPythonPatterns;
|
|
50
|
+
private detectPythonClassType;
|
|
51
|
+
private detectPythonFunctionType;
|
|
52
|
+
private getPythonDocstring;
|
|
53
|
+
private findPythonBlockEnd;
|
|
54
|
+
private getPythonBlockContent;
|
|
29
55
|
/**
|
|
30
56
|
* Convert an AST node to a PatternEntry if applicable.
|
|
31
57
|
*/
|
|
@@ -12,9 +12,24 @@ import ts from 'typescript';
|
|
|
12
12
|
import { generateEmbedding } from './embeddings.js';
|
|
13
13
|
/** Default configuration for the indexer */
|
|
14
14
|
const DEFAULT_CONFIG = {
|
|
15
|
-
include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*'],
|
|
16
|
-
exclude: [
|
|
17
|
-
|
|
15
|
+
include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*', '**/tests/**/*', '**/test/**/*'],
|
|
16
|
+
exclude: [
|
|
17
|
+
'**/node_modules/**',
|
|
18
|
+
'**/dist/**',
|
|
19
|
+
'**/build/**',
|
|
20
|
+
'**/.git/**',
|
|
21
|
+
'**/coverage/**',
|
|
22
|
+
'**/venv/**',
|
|
23
|
+
'**/.venv/**',
|
|
24
|
+
'**/__pycache__/**',
|
|
25
|
+
'**/site-packages/**',
|
|
26
|
+
'**/.pytest_cache/**',
|
|
27
|
+
'**/target/**', // Rust build dir
|
|
28
|
+
'**/bin/**',
|
|
29
|
+
'**/.gradle/**',
|
|
30
|
+
'**/.mvn/**'
|
|
31
|
+
],
|
|
32
|
+
extensions: ['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.cpp', '.h', '.rb', '.php', '.cs', '.kt'],
|
|
18
33
|
indexTests: false,
|
|
19
34
|
indexNodeModules: false,
|
|
20
35
|
minNameLength: 2,
|
|
@@ -180,21 +195,324 @@ export class PatternIndexer {
|
|
|
180
195
|
* Extract patterns from a single file using TypeScript AST.
|
|
181
196
|
*/
|
|
182
197
|
async extractPatterns(filePath, content) {
|
|
198
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
199
|
+
// Specific high-fidelity extractors
|
|
200
|
+
if (ext === '.py')
|
|
201
|
+
return this.extractPythonPatterns(filePath, content);
|
|
202
|
+
if (ext === '.go')
|
|
203
|
+
return this.extractGoPatterns(filePath, content);
|
|
204
|
+
if (ext === '.rs')
|
|
205
|
+
return this.extractRustPatterns(filePath, content);
|
|
206
|
+
if (ext === '.java' || ext === '.kt' || ext === '.cs')
|
|
207
|
+
return this.extractJVMStylePatterns(filePath, content);
|
|
208
|
+
// Fallback for TS/JS or other C-style languages
|
|
183
209
|
const patterns = [];
|
|
184
210
|
const relativePath = path.relative(this.rootDir, filePath);
|
|
185
|
-
//
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
211
|
+
// For TS/JS, use AST
|
|
212
|
+
if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
|
|
213
|
+
const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true, this.getScriptKind(filePath));
|
|
214
|
+
const visit = (node) => {
|
|
215
|
+
const pattern = this.nodeToPattern(node, sourceFile, relativePath, content);
|
|
216
|
+
if (pattern)
|
|
217
|
+
patterns.push(pattern);
|
|
218
|
+
ts.forEachChild(node, visit);
|
|
219
|
+
};
|
|
220
|
+
visit(sourceFile);
|
|
221
|
+
return patterns;
|
|
222
|
+
}
|
|
223
|
+
// Generic C-style fallback (C++, PHP, etc.)
|
|
224
|
+
return this.extractGenericCPatterns(filePath, content);
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Extract patterns from Go files.
|
|
228
|
+
*/
|
|
229
|
+
extractGoPatterns(filePath, content) {
|
|
230
|
+
const patterns = [];
|
|
231
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
232
|
+
const lines = content.split('\n');
|
|
233
|
+
const funcRegex = /^func\s+(?:\([^)]*\)\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*([^\{]*)\s*\{/;
|
|
234
|
+
const typeRegex = /^type\s+([A-Za-z_][A-Za-z0-9_]*)\s+(struct|interface)/;
|
|
235
|
+
for (let i = 0; i < lines.length; i++) {
|
|
236
|
+
const line = lines[i];
|
|
237
|
+
// Functions
|
|
238
|
+
const funcMatch = line.match(funcRegex);
|
|
239
|
+
if (funcMatch) {
|
|
240
|
+
const name = funcMatch[1];
|
|
241
|
+
patterns.push(this.createPatternEntry({
|
|
242
|
+
type: 'function',
|
|
243
|
+
name,
|
|
244
|
+
file: relativePath,
|
|
245
|
+
line: i + 1,
|
|
246
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
247
|
+
signature: `func ${name}(${funcMatch[2]}) ${funcMatch[3].trim()}`,
|
|
248
|
+
description: this.getCOMLineComments(lines, i - 1),
|
|
249
|
+
keywords: this.extractKeywords(name),
|
|
250
|
+
content: this.getBraceBlockContent(lines, i),
|
|
251
|
+
exported: /^[A-Z]/.test(name)
|
|
252
|
+
}));
|
|
192
253
|
}
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
254
|
+
// Types/Structs
|
|
255
|
+
const typeMatch = line.match(typeRegex);
|
|
256
|
+
if (typeMatch) {
|
|
257
|
+
const name = typeMatch[1];
|
|
258
|
+
patterns.push(this.createPatternEntry({
|
|
259
|
+
type: typeMatch[2],
|
|
260
|
+
name,
|
|
261
|
+
file: relativePath,
|
|
262
|
+
line: i + 1,
|
|
263
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
264
|
+
signature: `type ${name} ${typeMatch[2]}`,
|
|
265
|
+
description: this.getCOMLineComments(lines, i - 1),
|
|
266
|
+
keywords: this.extractKeywords(name),
|
|
267
|
+
content: this.getBraceBlockContent(lines, i),
|
|
268
|
+
exported: /^[A-Z]/.test(name)
|
|
269
|
+
}));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
196
272
|
return patterns;
|
|
197
273
|
}
|
|
274
|
+
/**
|
|
275
|
+
* Extract patterns from Rust files.
|
|
276
|
+
*/
|
|
277
|
+
extractRustPatterns(filePath, content) {
|
|
278
|
+
const patterns = [];
|
|
279
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
280
|
+
const lines = content.split('\n');
|
|
281
|
+
const fnRegex = /^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*[<(][^)]*[>)]\s*(?:->\s*[^\{]+)?\s*\{/;
|
|
282
|
+
const typeRegex = /^(?:pub\s+)?(struct|enum|trait)\s+([A-Za-z_][A-Za-z0-9_]*)/;
|
|
283
|
+
for (let i = 0; i < lines.length; i++) {
|
|
284
|
+
const line = lines[i];
|
|
285
|
+
const fnMatch = line.match(fnRegex);
|
|
286
|
+
if (fnMatch) {
|
|
287
|
+
const name = fnMatch[1];
|
|
288
|
+
patterns.push(this.createPatternEntry({
|
|
289
|
+
type: 'function',
|
|
290
|
+
name,
|
|
291
|
+
file: relativePath,
|
|
292
|
+
line: i + 1,
|
|
293
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
294
|
+
signature: line.split('{')[0].trim(),
|
|
295
|
+
description: this.getCOMLineComments(lines, i - 1),
|
|
296
|
+
keywords: this.extractKeywords(name),
|
|
297
|
+
content: this.getBraceBlockContent(lines, i),
|
|
298
|
+
exported: line.startsWith('pub')
|
|
299
|
+
}));
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
return patterns;
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Generic extraction for C-style languages (Java, C++, PHP, etc.)
|
|
306
|
+
*/
|
|
307
|
+
extractJVMStylePatterns(filePath, content) {
|
|
308
|
+
const patterns = [];
|
|
309
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
310
|
+
const lines = content.split('\n');
|
|
311
|
+
// Simplified for classes and methods
|
|
312
|
+
const classRegex = /^(?:public|private|protected|internal)?\s*(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+([A-Za-z0-9_]+)/;
|
|
313
|
+
const methodRegex = /^(?:public|private|protected|internal)\s+(?:static\s+)?(?:async\s+)?(?:[A-Za-z0-9_<>\[\]]+\s+)([A-Za-z0-9_]+)\s*\(/;
|
|
314
|
+
for (let i = 0; i < lines.length; i++) {
|
|
315
|
+
const line = lines[i].trim();
|
|
316
|
+
const classMatch = line.match(classRegex);
|
|
317
|
+
if (classMatch) {
|
|
318
|
+
patterns.push(this.createPatternEntry({
|
|
319
|
+
type: 'class',
|
|
320
|
+
name: classMatch[1],
|
|
321
|
+
file: relativePath,
|
|
322
|
+
line: i + 1,
|
|
323
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
324
|
+
signature: line,
|
|
325
|
+
description: this.getJavaDoc(lines, i - 1),
|
|
326
|
+
keywords: this.extractKeywords(classMatch[1]),
|
|
327
|
+
content: this.getBraceBlockContent(lines, i),
|
|
328
|
+
exported: line.includes('public')
|
|
329
|
+
}));
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
return patterns;
|
|
333
|
+
}
|
|
334
|
+
extractGenericCPatterns(filePath, content) {
|
|
335
|
+
// Fallback for everything else
|
|
336
|
+
return [];
|
|
337
|
+
}
|
|
338
|
+
getCOMLineComments(lines, startIndex) {
|
|
339
|
+
let comments = [];
|
|
340
|
+
for (let i = startIndex; i >= 0; i--) {
|
|
341
|
+
const line = lines[i].trim();
|
|
342
|
+
if (line.startsWith('//'))
|
|
343
|
+
comments.unshift(line.replace('//', '').trim());
|
|
344
|
+
else
|
|
345
|
+
break;
|
|
346
|
+
}
|
|
347
|
+
return comments.join(' ');
|
|
348
|
+
}
|
|
349
|
+
getJavaDoc(lines, startIndex) {
|
|
350
|
+
let comments = [];
|
|
351
|
+
let inDoc = false;
|
|
352
|
+
for (let i = startIndex; i >= 0; i--) {
|
|
353
|
+
const line = lines[i].trim();
|
|
354
|
+
if (line.endsWith('*/'))
|
|
355
|
+
inDoc = true;
|
|
356
|
+
if (inDoc)
|
|
357
|
+
comments.unshift(line.replace('/**', '').replace('*/', '').replace('*', '').trim());
|
|
358
|
+
if (line.startsWith('/**'))
|
|
359
|
+
break;
|
|
360
|
+
}
|
|
361
|
+
return comments.join(' ');
|
|
362
|
+
}
|
|
363
|
+
findBraceBlockEnd(lines, startIndex) {
|
|
364
|
+
let braceCount = 0;
|
|
365
|
+
let started = false;
|
|
366
|
+
for (let i = startIndex; i < lines.length; i++) {
|
|
367
|
+
const line = lines[i];
|
|
368
|
+
if (line.includes('{')) {
|
|
369
|
+
braceCount += (line.match(/\{/g) || []).length;
|
|
370
|
+
started = true;
|
|
371
|
+
}
|
|
372
|
+
if (line.includes('}')) {
|
|
373
|
+
braceCount -= (line.match(/\}/g) || []).length;
|
|
374
|
+
}
|
|
375
|
+
if (started && braceCount === 0)
|
|
376
|
+
return i + 1;
|
|
377
|
+
}
|
|
378
|
+
return lines.length;
|
|
379
|
+
}
|
|
380
|
+
getBraceBlockContent(lines, startIndex) {
|
|
381
|
+
const end = this.findBraceBlockEnd(lines, startIndex);
|
|
382
|
+
return lines.slice(startIndex, end).join('\n');
|
|
383
|
+
}
|
|
384
|
+
/**
|
|
385
|
+
* Extract patterns from Python files using regex.
|
|
386
|
+
*/
|
|
387
|
+
extractPythonPatterns(filePath, content) {
|
|
388
|
+
const patterns = [];
|
|
389
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
390
|
+
const lines = content.split('\n');
|
|
391
|
+
// Regex for Class definitions
|
|
392
|
+
const classRegex = /^class\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\))?\s*:/;
|
|
393
|
+
// Regex for Function definitions (including async)
|
|
394
|
+
const funcRegex = /^(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?\s*:/;
|
|
395
|
+
// Regex for Constants (Top-level UPPER_CASE variables)
|
|
396
|
+
const constRegex = /^([A-Z][A-Z0-9_]*)\s*=\s*(.+)$/;
|
|
397
|
+
for (let i = 0; i < lines.length; i++) {
|
|
398
|
+
const lineContent = lines[i].trim();
|
|
399
|
+
const originalLine = lines[i];
|
|
400
|
+
const lineNum = i + 1;
|
|
401
|
+
// Classes
|
|
402
|
+
const classMatch = originalLine.match(classRegex);
|
|
403
|
+
if (classMatch) {
|
|
404
|
+
const name = classMatch[1];
|
|
405
|
+
if (name.length >= this.config.minNameLength) {
|
|
406
|
+
patterns.push(this.createPatternEntry({
|
|
407
|
+
type: this.detectPythonClassType(name),
|
|
408
|
+
name,
|
|
409
|
+
file: relativePath,
|
|
410
|
+
line: lineNum,
|
|
411
|
+
endLine: this.findPythonBlockEnd(lines, i),
|
|
412
|
+
signature: `class ${name}${classMatch[2] || ''}`,
|
|
413
|
+
description: this.getPythonDocstring(lines, i + 1),
|
|
414
|
+
keywords: this.extractKeywords(name),
|
|
415
|
+
content: this.getPythonBlockContent(lines, i),
|
|
416
|
+
exported: !name.startsWith('_')
|
|
417
|
+
}));
|
|
418
|
+
continue;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
// Functions
|
|
422
|
+
const funcMatch = originalLine.match(funcRegex);
|
|
423
|
+
if (funcMatch) {
|
|
424
|
+
const name = funcMatch[1];
|
|
425
|
+
if (name.length >= this.config.minNameLength) {
|
|
426
|
+
patterns.push(this.createPatternEntry({
|
|
427
|
+
type: this.detectPythonFunctionType(name),
|
|
428
|
+
name,
|
|
429
|
+
file: relativePath,
|
|
430
|
+
line: lineNum,
|
|
431
|
+
endLine: this.findPythonBlockEnd(lines, i),
|
|
432
|
+
signature: `def ${name}(${funcMatch[2]})`,
|
|
433
|
+
description: this.getPythonDocstring(lines, i + 1),
|
|
434
|
+
keywords: this.extractKeywords(name),
|
|
435
|
+
content: this.getPythonBlockContent(lines, i),
|
|
436
|
+
exported: !name.startsWith('_')
|
|
437
|
+
}));
|
|
438
|
+
continue;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
// Constants
|
|
442
|
+
const constMatch = originalLine.match(constRegex);
|
|
443
|
+
if (constMatch) {
|
|
444
|
+
const name = constMatch[1];
|
|
445
|
+
if (name.length >= this.config.minNameLength) {
|
|
446
|
+
patterns.push(this.createPatternEntry({
|
|
447
|
+
type: 'constant',
|
|
448
|
+
name,
|
|
449
|
+
file: relativePath,
|
|
450
|
+
line: lineNum,
|
|
451
|
+
endLine: lineNum,
|
|
452
|
+
signature: `${name} = ...`,
|
|
453
|
+
description: '',
|
|
454
|
+
keywords: this.extractKeywords(name),
|
|
455
|
+
content: originalLine,
|
|
456
|
+
exported: !name.startsWith('_')
|
|
457
|
+
}));
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
return patterns;
|
|
462
|
+
}
|
|
463
|
+
detectPythonClassType(name) {
|
|
464
|
+
if (name.endsWith('Error') || name.endsWith('Exception'))
|
|
465
|
+
return 'error';
|
|
466
|
+
if (name.endsWith('Model'))
|
|
467
|
+
return 'model';
|
|
468
|
+
if (name.endsWith('Schema'))
|
|
469
|
+
return 'schema';
|
|
470
|
+
return 'class';
|
|
471
|
+
}
|
|
472
|
+
detectPythonFunctionType(name) {
|
|
473
|
+
if (name.startsWith('test_'))
|
|
474
|
+
return 'function'; // Tests are filtered by indexTests config
|
|
475
|
+
if (name.includes('middleware'))
|
|
476
|
+
return 'middleware';
|
|
477
|
+
if (name.includes('handler'))
|
|
478
|
+
return 'handler';
|
|
479
|
+
return 'function';
|
|
480
|
+
}
|
|
481
|
+
getPythonDocstring(lines, startIndex) {
|
|
482
|
+
if (startIndex >= lines.length)
|
|
483
|
+
return '';
|
|
484
|
+
const nextLine = lines[startIndex].trim();
|
|
485
|
+
if (nextLine.startsWith('"""') || nextLine.startsWith("'''")) {
|
|
486
|
+
const quote = nextLine.startsWith('"""') ? '"""' : "'''";
|
|
487
|
+
let doc = nextLine.replace(quote, '');
|
|
488
|
+
if (doc.endsWith(quote))
|
|
489
|
+
return doc.replace(quote, '').trim();
|
|
490
|
+
for (let i = startIndex + 1; i < lines.length; i++) {
|
|
491
|
+
if (lines[i].includes(quote)) {
|
|
492
|
+
doc += ' ' + lines[i].split(quote)[0].trim();
|
|
493
|
+
break;
|
|
494
|
+
}
|
|
495
|
+
doc += ' ' + lines[i].trim();
|
|
496
|
+
}
|
|
497
|
+
return doc.trim();
|
|
498
|
+
}
|
|
499
|
+
return '';
|
|
500
|
+
}
|
|
501
|
+
findPythonBlockEnd(lines, startIndex) {
|
|
502
|
+
const startIndent = lines[startIndex].search(/\S/);
|
|
503
|
+
for (let i = startIndex + 1; i < lines.length; i++) {
|
|
504
|
+
if (lines[i].trim() === '')
|
|
505
|
+
continue;
|
|
506
|
+
const currentIndent = lines[i].search(/\S/);
|
|
507
|
+
if (currentIndent <= startIndent)
|
|
508
|
+
return i;
|
|
509
|
+
}
|
|
510
|
+
return lines.length;
|
|
511
|
+
}
|
|
512
|
+
getPythonBlockContent(lines, startIndex) {
|
|
513
|
+
const endLine = this.findPythonBlockEnd(lines, startIndex);
|
|
514
|
+
return lines.slice(startIndex, endLine).join('\n');
|
|
515
|
+
}
|
|
198
516
|
/**
|
|
199
517
|
* Convert an AST node to a PatternEntry if applicable.
|
|
200
518
|
*/
|
package/package.json
CHANGED
|
@@ -22,9 +22,24 @@ import { generateEmbedding } from './embeddings.js';
|
|
|
22
22
|
|
|
23
23
|
/** Default configuration for the indexer */
|
|
24
24
|
const DEFAULT_CONFIG: PatternIndexConfig = {
|
|
25
|
-
include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*'],
|
|
26
|
-
exclude: [
|
|
27
|
-
|
|
25
|
+
include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*', '**/tests/**/*', '**/test/**/*'],
|
|
26
|
+
exclude: [
|
|
27
|
+
'**/node_modules/**',
|
|
28
|
+
'**/dist/**',
|
|
29
|
+
'**/build/**',
|
|
30
|
+
'**/.git/**',
|
|
31
|
+
'**/coverage/**',
|
|
32
|
+
'**/venv/**',
|
|
33
|
+
'**/.venv/**',
|
|
34
|
+
'**/__pycache__/**',
|
|
35
|
+
'**/site-packages/**',
|
|
36
|
+
'**/.pytest_cache/**',
|
|
37
|
+
'**/target/**', // Rust build dir
|
|
38
|
+
'**/bin/**',
|
|
39
|
+
'**/.gradle/**',
|
|
40
|
+
'**/.mvn/**'
|
|
41
|
+
],
|
|
42
|
+
extensions: ['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.cpp', '.h', '.rb', '.php', '.cs', '.kt'],
|
|
28
43
|
indexTests: false,
|
|
29
44
|
indexNodeModules: false,
|
|
30
45
|
minNameLength: 2,
|
|
@@ -226,32 +241,346 @@ export class PatternIndexer {
|
|
|
226
241
|
* Extract patterns from a single file using TypeScript AST.
|
|
227
242
|
*/
|
|
228
243
|
private async extractPatterns(filePath: string, content: string): Promise<PatternEntry[]> {
|
|
244
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
245
|
+
|
|
246
|
+
// Specific high-fidelity extractors
|
|
247
|
+
if (ext === '.py') return this.extractPythonPatterns(filePath, content);
|
|
248
|
+
if (ext === '.go') return this.extractGoPatterns(filePath, content);
|
|
249
|
+
if (ext === '.rs') return this.extractRustPatterns(filePath, content);
|
|
250
|
+
if (ext === '.java' || ext === '.kt' || ext === '.cs') return this.extractJVMStylePatterns(filePath, content);
|
|
251
|
+
|
|
252
|
+
// Fallback for TS/JS or other C-style languages
|
|
229
253
|
const patterns: PatternEntry[] = [];
|
|
230
254
|
const relativePath = path.relative(this.rootDir, filePath);
|
|
231
255
|
|
|
232
|
-
//
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
256
|
+
// For TS/JS, use AST
|
|
257
|
+
if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
|
|
258
|
+
const sourceFile = ts.createSourceFile(
|
|
259
|
+
filePath,
|
|
260
|
+
content,
|
|
261
|
+
ts.ScriptTarget.Latest,
|
|
262
|
+
true,
|
|
263
|
+
this.getScriptKind(filePath)
|
|
264
|
+
);
|
|
265
|
+
|
|
266
|
+
const visit = (node: ts.Node) => {
|
|
267
|
+
const pattern = this.nodeToPattern(node, sourceFile, relativePath, content);
|
|
268
|
+
if (pattern) patterns.push(pattern);
|
|
269
|
+
ts.forEachChild(node, visit);
|
|
270
|
+
};
|
|
271
|
+
visit(sourceFile);
|
|
272
|
+
return patterns;
|
|
273
|
+
}
|
|
240
274
|
|
|
241
|
-
//
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
275
|
+
// Generic C-style fallback (C++, PHP, etc.)
|
|
276
|
+
return this.extractGenericCPatterns(filePath, content);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Extract patterns from Go files.
|
|
281
|
+
*/
|
|
282
|
+
private extractGoPatterns(filePath: string, content: string): PatternEntry[] {
|
|
283
|
+
const patterns: PatternEntry[] = [];
|
|
284
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
285
|
+
const lines = content.split('\n');
|
|
286
|
+
|
|
287
|
+
const funcRegex = /^func\s+(?:\([^)]*\)\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*([^\{]*)\s*\{/;
|
|
288
|
+
const typeRegex = /^type\s+([A-Za-z_][A-Za-z0-9_]*)\s+(struct|interface)/;
|
|
289
|
+
|
|
290
|
+
for (let i = 0; i < lines.length; i++) {
|
|
291
|
+
const line = lines[i];
|
|
292
|
+
|
|
293
|
+
// Functions
|
|
294
|
+
const funcMatch = line.match(funcRegex);
|
|
295
|
+
if (funcMatch) {
|
|
296
|
+
const name = funcMatch[1];
|
|
297
|
+
patterns.push(this.createPatternEntry({
|
|
298
|
+
type: 'function',
|
|
299
|
+
name,
|
|
300
|
+
file: relativePath,
|
|
301
|
+
line: i + 1,
|
|
302
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
303
|
+
signature: `func ${name}(${funcMatch[2]}) ${funcMatch[3].trim()}`,
|
|
304
|
+
description: this.getCOMLineComments(lines, i - 1),
|
|
305
|
+
keywords: this.extractKeywords(name),
|
|
306
|
+
content: this.getBraceBlockContent(lines, i),
|
|
307
|
+
exported: /^[A-Z]/.test(name)
|
|
308
|
+
}));
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// Types/Structs
|
|
312
|
+
const typeMatch = line.match(typeRegex);
|
|
313
|
+
if (typeMatch) {
|
|
314
|
+
const name = typeMatch[1];
|
|
315
|
+
patterns.push(this.createPatternEntry({
|
|
316
|
+
type: typeMatch[2] as any,
|
|
317
|
+
name,
|
|
318
|
+
file: relativePath,
|
|
319
|
+
line: i + 1,
|
|
320
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
321
|
+
signature: `type ${name} ${typeMatch[2]}`,
|
|
322
|
+
description: this.getCOMLineComments(lines, i - 1),
|
|
323
|
+
keywords: this.extractKeywords(name),
|
|
324
|
+
content: this.getBraceBlockContent(lines, i),
|
|
325
|
+
exported: /^[A-Z]/.test(name)
|
|
326
|
+
}));
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return patterns;
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/**
|
|
333
|
+
* Extract patterns from Rust files.
|
|
334
|
+
*/
|
|
335
|
+
private extractRustPatterns(filePath: string, content: string): PatternEntry[] {
|
|
336
|
+
const patterns: PatternEntry[] = [];
|
|
337
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
338
|
+
const lines = content.split('\n');
|
|
339
|
+
|
|
340
|
+
const fnRegex = /^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*[<(][^)]*[>)]\s*(?:->\s*[^\{]+)?\s*\{/;
|
|
341
|
+
const typeRegex = /^(?:pub\s+)?(struct|enum|trait)\s+([A-Za-z_][A-Za-z0-9_]*)/;
|
|
342
|
+
|
|
343
|
+
for (let i = 0; i < lines.length; i++) {
|
|
344
|
+
const line = lines[i];
|
|
345
|
+
|
|
346
|
+
const fnMatch = line.match(fnRegex);
|
|
347
|
+
if (fnMatch) {
|
|
348
|
+
const name = fnMatch[1];
|
|
349
|
+
patterns.push(this.createPatternEntry({
|
|
350
|
+
type: 'function',
|
|
351
|
+
name,
|
|
352
|
+
file: relativePath,
|
|
353
|
+
line: i + 1,
|
|
354
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
355
|
+
signature: line.split('{')[0].trim(),
|
|
356
|
+
description: this.getCOMLineComments(lines, i - 1),
|
|
357
|
+
keywords: this.extractKeywords(name),
|
|
358
|
+
content: this.getBraceBlockContent(lines, i),
|
|
359
|
+
exported: line.startsWith('pub')
|
|
360
|
+
}));
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
return patterns;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* Generic extraction for C-style languages (Java, C++, PHP, etc.)
|
|
368
|
+
*/
|
|
369
|
+
private extractJVMStylePatterns(filePath: string, content: string): PatternEntry[] {
|
|
370
|
+
const patterns: PatternEntry[] = [];
|
|
371
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
372
|
+
const lines = content.split('\n');
|
|
373
|
+
|
|
374
|
+
// Simplified for classes and methods
|
|
375
|
+
const classRegex = /^(?:public|private|protected|internal)?\s*(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+([A-Za-z0-9_]+)/;
|
|
376
|
+
const methodRegex = /^(?:public|private|protected|internal)\s+(?:static\s+)?(?:async\s+)?(?:[A-Za-z0-9_<>\[\]]+\s+)([A-Za-z0-9_]+)\s*\(/;
|
|
377
|
+
|
|
378
|
+
for (let i = 0; i < lines.length; i++) {
|
|
379
|
+
const line = lines[i].trim();
|
|
380
|
+
|
|
381
|
+
const classMatch = line.match(classRegex);
|
|
382
|
+
if (classMatch) {
|
|
383
|
+
patterns.push(this.createPatternEntry({
|
|
384
|
+
type: 'class',
|
|
385
|
+
name: classMatch[1],
|
|
386
|
+
file: relativePath,
|
|
387
|
+
line: i + 1,
|
|
388
|
+
endLine: this.findBraceBlockEnd(lines, i),
|
|
389
|
+
signature: line,
|
|
390
|
+
description: this.getJavaDoc(lines, i - 1),
|
|
391
|
+
keywords: this.extractKeywords(classMatch[1]),
|
|
392
|
+
content: this.getBraceBlockContent(lines, i),
|
|
393
|
+
exported: line.includes('public')
|
|
394
|
+
}));
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
return patterns;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
private extractGenericCPatterns(filePath: string, content: string): PatternEntry[] {
|
|
401
|
+
// Fallback for everything else
|
|
402
|
+
return [];
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
private getCOMLineComments(lines: string[], startIndex: number): string {
|
|
406
|
+
let comments = [];
|
|
407
|
+
for (let i = startIndex; i >= 0; i--) {
|
|
408
|
+
const line = lines[i].trim();
|
|
409
|
+
if (line.startsWith('//')) comments.unshift(line.replace('//', '').trim());
|
|
410
|
+
else break;
|
|
411
|
+
}
|
|
412
|
+
return comments.join(' ');
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
private getJavaDoc(lines: string[], startIndex: number): string {
|
|
416
|
+
let comments = [];
|
|
417
|
+
let inDoc = false;
|
|
418
|
+
for (let i = startIndex; i >= 0; i--) {
|
|
419
|
+
const line = lines[i].trim();
|
|
420
|
+
if (line.endsWith('*/')) inDoc = true;
|
|
421
|
+
if (inDoc) comments.unshift(line.replace('/**', '').replace('*/', '').replace('*', '').trim());
|
|
422
|
+
if (line.startsWith('/**')) break;
|
|
423
|
+
}
|
|
424
|
+
return comments.join(' ');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
private findBraceBlockEnd(lines: string[], startIndex: number): number {
|
|
428
|
+
let braceCount = 0;
|
|
429
|
+
let started = false;
|
|
430
|
+
for (let i = startIndex; i < lines.length; i++) {
|
|
431
|
+
const line = lines[i];
|
|
432
|
+
if (line.includes('{')) {
|
|
433
|
+
braceCount += (line.match(/\{/g) || []).length;
|
|
434
|
+
started = true;
|
|
435
|
+
}
|
|
436
|
+
if (line.includes('}')) {
|
|
437
|
+
braceCount -= (line.match(/\}/g) || []).length;
|
|
438
|
+
}
|
|
439
|
+
if (started && braceCount === 0) return i + 1;
|
|
440
|
+
}
|
|
441
|
+
return lines.length;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
private getBraceBlockContent(lines: string[], startIndex: number): string {
|
|
445
|
+
const end = this.findBraceBlockEnd(lines, startIndex);
|
|
446
|
+
return lines.slice(startIndex, end).join('\n');
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/**
|
|
450
|
+
* Extract patterns from Python files using regex.
|
|
451
|
+
*/
|
|
452
|
+
private extractPythonPatterns(filePath: string, content: string): PatternEntry[] {
|
|
453
|
+
const patterns: PatternEntry[] = [];
|
|
454
|
+
const relativePath = path.relative(this.rootDir, filePath);
|
|
455
|
+
const lines = content.split('\n');
|
|
456
|
+
|
|
457
|
+
// Regex for Class definitions
|
|
458
|
+
const classRegex = /^class\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\))?\s*:/;
|
|
459
|
+
// Regex for Function definitions (including async)
|
|
460
|
+
const funcRegex = /^(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?\s*:/;
|
|
461
|
+
// Regex for Constants (Top-level UPPER_CASE variables)
|
|
462
|
+
const constRegex = /^([A-Z][A-Z0-9_]*)\s*=\s*(.+)$/;
|
|
463
|
+
|
|
464
|
+
for (let i = 0; i < lines.length; i++) {
|
|
465
|
+
const lineContent = lines[i].trim();
|
|
466
|
+
const originalLine = lines[i];
|
|
467
|
+
const lineNum = i + 1;
|
|
468
|
+
|
|
469
|
+
// Classes
|
|
470
|
+
const classMatch = originalLine.match(classRegex);
|
|
471
|
+
if (classMatch) {
|
|
472
|
+
const name = classMatch[1];
|
|
473
|
+
if (name.length >= this.config.minNameLength) {
|
|
474
|
+
patterns.push(this.createPatternEntry({
|
|
475
|
+
type: this.detectPythonClassType(name),
|
|
476
|
+
name,
|
|
477
|
+
file: relativePath,
|
|
478
|
+
line: lineNum,
|
|
479
|
+
endLine: this.findPythonBlockEnd(lines, i),
|
|
480
|
+
signature: `class ${name}${classMatch[2] || ''}`,
|
|
481
|
+
description: this.getPythonDocstring(lines, i + 1),
|
|
482
|
+
keywords: this.extractKeywords(name),
|
|
483
|
+
content: this.getPythonBlockContent(lines, i),
|
|
484
|
+
exported: !name.startsWith('_')
|
|
485
|
+
}));
|
|
486
|
+
continue;
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Functions
|
|
491
|
+
const funcMatch = originalLine.match(funcRegex);
|
|
492
|
+
if (funcMatch) {
|
|
493
|
+
const name = funcMatch[1];
|
|
494
|
+
if (name.length >= this.config.minNameLength) {
|
|
495
|
+
patterns.push(this.createPatternEntry({
|
|
496
|
+
type: this.detectPythonFunctionType(name),
|
|
497
|
+
name,
|
|
498
|
+
file: relativePath,
|
|
499
|
+
line: lineNum,
|
|
500
|
+
endLine: this.findPythonBlockEnd(lines, i),
|
|
501
|
+
signature: `def ${name}(${funcMatch[2]})`,
|
|
502
|
+
description: this.getPythonDocstring(lines, i + 1),
|
|
503
|
+
keywords: this.extractKeywords(name),
|
|
504
|
+
content: this.getPythonBlockContent(lines, i),
|
|
505
|
+
exported: !name.startsWith('_')
|
|
506
|
+
}));
|
|
507
|
+
continue;
|
|
508
|
+
}
|
|
246
509
|
}
|
|
247
|
-
ts.forEachChild(node, visit);
|
|
248
|
-
};
|
|
249
510
|
|
|
250
|
-
|
|
511
|
+
// Constants
|
|
512
|
+
const constMatch = originalLine.match(constRegex);
|
|
513
|
+
if (constMatch) {
|
|
514
|
+
const name = constMatch[1];
|
|
515
|
+
if (name.length >= this.config.minNameLength) {
|
|
516
|
+
patterns.push(this.createPatternEntry({
|
|
517
|
+
type: 'constant',
|
|
518
|
+
name,
|
|
519
|
+
file: relativePath,
|
|
520
|
+
line: lineNum,
|
|
521
|
+
endLine: lineNum,
|
|
522
|
+
signature: `${name} = ...`,
|
|
523
|
+
description: '',
|
|
524
|
+
keywords: this.extractKeywords(name),
|
|
525
|
+
content: originalLine,
|
|
526
|
+
exported: !name.startsWith('_')
|
|
527
|
+
}));
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
}
|
|
251
531
|
|
|
252
532
|
return patterns;
|
|
253
533
|
}
|
|
254
534
|
|
|
535
|
+
private detectPythonClassType(name: string): PatternType {
|
|
536
|
+
if (name.endsWith('Error') || name.endsWith('Exception')) return 'error';
|
|
537
|
+
if (name.endsWith('Model')) return 'model';
|
|
538
|
+
if (name.endsWith('Schema')) return 'schema';
|
|
539
|
+
return 'class';
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
private detectPythonFunctionType(name: string): PatternType {
|
|
543
|
+
if (name.startsWith('test_')) return 'function'; // Tests are filtered by indexTests config
|
|
544
|
+
if (name.includes('middleware')) return 'middleware';
|
|
545
|
+
if (name.includes('handler')) return 'handler';
|
|
546
|
+
return 'function';
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
private getPythonDocstring(lines: string[], startIndex: number): string {
|
|
550
|
+
if (startIndex >= lines.length) return '';
|
|
551
|
+
const nextLine = lines[startIndex].trim();
|
|
552
|
+
if (nextLine.startsWith('"""') || nextLine.startsWith("'''")) {
|
|
553
|
+
const quote = nextLine.startsWith('"""') ? '"""' : "'''";
|
|
554
|
+
let doc = nextLine.replace(quote, '');
|
|
555
|
+
if (doc.endsWith(quote)) return doc.replace(quote, '').trim();
|
|
556
|
+
|
|
557
|
+
for (let i = startIndex + 1; i < lines.length; i++) {
|
|
558
|
+
if (lines[i].includes(quote)) {
|
|
559
|
+
doc += ' ' + lines[i].split(quote)[0].trim();
|
|
560
|
+
break;
|
|
561
|
+
}
|
|
562
|
+
doc += ' ' + lines[i].trim();
|
|
563
|
+
}
|
|
564
|
+
return doc.trim();
|
|
565
|
+
}
|
|
566
|
+
return '';
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
private findPythonBlockEnd(lines: string[], startIndex: number): number {
|
|
570
|
+
const startIndent = lines[startIndex].search(/\S/);
|
|
571
|
+
for (let i = startIndex + 1; i < lines.length; i++) {
|
|
572
|
+
if (lines[i].trim() === '') continue;
|
|
573
|
+
const currentIndent = lines[i].search(/\S/);
|
|
574
|
+
if (currentIndent <= startIndent) return i;
|
|
575
|
+
}
|
|
576
|
+
return lines.length;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
private getPythonBlockContent(lines: string[], startIndex: number): string {
|
|
580
|
+
const endLine = this.findPythonBlockEnd(lines, startIndex);
|
|
581
|
+
return lines.slice(startIndex, endLine).join('\n');
|
|
582
|
+
}
|
|
583
|
+
|
|
255
584
|
/**
|
|
256
585
|
* Convert an AST node to a PatternEntry if applicable.
|
|
257
586
|
*/
|