@rigour-labs/core 2.10.0 → 2.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,3 +9,4 @@ export { PatternMatcher, checkPatternDuplicate, type MatcherConfig } from './mat
9
9
  export { StalenessDetector, checkCodeStaleness } from './staleness.js';
10
10
  export { SecurityDetector } from './security.js';
11
11
  export { OverrideManager, loadConfigOverrides } from './overrides.js';
12
+ export { generateEmbedding, semanticSearch, cosineSimilarity } from './embeddings.js';
@@ -13,3 +13,5 @@ export { StalenessDetector, checkCodeStaleness } from './staleness.js';
13
13
  export { SecurityDetector } from './security.js';
14
14
  // Override Management
15
15
  export { OverrideManager, loadConfigOverrides } from './overrides.js';
16
+ // Embeddings
17
+ export { generateEmbedding, semanticSearch, cosineSimilarity } from './embeddings.js';
@@ -26,6 +26,32 @@ export declare class PatternIndexer {
26
26
  * Extract patterns from a single file using TypeScript AST.
27
27
  */
28
28
  private extractPatterns;
29
+ /**
30
+ * Extract patterns from Go files.
31
+ */
32
+ private extractGoPatterns;
33
+ /**
34
+ * Extract patterns from Rust files.
35
+ */
36
+ private extractRustPatterns;
37
+ /**
38
+ * Generic extraction for C-style languages (Java, C++, PHP, etc.)
39
+ */
40
+ private extractJVMStylePatterns;
41
+ private extractGenericCPatterns;
42
+ private getCOMLineComments;
43
+ private getJavaDoc;
44
+ private findBraceBlockEnd;
45
+ private getBraceBlockContent;
46
+ /**
47
+ * Extract patterns from Python files using regex.
48
+ */
49
+ private extractPythonPatterns;
50
+ private detectPythonClassType;
51
+ private detectPythonFunctionType;
52
+ private getPythonDocstring;
53
+ private findPythonBlockEnd;
54
+ private getPythonBlockContent;
29
55
  /**
30
56
  * Convert an AST node to a PatternEntry if applicable.
31
57
  */
@@ -12,9 +12,24 @@ import ts from 'typescript';
12
12
  import { generateEmbedding } from './embeddings.js';
13
13
  /** Default configuration for the indexer */
14
14
  const DEFAULT_CONFIG = {
15
- include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*'],
16
- exclude: ['**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**', '**/coverage/**'],
17
- extensions: ['.ts', '.tsx', '.js', '.jsx'],
15
+ include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*', '**/tests/**/*', '**/test/**/*'],
16
+ exclude: [
17
+ '**/node_modules/**',
18
+ '**/dist/**',
19
+ '**/build/**',
20
+ '**/.git/**',
21
+ '**/coverage/**',
22
+ '**/venv/**',
23
+ '**/.venv/**',
24
+ '**/__pycache__/**',
25
+ '**/site-packages/**',
26
+ '**/.pytest_cache/**',
27
+ '**/target/**', // Rust build dir
28
+ '**/bin/**',
29
+ '**/.gradle/**',
30
+ '**/.mvn/**'
31
+ ],
32
+ extensions: ['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.cpp', '.h', '.rb', '.php', '.cs', '.kt'],
18
33
  indexTests: false,
19
34
  indexNodeModules: false,
20
35
  minNameLength: 2,
@@ -180,21 +195,324 @@ export class PatternIndexer {
180
195
  * Extract patterns from a single file using TypeScript AST.
181
196
  */
182
197
  async extractPatterns(filePath, content) {
198
+ const ext = path.extname(filePath).toLowerCase();
199
+ // Specific high-fidelity extractors
200
+ if (ext === '.py')
201
+ return this.extractPythonPatterns(filePath, content);
202
+ if (ext === '.go')
203
+ return this.extractGoPatterns(filePath, content);
204
+ if (ext === '.rs')
205
+ return this.extractRustPatterns(filePath, content);
206
+ if (ext === '.java' || ext === '.kt' || ext === '.cs')
207
+ return this.extractJVMStylePatterns(filePath, content);
208
+ // Fallback for TS/JS or other C-style languages
183
209
  const patterns = [];
184
210
  const relativePath = path.relative(this.rootDir, filePath);
185
- // Parse with TypeScript
186
- const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true, this.getScriptKind(filePath));
187
- // Walk the AST
188
- const visit = (node) => {
189
- const pattern = this.nodeToPattern(node, sourceFile, relativePath, content);
190
- if (pattern) {
191
- patterns.push(pattern);
211
+ // For TS/JS, use AST
212
+ if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
213
+ const sourceFile = ts.createSourceFile(filePath, content, ts.ScriptTarget.Latest, true, this.getScriptKind(filePath));
214
+ const visit = (node) => {
215
+ const pattern = this.nodeToPattern(node, sourceFile, relativePath, content);
216
+ if (pattern)
217
+ patterns.push(pattern);
218
+ ts.forEachChild(node, visit);
219
+ };
220
+ visit(sourceFile);
221
+ return patterns;
222
+ }
223
+ // Generic C-style fallback (C++, PHP, etc.)
224
+ return this.extractGenericCPatterns(filePath, content);
225
+ }
226
+ /**
227
+ * Extract patterns from Go files.
228
+ */
229
+ extractGoPatterns(filePath, content) {
230
+ const patterns = [];
231
+ const relativePath = path.relative(this.rootDir, filePath);
232
+ const lines = content.split('\n');
233
+ const funcRegex = /^func\s+(?:\([^)]*\)\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*([^\{]*)\s*\{/;
234
+ const typeRegex = /^type\s+([A-Za-z_][A-Za-z0-9_]*)\s+(struct|interface)/;
235
+ for (let i = 0; i < lines.length; i++) {
236
+ const line = lines[i];
237
+ // Functions
238
+ const funcMatch = line.match(funcRegex);
239
+ if (funcMatch) {
240
+ const name = funcMatch[1];
241
+ patterns.push(this.createPatternEntry({
242
+ type: 'function',
243
+ name,
244
+ file: relativePath,
245
+ line: i + 1,
246
+ endLine: this.findBraceBlockEnd(lines, i),
247
+ signature: `func ${name}(${funcMatch[2]}) ${funcMatch[3].trim()}`,
248
+ description: this.getCOMLineComments(lines, i - 1),
249
+ keywords: this.extractKeywords(name),
250
+ content: this.getBraceBlockContent(lines, i),
251
+ exported: /^[A-Z]/.test(name)
252
+ }));
192
253
  }
193
- ts.forEachChild(node, visit);
194
- };
195
- visit(sourceFile);
254
+ // Types/Structs
255
+ const typeMatch = line.match(typeRegex);
256
+ if (typeMatch) {
257
+ const name = typeMatch[1];
258
+ patterns.push(this.createPatternEntry({
259
+ type: typeMatch[2],
260
+ name,
261
+ file: relativePath,
262
+ line: i + 1,
263
+ endLine: this.findBraceBlockEnd(lines, i),
264
+ signature: `type ${name} ${typeMatch[2]}`,
265
+ description: this.getCOMLineComments(lines, i - 1),
266
+ keywords: this.extractKeywords(name),
267
+ content: this.getBraceBlockContent(lines, i),
268
+ exported: /^[A-Z]/.test(name)
269
+ }));
270
+ }
271
+ }
196
272
  return patterns;
197
273
  }
274
+ /**
275
+ * Extract patterns from Rust files.
276
+ */
277
+ extractRustPatterns(filePath, content) {
278
+ const patterns = [];
279
+ const relativePath = path.relative(this.rootDir, filePath);
280
+ const lines = content.split('\n');
281
+ const fnRegex = /^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*[<(][^)]*[>)]\s*(?:->\s*[^\{]+)?\s*\{/;
282
+ const typeRegex = /^(?:pub\s+)?(struct|enum|trait)\s+([A-Za-z_][A-Za-z0-9_]*)/;
283
+ for (let i = 0; i < lines.length; i++) {
284
+ const line = lines[i];
285
+ const fnMatch = line.match(fnRegex);
286
+ if (fnMatch) {
287
+ const name = fnMatch[1];
288
+ patterns.push(this.createPatternEntry({
289
+ type: 'function',
290
+ name,
291
+ file: relativePath,
292
+ line: i + 1,
293
+ endLine: this.findBraceBlockEnd(lines, i),
294
+ signature: line.split('{')[0].trim(),
295
+ description: this.getCOMLineComments(lines, i - 1),
296
+ keywords: this.extractKeywords(name),
297
+ content: this.getBraceBlockContent(lines, i),
298
+ exported: line.startsWith('pub')
299
+ }));
300
+ }
301
+ }
302
+ return patterns;
303
+ }
304
+ /**
305
+ * Generic extraction for C-style languages (Java, C++, PHP, etc.)
306
+ */
307
+ extractJVMStylePatterns(filePath, content) {
308
+ const patterns = [];
309
+ const relativePath = path.relative(this.rootDir, filePath);
310
+ const lines = content.split('\n');
311
+ // Simplified for classes and methods
312
+ const classRegex = /^(?:public|private|protected|internal)?\s*(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+([A-Za-z0-9_]+)/;
313
+ const methodRegex = /^(?:public|private|protected|internal)\s+(?:static\s+)?(?:async\s+)?(?:[A-Za-z0-9_<>\[\]]+\s+)([A-Za-z0-9_]+)\s*\(/;
314
+ for (let i = 0; i < lines.length; i++) {
315
+ const line = lines[i].trim();
316
+ const classMatch = line.match(classRegex);
317
+ if (classMatch) {
318
+ patterns.push(this.createPatternEntry({
319
+ type: 'class',
320
+ name: classMatch[1],
321
+ file: relativePath,
322
+ line: i + 1,
323
+ endLine: this.findBraceBlockEnd(lines, i),
324
+ signature: line,
325
+ description: this.getJavaDoc(lines, i - 1),
326
+ keywords: this.extractKeywords(classMatch[1]),
327
+ content: this.getBraceBlockContent(lines, i),
328
+ exported: line.includes('public')
329
+ }));
330
+ }
331
+ }
332
+ return patterns;
333
+ }
334
+ extractGenericCPatterns(filePath, content) {
335
+ // Fallback for everything else
336
+ return [];
337
+ }
338
+ getCOMLineComments(lines, startIndex) {
339
+ let comments = [];
340
+ for (let i = startIndex; i >= 0; i--) {
341
+ const line = lines[i].trim();
342
+ if (line.startsWith('//'))
343
+ comments.unshift(line.replace('//', '').trim());
344
+ else
345
+ break;
346
+ }
347
+ return comments.join(' ');
348
+ }
349
+ getJavaDoc(lines, startIndex) {
350
+ let comments = [];
351
+ let inDoc = false;
352
+ for (let i = startIndex; i >= 0; i--) {
353
+ const line = lines[i].trim();
354
+ if (line.endsWith('*/'))
355
+ inDoc = true;
356
+ if (inDoc)
357
+ comments.unshift(line.replace('/**', '').replace('*/', '').replace('*', '').trim());
358
+ if (line.startsWith('/**'))
359
+ break;
360
+ }
361
+ return comments.join(' ');
362
+ }
363
+ findBraceBlockEnd(lines, startIndex) {
364
+ let braceCount = 0;
365
+ let started = false;
366
+ for (let i = startIndex; i < lines.length; i++) {
367
+ const line = lines[i];
368
+ if (line.includes('{')) {
369
+ braceCount += (line.match(/\{/g) || []).length;
370
+ started = true;
371
+ }
372
+ if (line.includes('}')) {
373
+ braceCount -= (line.match(/\}/g) || []).length;
374
+ }
375
+ if (started && braceCount === 0)
376
+ return i + 1;
377
+ }
378
+ return lines.length;
379
+ }
380
+ getBraceBlockContent(lines, startIndex) {
381
+ const end = this.findBraceBlockEnd(lines, startIndex);
382
+ return lines.slice(startIndex, end).join('\n');
383
+ }
384
+ /**
385
+ * Extract patterns from Python files using regex.
386
+ */
387
+ extractPythonPatterns(filePath, content) {
388
+ const patterns = [];
389
+ const relativePath = path.relative(this.rootDir, filePath);
390
+ const lines = content.split('\n');
391
+ // Regex for Class definitions
392
+ const classRegex = /^class\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\))?\s*:/;
393
+ // Regex for Function definitions (including async)
394
+ const funcRegex = /^(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?\s*:/;
395
+ // Regex for Constants (Top-level UPPER_CASE variables)
396
+ const constRegex = /^([A-Z][A-Z0-9_]*)\s*=\s*(.+)$/;
397
+ for (let i = 0; i < lines.length; i++) {
398
+ const lineContent = lines[i].trim();
399
+ const originalLine = lines[i];
400
+ const lineNum = i + 1;
401
+ // Classes
402
+ const classMatch = originalLine.match(classRegex);
403
+ if (classMatch) {
404
+ const name = classMatch[1];
405
+ if (name.length >= this.config.minNameLength) {
406
+ patterns.push(this.createPatternEntry({
407
+ type: this.detectPythonClassType(name),
408
+ name,
409
+ file: relativePath,
410
+ line: lineNum,
411
+ endLine: this.findPythonBlockEnd(lines, i),
412
+ signature: `class ${name}${classMatch[2] || ''}`,
413
+ description: this.getPythonDocstring(lines, i + 1),
414
+ keywords: this.extractKeywords(name),
415
+ content: this.getPythonBlockContent(lines, i),
416
+ exported: !name.startsWith('_')
417
+ }));
418
+ continue;
419
+ }
420
+ }
421
+ // Functions
422
+ const funcMatch = originalLine.match(funcRegex);
423
+ if (funcMatch) {
424
+ const name = funcMatch[1];
425
+ if (name.length >= this.config.minNameLength) {
426
+ patterns.push(this.createPatternEntry({
427
+ type: this.detectPythonFunctionType(name),
428
+ name,
429
+ file: relativePath,
430
+ line: lineNum,
431
+ endLine: this.findPythonBlockEnd(lines, i),
432
+ signature: `def ${name}(${funcMatch[2]})`,
433
+ description: this.getPythonDocstring(lines, i + 1),
434
+ keywords: this.extractKeywords(name),
435
+ content: this.getPythonBlockContent(lines, i),
436
+ exported: !name.startsWith('_')
437
+ }));
438
+ continue;
439
+ }
440
+ }
441
+ // Constants
442
+ const constMatch = originalLine.match(constRegex);
443
+ if (constMatch) {
444
+ const name = constMatch[1];
445
+ if (name.length >= this.config.minNameLength) {
446
+ patterns.push(this.createPatternEntry({
447
+ type: 'constant',
448
+ name,
449
+ file: relativePath,
450
+ line: lineNum,
451
+ endLine: lineNum,
452
+ signature: `${name} = ...`,
453
+ description: '',
454
+ keywords: this.extractKeywords(name),
455
+ content: originalLine,
456
+ exported: !name.startsWith('_')
457
+ }));
458
+ }
459
+ }
460
+ }
461
+ return patterns;
462
+ }
463
+ detectPythonClassType(name) {
464
+ if (name.endsWith('Error') || name.endsWith('Exception'))
465
+ return 'error';
466
+ if (name.endsWith('Model'))
467
+ return 'model';
468
+ if (name.endsWith('Schema'))
469
+ return 'schema';
470
+ return 'class';
471
+ }
472
+ detectPythonFunctionType(name) {
473
+ if (name.startsWith('test_'))
474
+ return 'function'; // Tests are filtered by indexTests config
475
+ if (name.includes('middleware'))
476
+ return 'middleware';
477
+ if (name.includes('handler'))
478
+ return 'handler';
479
+ return 'function';
480
+ }
481
+ getPythonDocstring(lines, startIndex) {
482
+ if (startIndex >= lines.length)
483
+ return '';
484
+ const nextLine = lines[startIndex].trim();
485
+ if (nextLine.startsWith('"""') || nextLine.startsWith("'''")) {
486
+ const quote = nextLine.startsWith('"""') ? '"""' : "'''";
487
+ let doc = nextLine.replace(quote, '');
488
+ if (doc.endsWith(quote))
489
+ return doc.replace(quote, '').trim();
490
+ for (let i = startIndex + 1; i < lines.length; i++) {
491
+ if (lines[i].includes(quote)) {
492
+ doc += ' ' + lines[i].split(quote)[0].trim();
493
+ break;
494
+ }
495
+ doc += ' ' + lines[i].trim();
496
+ }
497
+ return doc.trim();
498
+ }
499
+ return '';
500
+ }
501
+ findPythonBlockEnd(lines, startIndex) {
502
+ const startIndent = lines[startIndex].search(/\S/);
503
+ for (let i = startIndex + 1; i < lines.length; i++) {
504
+ if (lines[i].trim() === '')
505
+ continue;
506
+ const currentIndent = lines[i].search(/\S/);
507
+ if (currentIndent <= startIndent)
508
+ return i;
509
+ }
510
+ return lines.length;
511
+ }
512
+ getPythonBlockContent(lines, startIndex) {
513
+ const endLine = this.findPythonBlockEnd(lines, startIndex);
514
+ return lines.slice(startIndex, endLine).join('\n');
515
+ }
198
516
  /**
199
517
  * Convert an AST node to a PatternEntry if applicable.
200
518
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rigour-labs/core",
3
- "version": "2.10.0",
3
+ "version": "2.12.0",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -51,3 +51,9 @@ export {
51
51
  OverrideManager,
52
52
  loadConfigOverrides
53
53
  } from './overrides.js';
54
+ // Embeddings
55
+ export {
56
+ generateEmbedding,
57
+ semanticSearch,
58
+ cosineSimilarity
59
+ } from './embeddings.js';
@@ -22,9 +22,24 @@ import { generateEmbedding } from './embeddings.js';
22
22
 
23
23
  /** Default configuration for the indexer */
24
24
  const DEFAULT_CONFIG: PatternIndexConfig = {
25
- include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*'],
26
- exclude: ['**/node_modules/**', '**/dist/**', '**/build/**', '**/.git/**', '**/coverage/**'],
27
- extensions: ['.ts', '.tsx', '.js', '.jsx'],
25
+ include: ['src/**/*', 'lib/**/*', 'app/**/*', 'components/**/*', 'utils/**/*', 'hooks/**/*', '**/tests/**/*', '**/test/**/*'],
26
+ exclude: [
27
+ '**/node_modules/**',
28
+ '**/dist/**',
29
+ '**/build/**',
30
+ '**/.git/**',
31
+ '**/coverage/**',
32
+ '**/venv/**',
33
+ '**/.venv/**',
34
+ '**/__pycache__/**',
35
+ '**/site-packages/**',
36
+ '**/.pytest_cache/**',
37
+ '**/target/**', // Rust build dir
38
+ '**/bin/**',
39
+ '**/.gradle/**',
40
+ '**/.mvn/**'
41
+ ],
42
+ extensions: ['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs', '.java', '.cpp', '.h', '.rb', '.php', '.cs', '.kt'],
28
43
  indexTests: false,
29
44
  indexNodeModules: false,
30
45
  minNameLength: 2,
@@ -226,32 +241,346 @@ export class PatternIndexer {
226
241
  * Extract patterns from a single file using TypeScript AST.
227
242
  */
228
243
  private async extractPatterns(filePath: string, content: string): Promise<PatternEntry[]> {
244
+ const ext = path.extname(filePath).toLowerCase();
245
+
246
+ // Specific high-fidelity extractors
247
+ if (ext === '.py') return this.extractPythonPatterns(filePath, content);
248
+ if (ext === '.go') return this.extractGoPatterns(filePath, content);
249
+ if (ext === '.rs') return this.extractRustPatterns(filePath, content);
250
+ if (ext === '.java' || ext === '.kt' || ext === '.cs') return this.extractJVMStylePatterns(filePath, content);
251
+
252
+ // Fallback for TS/JS or other C-style languages
229
253
  const patterns: PatternEntry[] = [];
230
254
  const relativePath = path.relative(this.rootDir, filePath);
231
255
 
232
- // Parse with TypeScript
233
- const sourceFile = ts.createSourceFile(
234
- filePath,
235
- content,
236
- ts.ScriptTarget.Latest,
237
- true,
238
- this.getScriptKind(filePath)
239
- );
256
+ // For TS/JS, use AST
257
+ if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
258
+ const sourceFile = ts.createSourceFile(
259
+ filePath,
260
+ content,
261
+ ts.ScriptTarget.Latest,
262
+ true,
263
+ this.getScriptKind(filePath)
264
+ );
265
+
266
+ const visit = (node: ts.Node) => {
267
+ const pattern = this.nodeToPattern(node, sourceFile, relativePath, content);
268
+ if (pattern) patterns.push(pattern);
269
+ ts.forEachChild(node, visit);
270
+ };
271
+ visit(sourceFile);
272
+ return patterns;
273
+ }
240
274
 
241
- // Walk the AST
242
- const visit = (node: ts.Node) => {
243
- const pattern = this.nodeToPattern(node, sourceFile, relativePath, content);
244
- if (pattern) {
245
- patterns.push(pattern);
275
+ // Generic C-style fallback (C++, PHP, etc.)
276
+ return this.extractGenericCPatterns(filePath, content);
277
+ }
278
+
279
+ /**
280
+ * Extract patterns from Go files.
281
+ */
282
+ private extractGoPatterns(filePath: string, content: string): PatternEntry[] {
283
+ const patterns: PatternEntry[] = [];
284
+ const relativePath = path.relative(this.rootDir, filePath);
285
+ const lines = content.split('\n');
286
+
287
+ const funcRegex = /^func\s+(?:\([^)]*\)\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*([^\{]*)\s*\{/;
288
+ const typeRegex = /^type\s+([A-Za-z_][A-Za-z0-9_]*)\s+(struct|interface)/;
289
+
290
+ for (let i = 0; i < lines.length; i++) {
291
+ const line = lines[i];
292
+
293
+ // Functions
294
+ const funcMatch = line.match(funcRegex);
295
+ if (funcMatch) {
296
+ const name = funcMatch[1];
297
+ patterns.push(this.createPatternEntry({
298
+ type: 'function',
299
+ name,
300
+ file: relativePath,
301
+ line: i + 1,
302
+ endLine: this.findBraceBlockEnd(lines, i),
303
+ signature: `func ${name}(${funcMatch[2]}) ${funcMatch[3].trim()}`,
304
+ description: this.getCOMLineComments(lines, i - 1),
305
+ keywords: this.extractKeywords(name),
306
+ content: this.getBraceBlockContent(lines, i),
307
+ exported: /^[A-Z]/.test(name)
308
+ }));
309
+ }
310
+
311
+ // Types/Structs
312
+ const typeMatch = line.match(typeRegex);
313
+ if (typeMatch) {
314
+ const name = typeMatch[1];
315
+ patterns.push(this.createPatternEntry({
316
+ type: typeMatch[2] as any,
317
+ name,
318
+ file: relativePath,
319
+ line: i + 1,
320
+ endLine: this.findBraceBlockEnd(lines, i),
321
+ signature: `type ${name} ${typeMatch[2]}`,
322
+ description: this.getCOMLineComments(lines, i - 1),
323
+ keywords: this.extractKeywords(name),
324
+ content: this.getBraceBlockContent(lines, i),
325
+ exported: /^[A-Z]/.test(name)
326
+ }));
327
+ }
328
+ }
329
+ return patterns;
330
+ }
331
+
332
+ /**
333
+ * Extract patterns from Rust files.
334
+ */
335
+ private extractRustPatterns(filePath: string, content: string): PatternEntry[] {
336
+ const patterns: PatternEntry[] = [];
337
+ const relativePath = path.relative(this.rootDir, filePath);
338
+ const lines = content.split('\n');
339
+
340
+ const fnRegex = /^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\s*[<(][^)]*[>)]\s*(?:->\s*[^\{]+)?\s*\{/;
341
+ const typeRegex = /^(?:pub\s+)?(struct|enum|trait)\s+([A-Za-z_][A-Za-z0-9_]*)/;
342
+
343
+ for (let i = 0; i < lines.length; i++) {
344
+ const line = lines[i];
345
+
346
+ const fnMatch = line.match(fnRegex);
347
+ if (fnMatch) {
348
+ const name = fnMatch[1];
349
+ patterns.push(this.createPatternEntry({
350
+ type: 'function',
351
+ name,
352
+ file: relativePath,
353
+ line: i + 1,
354
+ endLine: this.findBraceBlockEnd(lines, i),
355
+ signature: line.split('{')[0].trim(),
356
+ description: this.getCOMLineComments(lines, i - 1),
357
+ keywords: this.extractKeywords(name),
358
+ content: this.getBraceBlockContent(lines, i),
359
+ exported: line.startsWith('pub')
360
+ }));
361
+ }
362
+ }
363
+ return patterns;
364
+ }
365
+
366
+ /**
367
+ * Generic extraction for C-style languages (Java, C++, PHP, etc.)
368
+ */
369
+ private extractJVMStylePatterns(filePath: string, content: string): PatternEntry[] {
370
+ const patterns: PatternEntry[] = [];
371
+ const relativePath = path.relative(this.rootDir, filePath);
372
+ const lines = content.split('\n');
373
+
374
+ // Simplified for classes and methods
375
+ const classRegex = /^(?:public|private|protected|internal)?\s*(?:static\s+)?(?:final\s+)?(?:class|interface|enum)\s+([A-Za-z0-9_]+)/;
376
+ const methodRegex = /^(?:public|private|protected|internal)\s+(?:static\s+)?(?:async\s+)?(?:[A-Za-z0-9_<>\[\]]+\s+)([A-Za-z0-9_]+)\s*\(/;
377
+
378
+ for (let i = 0; i < lines.length; i++) {
379
+ const line = lines[i].trim();
380
+
381
+ const classMatch = line.match(classRegex);
382
+ if (classMatch) {
383
+ patterns.push(this.createPatternEntry({
384
+ type: 'class',
385
+ name: classMatch[1],
386
+ file: relativePath,
387
+ line: i + 1,
388
+ endLine: this.findBraceBlockEnd(lines, i),
389
+ signature: line,
390
+ description: this.getJavaDoc(lines, i - 1),
391
+ keywords: this.extractKeywords(classMatch[1]),
392
+ content: this.getBraceBlockContent(lines, i),
393
+ exported: line.includes('public')
394
+ }));
395
+ }
396
+ }
397
+ return patterns;
398
+ }
399
+
400
+ private extractGenericCPatterns(filePath: string, content: string): PatternEntry[] {
401
+ // Fallback for everything else
402
+ return [];
403
+ }
404
+
405
+ private getCOMLineComments(lines: string[], startIndex: number): string {
406
+ let comments = [];
407
+ for (let i = startIndex; i >= 0; i--) {
408
+ const line = lines[i].trim();
409
+ if (line.startsWith('//')) comments.unshift(line.replace('//', '').trim());
410
+ else break;
411
+ }
412
+ return comments.join(' ');
413
+ }
414
+
415
+ private getJavaDoc(lines: string[], startIndex: number): string {
416
+ let comments = [];
417
+ let inDoc = false;
418
+ for (let i = startIndex; i >= 0; i--) {
419
+ const line = lines[i].trim();
420
+ if (line.endsWith('*/')) inDoc = true;
421
+ if (inDoc) comments.unshift(line.replace('/**', '').replace('*/', '').replace('*', '').trim());
422
+ if (line.startsWith('/**')) break;
423
+ }
424
+ return comments.join(' ');
425
+ }
426
+
427
+ private findBraceBlockEnd(lines: string[], startIndex: number): number {
428
+ let braceCount = 0;
429
+ let started = false;
430
+ for (let i = startIndex; i < lines.length; i++) {
431
+ const line = lines[i];
432
+ if (line.includes('{')) {
433
+ braceCount += (line.match(/\{/g) || []).length;
434
+ started = true;
435
+ }
436
+ if (line.includes('}')) {
437
+ braceCount -= (line.match(/\}/g) || []).length;
438
+ }
439
+ if (started && braceCount === 0) return i + 1;
440
+ }
441
+ return lines.length;
442
+ }
443
+
444
+ private getBraceBlockContent(lines: string[], startIndex: number): string {
445
+ const end = this.findBraceBlockEnd(lines, startIndex);
446
+ return lines.slice(startIndex, end).join('\n');
447
+ }
448
+
449
+ /**
450
+ * Extract patterns from Python files using regex.
451
+ */
452
+ private extractPythonPatterns(filePath: string, content: string): PatternEntry[] {
453
+ const patterns: PatternEntry[] = [];
454
+ const relativePath = path.relative(this.rootDir, filePath);
455
+ const lines = content.split('\n');
456
+
457
+ // Regex for Class definitions
458
+ const classRegex = /^class\s+([A-Za-z_][A-Za-z0-9_]*)\s*(\([^)]*\))?\s*:/;
459
+ // Regex for Function definitions (including async)
460
+ const funcRegex = /^(?:async\s+)?def\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*(?:->\s*[^:]+)?\s*:/;
461
+ // Regex for Constants (Top-level UPPER_CASE variables)
462
+ const constRegex = /^([A-Z][A-Z0-9_]*)\s*=\s*(.+)$/;
463
+
464
+ for (let i = 0; i < lines.length; i++) {
465
+ const lineContent = lines[i].trim();
466
+ const originalLine = lines[i];
467
+ const lineNum = i + 1;
468
+
469
+ // Classes
470
+ const classMatch = originalLine.match(classRegex);
471
+ if (classMatch) {
472
+ const name = classMatch[1];
473
+ if (name.length >= this.config.minNameLength) {
474
+ patterns.push(this.createPatternEntry({
475
+ type: this.detectPythonClassType(name),
476
+ name,
477
+ file: relativePath,
478
+ line: lineNum,
479
+ endLine: this.findPythonBlockEnd(lines, i),
480
+ signature: `class ${name}${classMatch[2] || ''}`,
481
+ description: this.getPythonDocstring(lines, i + 1),
482
+ keywords: this.extractKeywords(name),
483
+ content: this.getPythonBlockContent(lines, i),
484
+ exported: !name.startsWith('_')
485
+ }));
486
+ continue;
487
+ }
488
+ }
489
+
490
+ // Functions
491
+ const funcMatch = originalLine.match(funcRegex);
492
+ if (funcMatch) {
493
+ const name = funcMatch[1];
494
+ if (name.length >= this.config.minNameLength) {
495
+ patterns.push(this.createPatternEntry({
496
+ type: this.detectPythonFunctionType(name),
497
+ name,
498
+ file: relativePath,
499
+ line: lineNum,
500
+ endLine: this.findPythonBlockEnd(lines, i),
501
+ signature: `def ${name}(${funcMatch[2]})`,
502
+ description: this.getPythonDocstring(lines, i + 1),
503
+ keywords: this.extractKeywords(name),
504
+ content: this.getPythonBlockContent(lines, i),
505
+ exported: !name.startsWith('_')
506
+ }));
507
+ continue;
508
+ }
246
509
  }
247
- ts.forEachChild(node, visit);
248
- };
249
510
 
250
- visit(sourceFile);
511
+ // Constants
512
+ const constMatch = originalLine.match(constRegex);
513
+ if (constMatch) {
514
+ const name = constMatch[1];
515
+ if (name.length >= this.config.minNameLength) {
516
+ patterns.push(this.createPatternEntry({
517
+ type: 'constant',
518
+ name,
519
+ file: relativePath,
520
+ line: lineNum,
521
+ endLine: lineNum,
522
+ signature: `${name} = ...`,
523
+ description: '',
524
+ keywords: this.extractKeywords(name),
525
+ content: originalLine,
526
+ exported: !name.startsWith('_')
527
+ }));
528
+ }
529
+ }
530
+ }
251
531
 
252
532
  return patterns;
253
533
  }
254
534
 
535
+ private detectPythonClassType(name: string): PatternType {
536
+ if (name.endsWith('Error') || name.endsWith('Exception')) return 'error';
537
+ if (name.endsWith('Model')) return 'model';
538
+ if (name.endsWith('Schema')) return 'schema';
539
+ return 'class';
540
+ }
541
+
542
+ private detectPythonFunctionType(name: string): PatternType {
543
+ if (name.startsWith('test_')) return 'function'; // Tests are filtered by indexTests config
544
+ if (name.includes('middleware')) return 'middleware';
545
+ if (name.includes('handler')) return 'handler';
546
+ return 'function';
547
+ }
548
+
549
+ private getPythonDocstring(lines: string[], startIndex: number): string {
550
+ if (startIndex >= lines.length) return '';
551
+ const nextLine = lines[startIndex].trim();
552
+ if (nextLine.startsWith('"""') || nextLine.startsWith("'''")) {
553
+ const quote = nextLine.startsWith('"""') ? '"""' : "'''";
554
+ let doc = nextLine.replace(quote, '');
555
+ if (doc.endsWith(quote)) return doc.replace(quote, '').trim();
556
+
557
+ for (let i = startIndex + 1; i < lines.length; i++) {
558
+ if (lines[i].includes(quote)) {
559
+ doc += ' ' + lines[i].split(quote)[0].trim();
560
+ break;
561
+ }
562
+ doc += ' ' + lines[i].trim();
563
+ }
564
+ return doc.trim();
565
+ }
566
+ return '';
567
+ }
568
+
569
+ private findPythonBlockEnd(lines: string[], startIndex: number): number {
570
+ const startIndent = lines[startIndex].search(/\S/);
571
+ for (let i = startIndex + 1; i < lines.length; i++) {
572
+ if (lines[i].trim() === '') continue;
573
+ const currentIndent = lines[i].search(/\S/);
574
+ if (currentIndent <= startIndent) return i;
575
+ }
576
+ return lines.length;
577
+ }
578
+
579
+ private getPythonBlockContent(lines: string[], startIndex: number): string {
580
+ const endLine = this.findPythonBlockEnd(lines, startIndex);
581
+ return lines.slice(startIndex, endLine).join('\n');
582
+ }
583
+
255
584
  /**
256
585
  * Convert an AST node to a PatternEntry if applicable.
257
586
  */