@imayuur/contexthub-repo-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,83 @@
1
+ import type { ParsedFile } from '@imayuur/contexthub-shared-types';
2
+ export declare class RepoParser {
3
+ private repoPath;
4
+ private tsParser;
5
+ constructor(repoPath: string);
6
+ /**
7
+ * Detect language from file extension
8
+ */
9
+ private detectLanguage;
10
+ /**
11
+ * Simple JS/TS parser for extracting symbols, imports, exports
12
+ */
13
+ private parseJSTS;
14
+ /**
15
+ * Parse a Python file for symbols
16
+ */
17
+ private parsePython;
18
+ /**
19
+ * Parse a Go file for symbols and imports
20
+ */
21
+ private parseGo;
22
+ /**
23
+ * Parse a Rust file for symbols and imports
24
+ */
25
+ private parseRust;
26
+ /**
27
+ * Parse a Java file for symbols and imports
28
+ */
29
+ private parseJava;
30
+ /**
31
+ * Parse a Ruby file for symbols and imports
32
+ */
33
+ private parseRuby;
34
+ /**
35
+ * Parse a PHP file for symbols and imports
36
+ */
37
+ private parsePhp;
38
+ /**
39
+ * Parse a C# file for symbols and imports
40
+ */
41
+ private parseCSharp;
42
+ /**
43
+ * Parse a Swift file for symbols and imports
44
+ */
45
+ private parseSwift;
46
+ /**
47
+ * Parse a Kotlin file for symbols and imports
48
+ */
49
+ private parseKotlin;
50
+ /**
51
+ * Parse a Scala file for symbols and imports
52
+ */
53
+ private parseScala;
54
+ /**
55
+ * Parse a C/C++ file for symbols and imports
56
+ */
57
+ private parseCPP;
58
+ /**
59
+ * Parse a single file (with size limit and sensitive file check)
60
+ */
61
+ parseFile(filePath: string): Promise<ParsedFile>;
62
+ /**
63
+ * Parse all code files in a directory (with security restrictions)
64
+ */
65
+ parseDirectory(dirPath: string, patterns?: string[]): Promise<ParsedFile[]>;
66
+ /**
67
+ * Build a dependency graph from parsed files
68
+ */
69
+ buildDependencyGraph(files: ParsedFile[]): Map<string, string[]>;
70
+ /**
71
+ * Find files that match a pattern or contain a symbol
72
+ */
73
+ findRelatedFiles(filePath: string, allFiles: ParsedFile[]): Promise<string[]>;
74
+ /**
75
+ * Get summary statistics for a parsed codebase
76
+ */
77
+ getCodeStats(files: ParsedFile[]): {
78
+ totalFiles: number;
79
+ byLanguage: Record<string, number>;
80
+ totalSymbols: number;
81
+ byType: Record<string, number>;
82
+ };
83
+ }
package/dist/index.js ADDED
@@ -0,0 +1,832 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.RepoParser = void 0;
37
+ const fs = __importStar(require("fs"));
38
+ const path = __importStar(require("path"));
39
+ const glob_1 = require("glob");
40
+ const contexthub_core_1 = require("@imayuur/contexthub-core");
41
+ const tree_sitter_1 = require("./tree-sitter");
42
+ const SENSITIVE_FILE_PATTERNS = [
43
+ '.env', '.env.local', '.env.production', '.env.staging',
44
+ '*.pem', '*.key', '*.p12', '*.pfx', '*.jks', '*.keystore',
45
+ 'credentials*', 'secrets*', '.npmrc', '.pypirc',
46
+ 'id_rsa*', 'id_ed25519*', '*.crt', '*.cert',
47
+ '.htpasswd', '*.secret',
48
+ ];
49
+ function isSensitiveFile(filePath) {
50
+ const basename = path.basename(filePath).toLowerCase();
51
+ return SENSITIVE_FILE_PATTERNS.some(pattern => {
52
+ const p = pattern.toLowerCase();
53
+ if (p.startsWith('*.'))
54
+ return basename.endsWith(p.slice(1));
55
+ if (p.endsWith('*'))
56
+ return basename.startsWith(p.slice(0, -1));
57
+ return basename === p || basename.startsWith(p.replace('*', ''));
58
+ });
59
+ }
60
+ class RepoParser {
61
+ constructor(repoPath) {
62
+ this.repoPath = repoPath;
63
+ this.tsParser = new tree_sitter_1.TreeSitterParser();
64
+ }
65
+ /**
66
+ * Detect language from file extension
67
+ */
68
+ detectLanguage(filePath) {
69
+ const ext = filePath.split('.').pop()?.toLowerCase();
70
+ switch (ext) {
71
+ case 'ts':
72
+ case 'tsx':
73
+ return 'typescript';
74
+ case 'js':
75
+ case 'jsx':
76
+ case 'mjs':
77
+ case 'cjs':
78
+ return 'javascript';
79
+ case 'py':
80
+ return 'python';
81
+ case 'java':
82
+ return 'java';
83
+ case 'cpp':
84
+ case 'cc':
85
+ case 'cxx':
86
+ case 'c++':
87
+ return 'cpp';
88
+ case 'c':
89
+ case 'h':
90
+ case 'hpp':
91
+ return 'c';
92
+ case 'go':
93
+ return 'go';
94
+ case 'rs':
95
+ return 'rust';
96
+ case 'rb':
97
+ return 'ruby';
98
+ case 'php':
99
+ return 'php';
100
+ case 'cs':
101
+ return 'csharp';
102
+ case 'swift':
103
+ return 'swift';
104
+ case 'kt':
105
+ case 'kts':
106
+ return 'kotlin';
107
+ case 'scala':
108
+ return 'scala';
109
+ case 'vue':
110
+ case 'svelte':
111
+ return 'framework';
112
+ default:
113
+ return 'unknown';
114
+ }
115
+ }
116
+ /**
117
+ * Simple JS/TS parser for extracting symbols, imports, exports
118
+ */
119
+ parseJSTS(content) {
120
+ const symbols = [];
121
+ const imports = [];
122
+ const exports = [];
123
+ const lines = content.split('\n');
124
+ // Patterns for TypeScript/JavaScript
125
+ const patterns = {
126
+ functionDecl: /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/,
127
+ classDecl: /^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/,
128
+ constDecl: /^(?:export\s+)?const\s+(\w+)\s*=/,
129
+ letDecl: /^(?:export\s+)?let\s+(\w+)\s*=/,
130
+ interfaceDecl: /^interface\s+(\w+)/,
131
+ typeDecl: /^type\s+(\w+)/,
132
+ enumDecl: /^enum\s+(\w+)/,
133
+ importDefault: /^import\s+(\w+)\s+from\s+['"](.+?)['"]/,
134
+ importNamed: /^import\s+{([^}]+)}\s+from\s+['"](.+?)['"]/,
135
+ importAll: /^import\s+\*\s+as\s+(\w+)\s+from\s+['"](.+?)['"]/,
136
+ exportDefault: /^export\s+default\s+/,
137
+ exportNamed: /^export\s+(?:const|let|function|class|interface|type|enum)\s+(\w+)/
138
+ };
139
+ lines.forEach((line, index) => {
140
+ const trimmed = line.trim();
141
+ const lineNumber = index + 1;
142
+ // Functions
143
+ const funcMatch = trimmed.match(patterns.functionDecl);
144
+ if (funcMatch) {
145
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
146
+ }
147
+ // Classes
148
+ const classMatch = trimmed.match(patterns.classDecl);
149
+ if (classMatch) {
150
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
151
+ }
152
+ // Constants/Variables
153
+ const constMatch = trimmed.match(patterns.constDecl);
154
+ if (constMatch) {
155
+ symbols.push({ type: 'variable', name: constMatch[1], lineNumber, columnNumber: 0 });
156
+ }
157
+ const letMatch = trimmed.match(patterns.letDecl);
158
+ if (letMatch) {
159
+ symbols.push({ type: 'variable', name: letMatch[1], lineNumber, columnNumber: 0 });
160
+ }
161
+ // Interfaces
162
+ const ifaceMatch = trimmed.match(patterns.interfaceDecl);
163
+ if (ifaceMatch) {
164
+ symbols.push({ type: 'interface', name: ifaceMatch[1], lineNumber, columnNumber: 0 });
165
+ }
166
+ // Type aliases
167
+ const typeMatch = trimmed.match(patterns.typeDecl);
168
+ if (typeMatch) {
169
+ symbols.push({ type: 'interface', name: typeMatch[1], lineNumber, columnNumber: 0 });
170
+ }
171
+ // Enums
172
+ const enumMatch = trimmed.match(patterns.enumDecl);
173
+ if (enumMatch) {
174
+ symbols.push({ type: 'class', name: enumMatch[1], lineNumber, columnNumber: 0 });
175
+ }
176
+ // Default imports
177
+ const importDefMatch = trimmed.match(patterns.importDefault);
178
+ if (importDefMatch) {
179
+ imports.push({
180
+ source: importDefMatch[2],
181
+ imported: [importDefMatch[1]],
182
+ isDefault: true,
183
+ lineNumber
184
+ });
185
+ }
186
+ // Named imports
187
+ const importNamedMatch = trimmed.match(patterns.importNamed);
188
+ if (importNamedMatch) {
189
+ const importedItems = importNamedMatch[1].split(',').map(s => s.trim()).filter(s => s);
190
+ imports.push({
191
+ source: importNamedMatch[2],
192
+ imported: importedItems,
193
+ isDefault: false,
194
+ lineNumber
195
+ });
196
+ }
197
+ // Namespace imports
198
+ const importAllMatch = trimmed.match(patterns.importAll);
199
+ if (importAllMatch) {
200
+ imports.push({
201
+ source: importAllMatch[2],
202
+ imported: [importAllMatch[1]],
203
+ isDefault: true,
204
+ lineNumber
205
+ });
206
+ }
207
+ // Default exports
208
+ if (trimmed.match(patterns.exportDefault)) {
209
+ exports.push({ source: '', imported: ['default'], isDefault: true, lineNumber });
210
+ }
211
+ // Named exports
212
+ const exportMatch = trimmed.match(patterns.exportNamed);
213
+ if (exportMatch) {
214
+ exports.push({ source: '', imported: [exportMatch[1]], isDefault: false, lineNumber });
215
+ }
216
+ });
217
+ return { symbols, imports, exports };
218
+ }
219
+ /**
220
+ * Parse a Python file for symbols
221
+ */
222
+ parsePython(content) {
223
+ const symbols = [];
224
+ const imports = [];
225
+ const exports = [];
226
+ const lines = content.split('\n');
227
+ const patterns = {
228
+ classDecl: /^class\s+(\w+)/,
229
+ functionDecl: /^def\s+(\w+)/,
230
+ importFrom: /^from\s+([\w.]+)\s+import\s+(.+)/,
231
+ import: /^import\s+(.+)/
232
+ };
233
+ lines.forEach((line, index) => {
234
+ const trimmed = line.trim();
235
+ const lineNumber = index + 1;
236
+ const classMatch = trimmed.match(patterns.classDecl);
237
+ if (classMatch) {
238
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
239
+ }
240
+ const funcMatch = trimmed.match(patterns.functionDecl);
241
+ if (funcMatch) {
242
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
243
+ }
244
+ const importFromMatch = trimmed.match(patterns.importFrom);
245
+ if (importFromMatch) {
246
+ const items = importFromMatch[2].split(',').map(s => s.trim()).filter(s => s !== '*');
247
+ imports.push({ source: importFromMatch[1], imported: items, isDefault: false, lineNumber });
248
+ }
249
+ const importMatch = trimmed.match(patterns.import);
250
+ if (importMatch) {
251
+ imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
252
+ }
253
+ // __all__ export
254
+ if (trimmed.startsWith('__all__')) {
255
+ exports.push({ source: '', imported: ['__all__'], isDefault: false, lineNumber });
256
+ }
257
+ });
258
+ return { symbols, imports, exports };
259
+ }
260
+ /**
261
+ * Parse a Go file for symbols and imports
262
+ */
263
+ parseGo(content) {
264
+ const symbols = [];
265
+ const imports = [];
266
+ const exports = [];
267
+ const lines = content.split('\n');
268
+ let inImportBlock = false;
269
+ lines.forEach((line, index) => {
270
+ const trimmed = line.trim();
271
+ const lineNumber = index + 1;
272
+ // Handle import block
273
+ if (trimmed.startsWith('import (')) {
274
+ inImportBlock = true;
275
+ return;
276
+ }
277
+ if (inImportBlock && trimmed.startsWith(')')) {
278
+ inImportBlock = false;
279
+ return;
280
+ }
281
+ if (inImportBlock) {
282
+ const match = trimmed.match(/"([^"]+)"/);
283
+ if (match) {
284
+ imports.push({ source: match[1], imported: ['*'], isDefault: false, lineNumber });
285
+ }
286
+ return;
287
+ }
288
+ // Single-line import
289
+ const importMatch = trimmed.match(/^import\s+"([^"]+)"/);
290
+ if (importMatch) {
291
+ imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
292
+ }
293
+ // Functions (with receiver)
294
+ const methodMatch = trimmed.match(/^func\s+\(([^)]+)\)\s+(\w+)/);
295
+ if (methodMatch) {
296
+ symbols.push({ type: 'method', name: `${methodMatch[1].trim()}.${methodMatch[2]}`, lineNumber, columnNumber: 0 });
297
+ }
298
+ else {
299
+ // Plain function
300
+ const funcMatch = trimmed.match(/^func\s+(\w+)/);
301
+ if (funcMatch) {
302
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
303
+ }
304
+ }
305
+ // Structs and interfaces
306
+ const typeMatch = trimmed.match(/^type\s+(\w+)\s+(struct|interface)/);
307
+ if (typeMatch) {
308
+ symbols.push({
309
+ type: typeMatch[2] === 'interface' ? 'interface' : 'class',
310
+ name: typeMatch[1],
311
+ lineNumber,
312
+ columnNumber: 0
313
+ });
314
+ }
315
+ });
316
+ return { symbols, imports, exports };
317
+ }
318
+ /**
319
+ * Parse a Rust file for symbols and imports
320
+ */
321
+ parseRust(content) {
322
+ const symbols = [];
323
+ const imports = [];
324
+ const exports = [];
325
+ const lines = content.split('\n');
326
+ lines.forEach((line, index) => {
327
+ const trimmed = line.trim();
328
+ const lineNumber = index + 1;
329
+ // Functions
330
+ const funcMatch = trimmed.match(/^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/);
331
+ if (funcMatch) {
332
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
333
+ }
334
+ // Structs, Enums, Traits
335
+ const structMatch = trimmed.match(/^(?:pub\s+)?struct\s+(\w+)/);
336
+ if (structMatch) {
337
+ symbols.push({ type: 'class', name: structMatch[1], lineNumber, columnNumber: 0 });
338
+ }
339
+ const enumMatch = trimmed.match(/^(?:pub\s+)?enum\s+(\w+)/);
340
+ if (enumMatch) {
341
+ symbols.push({ type: 'class', name: enumMatch[1], lineNumber, columnNumber: 0 });
342
+ }
343
+ const traitMatch = trimmed.match(/^(?:pub\s+)?trait\s+(\w+)/);
344
+ if (traitMatch) {
345
+ symbols.push({ type: 'interface', name: traitMatch[1], lineNumber, columnNumber: 0 });
346
+ }
347
+ // Imports
348
+ const importMatch = trimmed.match(/^use\s+([^;]+);/);
349
+ if (importMatch) {
350
+ imports.push({ source: importMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
351
+ }
352
+ });
353
+ return { symbols, imports, exports };
354
+ }
355
+ /**
356
+ * Parse a Java file for symbols and imports
357
+ */
358
+ parseJava(content) {
359
+ const symbols = [];
360
+ const imports = [];
361
+ const exports = [];
362
+ const lines = content.split('\n');
363
+ lines.forEach((line, index) => {
364
+ const trimmed = line.trim();
365
+ const lineNumber = index + 1;
366
+ // Classes and Interfaces
367
+ const classMatch = trimmed.match(/^(?:public\s+|private\s+)?class\s+(\w+)/);
368
+ if (classMatch) {
369
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
370
+ }
371
+ const interfaceMatch = trimmed.match(/^(?:public\s+|private\s+)?interface\s+(\w+)/);
372
+ if (interfaceMatch) {
373
+ symbols.push({ type: 'interface', name: interfaceMatch[1], lineNumber, columnNumber: 0 });
374
+ }
375
+ // Methods
376
+ const methodMatch = trimmed.match(/^(?:public|protected|private|static|\s) +[\w<>[\]]+ +(\w+) *\([^)]*\) *(?:throws [^{]+)? *\{/);
377
+ if (methodMatch) {
378
+ if (!['if', 'for', 'while', 'switch', 'catch'].includes(methodMatch[1])) {
379
+ symbols.push({ type: 'method', name: methodMatch[1], lineNumber, columnNumber: 0 });
380
+ }
381
+ }
382
+ // Imports
383
+ const importMatch = trimmed.match(/^import\s+([^;]+);/);
384
+ if (importMatch) {
385
+ imports.push({ source: importMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
386
+ }
387
+ });
388
+ return { symbols, imports, exports };
389
+ }
390
+ /**
391
+ * Parse a Ruby file for symbols and imports
392
+ */
393
+ parseRuby(content) {
394
+ const symbols = [];
395
+ const imports = [];
396
+ const exports = [];
397
+ const lines = content.split('\n');
398
+ lines.forEach((line, index) => {
399
+ const trimmed = line.trim();
400
+ const lineNumber = index + 1;
401
+ // Class
402
+ const classMatch = trimmed.match(/^class\s+([\w:]+)/);
403
+ if (classMatch) {
404
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
405
+ }
406
+ // Method / Function
407
+ const defMatch = trimmed.match(/^def\s+([\w!?.]+)/);
408
+ if (defMatch) {
409
+ symbols.push({ type: 'function', name: defMatch[1], lineNumber, columnNumber: 0 });
410
+ }
411
+ // Imports
412
+ const requireMatch = trimmed.match(/^(?:require|require_relative)\s+['"](.+?)['"]/);
413
+ if (requireMatch) {
414
+ imports.push({ source: requireMatch[1], imported: ['*'], isDefault: false, lineNumber });
415
+ }
416
+ });
417
+ return { symbols, imports, exports };
418
+ }
419
+ /**
420
+ * Parse a PHP file for symbols and imports
421
+ */
422
+ parsePhp(content) {
423
+ const symbols = [];
424
+ const imports = [];
425
+ const exports = [];
426
+ const lines = content.split('\n');
427
+ lines.forEach((line, index) => {
428
+ const trimmed = line.trim();
429
+ const lineNumber = index + 1;
430
+ // Class / Interface / Trait
431
+ const classMatch = trimmed.match(/^(?:abstract\s+|final\s+)?(?:class|interface|trait)\s+(\w+)/);
432
+ if (classMatch) {
433
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
434
+ }
435
+ // Function
436
+ const funcMatch = trimmed.match(/^(?:public|protected|private|static|\s)*function\s+(\w+)/);
437
+ if (funcMatch) {
438
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
439
+ }
440
+ // Imports (use, require, include)
441
+ const useMatch = trimmed.match(/^use\s+([^;]+);/);
442
+ if (useMatch) {
443
+ imports.push({ source: useMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
444
+ }
445
+ const reqMatch = trimmed.match(/^(?:require|require_once|include|include_once)\s*['"](.+?)['"]/);
446
+ if (reqMatch) {
447
+ imports.push({ source: reqMatch[1], imported: ['*'], isDefault: false, lineNumber });
448
+ }
449
+ });
450
+ return { symbols, imports, exports };
451
+ }
452
+ /**
453
+ * Parse a C# file for symbols and imports
454
+ */
455
+ parseCSharp(content) {
456
+ const symbols = [];
457
+ const imports = [];
458
+ const exports = [];
459
+ const lines = content.split('\n');
460
+ lines.forEach((line, index) => {
461
+ const trimmed = line.trim();
462
+ const lineNumber = index + 1;
463
+ // Class / Interface / Struct / Record
464
+ const classMatch = trimmed.match(/^(?:public|private|protected|internal|static|\s)*(?:class|interface|struct|record)\s+(\w+)/);
465
+ if (classMatch) {
466
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
467
+ }
468
+ // Method / Function
469
+ const methodMatch = trimmed.match(/^(?:public|private|protected|internal|static|async|override|\s)+[\w<>[\]]+ +(\w+) *\([^)]*\)/);
470
+ if (methodMatch) {
471
+ if (!['if', 'for', 'while', 'switch', 'using', 'catch'].includes(methodMatch[1])) {
472
+ symbols.push({ type: 'method', name: methodMatch[1], lineNumber, columnNumber: 0 });
473
+ }
474
+ }
475
+ // Imports (using)
476
+ const usingMatch = trimmed.match(/^using\s+([^;=]+);/);
477
+ if (usingMatch) {
478
+ imports.push({ source: usingMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
479
+ }
480
+ });
481
+ return { symbols, imports, exports };
482
+ }
483
+ /**
484
+ * Parse a Swift file for symbols and imports
485
+ */
486
+ parseSwift(content) {
487
+ const symbols = [];
488
+ const imports = [];
489
+ const exports = [];
490
+ const lines = content.split('\n');
491
+ lines.forEach((line, index) => {
492
+ const trimmed = line.trim();
493
+ const lineNumber = index + 1;
494
+ // Class / Struct / Protocol / Enum
495
+ const classMatch = trimmed.match(/^(?:public|private|internal|fileprivate|\s)*(?:class|struct|protocol|enum)\s+(\w+)/);
496
+ if (classMatch) {
497
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
498
+ }
499
+ // Function
500
+ const funcMatch = trimmed.match(/^(?:public|private|internal|fileprivate|static|class|\s)*func\s+(\w+)/);
501
+ if (funcMatch) {
502
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
503
+ }
504
+ // Imports
505
+ const importMatch = trimmed.match(/^import\s+(\w+)/);
506
+ if (importMatch) {
507
+ imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
508
+ }
509
+ });
510
+ return { symbols, imports, exports };
511
+ }
512
+ /**
513
+ * Parse a Kotlin file for symbols and imports
514
+ */
515
+ parseKotlin(content) {
516
+ const symbols = [];
517
+ const imports = [];
518
+ const exports = [];
519
+ const lines = content.split('\n');
520
+ lines.forEach((line, index) => {
521
+ const trimmed = line.trim();
522
+ const lineNumber = index + 1;
523
+ // Class / Interface / Object
524
+ const classMatch = trimmed.match(/^(?:open|abstract|sealed|data|internal|\s)*(?:class|interface|object)\s+(\w+)/);
525
+ if (classMatch) {
526
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
527
+ }
528
+ // Function
529
+ const funcMatch = trimmed.match(/^(?:open|override|internal|public|private|\s)*fun\s+(\w+)/);
530
+ if (funcMatch) {
531
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
532
+ }
533
+ // Imports
534
+ const importMatch = trimmed.match(/^import\s+([\w.]+)/);
535
+ if (importMatch) {
536
+ imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
537
+ }
538
+ });
539
+ return { symbols, imports, exports };
540
+ }
541
+ /**
542
+ * Parse a Scala file for symbols and imports
543
+ */
544
+ parseScala(content) {
545
+ const symbols = [];
546
+ const imports = [];
547
+ const exports = [];
548
+ const lines = content.split('\n');
549
+ lines.forEach((line, index) => {
550
+ const trimmed = line.trim();
551
+ const lineNumber = index + 1;
552
+ // Class / Object / Trait
553
+ const classMatch = trimmed.match(/^(?:abstract|case|\s)*(?:class|object|trait)\s+(\w+)/);
554
+ if (classMatch) {
555
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
556
+ }
557
+ // Function
558
+ const funcMatch = trimmed.match(/^(?:override|private|protected|\s)*def\s+(\w+)/);
559
+ if (funcMatch) {
560
+ symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
561
+ }
562
+ // Imports
563
+ const importMatch = trimmed.match(/^import\s+([\w.{}_]+)/);
564
+ if (importMatch) {
565
+ imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
566
+ }
567
+ });
568
+ return { symbols, imports, exports };
569
+ }
570
+ /**
571
+ * Parse a C/C++ file for symbols and imports
572
+ */
573
+ parseCPP(content) {
574
+ const symbols = [];
575
+ const imports = [];
576
+ const exports = [];
577
+ const lines = content.split('\n');
578
+ lines.forEach((line, index) => {
579
+ const trimmed = line.trim();
580
+ const lineNumber = index + 1;
581
+ // Class / Struct
582
+ const classMatch = trimmed.match(/^(?:class|struct)\s+(\w+)/);
583
+ if (classMatch) {
584
+ symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
585
+ }
586
+ // Function / Method
587
+ const funcMatch = trimmed.match(/^[\w<>[\]:*&]+ +(\w+::)?(\w+) *\([^)]*\) *(?:const)? *\{?/);
588
+ if (funcMatch) {
589
+ const name = funcMatch[1] ? funcMatch[1] + funcMatch[2] : funcMatch[2];
590
+ if (!['if', 'for', 'while', 'switch', 'catch', 'return'].includes(funcMatch[2])) {
591
+ symbols.push({ type: 'function', name, lineNumber, columnNumber: 0 });
592
+ }
593
+ }
594
+ // Imports (#include)
595
+ const includeMatch = trimmed.match(/^#include\s+['"<](.+?)['">]/);
596
+ if (includeMatch) {
597
+ imports.push({ source: includeMatch[1], imported: ['*'], isDefault: false, lineNumber });
598
+ }
599
+ });
600
+ return { symbols, imports, exports };
601
+ }
602
+ /**
603
+ * Parse a single file (with size limit and sensitive file check)
604
+ */
605
+ async parseFile(filePath) {
606
+ const ext = filePath.split('.').pop()?.toLowerCase();
607
+ let symbols = [];
608
+ let imports = [];
609
+ let exports = [];
610
+ try {
611
+ // Security: Skip sensitive files
612
+ if (isSensitiveFile(filePath)) {
613
+ return {
614
+ path: filePath,
615
+ language: this.detectLanguage(filePath),
616
+ symbols: [],
617
+ imports: [],
618
+ exports: []
619
+ };
620
+ }
621
+ // Security: Don't follow symlinks — must use lstatSync (not statSync!)
622
+ const lstats = fs.lstatSync(filePath);
623
+ if (lstats.isSymbolicLink()) {
624
+ return {
625
+ path: filePath,
626
+ language: this.detectLanguage(filePath),
627
+ symbols: [],
628
+ imports: [],
629
+ exports: []
630
+ };
631
+ }
632
+ // Security: Check file size before reading
633
+ if (lstats.size > contexthub_core_1.MAX_INGEST_FILE_SIZE) {
634
+ return {
635
+ path: filePath,
636
+ language: this.detectLanguage(filePath),
637
+ symbols: [],
638
+ imports: [],
639
+ exports: []
640
+ };
641
+ }
642
+ const content = fs.readFileSync(filePath, 'utf-8');
643
+ const language = this.detectLanguage(filePath);
644
+ let parsed = null;
645
+ if (['typescript', 'javascript', 'tsx', 'python'].includes(language)) {
646
+ parsed = await this.tsParser.parse(content, language);
647
+ }
648
+ if (parsed) {
649
+ symbols = parsed.symbols;
650
+ imports = parsed.imports;
651
+ exports = parsed.exports;
652
+ }
653
+ else {
654
+ // Fallback to regex parser
655
+ if (ext === 'ts' || ext === 'tsx' || ext === 'js' || ext === 'jsx' || ext === 'mjs' || ext === 'cjs') {
656
+ const regexParsed = this.parseJSTS(content);
657
+ symbols = regexParsed.symbols;
658
+ imports = regexParsed.imports;
659
+ exports = regexParsed.exports;
660
+ }
661
+ else if (ext === 'py') {
662
+ const regexParsed = this.parsePython(content);
663
+ symbols = regexParsed.symbols;
664
+ imports = regexParsed.imports;
665
+ exports = regexParsed.exports;
666
+ }
667
+ else if (ext === 'go') {
668
+ const regexParsed = this.parseGo(content);
669
+ symbols = regexParsed.symbols;
670
+ imports = regexParsed.imports;
671
+ exports = regexParsed.exports;
672
+ }
673
+ else if (ext === 'rs') {
674
+ const regexParsed = this.parseRust(content);
675
+ symbols = regexParsed.symbols;
676
+ imports = regexParsed.imports;
677
+ exports = regexParsed.exports;
678
+ }
679
+ else if (ext === 'java') {
680
+ const regexParsed = this.parseJava(content);
681
+ symbols = regexParsed.symbols;
682
+ imports = regexParsed.imports;
683
+ exports = regexParsed.exports;
684
+ }
685
+ else if (ext === 'rb') {
686
+ const regexParsed = this.parseRuby(content);
687
+ symbols = regexParsed.symbols;
688
+ imports = regexParsed.imports;
689
+ exports = regexParsed.exports;
690
+ }
691
+ else if (ext === 'php') {
692
+ const regexParsed = this.parsePhp(content);
693
+ symbols = regexParsed.symbols;
694
+ imports = regexParsed.imports;
695
+ exports = regexParsed.exports;
696
+ }
697
+ else if (ext === 'cs') {
698
+ const regexParsed = this.parseCSharp(content);
699
+ symbols = regexParsed.symbols;
700
+ imports = regexParsed.imports;
701
+ exports = regexParsed.exports;
702
+ }
703
+ else if (ext === 'swift') {
704
+ const regexParsed = this.parseSwift(content);
705
+ symbols = regexParsed.symbols;
706
+ imports = regexParsed.imports;
707
+ exports = regexParsed.exports;
708
+ }
709
+ else if (ext === 'kt' || ext === 'kts') {
710
+ const regexParsed = this.parseKotlin(content);
711
+ symbols = regexParsed.symbols;
712
+ imports = regexParsed.imports;
713
+ exports = regexParsed.exports;
714
+ }
715
+ else if (ext === 'scala') {
716
+ const regexParsed = this.parseScala(content);
717
+ symbols = regexParsed.symbols;
718
+ imports = regexParsed.imports;
719
+ exports = regexParsed.exports;
720
+ }
721
+ else if (['cpp', 'cc', 'cxx', 'c++', 'c', 'h', 'hpp'].includes(ext || '')) {
722
+ const regexParsed = this.parseCPP(content);
723
+ symbols = regexParsed.symbols;
724
+ imports = regexParsed.imports;
725
+ exports = regexParsed.exports;
726
+ }
727
+ }
728
+ }
729
+ catch (e) {
730
+ // Sanitized error — don't expose full path
731
+ console.error(`Failed to parse file:`, e?.message || 'unknown error');
732
+ }
733
+ return {
734
+ path: filePath,
735
+ language: this.detectLanguage(filePath),
736
+ symbols,
737
+ imports,
738
+ exports
739
+ };
740
+ }
741
+ /**
742
+ * Parse all code files in a directory (with security restrictions)
743
+ */
744
+ async parseDirectory(dirPath, patterns = ['**/*.{ts,tsx,js,jsx,py,go,rs,java,rb,php,cs,swift,kt,kts,scala,c,h,cpp,cc,cxx,hpp}']) {
745
+ const results = [];
746
+ const repoRoot = path.resolve(this.repoPath);
747
+ const ignore = new contexthub_core_1.ContexthubIgnore(this.repoPath);
748
+ for (const pattern of patterns) {
749
+ const files = await (0, glob_1.glob)(pattern, { cwd: dirPath, absolute: true });
750
+ let scannedCount = 0;
751
+ for (const file of files) {
752
+ // Security: Cap file count
753
+ if (scannedCount >= contexthub_core_1.MAX_FILES_PER_SCAN) {
754
+ console.error(`File scan limit reached (${contexthub_core_1.MAX_FILES_PER_SCAN}). Stopping.`);
755
+ break;
756
+ }
757
+ // Security: Only parse files within repo boundary
758
+ const resolved = path.resolve(file);
759
+ if (!resolved.startsWith(repoRoot + path.sep) && resolved !== repoRoot) {
760
+ continue;
761
+ }
762
+ const relPath = path.relative(repoRoot, resolved);
763
+ if (ignore.ignores(relPath))
764
+ continue;
765
+ // Skip node_modules, .contexthub, and sensitive files
766
+ if (file.includes('node_modules') || file.includes('.contexthub'))
767
+ continue;
768
+ if (isSensitiveFile(file))
769
+ continue;
770
+ const parsed = await this.parseFile(file);
771
+ results.push(parsed);
772
+ scannedCount++;
773
+ }
774
+ }
775
+ console.error(`Parsed ${results.length} files in directory (limit: ${contexthub_core_1.MAX_FILES_PER_SCAN})`);
776
+ return results;
777
+ }
778
+ /**
779
+ * Build a dependency graph from parsed files
780
+ */
781
+ buildDependencyGraph(files) {
782
+ const graph = new Map();
783
+ for (const file of files) {
784
+ const deps = [];
785
+ for (const imp of file.imports) {
786
+ deps.push(imp.source);
787
+ }
788
+ graph.set(file.path, deps);
789
+ }
790
+ return graph;
791
+ }
792
+ /**
793
+ * Find files that match a pattern or contain a symbol
794
+ */
795
+ async findRelatedFiles(filePath, allFiles) {
796
+ const related = [];
797
+ // Find imports that match this file's name
798
+ const fileName = path.basename(filePath, path.extname(filePath));
799
+ for (const file of allFiles) {
800
+ if (file.path === filePath)
801
+ continue;
802
+ // Check imports
803
+ for (const imp of file.imports) {
804
+ if (imp.source.includes(fileName) || imp.imported.some(i => i === fileName)) {
805
+ related.push(file.path);
806
+ break;
807
+ }
808
+ }
809
+ }
810
+ return [...new Set(related)];
811
+ }
812
+ /**
813
+ * Get summary statistics for a parsed codebase
814
+ */
815
+ getCodeStats(files) {
816
+ const stats = {
817
+ totalFiles: files.length,
818
+ byLanguage: {},
819
+ totalSymbols: 0,
820
+ byType: {}
821
+ };
822
+ for (const file of files) {
823
+ stats.byLanguage[file.language] = (stats.byLanguage[file.language] || 0) + 1;
824
+ for (const symbol of file.symbols) {
825
+ stats.totalSymbols++;
826
+ stats.byType[symbol.type] = (stats.byType[symbol.type] || 0) + 1;
827
+ }
828
+ }
829
+ return stats;
830
+ }
831
+ }
832
+ exports.RepoParser = RepoParser;
@@ -0,0 +1,13 @@
1
+ import { Symbol, ImportExport } from '@imayuur/contexthub-shared-types';
2
+ export declare class TreeSitterParser {
3
+ private parser;
4
+ private langs;
5
+ private initialized;
6
+ init(): Promise<void>;
7
+ parse(content: string, language: string): Promise<{
8
+ symbols: Symbol[];
9
+ imports: ImportExport[];
10
+ exports: ImportExport[];
11
+ } | null>;
12
+ private traverse;
13
+ }
@@ -0,0 +1,216 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TreeSitterParser = void 0;
4
+ class TreeSitterParser {
5
+ constructor() {
6
+ this.langs = new Map();
7
+ this.initialized = false;
8
+ }
9
+ async init() {
10
+ if (this.initialized)
11
+ return;
12
+ const { Parser, Language } = require('web-tree-sitter');
13
+ await Parser.init();
14
+ this.parser = new Parser();
15
+ // Load common languages
16
+ const jsPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-javascript.wasm');
17
+ const tsPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-typescript.wasm');
18
+ const tsxPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-tsx.wasm');
19
+ const pyPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-python.wasm');
20
+ const [jsLang, tsLang, tsxLang, pyLang] = await Promise.all([
21
+ Language.load(jsPath),
22
+ Language.load(tsPath),
23
+ Language.load(tsxPath),
24
+ Language.load(pyPath),
25
+ ]);
26
+ this.langs.set('javascript', jsLang);
27
+ this.langs.set('typescript', tsLang);
28
+ this.langs.set('tsx', tsxLang);
29
+ this.langs.set('python', pyLang);
30
+ this.initialized = true;
31
+ }
32
+ async parse(content, language) {
33
+ if (!this.initialized)
34
+ await this.init();
35
+ // Map file extension/language to tree-sitter language
36
+ let tsLang = 'javascript';
37
+ if (language === 'typescript')
38
+ tsLang = 'typescript';
39
+ if (language === 'tsx')
40
+ tsLang = 'tsx'; // TSX is a distinct grammar in tree-sitter
41
+ if (language === 'python')
42
+ tsLang = 'python';
43
+ const langObj = this.langs.get(tsLang);
44
+ if (!langObj)
45
+ return null;
46
+ this.parser.setLanguage(langObj);
47
+ // Set 5s timeout if supported
48
+ if (typeof this.parser.setTimeoutMicros === 'function') {
49
+ this.parser.setTimeoutMicros(5000000);
50
+ }
51
+ else if (typeof this.parser.setTimeout === 'function') {
52
+ this.parser.setTimeout(5000000);
53
+ }
54
+ let tree;
55
+ try {
56
+ tree = this.parser.parse(content);
57
+ }
58
+ catch (e) {
59
+ // Timeout or parse error
60
+ return null;
61
+ }
62
+ const symbols = [];
63
+ const imports = [];
64
+ const exports = [];
65
+ // Traverse the AST
66
+ this.traverse(tree.rootNode, symbols, imports, exports, tsLang);
67
+ return { symbols, imports, exports };
68
+ }
69
+ traverse(node, symbols, imports, exports, lang) {
70
+ // Determine the type of node
71
+ const type = node.type;
72
+ if (lang === 'javascript' || lang === 'typescript' || lang === 'tsx') {
73
+ if (type === 'function_declaration' || type === 'generator_function_declaration') {
74
+ const nameNode = node.childForFieldName('name');
75
+ if (nameNode) {
76
+ symbols.push({
77
+ type: 'function',
78
+ name: nameNode.text,
79
+ lineNumber: node.startPosition.row + 1,
80
+ columnNumber: node.startPosition.column
81
+ });
82
+ }
83
+ }
84
+ else if (type === 'class_declaration') {
85
+ const nameNode = node.childForFieldName('name');
86
+ if (nameNode) {
87
+ symbols.push({
88
+ type: 'class',
89
+ name: nameNode.text,
90
+ lineNumber: node.startPosition.row + 1,
91
+ columnNumber: node.startPosition.column
92
+ });
93
+ }
94
+ }
95
+ else if (type === 'method_definition') {
96
+ const nameNode = node.childForFieldName('name');
97
+ if (nameNode) {
98
+ symbols.push({
99
+ type: 'method',
100
+ name: nameNode.text,
101
+ lineNumber: node.startPosition.row + 1,
102
+ columnNumber: node.startPosition.column
103
+ });
104
+ }
105
+ }
106
+ else if (type === 'variable_declarator') {
107
+ const nameNode = node.childForFieldName('name');
108
+ if (nameNode && (node.parent?.parent?.type === 'export_statement' || node.parent?.type === 'lexical_declaration')) {
109
+ symbols.push({
110
+ type: 'variable',
111
+ name: nameNode.text,
112
+ lineNumber: node.startPosition.row + 1,
113
+ columnNumber: node.startPosition.column
114
+ });
115
+ }
116
+ }
117
+ else if (type === 'import_statement') {
118
+ const sourceNode = node.childForFieldName('source');
119
+ if (sourceNode) {
120
+ const source = sourceNode.text.replace(/['"]/g, '');
121
+ imports.push({
122
+ source,
123
+ imported: ['*'], // We could extract exact names but * is fine for the graph
124
+ isDefault: false,
125
+ lineNumber: node.startPosition.row + 1
126
+ });
127
+ }
128
+ }
129
+ else if (type === 'export_statement') {
130
+ const sourceNode = node.childForFieldName('source');
131
+ if (sourceNode) {
132
+ exports.push({
133
+ source: sourceNode.text.replace(/['"]/g, ''),
134
+ imported: ['*'],
135
+ isDefault: false,
136
+ lineNumber: node.startPosition.row + 1
137
+ });
138
+ }
139
+ else {
140
+ exports.push({
141
+ source: '',
142
+ imported: ['*'],
143
+ isDefault: false,
144
+ lineNumber: node.startPosition.row + 1
145
+ });
146
+ }
147
+ }
148
+ }
149
+ else if (lang === 'python') {
150
+ if (type === 'function_definition') {
151
+ const nameNode = node.childForFieldName('name');
152
+ if (nameNode) {
153
+ symbols.push({
154
+ type: 'function',
155
+ name: nameNode.text,
156
+ lineNumber: node.startPosition.row + 1,
157
+ columnNumber: node.startPosition.column
158
+ });
159
+ }
160
+ }
161
+ else if (type === 'class_definition') {
162
+ const nameNode = node.childForFieldName('name');
163
+ if (nameNode) {
164
+ symbols.push({
165
+ type: 'class',
166
+ name: nameNode.text,
167
+ lineNumber: node.startPosition.row + 1,
168
+ columnNumber: node.startPosition.column
169
+ });
170
+ }
171
+ }
172
+ else if (type === 'import_statement' || type === 'import_from_statement') {
173
+ let source = '';
174
+ const moduleNameNode = node.childForFieldName('module_name');
175
+ if (moduleNameNode) {
176
+ source = moduleNameNode.text;
177
+ }
178
+ else {
179
+ // simple import
180
+ const firstChild = node.children.find((c) => c.type === 'dotted_name');
181
+ if (firstChild)
182
+ source = firstChild.text;
183
+ }
184
+ if (source) {
185
+ imports.push({
186
+ source,
187
+ imported: ['*'],
188
+ isDefault: false,
189
+ lineNumber: node.startPosition.row + 1
190
+ });
191
+ }
192
+ }
193
+ else if (type === 'assignment') {
194
+ const leftNode = node.childForFieldName('left');
195
+ if (leftNode && leftNode.text === '__all__') {
196
+ const rightNode = node.childForFieldName('right');
197
+ if (rightNode && rightNode.type === 'list') {
198
+ const elements = rightNode.children.filter((c) => c.type === 'string');
199
+ for (const el of elements) {
200
+ exports.push({
201
+ source: '',
202
+ imported: [el.text.replace(/['"]/g, '')],
203
+ isDefault: false,
204
+ lineNumber: node.startPosition.row + 1
205
+ });
206
+ }
207
+ }
208
+ }
209
+ }
210
+ }
211
+ for (let i = 0; i < node.childCount; i++) {
212
+ this.traverse(node.child(i), symbols, imports, exports, lang);
213
+ }
214
+ }
215
+ }
216
+ exports.TreeSitterParser = TreeSitterParser;
package/package.json ADDED
@@ -0,0 +1,58 @@
1
+ {
2
+ "name": "@imayuur/contexthub-repo-parser",
3
+ "version": "1.0.0",
4
+ "description": "Sandboxed repository code analysis for ContextHub",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "git+https://github.com/iMayuuR/contexthub.git",
9
+ "directory": "packages/repo-parser"
10
+ },
11
+ "publishConfig": {
12
+ "access": "public"
13
+ },
14
+ "engines": {
15
+ "node": ">=18"
16
+ },
17
+ "main": "dist/index.js",
18
+ "types": "dist/index.d.ts",
19
+ "files": [
20
+ "dist",
21
+ "!dist/__tests__"
22
+ ],
23
+ "scripts": {
24
+ "build": "tsc",
25
+ "dev": "tsc --watch",
26
+ "test": "tsc && node --test dist/__tests__/*.test.js",
27
+ "prepublishOnly": "npm run build"
28
+ },
29
+ "dependencies": {
30
+ "@repomix/tree-sitter-wasms": "^0.1.17",
31
+ "glob": "^10.0.0",
32
+ "tree-sitter-wasms": "^0.1.13",
33
+ "web-tree-sitter": "^0.26.9",
34
+ "@imayuur/contexthub-shared-types": "^1.0.0"
35
+ },
36
+ "devDependencies": {
37
+ "@types/node": "^18.0.0",
38
+ "typescript": "^5.0.0"
39
+ },
40
+ "author": "Mayur Dattatray Patil",
41
+ "bugs": {
42
+ "url": "https://github.com/iMayuuR/contexthub/issues"
43
+ },
44
+ "homepage": "https://github.com/iMayuuR/contexthub#readme",
45
+ "keywords": [
46
+ "contexthub",
47
+ "mcp",
48
+ "ai-memory",
49
+ "cursor",
50
+ "claude"
51
+ ],
52
+ "exports": {
53
+ ".": {
54
+ "types": "./dist/index.d.ts",
55
+ "default": "./dist/index.js"
56
+ }
57
+ }
58
+ }