@imayuur/contexthub-repo-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +83 -0
- package/dist/index.js +832 -0
- package/dist/tree-sitter.d.ts +13 -0
- package/dist/tree-sitter.js +216 -0
- package/package.json +58 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import type { ParsedFile } from '@imayuur/contexthub-shared-types';
|
|
2
|
+
export declare class RepoParser {
|
|
3
|
+
private repoPath;
|
|
4
|
+
private tsParser;
|
|
5
|
+
constructor(repoPath: string);
|
|
6
|
+
/**
|
|
7
|
+
* Detect language from file extension
|
|
8
|
+
*/
|
|
9
|
+
private detectLanguage;
|
|
10
|
+
/**
|
|
11
|
+
* Simple JS/TS parser for extracting symbols, imports, exports
|
|
12
|
+
*/
|
|
13
|
+
private parseJSTS;
|
|
14
|
+
/**
|
|
15
|
+
* Parse a Python file for symbols
|
|
16
|
+
*/
|
|
17
|
+
private parsePython;
|
|
18
|
+
/**
|
|
19
|
+
* Parse a Go file for symbols and imports
|
|
20
|
+
*/
|
|
21
|
+
private parseGo;
|
|
22
|
+
/**
|
|
23
|
+
* Parse a Rust file for symbols and imports
|
|
24
|
+
*/
|
|
25
|
+
private parseRust;
|
|
26
|
+
/**
|
|
27
|
+
* Parse a Java file for symbols and imports
|
|
28
|
+
*/
|
|
29
|
+
private parseJava;
|
|
30
|
+
/**
|
|
31
|
+
* Parse a Ruby file for symbols and imports
|
|
32
|
+
*/
|
|
33
|
+
private parseRuby;
|
|
34
|
+
/**
|
|
35
|
+
* Parse a PHP file for symbols and imports
|
|
36
|
+
*/
|
|
37
|
+
private parsePhp;
|
|
38
|
+
/**
|
|
39
|
+
* Parse a C# file for symbols and imports
|
|
40
|
+
*/
|
|
41
|
+
private parseCSharp;
|
|
42
|
+
/**
|
|
43
|
+
* Parse a Swift file for symbols and imports
|
|
44
|
+
*/
|
|
45
|
+
private parseSwift;
|
|
46
|
+
/**
|
|
47
|
+
* Parse a Kotlin file for symbols and imports
|
|
48
|
+
*/
|
|
49
|
+
private parseKotlin;
|
|
50
|
+
/**
|
|
51
|
+
* Parse a Scala file for symbols and imports
|
|
52
|
+
*/
|
|
53
|
+
private parseScala;
|
|
54
|
+
/**
|
|
55
|
+
* Parse a C/C++ file for symbols and imports
|
|
56
|
+
*/
|
|
57
|
+
private parseCPP;
|
|
58
|
+
/**
|
|
59
|
+
* Parse a single file (with size limit and sensitive file check)
|
|
60
|
+
*/
|
|
61
|
+
parseFile(filePath: string): Promise<ParsedFile>;
|
|
62
|
+
/**
|
|
63
|
+
* Parse all code files in a directory (with security restrictions)
|
|
64
|
+
*/
|
|
65
|
+
parseDirectory(dirPath: string, patterns?: string[]): Promise<ParsedFile[]>;
|
|
66
|
+
/**
|
|
67
|
+
* Build a dependency graph from parsed files
|
|
68
|
+
*/
|
|
69
|
+
buildDependencyGraph(files: ParsedFile[]): Map<string, string[]>;
|
|
70
|
+
/**
|
|
71
|
+
* Find files that match a pattern or contain a symbol
|
|
72
|
+
*/
|
|
73
|
+
findRelatedFiles(filePath: string, allFiles: ParsedFile[]): Promise<string[]>;
|
|
74
|
+
/**
|
|
75
|
+
* Get summary statistics for a parsed codebase
|
|
76
|
+
*/
|
|
77
|
+
getCodeStats(files: ParsedFile[]): {
|
|
78
|
+
totalFiles: number;
|
|
79
|
+
byLanguage: Record<string, number>;
|
|
80
|
+
totalSymbols: number;
|
|
81
|
+
byType: Record<string, number>;
|
|
82
|
+
};
|
|
83
|
+
}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,832 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.RepoParser = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const glob_1 = require("glob");
|
|
40
|
+
const contexthub_core_1 = require("@imayuur/contexthub-core");
|
|
41
|
+
const tree_sitter_1 = require("./tree-sitter");
|
|
42
|
+
const SENSITIVE_FILE_PATTERNS = [
|
|
43
|
+
'.env', '.env.local', '.env.production', '.env.staging',
|
|
44
|
+
'*.pem', '*.key', '*.p12', '*.pfx', '*.jks', '*.keystore',
|
|
45
|
+
'credentials*', 'secrets*', '.npmrc', '.pypirc',
|
|
46
|
+
'id_rsa*', 'id_ed25519*', '*.crt', '*.cert',
|
|
47
|
+
'.htpasswd', '*.secret',
|
|
48
|
+
];
|
|
49
|
+
function isSensitiveFile(filePath) {
|
|
50
|
+
const basename = path.basename(filePath).toLowerCase();
|
|
51
|
+
return SENSITIVE_FILE_PATTERNS.some(pattern => {
|
|
52
|
+
const p = pattern.toLowerCase();
|
|
53
|
+
if (p.startsWith('*.'))
|
|
54
|
+
return basename.endsWith(p.slice(1));
|
|
55
|
+
if (p.endsWith('*'))
|
|
56
|
+
return basename.startsWith(p.slice(0, -1));
|
|
57
|
+
return basename === p || basename.startsWith(p.replace('*', ''));
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
class RepoParser {
|
|
61
|
+
constructor(repoPath) {
|
|
62
|
+
this.repoPath = repoPath;
|
|
63
|
+
this.tsParser = new tree_sitter_1.TreeSitterParser();
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Detect language from file extension
|
|
67
|
+
*/
|
|
68
|
+
detectLanguage(filePath) {
|
|
69
|
+
const ext = filePath.split('.').pop()?.toLowerCase();
|
|
70
|
+
switch (ext) {
|
|
71
|
+
case 'ts':
|
|
72
|
+
case 'tsx':
|
|
73
|
+
return 'typescript';
|
|
74
|
+
case 'js':
|
|
75
|
+
case 'jsx':
|
|
76
|
+
case 'mjs':
|
|
77
|
+
case 'cjs':
|
|
78
|
+
return 'javascript';
|
|
79
|
+
case 'py':
|
|
80
|
+
return 'python';
|
|
81
|
+
case 'java':
|
|
82
|
+
return 'java';
|
|
83
|
+
case 'cpp':
|
|
84
|
+
case 'cc':
|
|
85
|
+
case 'cxx':
|
|
86
|
+
case 'c++':
|
|
87
|
+
return 'cpp';
|
|
88
|
+
case 'c':
|
|
89
|
+
case 'h':
|
|
90
|
+
case 'hpp':
|
|
91
|
+
return 'c';
|
|
92
|
+
case 'go':
|
|
93
|
+
return 'go';
|
|
94
|
+
case 'rs':
|
|
95
|
+
return 'rust';
|
|
96
|
+
case 'rb':
|
|
97
|
+
return 'ruby';
|
|
98
|
+
case 'php':
|
|
99
|
+
return 'php';
|
|
100
|
+
case 'cs':
|
|
101
|
+
return 'csharp';
|
|
102
|
+
case 'swift':
|
|
103
|
+
return 'swift';
|
|
104
|
+
case 'kt':
|
|
105
|
+
case 'kts':
|
|
106
|
+
return 'kotlin';
|
|
107
|
+
case 'scala':
|
|
108
|
+
return 'scala';
|
|
109
|
+
case 'vue':
|
|
110
|
+
case 'svelte':
|
|
111
|
+
return 'framework';
|
|
112
|
+
default:
|
|
113
|
+
return 'unknown';
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Simple JS/TS parser for extracting symbols, imports, exports
|
|
118
|
+
*/
|
|
119
|
+
parseJSTS(content) {
|
|
120
|
+
const symbols = [];
|
|
121
|
+
const imports = [];
|
|
122
|
+
const exports = [];
|
|
123
|
+
const lines = content.split('\n');
|
|
124
|
+
// Patterns for TypeScript/JavaScript
|
|
125
|
+
const patterns = {
|
|
126
|
+
functionDecl: /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/,
|
|
127
|
+
classDecl: /^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/,
|
|
128
|
+
constDecl: /^(?:export\s+)?const\s+(\w+)\s*=/,
|
|
129
|
+
letDecl: /^(?:export\s+)?let\s+(\w+)\s*=/,
|
|
130
|
+
interfaceDecl: /^interface\s+(\w+)/,
|
|
131
|
+
typeDecl: /^type\s+(\w+)/,
|
|
132
|
+
enumDecl: /^enum\s+(\w+)/,
|
|
133
|
+
importDefault: /^import\s+(\w+)\s+from\s+['"](.+?)['"]/,
|
|
134
|
+
importNamed: /^import\s+{([^}]+)}\s+from\s+['"](.+?)['"]/,
|
|
135
|
+
importAll: /^import\s+\*\s+as\s+(\w+)\s+from\s+['"](.+?)['"]/,
|
|
136
|
+
exportDefault: /^export\s+default\s+/,
|
|
137
|
+
exportNamed: /^export\s+(?:const|let|function|class|interface|type|enum)\s+(\w+)/
|
|
138
|
+
};
|
|
139
|
+
lines.forEach((line, index) => {
|
|
140
|
+
const trimmed = line.trim();
|
|
141
|
+
const lineNumber = index + 1;
|
|
142
|
+
// Functions
|
|
143
|
+
const funcMatch = trimmed.match(patterns.functionDecl);
|
|
144
|
+
if (funcMatch) {
|
|
145
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
146
|
+
}
|
|
147
|
+
// Classes
|
|
148
|
+
const classMatch = trimmed.match(patterns.classDecl);
|
|
149
|
+
if (classMatch) {
|
|
150
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
151
|
+
}
|
|
152
|
+
// Constants/Variables
|
|
153
|
+
const constMatch = trimmed.match(patterns.constDecl);
|
|
154
|
+
if (constMatch) {
|
|
155
|
+
symbols.push({ type: 'variable', name: constMatch[1], lineNumber, columnNumber: 0 });
|
|
156
|
+
}
|
|
157
|
+
const letMatch = trimmed.match(patterns.letDecl);
|
|
158
|
+
if (letMatch) {
|
|
159
|
+
symbols.push({ type: 'variable', name: letMatch[1], lineNumber, columnNumber: 0 });
|
|
160
|
+
}
|
|
161
|
+
// Interfaces
|
|
162
|
+
const ifaceMatch = trimmed.match(patterns.interfaceDecl);
|
|
163
|
+
if (ifaceMatch) {
|
|
164
|
+
symbols.push({ type: 'interface', name: ifaceMatch[1], lineNumber, columnNumber: 0 });
|
|
165
|
+
}
|
|
166
|
+
// Type aliases
|
|
167
|
+
const typeMatch = trimmed.match(patterns.typeDecl);
|
|
168
|
+
if (typeMatch) {
|
|
169
|
+
symbols.push({ type: 'interface', name: typeMatch[1], lineNumber, columnNumber: 0 });
|
|
170
|
+
}
|
|
171
|
+
// Enums
|
|
172
|
+
const enumMatch = trimmed.match(patterns.enumDecl);
|
|
173
|
+
if (enumMatch) {
|
|
174
|
+
symbols.push({ type: 'class', name: enumMatch[1], lineNumber, columnNumber: 0 });
|
|
175
|
+
}
|
|
176
|
+
// Default imports
|
|
177
|
+
const importDefMatch = trimmed.match(patterns.importDefault);
|
|
178
|
+
if (importDefMatch) {
|
|
179
|
+
imports.push({
|
|
180
|
+
source: importDefMatch[2],
|
|
181
|
+
imported: [importDefMatch[1]],
|
|
182
|
+
isDefault: true,
|
|
183
|
+
lineNumber
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
// Named imports
|
|
187
|
+
const importNamedMatch = trimmed.match(patterns.importNamed);
|
|
188
|
+
if (importNamedMatch) {
|
|
189
|
+
const importedItems = importNamedMatch[1].split(',').map(s => s.trim()).filter(s => s);
|
|
190
|
+
imports.push({
|
|
191
|
+
source: importNamedMatch[2],
|
|
192
|
+
imported: importedItems,
|
|
193
|
+
isDefault: false,
|
|
194
|
+
lineNumber
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
// Namespace imports
|
|
198
|
+
const importAllMatch = trimmed.match(patterns.importAll);
|
|
199
|
+
if (importAllMatch) {
|
|
200
|
+
imports.push({
|
|
201
|
+
source: importAllMatch[2],
|
|
202
|
+
imported: [importAllMatch[1]],
|
|
203
|
+
isDefault: true,
|
|
204
|
+
lineNumber
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
// Default exports
|
|
208
|
+
if (trimmed.match(patterns.exportDefault)) {
|
|
209
|
+
exports.push({ source: '', imported: ['default'], isDefault: true, lineNumber });
|
|
210
|
+
}
|
|
211
|
+
// Named exports
|
|
212
|
+
const exportMatch = trimmed.match(patterns.exportNamed);
|
|
213
|
+
if (exportMatch) {
|
|
214
|
+
exports.push({ source: '', imported: [exportMatch[1]], isDefault: false, lineNumber });
|
|
215
|
+
}
|
|
216
|
+
});
|
|
217
|
+
return { symbols, imports, exports };
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Parse a Python file for symbols
|
|
221
|
+
*/
|
|
222
|
+
parsePython(content) {
|
|
223
|
+
const symbols = [];
|
|
224
|
+
const imports = [];
|
|
225
|
+
const exports = [];
|
|
226
|
+
const lines = content.split('\n');
|
|
227
|
+
const patterns = {
|
|
228
|
+
classDecl: /^class\s+(\w+)/,
|
|
229
|
+
functionDecl: /^def\s+(\w+)/,
|
|
230
|
+
importFrom: /^from\s+([\w.]+)\s+import\s+(.+)/,
|
|
231
|
+
import: /^import\s+(.+)/
|
|
232
|
+
};
|
|
233
|
+
lines.forEach((line, index) => {
|
|
234
|
+
const trimmed = line.trim();
|
|
235
|
+
const lineNumber = index + 1;
|
|
236
|
+
const classMatch = trimmed.match(patterns.classDecl);
|
|
237
|
+
if (classMatch) {
|
|
238
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
239
|
+
}
|
|
240
|
+
const funcMatch = trimmed.match(patterns.functionDecl);
|
|
241
|
+
if (funcMatch) {
|
|
242
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
243
|
+
}
|
|
244
|
+
const importFromMatch = trimmed.match(patterns.importFrom);
|
|
245
|
+
if (importFromMatch) {
|
|
246
|
+
const items = importFromMatch[2].split(',').map(s => s.trim()).filter(s => s !== '*');
|
|
247
|
+
imports.push({ source: importFromMatch[1], imported: items, isDefault: false, lineNumber });
|
|
248
|
+
}
|
|
249
|
+
const importMatch = trimmed.match(patterns.import);
|
|
250
|
+
if (importMatch) {
|
|
251
|
+
imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
252
|
+
}
|
|
253
|
+
// __all__ export
|
|
254
|
+
if (trimmed.startsWith('__all__')) {
|
|
255
|
+
exports.push({ source: '', imported: ['__all__'], isDefault: false, lineNumber });
|
|
256
|
+
}
|
|
257
|
+
});
|
|
258
|
+
return { symbols, imports, exports };
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Parse a Go file for symbols and imports
|
|
262
|
+
*/
|
|
263
|
+
parseGo(content) {
|
|
264
|
+
const symbols = [];
|
|
265
|
+
const imports = [];
|
|
266
|
+
const exports = [];
|
|
267
|
+
const lines = content.split('\n');
|
|
268
|
+
let inImportBlock = false;
|
|
269
|
+
lines.forEach((line, index) => {
|
|
270
|
+
const trimmed = line.trim();
|
|
271
|
+
const lineNumber = index + 1;
|
|
272
|
+
// Handle import block
|
|
273
|
+
if (trimmed.startsWith('import (')) {
|
|
274
|
+
inImportBlock = true;
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
if (inImportBlock && trimmed.startsWith(')')) {
|
|
278
|
+
inImportBlock = false;
|
|
279
|
+
return;
|
|
280
|
+
}
|
|
281
|
+
if (inImportBlock) {
|
|
282
|
+
const match = trimmed.match(/"([^"]+)"/);
|
|
283
|
+
if (match) {
|
|
284
|
+
imports.push({ source: match[1], imported: ['*'], isDefault: false, lineNumber });
|
|
285
|
+
}
|
|
286
|
+
return;
|
|
287
|
+
}
|
|
288
|
+
// Single-line import
|
|
289
|
+
const importMatch = trimmed.match(/^import\s+"([^"]+)"/);
|
|
290
|
+
if (importMatch) {
|
|
291
|
+
imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
292
|
+
}
|
|
293
|
+
// Functions (with receiver)
|
|
294
|
+
const methodMatch = trimmed.match(/^func\s+\(([^)]+)\)\s+(\w+)/);
|
|
295
|
+
if (methodMatch) {
|
|
296
|
+
symbols.push({ type: 'method', name: `${methodMatch[1].trim()}.${methodMatch[2]}`, lineNumber, columnNumber: 0 });
|
|
297
|
+
}
|
|
298
|
+
else {
|
|
299
|
+
// Plain function
|
|
300
|
+
const funcMatch = trimmed.match(/^func\s+(\w+)/);
|
|
301
|
+
if (funcMatch) {
|
|
302
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
// Structs and interfaces
|
|
306
|
+
const typeMatch = trimmed.match(/^type\s+(\w+)\s+(struct|interface)/);
|
|
307
|
+
if (typeMatch) {
|
|
308
|
+
symbols.push({
|
|
309
|
+
type: typeMatch[2] === 'interface' ? 'interface' : 'class',
|
|
310
|
+
name: typeMatch[1],
|
|
311
|
+
lineNumber,
|
|
312
|
+
columnNumber: 0
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
});
|
|
316
|
+
return { symbols, imports, exports };
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Parse a Rust file for symbols and imports
|
|
320
|
+
*/
|
|
321
|
+
parseRust(content) {
|
|
322
|
+
const symbols = [];
|
|
323
|
+
const imports = [];
|
|
324
|
+
const exports = [];
|
|
325
|
+
const lines = content.split('\n');
|
|
326
|
+
lines.forEach((line, index) => {
|
|
327
|
+
const trimmed = line.trim();
|
|
328
|
+
const lineNumber = index + 1;
|
|
329
|
+
// Functions
|
|
330
|
+
const funcMatch = trimmed.match(/^(?:pub\s+)?(?:async\s+)?fn\s+(\w+)/);
|
|
331
|
+
if (funcMatch) {
|
|
332
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
333
|
+
}
|
|
334
|
+
// Structs, Enums, Traits
|
|
335
|
+
const structMatch = trimmed.match(/^(?:pub\s+)?struct\s+(\w+)/);
|
|
336
|
+
if (structMatch) {
|
|
337
|
+
symbols.push({ type: 'class', name: structMatch[1], lineNumber, columnNumber: 0 });
|
|
338
|
+
}
|
|
339
|
+
const enumMatch = trimmed.match(/^(?:pub\s+)?enum\s+(\w+)/);
|
|
340
|
+
if (enumMatch) {
|
|
341
|
+
symbols.push({ type: 'class', name: enumMatch[1], lineNumber, columnNumber: 0 });
|
|
342
|
+
}
|
|
343
|
+
const traitMatch = trimmed.match(/^(?:pub\s+)?trait\s+(\w+)/);
|
|
344
|
+
if (traitMatch) {
|
|
345
|
+
symbols.push({ type: 'interface', name: traitMatch[1], lineNumber, columnNumber: 0 });
|
|
346
|
+
}
|
|
347
|
+
// Imports
|
|
348
|
+
const importMatch = trimmed.match(/^use\s+([^;]+);/);
|
|
349
|
+
if (importMatch) {
|
|
350
|
+
imports.push({ source: importMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
|
|
351
|
+
}
|
|
352
|
+
});
|
|
353
|
+
return { symbols, imports, exports };
|
|
354
|
+
}
|
|
355
|
+
/**
|
|
356
|
+
* Parse a Java file for symbols and imports
|
|
357
|
+
*/
|
|
358
|
+
parseJava(content) {
|
|
359
|
+
const symbols = [];
|
|
360
|
+
const imports = [];
|
|
361
|
+
const exports = [];
|
|
362
|
+
const lines = content.split('\n');
|
|
363
|
+
lines.forEach((line, index) => {
|
|
364
|
+
const trimmed = line.trim();
|
|
365
|
+
const lineNumber = index + 1;
|
|
366
|
+
// Classes and Interfaces
|
|
367
|
+
const classMatch = trimmed.match(/^(?:public\s+|private\s+)?class\s+(\w+)/);
|
|
368
|
+
if (classMatch) {
|
|
369
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
370
|
+
}
|
|
371
|
+
const interfaceMatch = trimmed.match(/^(?:public\s+|private\s+)?interface\s+(\w+)/);
|
|
372
|
+
if (interfaceMatch) {
|
|
373
|
+
symbols.push({ type: 'interface', name: interfaceMatch[1], lineNumber, columnNumber: 0 });
|
|
374
|
+
}
|
|
375
|
+
// Methods
|
|
376
|
+
const methodMatch = trimmed.match(/^(?:public|protected|private|static|\s) +[\w<>[\]]+ +(\w+) *\([^)]*\) *(?:throws [^{]+)? *\{/);
|
|
377
|
+
if (methodMatch) {
|
|
378
|
+
if (!['if', 'for', 'while', 'switch', 'catch'].includes(methodMatch[1])) {
|
|
379
|
+
symbols.push({ type: 'method', name: methodMatch[1], lineNumber, columnNumber: 0 });
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
// Imports
|
|
383
|
+
const importMatch = trimmed.match(/^import\s+([^;]+);/);
|
|
384
|
+
if (importMatch) {
|
|
385
|
+
imports.push({ source: importMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
|
|
386
|
+
}
|
|
387
|
+
});
|
|
388
|
+
return { symbols, imports, exports };
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Parse a Ruby file for symbols and imports
|
|
392
|
+
*/
|
|
393
|
+
parseRuby(content) {
|
|
394
|
+
const symbols = [];
|
|
395
|
+
const imports = [];
|
|
396
|
+
const exports = [];
|
|
397
|
+
const lines = content.split('\n');
|
|
398
|
+
lines.forEach((line, index) => {
|
|
399
|
+
const trimmed = line.trim();
|
|
400
|
+
const lineNumber = index + 1;
|
|
401
|
+
// Class
|
|
402
|
+
const classMatch = trimmed.match(/^class\s+([\w:]+)/);
|
|
403
|
+
if (classMatch) {
|
|
404
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
405
|
+
}
|
|
406
|
+
// Method / Function
|
|
407
|
+
const defMatch = trimmed.match(/^def\s+([\w!?.]+)/);
|
|
408
|
+
if (defMatch) {
|
|
409
|
+
symbols.push({ type: 'function', name: defMatch[1], lineNumber, columnNumber: 0 });
|
|
410
|
+
}
|
|
411
|
+
// Imports
|
|
412
|
+
const requireMatch = trimmed.match(/^(?:require|require_relative)\s+['"](.+?)['"]/);
|
|
413
|
+
if (requireMatch) {
|
|
414
|
+
imports.push({ source: requireMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
415
|
+
}
|
|
416
|
+
});
|
|
417
|
+
return { symbols, imports, exports };
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Parse a PHP file for symbols and imports
|
|
421
|
+
*/
|
|
422
|
+
parsePhp(content) {
|
|
423
|
+
const symbols = [];
|
|
424
|
+
const imports = [];
|
|
425
|
+
const exports = [];
|
|
426
|
+
const lines = content.split('\n');
|
|
427
|
+
lines.forEach((line, index) => {
|
|
428
|
+
const trimmed = line.trim();
|
|
429
|
+
const lineNumber = index + 1;
|
|
430
|
+
// Class / Interface / Trait
|
|
431
|
+
const classMatch = trimmed.match(/^(?:abstract\s+|final\s+)?(?:class|interface|trait)\s+(\w+)/);
|
|
432
|
+
if (classMatch) {
|
|
433
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
434
|
+
}
|
|
435
|
+
// Function
|
|
436
|
+
const funcMatch = trimmed.match(/^(?:public|protected|private|static|\s)*function\s+(\w+)/);
|
|
437
|
+
if (funcMatch) {
|
|
438
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
439
|
+
}
|
|
440
|
+
// Imports (use, require, include)
|
|
441
|
+
const useMatch = trimmed.match(/^use\s+([^;]+);/);
|
|
442
|
+
if (useMatch) {
|
|
443
|
+
imports.push({ source: useMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
|
|
444
|
+
}
|
|
445
|
+
const reqMatch = trimmed.match(/^(?:require|require_once|include|include_once)\s*['"](.+?)['"]/);
|
|
446
|
+
if (reqMatch) {
|
|
447
|
+
imports.push({ source: reqMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
448
|
+
}
|
|
449
|
+
});
|
|
450
|
+
return { symbols, imports, exports };
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* Parse a C# file for symbols and imports
|
|
454
|
+
*/
|
|
455
|
+
parseCSharp(content) {
|
|
456
|
+
const symbols = [];
|
|
457
|
+
const imports = [];
|
|
458
|
+
const exports = [];
|
|
459
|
+
const lines = content.split('\n');
|
|
460
|
+
lines.forEach((line, index) => {
|
|
461
|
+
const trimmed = line.trim();
|
|
462
|
+
const lineNumber = index + 1;
|
|
463
|
+
// Class / Interface / Struct / Record
|
|
464
|
+
const classMatch = trimmed.match(/^(?:public|private|protected|internal|static|\s)*(?:class|interface|struct|record)\s+(\w+)/);
|
|
465
|
+
if (classMatch) {
|
|
466
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
467
|
+
}
|
|
468
|
+
// Method / Function
|
|
469
|
+
const methodMatch = trimmed.match(/^(?:public|private|protected|internal|static|async|override|\s)+[\w<>[\]]+ +(\w+) *\([^)]*\)/);
|
|
470
|
+
if (methodMatch) {
|
|
471
|
+
if (!['if', 'for', 'while', 'switch', 'using', 'catch'].includes(methodMatch[1])) {
|
|
472
|
+
symbols.push({ type: 'method', name: methodMatch[1], lineNumber, columnNumber: 0 });
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
// Imports (using)
|
|
476
|
+
const usingMatch = trimmed.match(/^using\s+([^;=]+);/);
|
|
477
|
+
if (usingMatch) {
|
|
478
|
+
imports.push({ source: usingMatch[1].trim(), imported: ['*'], isDefault: false, lineNumber });
|
|
479
|
+
}
|
|
480
|
+
});
|
|
481
|
+
return { symbols, imports, exports };
|
|
482
|
+
}
|
|
483
|
+
/**
|
|
484
|
+
* Parse a Swift file for symbols and imports
|
|
485
|
+
*/
|
|
486
|
+
parseSwift(content) {
|
|
487
|
+
const symbols = [];
|
|
488
|
+
const imports = [];
|
|
489
|
+
const exports = [];
|
|
490
|
+
const lines = content.split('\n');
|
|
491
|
+
lines.forEach((line, index) => {
|
|
492
|
+
const trimmed = line.trim();
|
|
493
|
+
const lineNumber = index + 1;
|
|
494
|
+
// Class / Struct / Protocol / Enum
|
|
495
|
+
const classMatch = trimmed.match(/^(?:public|private|internal|fileprivate|\s)*(?:class|struct|protocol|enum)\s+(\w+)/);
|
|
496
|
+
if (classMatch) {
|
|
497
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
498
|
+
}
|
|
499
|
+
// Function
|
|
500
|
+
const funcMatch = trimmed.match(/^(?:public|private|internal|fileprivate|static|class|\s)*func\s+(\w+)/);
|
|
501
|
+
if (funcMatch) {
|
|
502
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
503
|
+
}
|
|
504
|
+
// Imports
|
|
505
|
+
const importMatch = trimmed.match(/^import\s+(\w+)/);
|
|
506
|
+
if (importMatch) {
|
|
507
|
+
imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
508
|
+
}
|
|
509
|
+
});
|
|
510
|
+
return { symbols, imports, exports };
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Parse a Kotlin file for symbols and imports
|
|
514
|
+
*/
|
|
515
|
+
parseKotlin(content) {
|
|
516
|
+
const symbols = [];
|
|
517
|
+
const imports = [];
|
|
518
|
+
const exports = [];
|
|
519
|
+
const lines = content.split('\n');
|
|
520
|
+
lines.forEach((line, index) => {
|
|
521
|
+
const trimmed = line.trim();
|
|
522
|
+
const lineNumber = index + 1;
|
|
523
|
+
// Class / Interface / Object
|
|
524
|
+
const classMatch = trimmed.match(/^(?:open|abstract|sealed|data|internal|\s)*(?:class|interface|object)\s+(\w+)/);
|
|
525
|
+
if (classMatch) {
|
|
526
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
527
|
+
}
|
|
528
|
+
// Function
|
|
529
|
+
const funcMatch = trimmed.match(/^(?:open|override|internal|public|private|\s)*fun\s+(\w+)/);
|
|
530
|
+
if (funcMatch) {
|
|
531
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
532
|
+
}
|
|
533
|
+
// Imports
|
|
534
|
+
const importMatch = trimmed.match(/^import\s+([\w.]+)/);
|
|
535
|
+
if (importMatch) {
|
|
536
|
+
imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
537
|
+
}
|
|
538
|
+
});
|
|
539
|
+
return { symbols, imports, exports };
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* Parse a Scala file for symbols and imports
|
|
543
|
+
*/
|
|
544
|
+
parseScala(content) {
|
|
545
|
+
const symbols = [];
|
|
546
|
+
const imports = [];
|
|
547
|
+
const exports = [];
|
|
548
|
+
const lines = content.split('\n');
|
|
549
|
+
lines.forEach((line, index) => {
|
|
550
|
+
const trimmed = line.trim();
|
|
551
|
+
const lineNumber = index + 1;
|
|
552
|
+
// Class / Object / Trait
|
|
553
|
+
const classMatch = trimmed.match(/^(?:abstract|case|\s)*(?:class|object|trait)\s+(\w+)/);
|
|
554
|
+
if (classMatch) {
|
|
555
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
556
|
+
}
|
|
557
|
+
// Function
|
|
558
|
+
const funcMatch = trimmed.match(/^(?:override|private|protected|\s)*def\s+(\w+)/);
|
|
559
|
+
if (funcMatch) {
|
|
560
|
+
symbols.push({ type: 'function', name: funcMatch[1], lineNumber, columnNumber: 0 });
|
|
561
|
+
}
|
|
562
|
+
// Imports
|
|
563
|
+
const importMatch = trimmed.match(/^import\s+([\w.{}_]+)/);
|
|
564
|
+
if (importMatch) {
|
|
565
|
+
imports.push({ source: importMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
566
|
+
}
|
|
567
|
+
});
|
|
568
|
+
return { symbols, imports, exports };
|
|
569
|
+
}
|
|
570
|
+
/**
|
|
571
|
+
* Parse a C/C++ file for symbols and imports
|
|
572
|
+
*/
|
|
573
|
+
parseCPP(content) {
|
|
574
|
+
const symbols = [];
|
|
575
|
+
const imports = [];
|
|
576
|
+
const exports = [];
|
|
577
|
+
const lines = content.split('\n');
|
|
578
|
+
lines.forEach((line, index) => {
|
|
579
|
+
const trimmed = line.trim();
|
|
580
|
+
const lineNumber = index + 1;
|
|
581
|
+
// Class / Struct
|
|
582
|
+
const classMatch = trimmed.match(/^(?:class|struct)\s+(\w+)/);
|
|
583
|
+
if (classMatch) {
|
|
584
|
+
symbols.push({ type: 'class', name: classMatch[1], lineNumber, columnNumber: 0 });
|
|
585
|
+
}
|
|
586
|
+
// Function / Method
|
|
587
|
+
const funcMatch = trimmed.match(/^[\w<>[\]:*&]+ +(\w+::)?(\w+) *\([^)]*\) *(?:const)? *\{?/);
|
|
588
|
+
if (funcMatch) {
|
|
589
|
+
const name = funcMatch[1] ? funcMatch[1] + funcMatch[2] : funcMatch[2];
|
|
590
|
+
if (!['if', 'for', 'while', 'switch', 'catch', 'return'].includes(funcMatch[2])) {
|
|
591
|
+
symbols.push({ type: 'function', name, lineNumber, columnNumber: 0 });
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
// Imports (#include)
|
|
595
|
+
const includeMatch = trimmed.match(/^#include\s+['"<](.+?)['">]/);
|
|
596
|
+
if (includeMatch) {
|
|
597
|
+
imports.push({ source: includeMatch[1], imported: ['*'], isDefault: false, lineNumber });
|
|
598
|
+
}
|
|
599
|
+
});
|
|
600
|
+
return { symbols, imports, exports };
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Parse a single file (with size limit and sensitive file check)
|
|
604
|
+
*/
|
|
605
|
+
async parseFile(filePath) {
|
|
606
|
+
const ext = filePath.split('.').pop()?.toLowerCase();
|
|
607
|
+
let symbols = [];
|
|
608
|
+
let imports = [];
|
|
609
|
+
let exports = [];
|
|
610
|
+
try {
|
|
611
|
+
// Security: Skip sensitive files
|
|
612
|
+
if (isSensitiveFile(filePath)) {
|
|
613
|
+
return {
|
|
614
|
+
path: filePath,
|
|
615
|
+
language: this.detectLanguage(filePath),
|
|
616
|
+
symbols: [],
|
|
617
|
+
imports: [],
|
|
618
|
+
exports: []
|
|
619
|
+
};
|
|
620
|
+
}
|
|
621
|
+
// Security: Don't follow symlinks — must use lstatSync (not statSync!)
|
|
622
|
+
const lstats = fs.lstatSync(filePath);
|
|
623
|
+
if (lstats.isSymbolicLink()) {
|
|
624
|
+
return {
|
|
625
|
+
path: filePath,
|
|
626
|
+
language: this.detectLanguage(filePath),
|
|
627
|
+
symbols: [],
|
|
628
|
+
imports: [],
|
|
629
|
+
exports: []
|
|
630
|
+
};
|
|
631
|
+
}
|
|
632
|
+
// Security: Check file size before reading
|
|
633
|
+
if (lstats.size > contexthub_core_1.MAX_INGEST_FILE_SIZE) {
|
|
634
|
+
return {
|
|
635
|
+
path: filePath,
|
|
636
|
+
language: this.detectLanguage(filePath),
|
|
637
|
+
symbols: [],
|
|
638
|
+
imports: [],
|
|
639
|
+
exports: []
|
|
640
|
+
};
|
|
641
|
+
}
|
|
642
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
643
|
+
const language = this.detectLanguage(filePath);
|
|
644
|
+
let parsed = null;
|
|
645
|
+
if (['typescript', 'javascript', 'tsx', 'python'].includes(language)) {
|
|
646
|
+
parsed = await this.tsParser.parse(content, language);
|
|
647
|
+
}
|
|
648
|
+
if (parsed) {
|
|
649
|
+
symbols = parsed.symbols;
|
|
650
|
+
imports = parsed.imports;
|
|
651
|
+
exports = parsed.exports;
|
|
652
|
+
}
|
|
653
|
+
else {
|
|
654
|
+
// Fallback to regex parser
|
|
655
|
+
if (ext === 'ts' || ext === 'tsx' || ext === 'js' || ext === 'jsx' || ext === 'mjs' || ext === 'cjs') {
|
|
656
|
+
const regexParsed = this.parseJSTS(content);
|
|
657
|
+
symbols = regexParsed.symbols;
|
|
658
|
+
imports = regexParsed.imports;
|
|
659
|
+
exports = regexParsed.exports;
|
|
660
|
+
}
|
|
661
|
+
else if (ext === 'py') {
|
|
662
|
+
const regexParsed = this.parsePython(content);
|
|
663
|
+
symbols = regexParsed.symbols;
|
|
664
|
+
imports = regexParsed.imports;
|
|
665
|
+
exports = regexParsed.exports;
|
|
666
|
+
}
|
|
667
|
+
else if (ext === 'go') {
|
|
668
|
+
const regexParsed = this.parseGo(content);
|
|
669
|
+
symbols = regexParsed.symbols;
|
|
670
|
+
imports = regexParsed.imports;
|
|
671
|
+
exports = regexParsed.exports;
|
|
672
|
+
}
|
|
673
|
+
else if (ext === 'rs') {
|
|
674
|
+
const regexParsed = this.parseRust(content);
|
|
675
|
+
symbols = regexParsed.symbols;
|
|
676
|
+
imports = regexParsed.imports;
|
|
677
|
+
exports = regexParsed.exports;
|
|
678
|
+
}
|
|
679
|
+
else if (ext === 'java') {
|
|
680
|
+
const regexParsed = this.parseJava(content);
|
|
681
|
+
symbols = regexParsed.symbols;
|
|
682
|
+
imports = regexParsed.imports;
|
|
683
|
+
exports = regexParsed.exports;
|
|
684
|
+
}
|
|
685
|
+
else if (ext === 'rb') {
|
|
686
|
+
const regexParsed = this.parseRuby(content);
|
|
687
|
+
symbols = regexParsed.symbols;
|
|
688
|
+
imports = regexParsed.imports;
|
|
689
|
+
exports = regexParsed.exports;
|
|
690
|
+
}
|
|
691
|
+
else if (ext === 'php') {
|
|
692
|
+
const regexParsed = this.parsePhp(content);
|
|
693
|
+
symbols = regexParsed.symbols;
|
|
694
|
+
imports = regexParsed.imports;
|
|
695
|
+
exports = regexParsed.exports;
|
|
696
|
+
}
|
|
697
|
+
else if (ext === 'cs') {
|
|
698
|
+
const regexParsed = this.parseCSharp(content);
|
|
699
|
+
symbols = regexParsed.symbols;
|
|
700
|
+
imports = regexParsed.imports;
|
|
701
|
+
exports = regexParsed.exports;
|
|
702
|
+
}
|
|
703
|
+
else if (ext === 'swift') {
|
|
704
|
+
const regexParsed = this.parseSwift(content);
|
|
705
|
+
symbols = regexParsed.symbols;
|
|
706
|
+
imports = regexParsed.imports;
|
|
707
|
+
exports = regexParsed.exports;
|
|
708
|
+
}
|
|
709
|
+
else if (ext === 'kt' || ext === 'kts') {
|
|
710
|
+
const regexParsed = this.parseKotlin(content);
|
|
711
|
+
symbols = regexParsed.symbols;
|
|
712
|
+
imports = regexParsed.imports;
|
|
713
|
+
exports = regexParsed.exports;
|
|
714
|
+
}
|
|
715
|
+
else if (ext === 'scala') {
|
|
716
|
+
const regexParsed = this.parseScala(content);
|
|
717
|
+
symbols = regexParsed.symbols;
|
|
718
|
+
imports = regexParsed.imports;
|
|
719
|
+
exports = regexParsed.exports;
|
|
720
|
+
}
|
|
721
|
+
else if (['cpp', 'cc', 'cxx', 'c++', 'c', 'h', 'hpp'].includes(ext || '')) {
|
|
722
|
+
const regexParsed = this.parseCPP(content);
|
|
723
|
+
symbols = regexParsed.symbols;
|
|
724
|
+
imports = regexParsed.imports;
|
|
725
|
+
exports = regexParsed.exports;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
catch (e) {
|
|
730
|
+
// Sanitized error — don't expose full path
|
|
731
|
+
console.error(`Failed to parse file:`, e?.message || 'unknown error');
|
|
732
|
+
}
|
|
733
|
+
return {
|
|
734
|
+
path: filePath,
|
|
735
|
+
language: this.detectLanguage(filePath),
|
|
736
|
+
symbols,
|
|
737
|
+
imports,
|
|
738
|
+
exports
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
/**
|
|
742
|
+
* Parse all code files in a directory (with security restrictions)
|
|
743
|
+
*/
|
|
744
|
+
async parseDirectory(dirPath, patterns = ['**/*.{ts,tsx,js,jsx,py,go,rs,java,rb,php,cs,swift,kt,kts,scala,c,h,cpp,cc,cxx,hpp}']) {
|
|
745
|
+
const results = [];
|
|
746
|
+
const repoRoot = path.resolve(this.repoPath);
|
|
747
|
+
const ignore = new contexthub_core_1.ContexthubIgnore(this.repoPath);
|
|
748
|
+
for (const pattern of patterns) {
|
|
749
|
+
const files = await (0, glob_1.glob)(pattern, { cwd: dirPath, absolute: true });
|
|
750
|
+
let scannedCount = 0;
|
|
751
|
+
for (const file of files) {
|
|
752
|
+
// Security: Cap file count
|
|
753
|
+
if (scannedCount >= contexthub_core_1.MAX_FILES_PER_SCAN) {
|
|
754
|
+
console.error(`File scan limit reached (${contexthub_core_1.MAX_FILES_PER_SCAN}). Stopping.`);
|
|
755
|
+
break;
|
|
756
|
+
}
|
|
757
|
+
// Security: Only parse files within repo boundary
|
|
758
|
+
const resolved = path.resolve(file);
|
|
759
|
+
if (!resolved.startsWith(repoRoot + path.sep) && resolved !== repoRoot) {
|
|
760
|
+
continue;
|
|
761
|
+
}
|
|
762
|
+
const relPath = path.relative(repoRoot, resolved);
|
|
763
|
+
if (ignore.ignores(relPath))
|
|
764
|
+
continue;
|
|
765
|
+
// Skip node_modules, .contexthub, and sensitive files
|
|
766
|
+
if (file.includes('node_modules') || file.includes('.contexthub'))
|
|
767
|
+
continue;
|
|
768
|
+
if (isSensitiveFile(file))
|
|
769
|
+
continue;
|
|
770
|
+
const parsed = await this.parseFile(file);
|
|
771
|
+
results.push(parsed);
|
|
772
|
+
scannedCount++;
|
|
773
|
+
}
|
|
774
|
+
}
|
|
775
|
+
console.error(`Parsed ${results.length} files in directory (limit: ${contexthub_core_1.MAX_FILES_PER_SCAN})`);
|
|
776
|
+
return results;
|
|
777
|
+
}
|
|
778
|
+
/**
|
|
779
|
+
* Build a dependency graph from parsed files
|
|
780
|
+
*/
|
|
781
|
+
buildDependencyGraph(files) {
|
|
782
|
+
const graph = new Map();
|
|
783
|
+
for (const file of files) {
|
|
784
|
+
const deps = [];
|
|
785
|
+
for (const imp of file.imports) {
|
|
786
|
+
deps.push(imp.source);
|
|
787
|
+
}
|
|
788
|
+
graph.set(file.path, deps);
|
|
789
|
+
}
|
|
790
|
+
return graph;
|
|
791
|
+
}
|
|
792
|
+
/**
|
|
793
|
+
* Find files that match a pattern or contain a symbol
|
|
794
|
+
*/
|
|
795
|
+
async findRelatedFiles(filePath, allFiles) {
|
|
796
|
+
const related = [];
|
|
797
|
+
// Find imports that match this file's name
|
|
798
|
+
const fileName = path.basename(filePath, path.extname(filePath));
|
|
799
|
+
for (const file of allFiles) {
|
|
800
|
+
if (file.path === filePath)
|
|
801
|
+
continue;
|
|
802
|
+
// Check imports
|
|
803
|
+
for (const imp of file.imports) {
|
|
804
|
+
if (imp.source.includes(fileName) || imp.imported.some(i => i === fileName)) {
|
|
805
|
+
related.push(file.path);
|
|
806
|
+
break;
|
|
807
|
+
}
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
return [...new Set(related)];
|
|
811
|
+
}
|
|
812
|
+
/**
|
|
813
|
+
* Get summary statistics for a parsed codebase
|
|
814
|
+
*/
|
|
815
|
+
getCodeStats(files) {
|
|
816
|
+
const stats = {
|
|
817
|
+
totalFiles: files.length,
|
|
818
|
+
byLanguage: {},
|
|
819
|
+
totalSymbols: 0,
|
|
820
|
+
byType: {}
|
|
821
|
+
};
|
|
822
|
+
for (const file of files) {
|
|
823
|
+
stats.byLanguage[file.language] = (stats.byLanguage[file.language] || 0) + 1;
|
|
824
|
+
for (const symbol of file.symbols) {
|
|
825
|
+
stats.totalSymbols++;
|
|
826
|
+
stats.byType[symbol.type] = (stats.byType[symbol.type] || 0) + 1;
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
return stats;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
exports.RepoParser = RepoParser;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { Symbol, ImportExport } from '@imayuur/contexthub-shared-types';
|
|
2
|
+
export declare class TreeSitterParser {
|
|
3
|
+
private parser;
|
|
4
|
+
private langs;
|
|
5
|
+
private initialized;
|
|
6
|
+
init(): Promise<void>;
|
|
7
|
+
parse(content: string, language: string): Promise<{
|
|
8
|
+
symbols: Symbol[];
|
|
9
|
+
imports: ImportExport[];
|
|
10
|
+
exports: ImportExport[];
|
|
11
|
+
} | null>;
|
|
12
|
+
private traverse;
|
|
13
|
+
}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TreeSitterParser = void 0;
|
|
4
|
+
class TreeSitterParser {
|
|
5
|
+
constructor() {
|
|
6
|
+
this.langs = new Map();
|
|
7
|
+
this.initialized = false;
|
|
8
|
+
}
|
|
9
|
+
async init() {
|
|
10
|
+
if (this.initialized)
|
|
11
|
+
return;
|
|
12
|
+
const { Parser, Language } = require('web-tree-sitter');
|
|
13
|
+
await Parser.init();
|
|
14
|
+
this.parser = new Parser();
|
|
15
|
+
// Load common languages
|
|
16
|
+
const jsPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-javascript.wasm');
|
|
17
|
+
const tsPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-typescript.wasm');
|
|
18
|
+
const tsxPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-tsx.wasm');
|
|
19
|
+
const pyPath = require.resolve('@repomix/tree-sitter-wasms/out/tree-sitter-python.wasm');
|
|
20
|
+
const [jsLang, tsLang, tsxLang, pyLang] = await Promise.all([
|
|
21
|
+
Language.load(jsPath),
|
|
22
|
+
Language.load(tsPath),
|
|
23
|
+
Language.load(tsxPath),
|
|
24
|
+
Language.load(pyPath),
|
|
25
|
+
]);
|
|
26
|
+
this.langs.set('javascript', jsLang);
|
|
27
|
+
this.langs.set('typescript', tsLang);
|
|
28
|
+
this.langs.set('tsx', tsxLang);
|
|
29
|
+
this.langs.set('python', pyLang);
|
|
30
|
+
this.initialized = true;
|
|
31
|
+
}
|
|
32
|
+
async parse(content, language) {
|
|
33
|
+
if (!this.initialized)
|
|
34
|
+
await this.init();
|
|
35
|
+
// Map file extension/language to tree-sitter language
|
|
36
|
+
let tsLang = 'javascript';
|
|
37
|
+
if (language === 'typescript')
|
|
38
|
+
tsLang = 'typescript';
|
|
39
|
+
if (language === 'tsx')
|
|
40
|
+
tsLang = 'tsx'; // TSX is a distinct grammar in tree-sitter
|
|
41
|
+
if (language === 'python')
|
|
42
|
+
tsLang = 'python';
|
|
43
|
+
const langObj = this.langs.get(tsLang);
|
|
44
|
+
if (!langObj)
|
|
45
|
+
return null;
|
|
46
|
+
this.parser.setLanguage(langObj);
|
|
47
|
+
// Set 5s timeout if supported
|
|
48
|
+
if (typeof this.parser.setTimeoutMicros === 'function') {
|
|
49
|
+
this.parser.setTimeoutMicros(5000000);
|
|
50
|
+
}
|
|
51
|
+
else if (typeof this.parser.setTimeout === 'function') {
|
|
52
|
+
this.parser.setTimeout(5000000);
|
|
53
|
+
}
|
|
54
|
+
let tree;
|
|
55
|
+
try {
|
|
56
|
+
tree = this.parser.parse(content);
|
|
57
|
+
}
|
|
58
|
+
catch (e) {
|
|
59
|
+
// Timeout or parse error
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
const symbols = [];
|
|
63
|
+
const imports = [];
|
|
64
|
+
const exports = [];
|
|
65
|
+
// Traverse the AST
|
|
66
|
+
this.traverse(tree.rootNode, symbols, imports, exports, tsLang);
|
|
67
|
+
return { symbols, imports, exports };
|
|
68
|
+
}
|
|
69
|
+
traverse(node, symbols, imports, exports, lang) {
|
|
70
|
+
// Determine the type of node
|
|
71
|
+
const type = node.type;
|
|
72
|
+
if (lang === 'javascript' || lang === 'typescript' || lang === 'tsx') {
|
|
73
|
+
if (type === 'function_declaration' || type === 'generator_function_declaration') {
|
|
74
|
+
const nameNode = node.childForFieldName('name');
|
|
75
|
+
if (nameNode) {
|
|
76
|
+
symbols.push({
|
|
77
|
+
type: 'function',
|
|
78
|
+
name: nameNode.text,
|
|
79
|
+
lineNumber: node.startPosition.row + 1,
|
|
80
|
+
columnNumber: node.startPosition.column
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
else if (type === 'class_declaration') {
|
|
85
|
+
const nameNode = node.childForFieldName('name');
|
|
86
|
+
if (nameNode) {
|
|
87
|
+
symbols.push({
|
|
88
|
+
type: 'class',
|
|
89
|
+
name: nameNode.text,
|
|
90
|
+
lineNumber: node.startPosition.row + 1,
|
|
91
|
+
columnNumber: node.startPosition.column
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else if (type === 'method_definition') {
|
|
96
|
+
const nameNode = node.childForFieldName('name');
|
|
97
|
+
if (nameNode) {
|
|
98
|
+
symbols.push({
|
|
99
|
+
type: 'method',
|
|
100
|
+
name: nameNode.text,
|
|
101
|
+
lineNumber: node.startPosition.row + 1,
|
|
102
|
+
columnNumber: node.startPosition.column
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
else if (type === 'variable_declarator') {
|
|
107
|
+
const nameNode = node.childForFieldName('name');
|
|
108
|
+
if (nameNode && (node.parent?.parent?.type === 'export_statement' || node.parent?.type === 'lexical_declaration')) {
|
|
109
|
+
symbols.push({
|
|
110
|
+
type: 'variable',
|
|
111
|
+
name: nameNode.text,
|
|
112
|
+
lineNumber: node.startPosition.row + 1,
|
|
113
|
+
columnNumber: node.startPosition.column
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
else if (type === 'import_statement') {
|
|
118
|
+
const sourceNode = node.childForFieldName('source');
|
|
119
|
+
if (sourceNode) {
|
|
120
|
+
const source = sourceNode.text.replace(/['"]/g, '');
|
|
121
|
+
imports.push({
|
|
122
|
+
source,
|
|
123
|
+
imported: ['*'], // We could extract exact names but * is fine for the graph
|
|
124
|
+
isDefault: false,
|
|
125
|
+
lineNumber: node.startPosition.row + 1
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
else if (type === 'export_statement') {
|
|
130
|
+
const sourceNode = node.childForFieldName('source');
|
|
131
|
+
if (sourceNode) {
|
|
132
|
+
exports.push({
|
|
133
|
+
source: sourceNode.text.replace(/['"]/g, ''),
|
|
134
|
+
imported: ['*'],
|
|
135
|
+
isDefault: false,
|
|
136
|
+
lineNumber: node.startPosition.row + 1
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
exports.push({
|
|
141
|
+
source: '',
|
|
142
|
+
imported: ['*'],
|
|
143
|
+
isDefault: false,
|
|
144
|
+
lineNumber: node.startPosition.row + 1
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
else if (lang === 'python') {
|
|
150
|
+
if (type === 'function_definition') {
|
|
151
|
+
const nameNode = node.childForFieldName('name');
|
|
152
|
+
if (nameNode) {
|
|
153
|
+
symbols.push({
|
|
154
|
+
type: 'function',
|
|
155
|
+
name: nameNode.text,
|
|
156
|
+
lineNumber: node.startPosition.row + 1,
|
|
157
|
+
columnNumber: node.startPosition.column
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
else if (type === 'class_definition') {
|
|
162
|
+
const nameNode = node.childForFieldName('name');
|
|
163
|
+
if (nameNode) {
|
|
164
|
+
symbols.push({
|
|
165
|
+
type: 'class',
|
|
166
|
+
name: nameNode.text,
|
|
167
|
+
lineNumber: node.startPosition.row + 1,
|
|
168
|
+
columnNumber: node.startPosition.column
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
else if (type === 'import_statement' || type === 'import_from_statement') {
|
|
173
|
+
let source = '';
|
|
174
|
+
const moduleNameNode = node.childForFieldName('module_name');
|
|
175
|
+
if (moduleNameNode) {
|
|
176
|
+
source = moduleNameNode.text;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
// simple import
|
|
180
|
+
const firstChild = node.children.find((c) => c.type === 'dotted_name');
|
|
181
|
+
if (firstChild)
|
|
182
|
+
source = firstChild.text;
|
|
183
|
+
}
|
|
184
|
+
if (source) {
|
|
185
|
+
imports.push({
|
|
186
|
+
source,
|
|
187
|
+
imported: ['*'],
|
|
188
|
+
isDefault: false,
|
|
189
|
+
lineNumber: node.startPosition.row + 1
|
|
190
|
+
});
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
else if (type === 'assignment') {
|
|
194
|
+
const leftNode = node.childForFieldName('left');
|
|
195
|
+
if (leftNode && leftNode.text === '__all__') {
|
|
196
|
+
const rightNode = node.childForFieldName('right');
|
|
197
|
+
if (rightNode && rightNode.type === 'list') {
|
|
198
|
+
const elements = rightNode.children.filter((c) => c.type === 'string');
|
|
199
|
+
for (const el of elements) {
|
|
200
|
+
exports.push({
|
|
201
|
+
source: '',
|
|
202
|
+
imported: [el.text.replace(/['"]/g, '')],
|
|
203
|
+
isDefault: false,
|
|
204
|
+
lineNumber: node.startPosition.row + 1
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
212
|
+
this.traverse(node.child(i), symbols, imports, exports, lang);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
exports.TreeSitterParser = TreeSitterParser;
|
package/package.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@imayuur/contexthub-repo-parser",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Sandboxed repository code analysis for ContextHub",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "git+https://github.com/iMayuuR/contexthub.git",
|
|
9
|
+
"directory": "packages/repo-parser"
|
|
10
|
+
},
|
|
11
|
+
"publishConfig": {
|
|
12
|
+
"access": "public"
|
|
13
|
+
},
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": ">=18"
|
|
16
|
+
},
|
|
17
|
+
"main": "dist/index.js",
|
|
18
|
+
"types": "dist/index.d.ts",
|
|
19
|
+
"files": [
|
|
20
|
+
"dist",
|
|
21
|
+
"!dist/__tests__"
|
|
22
|
+
],
|
|
23
|
+
"scripts": {
|
|
24
|
+
"build": "tsc",
|
|
25
|
+
"dev": "tsc --watch",
|
|
26
|
+
"test": "tsc && node --test dist/__tests__/*.test.js",
|
|
27
|
+
"prepublishOnly": "npm run build"
|
|
28
|
+
},
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"@repomix/tree-sitter-wasms": "^0.1.17",
|
|
31
|
+
"glob": "^10.0.0",
|
|
32
|
+
"tree-sitter-wasms": "^0.1.13",
|
|
33
|
+
"web-tree-sitter": "^0.26.9",
|
|
34
|
+
"@imayuur/contexthub-shared-types": "^1.0.0"
|
|
35
|
+
},
|
|
36
|
+
"devDependencies": {
|
|
37
|
+
"@types/node": "^18.0.0",
|
|
38
|
+
"typescript": "^5.0.0"
|
|
39
|
+
},
|
|
40
|
+
"author": "Mayur Dattatray Patil",
|
|
41
|
+
"bugs": {
|
|
42
|
+
"url": "https://github.com/iMayuuR/contexthub/issues"
|
|
43
|
+
},
|
|
44
|
+
"homepage": "https://github.com/iMayuuR/contexthub#readme",
|
|
45
|
+
"keywords": [
|
|
46
|
+
"contexthub",
|
|
47
|
+
"mcp",
|
|
48
|
+
"ai-memory",
|
|
49
|
+
"cursor",
|
|
50
|
+
"claude"
|
|
51
|
+
],
|
|
52
|
+
"exports": {
|
|
53
|
+
".": {
|
|
54
|
+
"types": "./dist/index.d.ts",
|
|
55
|
+
"default": "./dist/index.js"
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|