@getmikk/core 2.0.13 → 2.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/analysis/index.ts +9 -0
- package/src/analysis/taint-analysis.ts +419 -0
- package/src/analysis/type-flow.ts +247 -0
- package/src/cache/incremental-cache.ts +272 -0
- package/src/cache/index.ts +1 -0
- package/src/contract/contract-generator.ts +31 -3
- package/src/contract/lock-compiler.ts +31 -0
- package/src/contract/schema.ts +2 -0
- package/src/index.ts +2 -0
- package/src/parser/error-recovery.ts +646 -0
- package/src/parser/index.ts +29 -0
- package/src/parser/tree-sitter/parser.ts +35 -8
- package/src/security/index.ts +1 -0
- package/src/security/scanner.ts +342 -0
- package/src/utils/fs.ts +46 -15
- package/src/utils/language-registry.ts +13 -0
- package/src/utils/minimatch.ts +49 -6
|
@@ -0,0 +1,646 @@
|
|
|
1
|
+
import type { ParsedFile, ParsedFunction, ParsedClass, ParsedImport, ParsedParam, CallExpression } from './types.js'
|
|
2
|
+
import * as path from 'node:path'
|
|
3
|
+
import { hashContent } from '../hash/file-hasher.js'
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Error Recovery Engine — graceful degradation when parsing fails
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
export interface RecoveryResult {
|
|
10
|
+
success: boolean
|
|
11
|
+
strategy: string
|
|
12
|
+
parsed: ParsedFile
|
|
13
|
+
confidence: number
|
|
14
|
+
errors: string[]
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class ErrorRecoveryEngine {
|
|
18
|
+
async recover(filePath: string, content: string, language: string): Promise<RecoveryResult> {
|
|
19
|
+
const ext = path.extname(filePath).toLowerCase()
|
|
20
|
+
|
|
21
|
+
const regexResult = await this.recoverWithRegex(filePath, content, ext, language)
|
|
22
|
+
if (regexResult.confidence > 0.3) {
|
|
23
|
+
return regexResult
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
return this.recoverMinimal(filePath, content, ext, language)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
private async recoverWithRegex(
|
|
30
|
+
filePath: string,
|
|
31
|
+
content: string,
|
|
32
|
+
ext: string,
|
|
33
|
+
language: string
|
|
34
|
+
): Promise<RecoveryResult> {
|
|
35
|
+
const errors: string[] = []
|
|
36
|
+
const functions: ParsedFunction[] = []
|
|
37
|
+
const classes: ParsedClass[] = []
|
|
38
|
+
const imports: ParsedImport[] = []
|
|
39
|
+
|
|
40
|
+
try {
|
|
41
|
+
const lines = content.split('\n')
|
|
42
|
+
|
|
43
|
+
if (language === 'python' || ext === '.py') {
|
|
44
|
+
this.recoverPython(filePath, content, lines, functions, classes, imports)
|
|
45
|
+
} else if (language === 'typescript' || language === 'javascript' || ext === '.ts' || ext === '.tsx' || ext === '.js' || ext === '.jsx') {
|
|
46
|
+
this.recoverJavaScript(filePath, content, lines, functions, classes, imports)
|
|
47
|
+
} else if (language === 'go' || ext === '.go') {
|
|
48
|
+
this.recoverGo(filePath, content, lines, functions, classes, imports)
|
|
49
|
+
} else if (language === 'rust' || ext === '.rs') {
|
|
50
|
+
this.recoverRust(filePath, content, lines, functions, classes, imports)
|
|
51
|
+
} else if (language === 'java' || ext === '.java') {
|
|
52
|
+
this.recoverJava(filePath, content, lines, functions, classes, imports)
|
|
53
|
+
} else {
|
|
54
|
+
this.recoverGeneric(filePath, content, lines, functions, classes, imports)
|
|
55
|
+
}
|
|
56
|
+
} catch (err) {
|
|
57
|
+
errors.push(`Regex recovery failed: ${err instanceof Error ? err.message : String(err)}`)
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const confidence = this.calculateConfidence(functions, classes, imports, content)
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
success: functions.length > 0 || classes.length > 0 || imports.length > 0,
|
|
64
|
+
strategy: 'regex-recovery',
|
|
65
|
+
parsed: {
|
|
66
|
+
path: filePath,
|
|
67
|
+
language: language as ParsedFile['language'],
|
|
68
|
+
hash: hashContent(content),
|
|
69
|
+
parsedAt: Date.now(),
|
|
70
|
+
functions,
|
|
71
|
+
classes,
|
|
72
|
+
imports,
|
|
73
|
+
generics: [],
|
|
74
|
+
variables: [],
|
|
75
|
+
exports: [],
|
|
76
|
+
routes: [],
|
|
77
|
+
calls: [],
|
|
78
|
+
},
|
|
79
|
+
confidence,
|
|
80
|
+
errors,
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
private async recoverMinimal(
|
|
85
|
+
filePath: string,
|
|
86
|
+
content: string,
|
|
87
|
+
ext: string,
|
|
88
|
+
language: string
|
|
89
|
+
): Promise<RecoveryResult> {
|
|
90
|
+
return {
|
|
91
|
+
success: false,
|
|
92
|
+
strategy: 'minimal-fallback',
|
|
93
|
+
parsed: {
|
|
94
|
+
path: filePath,
|
|
95
|
+
language: language as ParsedFile['language'],
|
|
96
|
+
hash: hashContent(content),
|
|
97
|
+
parsedAt: Date.now(),
|
|
98
|
+
functions: [],
|
|
99
|
+
classes: [],
|
|
100
|
+
imports: [],
|
|
101
|
+
generics: [],
|
|
102
|
+
variables: [],
|
|
103
|
+
exports: [],
|
|
104
|
+
routes: [],
|
|
105
|
+
calls: [],
|
|
106
|
+
},
|
|
107
|
+
confidence: 0,
|
|
108
|
+
errors: ['All recovery strategies failed'],
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
// Language-specific regex recovery
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
private recoverPython(
|
|
117
|
+
filePath: string,
|
|
118
|
+
content: string,
|
|
119
|
+
lines: string[],
|
|
120
|
+
functions: ParsedFunction[],
|
|
121
|
+
classes: ParsedClass[],
|
|
122
|
+
imports: ParsedImport[]
|
|
123
|
+
): void {
|
|
124
|
+
const funcRegex = /^\s*(async\s+)?def\s+(\w+)\s*\(([^)]*)\)/
|
|
125
|
+
for (let i = 0; i < lines.length; i++) {
|
|
126
|
+
const match = lines[i].match(funcRegex)
|
|
127
|
+
if (match) {
|
|
128
|
+
const [, isAsync, name, params] = match
|
|
129
|
+
const paramsList: ParsedParam[] = params.split(',').map(p => p.trim()).filter(Boolean).map(p => ({
|
|
130
|
+
name: p.split(':')[0].split('=')[0].trim(),
|
|
131
|
+
type: p.includes(':') ? p.split(':')[1].split('=')[0].trim() : '',
|
|
132
|
+
optional: p.includes('=') || p.startsWith('self') || p.startsWith('cls'),
|
|
133
|
+
}))
|
|
134
|
+
functions.push({
|
|
135
|
+
id: `fn:${filePath}:${name.toLowerCase()}`,
|
|
136
|
+
name,
|
|
137
|
+
file: filePath,
|
|
138
|
+
startLine: i + 1,
|
|
139
|
+
endLine: this.findPythonFunctionEnd(lines, i),
|
|
140
|
+
isAsync: !!isAsync,
|
|
141
|
+
isExported: !name.startsWith('_'),
|
|
142
|
+
params: paramsList,
|
|
143
|
+
returnType: '',
|
|
144
|
+
purpose: this.extractPythonDocstring(lines, i),
|
|
145
|
+
calls: [],
|
|
146
|
+
hash: '',
|
|
147
|
+
edgeCasesHandled: [],
|
|
148
|
+
errorHandling: [],
|
|
149
|
+
detailedLines: [],
|
|
150
|
+
})
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const classRegex = /^\s*class\s+(\w+)/
|
|
155
|
+
for (let i = 0; i < lines.length; i++) {
|
|
156
|
+
const match = lines[i].match(classRegex)
|
|
157
|
+
if (match) {
|
|
158
|
+
const [, name] = match
|
|
159
|
+
classes.push({
|
|
160
|
+
id: `class:${filePath}:${name.toLowerCase()}`,
|
|
161
|
+
name,
|
|
162
|
+
file: filePath,
|
|
163
|
+
startLine: i + 1,
|
|
164
|
+
endLine: this.findPythonClassEnd(lines, i),
|
|
165
|
+
isExported: !name.startsWith('_'),
|
|
166
|
+
methods: [],
|
|
167
|
+
purpose: '',
|
|
168
|
+
hash: '',
|
|
169
|
+
properties: [],
|
|
170
|
+
})
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const importRegex = /^\s*(?:from\s+(\S+)\s+)?import\s+(.+)/
|
|
175
|
+
for (let i = 0; i < lines.length; i++) {
|
|
176
|
+
const match = lines[i].match(importRegex)
|
|
177
|
+
if (match) {
|
|
178
|
+
const [, fromModule, importsStr] = match
|
|
179
|
+
const names = importsStr.split(',').map(s => s.trim().split(' as ')[0].trim()).filter(Boolean)
|
|
180
|
+
if (names.length > 0) {
|
|
181
|
+
imports.push({
|
|
182
|
+
source: fromModule || names[0],
|
|
183
|
+
names,
|
|
184
|
+
resolvedPath: '',
|
|
185
|
+
isDefault: !fromModule,
|
|
186
|
+
isDynamic: false,
|
|
187
|
+
})
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
private recoverJavaScript(
|
|
194
|
+
filePath: string,
|
|
195
|
+
content: string,
|
|
196
|
+
lines: string[],
|
|
197
|
+
functions: ParsedFunction[],
|
|
198
|
+
classes: ParsedClass[],
|
|
199
|
+
imports: ParsedImport[]
|
|
200
|
+
): void {
|
|
201
|
+
const funcRegex = /^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(([^)]*)\)/
|
|
202
|
+
const arrowRegex = /^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(([^)]*)\)/
|
|
203
|
+
|
|
204
|
+
for (let i = 0; i < lines.length; i++) {
|
|
205
|
+
let match = lines[i].match(funcRegex)
|
|
206
|
+
if (!match) {
|
|
207
|
+
match = lines[i].match(arrowRegex)
|
|
208
|
+
}
|
|
209
|
+
if (match) {
|
|
210
|
+
const [, name, params] = match
|
|
211
|
+
const isAsync = lines[i].includes('async')
|
|
212
|
+
const paramsList: ParsedParam[] = params.split(',').map(p => p.trim()).filter(Boolean).map(p => ({
|
|
213
|
+
name: p.split(':')[0].split('=')[0].trim(),
|
|
214
|
+
type: p.includes(':') ? p.split(':')[1].split('=')[0].trim() : '',
|
|
215
|
+
optional: p.includes('?') || p.includes('='),
|
|
216
|
+
}))
|
|
217
|
+
functions.push({
|
|
218
|
+
id: `fn:${filePath}:${name.toLowerCase()}`,
|
|
219
|
+
name,
|
|
220
|
+
file: filePath,
|
|
221
|
+
startLine: i + 1,
|
|
222
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
223
|
+
isAsync,
|
|
224
|
+
isExported: lines[i].includes('export'),
|
|
225
|
+
params: paramsList,
|
|
226
|
+
returnType: '',
|
|
227
|
+
purpose: '',
|
|
228
|
+
calls: [],
|
|
229
|
+
hash: '',
|
|
230
|
+
edgeCasesHandled: [],
|
|
231
|
+
errorHandling: [],
|
|
232
|
+
detailedLines: [],
|
|
233
|
+
})
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const classRegex = /^\s*(?:export\s+)?(?:default\s+)?class\s+(\w+)/
|
|
238
|
+
for (let i = 0; i < lines.length; i++) {
|
|
239
|
+
const match = lines[i].match(classRegex)
|
|
240
|
+
if (match) {
|
|
241
|
+
const [, name] = match
|
|
242
|
+
classes.push({
|
|
243
|
+
id: `class:${filePath}:${name.toLowerCase()}`,
|
|
244
|
+
name,
|
|
245
|
+
file: filePath,
|
|
246
|
+
startLine: i + 1,
|
|
247
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
248
|
+
isExported: lines[i].includes('export'),
|
|
249
|
+
methods: [],
|
|
250
|
+
purpose: '',
|
|
251
|
+
hash: '',
|
|
252
|
+
properties: [],
|
|
253
|
+
})
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const importRegex = /^\s*import\s+(?:\{([^}]+)\}|(\w+))\s+from\s+['"]([^'"]+)['"]/
|
|
258
|
+
for (let i = 0; i < lines.length; i++) {
|
|
259
|
+
const match = lines[i].match(importRegex)
|
|
260
|
+
if (match) {
|
|
261
|
+
const [, named, defaultImport, source] = match
|
|
262
|
+
const names = named ? named.split(',').map(s => s.trim()).filter(Boolean) : defaultImport ? [defaultImport] : []
|
|
263
|
+
if (names.length > 0 || source) {
|
|
264
|
+
imports.push({
|
|
265
|
+
source: source || '',
|
|
266
|
+
names,
|
|
267
|
+
resolvedPath: '',
|
|
268
|
+
isDefault: !!defaultImport,
|
|
269
|
+
isDynamic: false,
|
|
270
|
+
})
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
private recoverGo(
|
|
277
|
+
filePath: string,
|
|
278
|
+
content: string,
|
|
279
|
+
lines: string[],
|
|
280
|
+
functions: ParsedFunction[],
|
|
281
|
+
classes: ParsedClass[],
|
|
282
|
+
imports: ParsedImport[]
|
|
283
|
+
): void {
|
|
284
|
+
const funcRegex = /^\s*func\s+(?:\(\s*\w+\s+\*?\w+\s*\)\s+)?(\w+)\s*\(/
|
|
285
|
+
for (let i = 0; i < lines.length; i++) {
|
|
286
|
+
const match = lines[i].match(funcRegex)
|
|
287
|
+
if (match) {
|
|
288
|
+
const [, name] = match
|
|
289
|
+
const isExported = name.length > 0 && name[0] === name[0].toUpperCase()
|
|
290
|
+
functions.push({
|
|
291
|
+
id: `fn:${filePath}:${name.toLowerCase()}`,
|
|
292
|
+
name,
|
|
293
|
+
file: filePath,
|
|
294
|
+
startLine: i + 1,
|
|
295
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
296
|
+
isAsync: false,
|
|
297
|
+
isExported,
|
|
298
|
+
params: [],
|
|
299
|
+
returnType: '',
|
|
300
|
+
purpose: '',
|
|
301
|
+
calls: [],
|
|
302
|
+
hash: '',
|
|
303
|
+
edgeCasesHandled: [],
|
|
304
|
+
errorHandling: [],
|
|
305
|
+
detailedLines: [],
|
|
306
|
+
})
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const structRegex = /^\s*type\s+(\w+)\s+struct/
|
|
311
|
+
for (let i = 0; i < lines.length; i++) {
|
|
312
|
+
const match = lines[i].match(structRegex)
|
|
313
|
+
if (match) {
|
|
314
|
+
const [, name] = match
|
|
315
|
+
const isExported = name.length > 0 && name[0] === name[0].toUpperCase()
|
|
316
|
+
classes.push({
|
|
317
|
+
id: `class:${filePath}:${name.toLowerCase()}`,
|
|
318
|
+
name,
|
|
319
|
+
file: filePath,
|
|
320
|
+
startLine: i + 1,
|
|
321
|
+
endLine: this.findGoStructEnd(lines, i),
|
|
322
|
+
isExported,
|
|
323
|
+
methods: [],
|
|
324
|
+
purpose: '',
|
|
325
|
+
hash: '',
|
|
326
|
+
properties: [],
|
|
327
|
+
})
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
let inImportBlock = false
|
|
332
|
+
for (let i = 0; i < lines.length; i++) {
|
|
333
|
+
const line = lines[i].trim()
|
|
334
|
+
if (line.startsWith('import (')) {
|
|
335
|
+
inImportBlock = true
|
|
336
|
+
continue
|
|
337
|
+
}
|
|
338
|
+
if (inImportBlock) {
|
|
339
|
+
if (line === ')') {
|
|
340
|
+
inImportBlock = false
|
|
341
|
+
continue
|
|
342
|
+
}
|
|
343
|
+
const pkg = line.replace(/"/g, '').trim()
|
|
344
|
+
if (pkg) {
|
|
345
|
+
imports.push({
|
|
346
|
+
source: pkg,
|
|
347
|
+
names: [],
|
|
348
|
+
resolvedPath: '',
|
|
349
|
+
isDefault: false,
|
|
350
|
+
isDynamic: false,
|
|
351
|
+
})
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
const singleImport = line.match(/^import\s+"([^"]+)"/)
|
|
355
|
+
if (singleImport) {
|
|
356
|
+
imports.push({
|
|
357
|
+
source: singleImport[1],
|
|
358
|
+
names: [],
|
|
359
|
+
resolvedPath: '',
|
|
360
|
+
isDefault: false,
|
|
361
|
+
isDynamic: false,
|
|
362
|
+
})
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
private recoverRust(
|
|
368
|
+
filePath: string,
|
|
369
|
+
content: string,
|
|
370
|
+
lines: string[],
|
|
371
|
+
functions: ParsedFunction[],
|
|
372
|
+
classes: ParsedClass[],
|
|
373
|
+
imports: ParsedImport[]
|
|
374
|
+
): void {
|
|
375
|
+
const funcRegex = /^\s*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)\s*[<(]/
|
|
376
|
+
for (let i = 0; i < lines.length; i++) {
|
|
377
|
+
const match = lines[i].match(funcRegex)
|
|
378
|
+
if (match) {
|
|
379
|
+
const [, name] = match
|
|
380
|
+
const isExported = lines[i].includes('pub')
|
|
381
|
+
functions.push({
|
|
382
|
+
id: `fn:${filePath}:${name.toLowerCase()}`,
|
|
383
|
+
name,
|
|
384
|
+
file: filePath,
|
|
385
|
+
startLine: i + 1,
|
|
386
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
387
|
+
isAsync: lines[i].includes('async'),
|
|
388
|
+
isExported,
|
|
389
|
+
params: [],
|
|
390
|
+
returnType: '',
|
|
391
|
+
purpose: '',
|
|
392
|
+
calls: [],
|
|
393
|
+
hash: '',
|
|
394
|
+
edgeCasesHandled: [],
|
|
395
|
+
errorHandling: [],
|
|
396
|
+
detailedLines: [],
|
|
397
|
+
})
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const structRegex = /^\s*(?:pub\s+)?struct\s+(\w+)/
|
|
402
|
+
for (let i = 0; i < lines.length; i++) {
|
|
403
|
+
const match = lines[i].match(structRegex)
|
|
404
|
+
if (match) {
|
|
405
|
+
const [, name] = match
|
|
406
|
+
classes.push({
|
|
407
|
+
id: `class:${filePath}:${name.toLowerCase()}`,
|
|
408
|
+
name,
|
|
409
|
+
file: filePath,
|
|
410
|
+
startLine: i + 1,
|
|
411
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
412
|
+
isExported: lines[i].includes('pub'),
|
|
413
|
+
methods: [],
|
|
414
|
+
purpose: '',
|
|
415
|
+
hash: '',
|
|
416
|
+
properties: [],
|
|
417
|
+
})
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
const useRegex = /^\s*(?:pub\s+)?use\s+(.+);/
|
|
422
|
+
for (let i = 0; i < lines.length; i++) {
|
|
423
|
+
const match = lines[i].match(useRegex)
|
|
424
|
+
if (match) {
|
|
425
|
+
imports.push({
|
|
426
|
+
source: match[1].trim(),
|
|
427
|
+
names: [],
|
|
428
|
+
resolvedPath: '',
|
|
429
|
+
isDefault: false,
|
|
430
|
+
isDynamic: false,
|
|
431
|
+
})
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
private recoverJava(
|
|
437
|
+
filePath: string,
|
|
438
|
+
content: string,
|
|
439
|
+
lines: string[],
|
|
440
|
+
functions: ParsedFunction[],
|
|
441
|
+
classes: ParsedClass[],
|
|
442
|
+
imports: ParsedImport[]
|
|
443
|
+
): void {
|
|
444
|
+
const methodRegex = /^\s*(?:public|private|protected)?\s*(?:static\s+)?(?:final\s+)?(?:\w+(?:<[^>]+>)?(?:\[\])?)\s+(\w+)\s*\(/
|
|
445
|
+
for (let i = 0; i < lines.length; i++) {
|
|
446
|
+
const match = lines[i].match(methodRegex)
|
|
447
|
+
if (match) {
|
|
448
|
+
const [, name] = match
|
|
449
|
+
if (name !== 'if' && name !== 'for' && name !== 'while' && name !== 'switch' && name !== 'class' && name !== 'interface') {
|
|
450
|
+
const isExported = lines[i].includes('public')
|
|
451
|
+
functions.push({
|
|
452
|
+
id: `fn:${filePath}:${name.toLowerCase()}`,
|
|
453
|
+
name,
|
|
454
|
+
file: filePath,
|
|
455
|
+
startLine: i + 1,
|
|
456
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
457
|
+
isAsync: false,
|
|
458
|
+
isExported,
|
|
459
|
+
params: [],
|
|
460
|
+
returnType: '',
|
|
461
|
+
purpose: '',
|
|
462
|
+
calls: [],
|
|
463
|
+
hash: '',
|
|
464
|
+
edgeCasesHandled: [],
|
|
465
|
+
errorHandling: [],
|
|
466
|
+
detailedLines: [],
|
|
467
|
+
})
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
const classRegex = /^\s*(?:public\s+)?(?:abstract\s+)?class\s+(\w+)/
|
|
473
|
+
for (let i = 0; i < lines.length; i++) {
|
|
474
|
+
const match = lines[i].match(classRegex)
|
|
475
|
+
if (match) {
|
|
476
|
+
const [, name] = match
|
|
477
|
+
classes.push({
|
|
478
|
+
id: `class:${filePath}:${name.toLowerCase()}`,
|
|
479
|
+
name,
|
|
480
|
+
file: filePath,
|
|
481
|
+
startLine: i + 1,
|
|
482
|
+
endLine: this.findJSBraceEnd(lines, i),
|
|
483
|
+
isExported: lines[i].includes('public'),
|
|
484
|
+
methods: [],
|
|
485
|
+
purpose: '',
|
|
486
|
+
hash: '',
|
|
487
|
+
properties: [],
|
|
488
|
+
})
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
const importRegex = /^\s*import\s+([\w.]+)\s*;/
|
|
493
|
+
for (let i = 0; i < lines.length; i++) {
|
|
494
|
+
const match = lines[i].match(importRegex)
|
|
495
|
+
if (match) {
|
|
496
|
+
imports.push({
|
|
497
|
+
source: match[1],
|
|
498
|
+
names: [],
|
|
499
|
+
resolvedPath: '',
|
|
500
|
+
isDefault: false,
|
|
501
|
+
isDynamic: false,
|
|
502
|
+
})
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
private recoverGeneric(
|
|
508
|
+
filePath: string,
|
|
509
|
+
content: string,
|
|
510
|
+
lines: string[],
|
|
511
|
+
functions: ParsedFunction[],
|
|
512
|
+
classes: ParsedClass[],
|
|
513
|
+
imports: ParsedImport[]
|
|
514
|
+
): void {
|
|
515
|
+
const funcPatterns = [
|
|
516
|
+
/function\s+(\w+)\s*\(/,
|
|
517
|
+
/def\s+(\w+)\s*\(/,
|
|
518
|
+
/fn\s+(\w+)\s*[<(]/,
|
|
519
|
+
/func\s+(\w+)\s*\(/,
|
|
520
|
+
]
|
|
521
|
+
|
|
522
|
+
const classPatterns = [
|
|
523
|
+
/class\s+(\w+)/,
|
|
524
|
+
/struct\s+(\w+)/,
|
|
525
|
+
/type\s+(\w+)\s+struct/,
|
|
526
|
+
]
|
|
527
|
+
|
|
528
|
+
for (let i = 0; i < lines.length; i++) {
|
|
529
|
+
for (const pattern of funcPatterns) {
|
|
530
|
+
const match = lines[i].match(pattern)
|
|
531
|
+
if (match) {
|
|
532
|
+
functions.push({
|
|
533
|
+
id: `fn:${filePath}:${match[1].toLowerCase()}`,
|
|
534
|
+
name: match[1],
|
|
535
|
+
file: filePath,
|
|
536
|
+
startLine: i + 1,
|
|
537
|
+
endLine: i + 10,
|
|
538
|
+
isAsync: false,
|
|
539
|
+
isExported: false,
|
|
540
|
+
params: [],
|
|
541
|
+
returnType: '',
|
|
542
|
+
purpose: '',
|
|
543
|
+
calls: [],
|
|
544
|
+
hash: '',
|
|
545
|
+
edgeCasesHandled: [],
|
|
546
|
+
errorHandling: [],
|
|
547
|
+
detailedLines: [],
|
|
548
|
+
})
|
|
549
|
+
break
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
for (const pattern of classPatterns) {
|
|
554
|
+
const match = lines[i].match(pattern)
|
|
555
|
+
if (match) {
|
|
556
|
+
classes.push({
|
|
557
|
+
id: `class:${filePath}:${match[1].toLowerCase()}`,
|
|
558
|
+
name: match[1],
|
|
559
|
+
file: filePath,
|
|
560
|
+
startLine: i + 1,
|
|
561
|
+
endLine: i + 20,
|
|
562
|
+
isExported: false,
|
|
563
|
+
methods: [],
|
|
564
|
+
purpose: '',
|
|
565
|
+
hash: '',
|
|
566
|
+
properties: [],
|
|
567
|
+
})
|
|
568
|
+
break
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// ---------------------------------------------------------------------------
|
|
575
|
+
// Helper methods
|
|
576
|
+
// ---------------------------------------------------------------------------
|
|
577
|
+
|
|
578
|
+
private findPythonFunctionEnd(lines: string[], startLine: number): number {
|
|
579
|
+
const baseIndent = this.getIndentLevel(lines[startLine])
|
|
580
|
+
for (let i = startLine + 1; i < lines.length; i++) {
|
|
581
|
+
if (lines[i].trim() === '') continue
|
|
582
|
+
if (this.getIndentLevel(lines[i]) <= baseIndent && lines[i].trim() !== '') {
|
|
583
|
+
return i
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
return lines.length
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
private findPythonClassEnd(lines: string[], startLine: number): number {
|
|
590
|
+
return this.findPythonFunctionEnd(lines, startLine)
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
private findJSBraceEnd(lines: string[], startLine: number): number {
|
|
594
|
+
let braces = 0
|
|
595
|
+
let started = false
|
|
596
|
+
for (let i = startLine; i < lines.length; i++) {
|
|
597
|
+
for (const char of lines[i]) {
|
|
598
|
+
if (char === '{') { braces++; started = true }
|
|
599
|
+
if (char === '}') braces--
|
|
600
|
+
}
|
|
601
|
+
if (started && braces === 0) return i + 1
|
|
602
|
+
}
|
|
603
|
+
return lines.length
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
private findGoStructEnd(lines: string[], startLine: number): number {
|
|
607
|
+
for (let i = startLine + 1; i < lines.length; i++) {
|
|
608
|
+
if (lines[i].trim() === '}') return i + 1
|
|
609
|
+
}
|
|
610
|
+
return lines.length
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
private getIndentLevel(line: string): number {
|
|
614
|
+
const match = line.match(/^(\s*)/)
|
|
615
|
+
return match ? match[1].length : 0
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
private extractPythonDocstring(lines: string[], funcLine: number): string {
|
|
619
|
+
for (let i = funcLine + 1; i < Math.min(funcLine + 5, lines.length); i++) {
|
|
620
|
+
const trimmed = lines[i].trim()
|
|
621
|
+
if (trimmed.startsWith('"""') || trimmed.startsWith("'''")) {
|
|
622
|
+
return trimmed.replace(/['"]{3}/g, '').trim()
|
|
623
|
+
}
|
|
624
|
+
if (trimmed.startsWith('#')) {
|
|
625
|
+
return trimmed.substring(1).trim()
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
return ''
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
private calculateConfidence(
|
|
632
|
+
functions: ParsedFunction[],
|
|
633
|
+
classes: ParsedClass[],
|
|
634
|
+
imports: ParsedImport[],
|
|
635
|
+
content: string
|
|
636
|
+
): number {
|
|
637
|
+
const lineCount = content.split('\n').length
|
|
638
|
+
const extracted = functions.length + classes.length + imports.length
|
|
639
|
+
const ratio = Math.min(1, extracted / Math.max(1, lineCount / 10))
|
|
640
|
+
let confidence = ratio * 0.7
|
|
641
|
+
if (functions.length > 0 && classes.length > 0) confidence += 0.1
|
|
642
|
+
if (imports.length > 0) confidence += 0.1
|
|
643
|
+
if (extracted === 0) confidence = 0
|
|
644
|
+
return Math.min(1, Math.max(0, confidence))
|
|
645
|
+
}
|
|
646
|
+
}
|
package/src/parser/index.ts
CHANGED
|
@@ -5,6 +5,7 @@ import { GoParser } from './go/go-parser.js'
|
|
|
5
5
|
import { UnsupportedLanguageError } from '../utils/errors.js'
|
|
6
6
|
import type { ParsedFile } from './types.js'
|
|
7
7
|
import { hashContent } from '../hash/file-hasher.js'
|
|
8
|
+
import { IncrementalCache } from '../cache/incremental-cache.js'
|
|
8
9
|
import {
|
|
9
10
|
parserKindForExtension,
|
|
10
11
|
languageForExtension,
|
|
@@ -256,6 +257,9 @@ export async function parseFilesWithDiagnostics(
|
|
|
256
257
|
}
|
|
257
258
|
}
|
|
258
259
|
|
|
260
|
+
// Initialize incremental cache
|
|
261
|
+
const cache = new IncrementalCache(projectRoot)
|
|
262
|
+
|
|
259
263
|
// Normalized project root for absolute path construction.
|
|
260
264
|
const normalizedRoot = nodePath.resolve(projectRoot).replace(/\\/g, '/')
|
|
261
265
|
|
|
@@ -311,12 +315,33 @@ export async function parseFilesWithDiagnostics(
|
|
|
311
315
|
}
|
|
312
316
|
|
|
313
317
|
try {
|
|
318
|
+
// Compute content hash for cache lookup
|
|
319
|
+
const contentHash = hashContent(content)
|
|
320
|
+
|
|
321
|
+
// Check cache first
|
|
322
|
+
const cached = await cache.get(absoluteFp, contentHash)
|
|
323
|
+
if (cached) {
|
|
324
|
+
// Cache hit — reuse parsed result
|
|
325
|
+
if (parserKind === 'oxc') {
|
|
326
|
+
oxcFiles.push(cached)
|
|
327
|
+
} else if (parserKind === 'go') {
|
|
328
|
+
goFiles.push(cached)
|
|
329
|
+
} else {
|
|
330
|
+
treeFiles.push(cached)
|
|
331
|
+
}
|
|
332
|
+
parsedFilesCount += 1
|
|
333
|
+
continue
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Cache miss — parse and store
|
|
314
337
|
if (parserKind === 'oxc') {
|
|
315
338
|
const parsed = await oxcParser.parse(absoluteFp, content)
|
|
339
|
+
await cache.set(absoluteFp, contentHash, parsed)
|
|
316
340
|
oxcFiles.push(parsed)
|
|
317
341
|
parsedFilesCount += 1
|
|
318
342
|
} else if (parserKind === 'go') {
|
|
319
343
|
const parsed = await goParser.parse(absoluteFp, content)
|
|
344
|
+
await cache.set(absoluteFp, contentHash, parsed)
|
|
320
345
|
goFiles.push(parsed)
|
|
321
346
|
parsedFilesCount += 1
|
|
322
347
|
} else {
|
|
@@ -335,6 +360,7 @@ export async function parseFilesWithDiagnostics(
|
|
|
335
360
|
}
|
|
336
361
|
const ts = await getTreeSitter()
|
|
337
362
|
const parsed = await ts.parse(absoluteFp, content)
|
|
363
|
+
await cache.set(absoluteFp, contentHash, parsed)
|
|
338
364
|
treeFiles.push(parsed)
|
|
339
365
|
parsedFilesCount += 1
|
|
340
366
|
}
|
|
@@ -414,6 +440,9 @@ export async function parseFilesWithDiagnostics(
|
|
|
414
440
|
...fallbackFiles,
|
|
415
441
|
]
|
|
416
442
|
|
|
443
|
+
// Persist cache metadata
|
|
444
|
+
cache.flush()
|
|
445
|
+
|
|
417
446
|
return {
|
|
418
447
|
files: resolved,
|
|
419
448
|
diagnostics,
|