@getmikk/core 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +2 -1
- package/src/analysis/index.ts +9 -0
- package/src/analysis/taint-analysis.ts +419 -0
- package/src/analysis/type-flow.ts +247 -0
- package/src/cache/incremental-cache.ts +278 -0
- package/src/cache/index.ts +1 -0
- package/src/contract/contract-generator.ts +31 -3
- package/src/contract/contract-reader.ts +1 -0
- package/src/contract/lock-compiler.ts +125 -12
- package/src/contract/schema.ts +4 -0
- package/src/error-handler.ts +2 -1
- package/src/graph/cluster-detector.ts +2 -4
- package/src/graph/dead-code-detector.ts +303 -117
- package/src/graph/graph-builder.ts +21 -161
- package/src/graph/impact-analyzer.ts +1 -0
- package/src/graph/index.ts +2 -0
- package/src/graph/rich-function-index.ts +1080 -0
- package/src/graph/symbol-table.ts +252 -0
- package/src/hash/hash-store.ts +1 -0
- package/src/index.ts +4 -0
- package/src/parser/base-extractor.ts +19 -0
- package/src/parser/boundary-checker.ts +31 -12
- package/src/parser/error-recovery.ts +647 -0
- package/src/parser/function-body-extractor.ts +248 -0
- package/src/parser/go/go-extractor.ts +249 -676
- package/src/parser/index.ts +138 -295
- package/src/parser/language-registry.ts +57 -0
- package/src/parser/oxc-parser.ts +166 -28
- package/src/parser/oxc-resolver.ts +179 -11
- package/src/parser/parser-constants.ts +1 -0
- package/src/parser/rust/rust-extractor.ts +109 -0
- package/src/parser/tree-sitter/parser.ts +400 -66
- package/src/parser/tree-sitter/queries.ts +106 -10
- package/src/parser/types.ts +20 -1
- package/src/search/bm25.ts +21 -8
- package/src/search/direct-search.ts +472 -0
- package/src/search/embedding-provider.ts +249 -0
- package/src/search/index.ts +12 -0
- package/src/search/semantic-search.ts +435 -0
- package/src/security/index.ts +1 -0
- package/src/security/scanner.ts +342 -0
- package/src/utils/artifact-transaction.ts +1 -0
- package/src/utils/atomic-write.ts +1 -0
- package/src/utils/errors.ts +89 -4
- package/src/utils/fs.ts +150 -65
- package/src/utils/json.ts +1 -0
- package/src/utils/language-registry.ts +96 -5
- package/src/utils/minimatch.ts +49 -6
- package/src/utils/path.ts +26 -0
- package/tests/dead-code.test.ts +3 -2
- package/tests/direct-search.test.ts +435 -0
- package/tests/error-recovery.test.ts +143 -0
- package/tests/fixtures/simple-api/src/index.ts +1 -1
- package/tests/go-parser.test.ts +19 -335
- package/tests/js-parser.test.ts +18 -1089
- package/tests/language-registry-all.test.ts +276 -0
- package/tests/language-registry.test.ts +6 -4
- package/tests/parse-diagnostics.test.ts +9 -96
- package/tests/parser.test.ts +42 -771
- package/tests/polyglot-parser.test.ts +117 -0
- package/tests/rich-function-index.test.ts +703 -0
- package/tests/tree-sitter-parser.test.ts +108 -80
- package/tests/ts-parser.test.ts +8 -8
- package/tests/verification.test.ts +175 -0
- package/src/parser/base-parser.ts +0 -16
- package/src/parser/go/go-parser.ts +0 -43
- package/src/parser/javascript/js-extractor.ts +0 -278
- package/src/parser/javascript/js-parser.ts +0 -101
- package/src/parser/typescript/ts-extractor.ts +0 -447
- package/src/parser/typescript/ts-parser.ts +0 -36
package/src/parser/index.ts
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
|
+
|
|
1
2
|
import * as nodePath from 'node:path'
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
3
|
+
import { LanguageRegistry } from './language-registry.js'
|
|
4
|
+
export { LanguageRegistry } from './language-registry.js'
|
|
5
|
+
import './oxc-parser.js'
|
|
6
|
+
import './tree-sitter/parser.js'
|
|
7
|
+
import './go/go-extractor.js'
|
|
8
|
+
import { BaseExtractor } from './base-extractor.js'
|
|
7
9
|
import { hashContent } from '../hash/file-hasher.js'
|
|
8
|
-
import {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
isTreeSitterExtension,
|
|
13
|
-
type ParserKind,
|
|
14
|
-
} from '../utils/language-registry.js'
|
|
10
|
+
import { IncrementalCache } from '../cache/incremental-cache.js'
|
|
11
|
+
import { languageForExtension, toParsedFileLanguage } from '../utils/language-registry.js'
|
|
12
|
+
import { ErrorRecoveryEngine } from './error-recovery.js'
|
|
13
|
+
import type { ParsedFile } from './types.js'
|
|
15
14
|
|
|
16
15
|
export type {
|
|
17
16
|
ParsedFile,
|
|
@@ -25,23 +24,15 @@ export type {
|
|
|
25
24
|
ParsedGeneric,
|
|
26
25
|
ParsedRoute
|
|
27
26
|
} from './types.js'
|
|
28
|
-
|
|
29
|
-
export {
|
|
30
|
-
export { TypeScriptExtractor } from './typescript/ts-extractor.js'
|
|
31
|
-
export { TypeScriptResolver } from './typescript/ts-resolver.js'
|
|
32
|
-
export { GoParser } from './go/go-parser.js'
|
|
33
|
-
export { GoExtractor } from './go/go-extractor.js'
|
|
34
|
-
export { GoResolver } from './go/go-resolver.js'
|
|
35
|
-
export { JavaScriptParser } from './javascript/js-parser.js'
|
|
36
|
-
export { JavaScriptExtractor } from './javascript/js-extractor.js'
|
|
37
|
-
export { JavaScriptResolver } from './javascript/js-resolver.js'
|
|
27
|
+
|
|
28
|
+
export { BaseExtractor } from './base-extractor.js'
|
|
38
29
|
export { BoundaryChecker } from './boundary-checker.js'
|
|
39
|
-
export { TreeSitterParser } from './tree-sitter/parser.js'
|
|
40
30
|
|
|
41
31
|
export type ParseDiagnosticStage = 'read' | 'parse' | 'resolve-imports'
|
|
42
32
|
export type ParseDiagnosticReason =
|
|
43
33
|
| 'read-error'
|
|
44
34
|
| 'parse-error'
|
|
35
|
+
| 'parse-error-recovered'
|
|
45
36
|
| 'resolve-error'
|
|
46
37
|
| 'unsupported-extension'
|
|
47
38
|
| 'parser-unavailable'
|
|
@@ -49,7 +40,6 @@ export type ParseDiagnosticReason =
|
|
|
49
40
|
export interface ParseDiagnostic {
|
|
50
41
|
filePath: string
|
|
51
42
|
extension: string
|
|
52
|
-
parser: ParserKind
|
|
53
43
|
stage: ParseDiagnosticStage
|
|
54
44
|
reason: ParseDiagnosticReason
|
|
55
45
|
message: string
|
|
@@ -70,18 +60,9 @@ export interface ParseFilesResult {
|
|
|
70
60
|
summary: ParseFilesSummary
|
|
71
61
|
}
|
|
72
62
|
|
|
73
|
-
const isLikelyParserUnavailable = (parser: ParserKind, message: string): boolean => {
|
|
74
|
-
if (parser !== 'tree-sitter') return false
|
|
75
|
-
const normalized = message.toLowerCase()
|
|
76
|
-
return normalized.includes('web-tree-sitter') ||
|
|
77
|
-
normalized.includes('tree-sitter') ||
|
|
78
|
-
normalized.includes('cannot find module')
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
|
|
82
63
|
const buildFallbackParsedFile = (filePath: string, content: string, ext: string): ParsedFile => ({
|
|
83
64
|
path: filePath,
|
|
84
|
-
language: languageForExtension(ext)
|
|
65
|
+
language: toParsedFileLanguage(languageForExtension(ext)),
|
|
85
66
|
functions: [],
|
|
86
67
|
classes: [],
|
|
87
68
|
generics: [],
|
|
@@ -100,209 +81,105 @@ const normalizeErrorMessage = (err: unknown): string => {
|
|
|
100
81
|
return String(err)
|
|
101
82
|
}
|
|
102
83
|
|
|
103
|
-
/** Get the appropriate parser for a file based on its extension */
|
|
104
|
-
export function getParser(filePath: string): BaseParser {
|
|
105
|
-
const ext = nodePath.extname(filePath).toLowerCase()
|
|
106
|
-
const parserKind = parserKindForExtension(ext)
|
|
107
|
-
|
|
108
|
-
switch (parserKind) {
|
|
109
|
-
case 'oxc':
|
|
110
|
-
return new OxcParser()
|
|
111
|
-
case 'go':
|
|
112
|
-
return new GoParser()
|
|
113
|
-
case 'tree-sitter':
|
|
114
|
-
return createTreeSitterParser()
|
|
115
|
-
default:
|
|
116
|
-
throw new UnsupportedLanguageError(ext || '<no extension>')
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
let _treeSitterParserInstance: BaseParser | null = null
|
|
121
|
-
|
|
122
|
-
const createTreeSitterParser = (): BaseParser => {
|
|
123
|
-
if (!_treeSitterParserInstance) {
|
|
124
|
-
// Return a lazy-loading wrapper that handles missing tree-sitter gracefully.
|
|
125
|
-
_treeSitterParserInstance = new LazyTreeSitterParser()
|
|
126
|
-
}
|
|
127
|
-
return _treeSitterParserInstance
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
class LazyTreeSitterParser extends BaseParser {
|
|
131
|
-
private parser: any = null
|
|
132
|
-
|
|
133
|
-
async init(): Promise<void> {
|
|
134
|
-
if (this.parser) return
|
|
135
|
-
try {
|
|
136
|
-
const { TreeSitterParser } = await import('./tree-sitter/parser.js')
|
|
137
|
-
this.parser = new TreeSitterParser()
|
|
138
|
-
} catch {
|
|
139
|
-
// web-tree-sitter not available
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
async parse(filePath: string, content: string): Promise<ParsedFile> {
|
|
144
|
-
await this.init()
|
|
145
|
-
if (!this.parser) {
|
|
146
|
-
return this.buildEmptyFile(filePath, content)
|
|
147
|
-
}
|
|
148
|
-
return this.parser.parse(filePath, content)
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
async resolveImports(files: ParsedFile[], projectRoot: string): Promise<ParsedFile[]> {
|
|
152
|
-
await this.init()
|
|
153
|
-
if (!this.parser) return files
|
|
154
|
-
return this.parser.resolveImports(files, projectRoot)
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
getSupportedExtensions(): string[] {
|
|
158
|
-
return [...getParserExtensions('tree-sitter')]
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
private buildEmptyFile(filePath: string, content: string): ParsedFile {
|
|
162
|
-
const ext = nodePath.extname(filePath).toLowerCase()
|
|
163
|
-
const lang = languageForExtension(ext)
|
|
164
|
-
return {
|
|
165
|
-
path: filePath,
|
|
166
|
-
language: lang as ParsedFile['language'],
|
|
167
|
-
functions: [],
|
|
168
|
-
classes: [],
|
|
169
|
-
generics: [],
|
|
170
|
-
imports: [],
|
|
171
|
-
exports: [],
|
|
172
|
-
routes: [],
|
|
173
|
-
variables: [],
|
|
174
|
-
calls: [],
|
|
175
|
-
hash: hashContent(content),
|
|
176
|
-
parsedAt: Date.now(),
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
|
|
181
84
|
export interface ParseFilesOptions {
|
|
182
85
|
strictParserPreflight?: boolean
|
|
183
|
-
|
|
86
|
+
concurrency?: number
|
|
184
87
|
}
|
|
185
88
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
89
|
+
const DEFAULT_CONCURRENCY = 32
|
|
90
|
+
|
|
91
|
+
async function parallelBatch<T, R>(
|
|
92
|
+
items: T[],
|
|
93
|
+
processor: (item: T) => Promise<R>,
|
|
94
|
+
concurrency: number
|
|
95
|
+
): Promise<R[]> {
|
|
96
|
+
const results: R[] = new Array(items.length)
|
|
97
|
+
const errors: (Error | null)[] = new Array(items.length)
|
|
98
|
+
|
|
99
|
+
for (let i = 0; i < items.length; i += concurrency) {
|
|
100
|
+
const batch = items.slice(i, i + concurrency)
|
|
101
|
+
const batchResults = await Promise.allSettled(batch.map((item, _j) => processor(item)))
|
|
102
|
+
|
|
103
|
+
for (let j = 0; j < batchResults.length; j++) {
|
|
104
|
+
const result = batchResults[j]
|
|
105
|
+
if (result.status === 'fulfilled') {
|
|
106
|
+
results[i + j] = result.value
|
|
107
|
+
} else {
|
|
108
|
+
errors[i + j] = result.reason
|
|
109
|
+
}
|
|
192
110
|
}
|
|
193
|
-
return await (parser as any).isRuntimeAvailable()
|
|
194
|
-
} catch {
|
|
195
|
-
return false
|
|
196
111
|
}
|
|
112
|
+
|
|
113
|
+
return results
|
|
197
114
|
}
|
|
198
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Main entry point for scanning and parsing multiple files.
|
|
118
|
+
* Uses LanguageRegistry to dispatch to the correct extractor.
|
|
119
|
+
* PARALLELIZED: Files are parsed concurrently for better performance.
|
|
120
|
+
*/
|
|
199
121
|
export async function parseFilesWithDiagnostics(
|
|
200
122
|
filePaths: string[],
|
|
201
123
|
projectRoot: string,
|
|
202
124
|
readFile: (fp: string) => Promise<string>,
|
|
203
125
|
options: ParseFilesOptions = {},
|
|
204
126
|
): Promise<ParseFilesResult> {
|
|
205
|
-
// Shared parser instances — avoid re-initialisation overhead per file.
|
|
206
|
-
const oxcParser = new OxcParser()
|
|
207
|
-
const goParser = new GoParser()
|
|
208
|
-
|
|
209
|
-
// Lazily loaded to avoid mandatory dependency on tree-sitter for TS/JS-only projects.
|
|
210
|
-
let treeSitterParser: BaseParser | null = null
|
|
211
|
-
const getTreeSitter = async (): Promise<BaseParser> => {
|
|
212
|
-
if (!treeSitterParser) {
|
|
213
|
-
const { TreeSitterParser } = await import('./tree-sitter/parser.js')
|
|
214
|
-
treeSitterParser = new TreeSitterParser()
|
|
215
|
-
}
|
|
216
|
-
return treeSitterParser
|
|
217
|
-
}
|
|
218
|
-
|
|
219
127
|
const diagnostics: ParseDiagnostic[] = []
|
|
220
|
-
const addDiagnostic = (diagnostic: ParseDiagnostic) => diagnostics.push(diagnostic)
|
|
221
|
-
|
|
222
|
-
const treeSitterNeeded = filePaths.some(fp => {
|
|
223
|
-
const ext = nodePath.extname(fp).toLowerCase()
|
|
224
|
-
return isTreeSitterExtension(ext)
|
|
225
|
-
})
|
|
226
|
-
|
|
227
|
-
let treeSitterAvailable = true
|
|
228
|
-
if (treeSitterNeeded) {
|
|
229
|
-
treeSitterAvailable =
|
|
230
|
-
typeof options.treeSitterRuntimeAvailable === 'boolean'
|
|
231
|
-
? options.treeSitterRuntimeAvailable
|
|
232
|
-
: await isTreeSitterRuntimeAvailable()
|
|
233
|
-
if (!treeSitterAvailable) {
|
|
234
|
-
addDiagnostic({
|
|
235
|
-
filePath: '*',
|
|
236
|
-
extension: '*',
|
|
237
|
-
parser: 'tree-sitter',
|
|
238
|
-
stage: 'parse',
|
|
239
|
-
reason: 'parser-unavailable',
|
|
240
|
-
message: 'Tree-sitter runtime unavailable. Install web-tree-sitter and language grammars.',
|
|
241
|
-
})
|
|
242
|
-
if (options.strictParserPreflight) {
|
|
243
|
-
return {
|
|
244
|
-
files: [],
|
|
245
|
-
diagnostics,
|
|
246
|
-
summary: {
|
|
247
|
-
requestedFiles: filePaths.length,
|
|
248
|
-
parsedFiles: 0,
|
|
249
|
-
fallbackFiles: 0,
|
|
250
|
-
unreadableFiles: 0,
|
|
251
|
-
unsupportedFiles: 0,
|
|
252
|
-
diagnostics: diagnostics.length,
|
|
253
|
-
},
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
// Normalized project root for absolute path construction.
|
|
260
128
|
const normalizedRoot = nodePath.resolve(projectRoot).replace(/\\/g, '/')
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
const
|
|
264
|
-
const
|
|
265
|
-
|
|
129
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY
|
|
130
|
+
|
|
131
|
+
const cache = new IncrementalCache(projectRoot)
|
|
132
|
+
const registry = LanguageRegistry.getInstance()
|
|
133
|
+
|
|
134
|
+
const filesByExtractor = new Map<BaseExtractor, ParsedFile[]>()
|
|
266
135
|
const fallbackFiles: ParsedFile[] = []
|
|
267
|
-
|
|
136
|
+
const _pendingDiagnostics: Array<{ filePath: string; ext: string; stage: ParseDiagnosticStage; reason: ParseDiagnosticReason; message: string }> = []
|
|
137
|
+
|
|
268
138
|
let parsedFilesCount = 0
|
|
269
139
|
let fallbackFilesCount = 0
|
|
270
140
|
let unreadableFiles = 0
|
|
271
141
|
let unsupportedFiles = 0
|
|
272
142
|
|
|
273
|
-
|
|
274
|
-
// mutable per-instance state (e.g. language switching/counters).
|
|
275
|
-
for (const fp of filePaths) {
|
|
276
|
-
const ext = nodePath.extname(fp).toLowerCase()
|
|
277
|
-
const parserKind = parserKindForExtension(ext)
|
|
278
|
-
|
|
279
|
-
// Build absolute posix path — this is the single source of truth for all IDs.
|
|
143
|
+
const fileResults = await parallelBatch(filePaths, async (fp) => {
|
|
280
144
|
const absoluteFp = nodePath.resolve(normalizedRoot, fp).replace(/\\/g, '/')
|
|
281
|
-
|
|
282
|
-
|
|
145
|
+
const ext = nodePath.extname(absoluteFp).toLowerCase()
|
|
146
|
+
const langDef = registry.getForFile(absoluteFp)
|
|
147
|
+
|
|
283
148
|
try {
|
|
284
|
-
content = await readFile(absoluteFp)
|
|
149
|
+
const content = await readFile(absoluteFp)
|
|
150
|
+
return { absoluteFp, ext, langDef, content }
|
|
285
151
|
} catch (err: unknown) {
|
|
152
|
+
return {
|
|
153
|
+
absoluteFp, ext, langDef, content: null,
|
|
154
|
+
error: normalizeErrorMessage(err)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}, concurrency)
|
|
158
|
+
|
|
159
|
+
for (const result of fileResults) {
|
|
160
|
+
if (result.error) {
|
|
286
161
|
unreadableFiles += 1
|
|
287
|
-
|
|
288
|
-
filePath: absoluteFp,
|
|
289
|
-
extension: ext,
|
|
290
|
-
parser: parserKind,
|
|
162
|
+
diagnostics.push({
|
|
163
|
+
filePath: result.absoluteFp,
|
|
164
|
+
extension: result.ext,
|
|
291
165
|
stage: 'read',
|
|
292
166
|
reason: 'read-error',
|
|
293
|
-
message:
|
|
167
|
+
message: result.error,
|
|
294
168
|
})
|
|
295
169
|
continue
|
|
296
170
|
}
|
|
297
171
|
|
|
298
|
-
if (
|
|
172
|
+
if (result.content === null) continue
|
|
173
|
+
|
|
174
|
+
const { absoluteFp, ext, langDef, content } = result
|
|
175
|
+
|
|
176
|
+
if (!langDef) {
|
|
299
177
|
unsupportedFiles += 1
|
|
300
178
|
fallbackFilesCount += 1
|
|
301
|
-
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content
|
|
302
|
-
|
|
179
|
+
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content!, ext))
|
|
180
|
+
diagnostics.push({
|
|
303
181
|
filePath: absoluteFp,
|
|
304
182
|
extension: ext,
|
|
305
|
-
parser: parserKind,
|
|
306
183
|
stage: 'parse',
|
|
307
184
|
reason: 'unsupported-extension',
|
|
308
185
|
message: `Unsupported extension: ${ext || '<none>'}`,
|
|
@@ -311,111 +188,86 @@ export async function parseFilesWithDiagnostics(
|
|
|
311
188
|
}
|
|
312
189
|
|
|
313
190
|
try {
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
parsedFilesCount += 1
|
|
322
|
-
} else {
|
|
323
|
-
if (!treeSitterAvailable) {
|
|
324
|
-
fallbackFilesCount += 1
|
|
325
|
-
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content, ext))
|
|
326
|
-
addDiagnostic({
|
|
327
|
-
filePath: absoluteFp,
|
|
328
|
-
extension: ext,
|
|
329
|
-
parser: 'tree-sitter',
|
|
330
|
-
stage: 'parse',
|
|
331
|
-
reason: 'parser-unavailable',
|
|
332
|
-
message: 'Tree-sitter runtime unavailable. Falling back to empty parsed file.',
|
|
333
|
-
})
|
|
334
|
-
continue
|
|
335
|
-
}
|
|
336
|
-
const ts = await getTreeSitter()
|
|
337
|
-
const parsed = await ts.parse(absoluteFp, content)
|
|
338
|
-
treeFiles.push(parsed)
|
|
191
|
+
const contentHash = hashContent(content!)
|
|
192
|
+
const cached = await cache.get(absoluteFp, contentHash)
|
|
193
|
+
|
|
194
|
+
if (cached) {
|
|
195
|
+
const group = filesByExtractor.get(langDef.extractor) || []
|
|
196
|
+
group.push(cached)
|
|
197
|
+
filesByExtractor.set(langDef.extractor, group)
|
|
339
198
|
parsedFilesCount += 1
|
|
199
|
+
continue
|
|
340
200
|
}
|
|
341
|
-
} catch (err: unknown) {
|
|
342
|
-
fallbackFilesCount += 1
|
|
343
|
-
const message = normalizeErrorMessage(err)
|
|
344
|
-
const reason: ParseDiagnosticReason = isLikelyParserUnavailable(parserKind, message)
|
|
345
|
-
? 'parser-unavailable'
|
|
346
|
-
: 'parse-error'
|
|
347
|
-
|
|
348
|
-
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content, ext))
|
|
349
|
-
addDiagnostic({
|
|
350
|
-
filePath: absoluteFp,
|
|
351
|
-
extension: ext,
|
|
352
|
-
parser: parserKind,
|
|
353
|
-
stage: 'parse',
|
|
354
|
-
reason,
|
|
355
|
-
message,
|
|
356
|
-
})
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
// Resolve imports batch-wise per parser (each has its own resolver).
|
|
361
|
-
let resolvedOxcFiles = oxcFiles
|
|
362
|
-
if (oxcFiles.length > 0) {
|
|
363
|
-
try {
|
|
364
|
-
resolvedOxcFiles = await oxcParser.resolveImports(oxcFiles, normalizedRoot)
|
|
365
|
-
} catch (err: unknown) {
|
|
366
|
-
addDiagnostic({
|
|
367
|
-
filePath: '*',
|
|
368
|
-
extension: '*',
|
|
369
|
-
parser: 'oxc',
|
|
370
|
-
stage: 'resolve-imports',
|
|
371
|
-
reason: 'resolve-error',
|
|
372
|
-
message: normalizeErrorMessage(err),
|
|
373
|
-
})
|
|
374
|
-
}
|
|
375
|
-
}
|
|
376
201
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
202
|
+
const parsed = await langDef.extractor.extract(absoluteFp, content!)
|
|
203
|
+
await cache.set(absoluteFp, contentHash, parsed)
|
|
204
|
+
|
|
205
|
+
const group = filesByExtractor.get(langDef.extractor) || []
|
|
206
|
+
group.push(parsed)
|
|
207
|
+
filesByExtractor.set(langDef.extractor, group)
|
|
208
|
+
parsedFilesCount += 1
|
|
381
209
|
} catch (err: unknown) {
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
210
|
+
if (process.env.MIKK_DEBUG) {
|
|
211
|
+
console.error(`[parser] Error extracting ${absoluteFp}:`, err instanceof Error ? err.message : String(err))
|
|
212
|
+
}
|
|
213
|
+
fallbackFilesCount += 1
|
|
214
|
+
|
|
215
|
+
const errorMessage = normalizeErrorMessage(err)
|
|
216
|
+
const language = langDef?.name ?? languageForExtension(ext) ?? 'unknown'
|
|
217
|
+
|
|
218
|
+
const recoveryEngine = new ErrorRecoveryEngine()
|
|
219
|
+
const recoveryResult = await recoveryEngine.recover(absoluteFp, content!, language)
|
|
220
|
+
|
|
221
|
+
if (recoveryResult.success && recoveryResult.confidence > 0.2) {
|
|
222
|
+
fallbackFiles.push(recoveryResult.parsed)
|
|
223
|
+
diagnostics.push({
|
|
224
|
+
filePath: absoluteFp,
|
|
225
|
+
extension: ext,
|
|
226
|
+
stage: 'parse',
|
|
227
|
+
reason: 'parse-error-recovered',
|
|
228
|
+
message: `${errorMessage} | Recovered ${recoveryResult.strategy} (confidence: ${(recoveryResult.confidence * 100).toFixed(0)}%) | ${recoveryResult.parsed.functions.length} fns, ${recoveryResult.parsed.classes.length} classes`,
|
|
229
|
+
})
|
|
230
|
+
} else {
|
|
231
|
+
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content!, ext))
|
|
232
|
+
diagnostics.push({
|
|
233
|
+
filePath: absoluteFp,
|
|
234
|
+
extension: ext,
|
|
235
|
+
stage: 'parse',
|
|
236
|
+
reason: 'parse-error',
|
|
237
|
+
message: errorMessage,
|
|
238
|
+
})
|
|
239
|
+
}
|
|
390
240
|
}
|
|
391
241
|
}
|
|
392
242
|
|
|
393
|
-
|
|
394
|
-
if (treeFiles.length > 0) {
|
|
243
|
+
for (const [extractor, files] of filesByExtractor.entries()) {
|
|
395
244
|
try {
|
|
396
|
-
const
|
|
397
|
-
|
|
245
|
+
const resolved = await extractor.resolveImports(files, normalizedRoot)
|
|
246
|
+
|
|
247
|
+
for (let i = 0; i < files.length; i++) {
|
|
248
|
+
const originalFile = files[i]
|
|
249
|
+
const resolvedFile = resolved[i]
|
|
250
|
+
if (resolvedFile && resolvedFile !== originalFile) {
|
|
251
|
+
files[i] = resolvedFile
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
fallbackFiles.push(...files)
|
|
398
255
|
} catch (err: unknown) {
|
|
399
|
-
|
|
256
|
+
diagnostics.push({
|
|
400
257
|
filePath: '*',
|
|
401
258
|
extension: '*',
|
|
402
|
-
parser: 'tree-sitter',
|
|
403
259
|
stage: 'resolve-imports',
|
|
404
260
|
reason: 'resolve-error',
|
|
405
261
|
message: normalizeErrorMessage(err),
|
|
406
262
|
})
|
|
263
|
+
fallbackFiles.push(...files)
|
|
407
264
|
}
|
|
408
265
|
}
|
|
409
266
|
|
|
410
|
-
|
|
411
|
-
...resolvedOxcFiles,
|
|
412
|
-
...resolvedGoFiles,
|
|
413
|
-
...resolvedTreeFiles,
|
|
414
|
-
...fallbackFiles,
|
|
415
|
-
]
|
|
267
|
+
cache.flush()
|
|
416
268
|
|
|
417
269
|
return {
|
|
418
|
-
files:
|
|
270
|
+
files: fallbackFiles,
|
|
419
271
|
diagnostics,
|
|
420
272
|
summary: {
|
|
421
273
|
requestedFiles: filePaths.length,
|
|
@@ -428,15 +280,6 @@ export async function parseFilesWithDiagnostics(
|
|
|
428
280
|
}
|
|
429
281
|
}
|
|
430
282
|
|
|
431
|
-
/**
|
|
432
|
-
* Parse multiple files, resolve their imports, and return ParsedFile[].
|
|
433
|
-
*
|
|
434
|
-
* Path contract (critical for graph correctness):
|
|
435
|
-
* - filePaths come from discoverFiles() as project-root-relative strings
|
|
436
|
-
* - We resolve them to ABSOLUTE posix paths before passing to parse()
|
|
437
|
-
* - ParsedFile.path is therefore always absolute + forward-slash
|
|
438
|
-
* - OxcResolver also returns absolute paths → import edges always consistent
|
|
439
|
-
*/
|
|
440
283
|
export async function parseFiles(
|
|
441
284
|
filePaths: string[],
|
|
442
285
|
projectRoot: string,
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { SyntaxNode } from 'web-tree-sitter';
|
|
2
|
+
import { BaseExtractor } from './base-extractor.js';
|
|
3
|
+
|
|
4
|
+
export interface LanguageDefinition {
|
|
5
|
+
name: string;
|
|
6
|
+
extensions: string[];
|
|
7
|
+
treeSitterGrammar: string;
|
|
8
|
+
extractor: BaseExtractor;
|
|
9
|
+
semanticFeatures: {
|
|
10
|
+
hasTypeSystem: boolean;
|
|
11
|
+
hasGenerics: boolean;
|
|
12
|
+
hasMacros: boolean;
|
|
13
|
+
hasAnnotations: boolean;
|
|
14
|
+
hasPatternMatching: boolean;
|
|
15
|
+
};
|
|
16
|
+
specialHandling?: {
|
|
17
|
+
importResolution?: (source: string, filePath: string) => Promise<string[]>;
|
|
18
|
+
exportDetection?: (node: SyntaxNode) => boolean;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class LanguageRegistry {
|
|
23
|
+
private static instance: LanguageRegistry;
|
|
24
|
+
private languages: Map<string, LanguageDefinition> = new Map();
|
|
25
|
+
private extMap: Map<string, string> = new Map();
|
|
26
|
+
|
|
27
|
+
private constructor() {}
|
|
28
|
+
|
|
29
|
+
public static getInstance(): LanguageRegistry {
|
|
30
|
+
if (!LanguageRegistry.instance) {
|
|
31
|
+
LanguageRegistry.instance = new LanguageRegistry();
|
|
32
|
+
}
|
|
33
|
+
return LanguageRegistry.instance;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
public register(lang: LanguageDefinition): void {
|
|
37
|
+
this.languages.set(lang.name, lang);
|
|
38
|
+
for (const ext of lang.extensions) {
|
|
39
|
+
this.extMap.set(ext, lang.name);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
public getForFile(filePath: string): LanguageDefinition | null {
|
|
44
|
+
const ext = filePath.slice(filePath.lastIndexOf('.'));
|
|
45
|
+
const langName = this.extMap.get(ext);
|
|
46
|
+
if (!langName) return null;
|
|
47
|
+
return this.languages.get(langName) || null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
public getAllSupportedExtensions(): string[] {
|
|
51
|
+
return Array.from(this.extMap.keys());
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
public listLanguages(): string[] {
|
|
55
|
+
return Array.from(this.languages.keys());
|
|
56
|
+
}
|
|
57
|
+
}
|