@getmikk/core 2.0.14 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +2 -1
- package/src/analysis/type-flow.ts +1 -1
- package/src/cache/incremental-cache.ts +86 -80
- package/src/contract/contract-reader.ts +1 -0
- package/src/contract/lock-compiler.ts +95 -13
- package/src/contract/schema.ts +2 -0
- package/src/error-handler.ts +2 -1
- package/src/graph/cluster-detector.ts +2 -4
- package/src/graph/dead-code-detector.ts +303 -117
- package/src/graph/graph-builder.ts +21 -161
- package/src/graph/impact-analyzer.ts +1 -0
- package/src/graph/index.ts +2 -0
- package/src/graph/rich-function-index.ts +1080 -0
- package/src/graph/symbol-table.ts +252 -0
- package/src/hash/hash-store.ts +1 -0
- package/src/index.ts +2 -0
- package/src/parser/base-extractor.ts +19 -0
- package/src/parser/boundary-checker.ts +31 -12
- package/src/parser/error-recovery.ts +5 -4
- package/src/parser/function-body-extractor.ts +248 -0
- package/src/parser/go/go-extractor.ts +249 -676
- package/src/parser/index.ts +132 -318
- package/src/parser/language-registry.ts +57 -0
- package/src/parser/oxc-parser.ts +166 -28
- package/src/parser/oxc-resolver.ts +179 -11
- package/src/parser/parser-constants.ts +1 -0
- package/src/parser/rust/rust-extractor.ts +109 -0
- package/src/parser/tree-sitter/parser.ts +369 -62
- package/src/parser/tree-sitter/queries.ts +106 -10
- package/src/parser/types.ts +20 -1
- package/src/search/bm25.ts +21 -8
- package/src/search/direct-search.ts +472 -0
- package/src/search/embedding-provider.ts +249 -0
- package/src/search/index.ts +12 -0
- package/src/search/semantic-search.ts +435 -0
- package/src/utils/artifact-transaction.ts +1 -0
- package/src/utils/atomic-write.ts +1 -0
- package/src/utils/errors.ts +89 -4
- package/src/utils/fs.ts +104 -50
- package/src/utils/json.ts +1 -0
- package/src/utils/language-registry.ts +84 -6
- package/src/utils/path.ts +26 -0
- package/tests/dead-code.test.ts +3 -2
- package/tests/direct-search.test.ts +435 -0
- package/tests/error-recovery.test.ts +143 -0
- package/tests/fixtures/simple-api/src/index.ts +1 -1
- package/tests/go-parser.test.ts +19 -335
- package/tests/js-parser.test.ts +18 -1089
- package/tests/language-registry-all.test.ts +276 -0
- package/tests/language-registry.test.ts +6 -4
- package/tests/parse-diagnostics.test.ts +9 -96
- package/tests/parser.test.ts +42 -771
- package/tests/polyglot-parser.test.ts +117 -0
- package/tests/rich-function-index.test.ts +703 -0
- package/tests/tree-sitter-parser.test.ts +108 -80
- package/tests/ts-parser.test.ts +8 -8
- package/tests/verification.test.ts +175 -0
- package/src/parser/base-parser.ts +0 -16
- package/src/parser/go/go-parser.ts +0 -43
- package/src/parser/javascript/js-extractor.ts +0 -278
- package/src/parser/javascript/js-parser.ts +0 -101
- package/src/parser/typescript/ts-extractor.ts +0 -447
- package/src/parser/typescript/ts-parser.ts +0 -36
package/src/parser/index.ts
CHANGED
|
@@ -1,18 +1,16 @@
|
|
|
1
|
+
|
|
1
2
|
import * as nodePath from 'node:path'
|
|
2
|
-
import {
|
|
3
|
-
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import
|
|
3
|
+
import { LanguageRegistry } from './language-registry.js'
|
|
4
|
+
export { LanguageRegistry } from './language-registry.js'
|
|
5
|
+
import './oxc-parser.js'
|
|
6
|
+
import './tree-sitter/parser.js'
|
|
7
|
+
import './go/go-extractor.js'
|
|
8
|
+
import { BaseExtractor } from './base-extractor.js'
|
|
7
9
|
import { hashContent } from '../hash/file-hasher.js'
|
|
8
10
|
import { IncrementalCache } from '../cache/incremental-cache.js'
|
|
9
|
-
import {
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
getParserExtensions,
|
|
13
|
-
isTreeSitterExtension,
|
|
14
|
-
type ParserKind,
|
|
15
|
-
} from '../utils/language-registry.js'
|
|
11
|
+
import { languageForExtension, toParsedFileLanguage } from '../utils/language-registry.js'
|
|
12
|
+
import { ErrorRecoveryEngine } from './error-recovery.js'
|
|
13
|
+
import type { ParsedFile } from './types.js'
|
|
16
14
|
|
|
17
15
|
export type {
|
|
18
16
|
ParsedFile,
|
|
@@ -26,23 +24,15 @@ export type {
|
|
|
26
24
|
ParsedGeneric,
|
|
27
25
|
ParsedRoute
|
|
28
26
|
} from './types.js'
|
|
29
|
-
|
|
30
|
-
export {
|
|
31
|
-
export { TypeScriptExtractor } from './typescript/ts-extractor.js'
|
|
32
|
-
export { TypeScriptResolver } from './typescript/ts-resolver.js'
|
|
33
|
-
export { GoParser } from './go/go-parser.js'
|
|
34
|
-
export { GoExtractor } from './go/go-extractor.js'
|
|
35
|
-
export { GoResolver } from './go/go-resolver.js'
|
|
36
|
-
export { JavaScriptParser } from './javascript/js-parser.js'
|
|
37
|
-
export { JavaScriptExtractor } from './javascript/js-extractor.js'
|
|
38
|
-
export { JavaScriptResolver } from './javascript/js-resolver.js'
|
|
27
|
+
|
|
28
|
+
export { BaseExtractor } from './base-extractor.js'
|
|
39
29
|
export { BoundaryChecker } from './boundary-checker.js'
|
|
40
|
-
export { TreeSitterParser } from './tree-sitter/parser.js'
|
|
41
30
|
|
|
42
31
|
export type ParseDiagnosticStage = 'read' | 'parse' | 'resolve-imports'
|
|
43
32
|
export type ParseDiagnosticReason =
|
|
44
33
|
| 'read-error'
|
|
45
34
|
| 'parse-error'
|
|
35
|
+
| 'parse-error-recovered'
|
|
46
36
|
| 'resolve-error'
|
|
47
37
|
| 'unsupported-extension'
|
|
48
38
|
| 'parser-unavailable'
|
|
@@ -50,7 +40,6 @@ export type ParseDiagnosticReason =
|
|
|
50
40
|
export interface ParseDiagnostic {
|
|
51
41
|
filePath: string
|
|
52
42
|
extension: string
|
|
53
|
-
parser: ParserKind
|
|
54
43
|
stage: ParseDiagnosticStage
|
|
55
44
|
reason: ParseDiagnosticReason
|
|
56
45
|
message: string
|
|
@@ -71,18 +60,9 @@ export interface ParseFilesResult {
|
|
|
71
60
|
summary: ParseFilesSummary
|
|
72
61
|
}
|
|
73
62
|
|
|
74
|
-
const isLikelyParserUnavailable = (parser: ParserKind, message: string): boolean => {
|
|
75
|
-
if (parser !== 'tree-sitter') return false
|
|
76
|
-
const normalized = message.toLowerCase()
|
|
77
|
-
return normalized.includes('web-tree-sitter') ||
|
|
78
|
-
normalized.includes('tree-sitter') ||
|
|
79
|
-
normalized.includes('cannot find module')
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
|
|
83
63
|
const buildFallbackParsedFile = (filePath: string, content: string, ext: string): ParsedFile => ({
|
|
84
64
|
path: filePath,
|
|
85
|
-
language: languageForExtension(ext)
|
|
65
|
+
language: toParsedFileLanguage(languageForExtension(ext)),
|
|
86
66
|
functions: [],
|
|
87
67
|
classes: [],
|
|
88
68
|
generics: [],
|
|
@@ -101,212 +81,105 @@ const normalizeErrorMessage = (err: unknown): string => {
|
|
|
101
81
|
return String(err)
|
|
102
82
|
}
|
|
103
83
|
|
|
104
|
-
/** Get the appropriate parser for a file based on its extension */
|
|
105
|
-
export function getParser(filePath: string): BaseParser {
|
|
106
|
-
const ext = nodePath.extname(filePath).toLowerCase()
|
|
107
|
-
const parserKind = parserKindForExtension(ext)
|
|
108
|
-
|
|
109
|
-
switch (parserKind) {
|
|
110
|
-
case 'oxc':
|
|
111
|
-
return new OxcParser()
|
|
112
|
-
case 'go':
|
|
113
|
-
return new GoParser()
|
|
114
|
-
case 'tree-sitter':
|
|
115
|
-
return createTreeSitterParser()
|
|
116
|
-
default:
|
|
117
|
-
throw new UnsupportedLanguageError(ext || '<no extension>')
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
let _treeSitterParserInstance: BaseParser | null = null
|
|
122
|
-
|
|
123
|
-
const createTreeSitterParser = (): BaseParser => {
|
|
124
|
-
if (!_treeSitterParserInstance) {
|
|
125
|
-
// Return a lazy-loading wrapper that handles missing tree-sitter gracefully.
|
|
126
|
-
_treeSitterParserInstance = new LazyTreeSitterParser()
|
|
127
|
-
}
|
|
128
|
-
return _treeSitterParserInstance
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
class LazyTreeSitterParser extends BaseParser {
|
|
132
|
-
private parser: any = null
|
|
133
|
-
|
|
134
|
-
async init(): Promise<void> {
|
|
135
|
-
if (this.parser) return
|
|
136
|
-
try {
|
|
137
|
-
const { TreeSitterParser } = await import('./tree-sitter/parser.js')
|
|
138
|
-
this.parser = new TreeSitterParser()
|
|
139
|
-
} catch {
|
|
140
|
-
// web-tree-sitter not available
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
|
|
144
|
-
async parse(filePath: string, content: string): Promise<ParsedFile> {
|
|
145
|
-
await this.init()
|
|
146
|
-
if (!this.parser) {
|
|
147
|
-
return this.buildEmptyFile(filePath, content)
|
|
148
|
-
}
|
|
149
|
-
return this.parser.parse(filePath, content)
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
async resolveImports(files: ParsedFile[], projectRoot: string): Promise<ParsedFile[]> {
|
|
153
|
-
await this.init()
|
|
154
|
-
if (!this.parser) return files
|
|
155
|
-
return this.parser.resolveImports(files, projectRoot)
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
getSupportedExtensions(): string[] {
|
|
159
|
-
return [...getParserExtensions('tree-sitter')]
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
private buildEmptyFile(filePath: string, content: string): ParsedFile {
|
|
163
|
-
const ext = nodePath.extname(filePath).toLowerCase()
|
|
164
|
-
const lang = languageForExtension(ext)
|
|
165
|
-
return {
|
|
166
|
-
path: filePath,
|
|
167
|
-
language: lang as ParsedFile['language'],
|
|
168
|
-
functions: [],
|
|
169
|
-
classes: [],
|
|
170
|
-
generics: [],
|
|
171
|
-
imports: [],
|
|
172
|
-
exports: [],
|
|
173
|
-
routes: [],
|
|
174
|
-
variables: [],
|
|
175
|
-
calls: [],
|
|
176
|
-
hash: hashContent(content),
|
|
177
|
-
parsedAt: Date.now(),
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
84
|
export interface ParseFilesOptions {
|
|
183
85
|
strictParserPreflight?: boolean
|
|
184
|
-
|
|
86
|
+
concurrency?: number
|
|
185
87
|
}
|
|
186
88
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
89
|
+
const DEFAULT_CONCURRENCY = 32
|
|
90
|
+
|
|
91
|
+
async function parallelBatch<T, R>(
|
|
92
|
+
items: T[],
|
|
93
|
+
processor: (item: T) => Promise<R>,
|
|
94
|
+
concurrency: number
|
|
95
|
+
): Promise<R[]> {
|
|
96
|
+
const results: R[] = new Array(items.length)
|
|
97
|
+
const errors: (Error | null)[] = new Array(items.length)
|
|
98
|
+
|
|
99
|
+
for (let i = 0; i < items.length; i += concurrency) {
|
|
100
|
+
const batch = items.slice(i, i + concurrency)
|
|
101
|
+
const batchResults = await Promise.allSettled(batch.map((item, _j) => processor(item)))
|
|
102
|
+
|
|
103
|
+
for (let j = 0; j < batchResults.length; j++) {
|
|
104
|
+
const result = batchResults[j]
|
|
105
|
+
if (result.status === 'fulfilled') {
|
|
106
|
+
results[i + j] = result.value
|
|
107
|
+
} else {
|
|
108
|
+
errors[i + j] = result.reason
|
|
109
|
+
}
|
|
193
110
|
}
|
|
194
|
-
return await (parser as any).isRuntimeAvailable()
|
|
195
|
-
} catch {
|
|
196
|
-
return false
|
|
197
111
|
}
|
|
112
|
+
|
|
113
|
+
return results
|
|
198
114
|
}
|
|
199
115
|
|
|
116
|
+
/**
|
|
117
|
+
* Main entry point for scanning and parsing multiple files.
|
|
118
|
+
* Uses LanguageRegistry to dispatch to the correct extractor.
|
|
119
|
+
* PARALLELIZED: Files are parsed concurrently for better performance.
|
|
120
|
+
*/
|
|
200
121
|
export async function parseFilesWithDiagnostics(
|
|
201
122
|
filePaths: string[],
|
|
202
123
|
projectRoot: string,
|
|
203
124
|
readFile: (fp: string) => Promise<string>,
|
|
204
125
|
options: ParseFilesOptions = {},
|
|
205
126
|
): Promise<ParseFilesResult> {
|
|
206
|
-
// Shared parser instances — avoid re-initialisation overhead per file.
|
|
207
|
-
const oxcParser = new OxcParser()
|
|
208
|
-
const goParser = new GoParser()
|
|
209
|
-
|
|
210
|
-
// Lazily loaded to avoid mandatory dependency on tree-sitter for TS/JS-only projects.
|
|
211
|
-
let treeSitterParser: BaseParser | null = null
|
|
212
|
-
const getTreeSitter = async (): Promise<BaseParser> => {
|
|
213
|
-
if (!treeSitterParser) {
|
|
214
|
-
const { TreeSitterParser } = await import('./tree-sitter/parser.js')
|
|
215
|
-
treeSitterParser = new TreeSitterParser()
|
|
216
|
-
}
|
|
217
|
-
return treeSitterParser
|
|
218
|
-
}
|
|
219
|
-
|
|
220
127
|
const diagnostics: ParseDiagnostic[] = []
|
|
221
|
-
const addDiagnostic = (diagnostic: ParseDiagnostic) => diagnostics.push(diagnostic)
|
|
222
|
-
|
|
223
|
-
const treeSitterNeeded = filePaths.some(fp => {
|
|
224
|
-
const ext = nodePath.extname(fp).toLowerCase()
|
|
225
|
-
return isTreeSitterExtension(ext)
|
|
226
|
-
})
|
|
227
|
-
|
|
228
|
-
let treeSitterAvailable = true
|
|
229
|
-
if (treeSitterNeeded) {
|
|
230
|
-
treeSitterAvailable =
|
|
231
|
-
typeof options.treeSitterRuntimeAvailable === 'boolean'
|
|
232
|
-
? options.treeSitterRuntimeAvailable
|
|
233
|
-
: await isTreeSitterRuntimeAvailable()
|
|
234
|
-
if (!treeSitterAvailable) {
|
|
235
|
-
addDiagnostic({
|
|
236
|
-
filePath: '*',
|
|
237
|
-
extension: '*',
|
|
238
|
-
parser: 'tree-sitter',
|
|
239
|
-
stage: 'parse',
|
|
240
|
-
reason: 'parser-unavailable',
|
|
241
|
-
message: 'Tree-sitter runtime unavailable. Install web-tree-sitter and language grammars.',
|
|
242
|
-
})
|
|
243
|
-
if (options.strictParserPreflight) {
|
|
244
|
-
return {
|
|
245
|
-
files: [],
|
|
246
|
-
diagnostics,
|
|
247
|
-
summary: {
|
|
248
|
-
requestedFiles: filePaths.length,
|
|
249
|
-
parsedFiles: 0,
|
|
250
|
-
fallbackFiles: 0,
|
|
251
|
-
unreadableFiles: 0,
|
|
252
|
-
unsupportedFiles: 0,
|
|
253
|
-
diagnostics: diagnostics.length,
|
|
254
|
-
},
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
// Initialize incremental cache
|
|
261
|
-
const cache = new IncrementalCache(projectRoot)
|
|
262
|
-
|
|
263
|
-
// Normalized project root for absolute path construction.
|
|
264
128
|
const normalizedRoot = nodePath.resolve(projectRoot).replace(/\\/g, '/')
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
const
|
|
268
|
-
const
|
|
269
|
-
|
|
129
|
+
const concurrency = options.concurrency ?? DEFAULT_CONCURRENCY
|
|
130
|
+
|
|
131
|
+
const cache = new IncrementalCache(projectRoot)
|
|
132
|
+
const registry = LanguageRegistry.getInstance()
|
|
133
|
+
|
|
134
|
+
const filesByExtractor = new Map<BaseExtractor, ParsedFile[]>()
|
|
270
135
|
const fallbackFiles: ParsedFile[] = []
|
|
271
|
-
|
|
136
|
+
const _pendingDiagnostics: Array<{ filePath: string; ext: string; stage: ParseDiagnosticStage; reason: ParseDiagnosticReason; message: string }> = []
|
|
137
|
+
|
|
272
138
|
let parsedFilesCount = 0
|
|
273
139
|
let fallbackFilesCount = 0
|
|
274
140
|
let unreadableFiles = 0
|
|
275
141
|
let unsupportedFiles = 0
|
|
276
142
|
|
|
277
|
-
|
|
278
|
-
// mutable per-instance state (e.g. language switching/counters).
|
|
279
|
-
for (const fp of filePaths) {
|
|
280
|
-
const ext = nodePath.extname(fp).toLowerCase()
|
|
281
|
-
const parserKind = parserKindForExtension(ext)
|
|
282
|
-
|
|
283
|
-
// Build absolute posix path — this is the single source of truth for all IDs.
|
|
143
|
+
const fileResults = await parallelBatch(filePaths, async (fp) => {
|
|
284
144
|
const absoluteFp = nodePath.resolve(normalizedRoot, fp).replace(/\\/g, '/')
|
|
285
|
-
|
|
286
|
-
|
|
145
|
+
const ext = nodePath.extname(absoluteFp).toLowerCase()
|
|
146
|
+
const langDef = registry.getForFile(absoluteFp)
|
|
147
|
+
|
|
287
148
|
try {
|
|
288
|
-
content = await readFile(absoluteFp)
|
|
149
|
+
const content = await readFile(absoluteFp)
|
|
150
|
+
return { absoluteFp, ext, langDef, content }
|
|
289
151
|
} catch (err: unknown) {
|
|
152
|
+
return {
|
|
153
|
+
absoluteFp, ext, langDef, content: null,
|
|
154
|
+
error: normalizeErrorMessage(err)
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}, concurrency)
|
|
158
|
+
|
|
159
|
+
for (const result of fileResults) {
|
|
160
|
+
if (result.error) {
|
|
290
161
|
unreadableFiles += 1
|
|
291
|
-
|
|
292
|
-
filePath: absoluteFp,
|
|
293
|
-
extension: ext,
|
|
294
|
-
parser: parserKind,
|
|
162
|
+
diagnostics.push({
|
|
163
|
+
filePath: result.absoluteFp,
|
|
164
|
+
extension: result.ext,
|
|
295
165
|
stage: 'read',
|
|
296
166
|
reason: 'read-error',
|
|
297
|
-
message:
|
|
167
|
+
message: result.error,
|
|
298
168
|
})
|
|
299
169
|
continue
|
|
300
170
|
}
|
|
301
171
|
|
|
302
|
-
if (
|
|
172
|
+
if (result.content === null) continue
|
|
173
|
+
|
|
174
|
+
const { absoluteFp, ext, langDef, content } = result
|
|
175
|
+
|
|
176
|
+
if (!langDef) {
|
|
303
177
|
unsupportedFiles += 1
|
|
304
178
|
fallbackFilesCount += 1
|
|
305
|
-
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content
|
|
306
|
-
|
|
179
|
+
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content!, ext))
|
|
180
|
+
diagnostics.push({
|
|
307
181
|
filePath: absoluteFp,
|
|
308
182
|
extension: ext,
|
|
309
|
-
parser: parserKind,
|
|
310
183
|
stage: 'parse',
|
|
311
184
|
reason: 'unsupported-extension',
|
|
312
185
|
message: `Unsupported extension: ${ext || '<none>'}`,
|
|
@@ -315,136 +188,86 @@ export async function parseFilesWithDiagnostics(
|
|
|
315
188
|
}
|
|
316
189
|
|
|
317
190
|
try {
|
|
318
|
-
|
|
319
|
-
const contentHash = hashContent(content)
|
|
320
|
-
|
|
321
|
-
// Check cache first
|
|
191
|
+
const contentHash = hashContent(content!)
|
|
322
192
|
const cached = await cache.get(absoluteFp, contentHash)
|
|
193
|
+
|
|
323
194
|
if (cached) {
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
} else if (parserKind === 'go') {
|
|
328
|
-
goFiles.push(cached)
|
|
329
|
-
} else {
|
|
330
|
-
treeFiles.push(cached)
|
|
331
|
-
}
|
|
195
|
+
const group = filesByExtractor.get(langDef.extractor) || []
|
|
196
|
+
group.push(cached)
|
|
197
|
+
filesByExtractor.set(langDef.extractor, group)
|
|
332
198
|
parsedFilesCount += 1
|
|
333
199
|
continue
|
|
334
200
|
}
|
|
335
201
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
const parsed = await goParser.parse(absoluteFp, content)
|
|
344
|
-
await cache.set(absoluteFp, contentHash, parsed)
|
|
345
|
-
goFiles.push(parsed)
|
|
346
|
-
parsedFilesCount += 1
|
|
347
|
-
} else {
|
|
348
|
-
if (!treeSitterAvailable) {
|
|
349
|
-
fallbackFilesCount += 1
|
|
350
|
-
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content, ext))
|
|
351
|
-
addDiagnostic({
|
|
352
|
-
filePath: absoluteFp,
|
|
353
|
-
extension: ext,
|
|
354
|
-
parser: 'tree-sitter',
|
|
355
|
-
stage: 'parse',
|
|
356
|
-
reason: 'parser-unavailable',
|
|
357
|
-
message: 'Tree-sitter runtime unavailable. Falling back to empty parsed file.',
|
|
358
|
-
})
|
|
359
|
-
continue
|
|
360
|
-
}
|
|
361
|
-
const ts = await getTreeSitter()
|
|
362
|
-
const parsed = await ts.parse(absoluteFp, content)
|
|
363
|
-
await cache.set(absoluteFp, contentHash, parsed)
|
|
364
|
-
treeFiles.push(parsed)
|
|
365
|
-
parsedFilesCount += 1
|
|
366
|
-
}
|
|
202
|
+
const parsed = await langDef.extractor.extract(absoluteFp, content!)
|
|
203
|
+
await cache.set(absoluteFp, contentHash, parsed)
|
|
204
|
+
|
|
205
|
+
const group = filesByExtractor.get(langDef.extractor) || []
|
|
206
|
+
group.push(parsed)
|
|
207
|
+
filesByExtractor.set(langDef.extractor, group)
|
|
208
|
+
parsedFilesCount += 1
|
|
367
209
|
} catch (err: unknown) {
|
|
210
|
+
if (process.env.MIKK_DEBUG) {
|
|
211
|
+
console.error(`[parser] Error extracting ${absoluteFp}:`, err instanceof Error ? err.message : String(err))
|
|
212
|
+
}
|
|
368
213
|
fallbackFilesCount += 1
|
|
369
|
-
|
|
370
|
-
const
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
parser: 'oxc',
|
|
396
|
-
stage: 'resolve-imports',
|
|
397
|
-
reason: 'resolve-error',
|
|
398
|
-
message: normalizeErrorMessage(err),
|
|
399
|
-
})
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
let resolvedGoFiles = goFiles
|
|
404
|
-
if (goFiles.length > 0) {
|
|
405
|
-
try {
|
|
406
|
-
resolvedGoFiles = await goParser.resolveImports(goFiles, normalizedRoot)
|
|
407
|
-
} catch (err: unknown) {
|
|
408
|
-
addDiagnostic({
|
|
409
|
-
filePath: '*',
|
|
410
|
-
extension: '*',
|
|
411
|
-
parser: 'go',
|
|
412
|
-
stage: 'resolve-imports',
|
|
413
|
-
reason: 'resolve-error',
|
|
414
|
-
message: normalizeErrorMessage(err),
|
|
415
|
-
})
|
|
214
|
+
|
|
215
|
+
const errorMessage = normalizeErrorMessage(err)
|
|
216
|
+
const language = langDef?.name ?? languageForExtension(ext) ?? 'unknown'
|
|
217
|
+
|
|
218
|
+
const recoveryEngine = new ErrorRecoveryEngine()
|
|
219
|
+
const recoveryResult = await recoveryEngine.recover(absoluteFp, content!, language)
|
|
220
|
+
|
|
221
|
+
if (recoveryResult.success && recoveryResult.confidence > 0.2) {
|
|
222
|
+
fallbackFiles.push(recoveryResult.parsed)
|
|
223
|
+
diagnostics.push({
|
|
224
|
+
filePath: absoluteFp,
|
|
225
|
+
extension: ext,
|
|
226
|
+
stage: 'parse',
|
|
227
|
+
reason: 'parse-error-recovered',
|
|
228
|
+
message: `${errorMessage} | Recovered ${recoveryResult.strategy} (confidence: ${(recoveryResult.confidence * 100).toFixed(0)}%) | ${recoveryResult.parsed.functions.length} fns, ${recoveryResult.parsed.classes.length} classes`,
|
|
229
|
+
})
|
|
230
|
+
} else {
|
|
231
|
+
fallbackFiles.push(buildFallbackParsedFile(absoluteFp, content!, ext))
|
|
232
|
+
diagnostics.push({
|
|
233
|
+
filePath: absoluteFp,
|
|
234
|
+
extension: ext,
|
|
235
|
+
stage: 'parse',
|
|
236
|
+
reason: 'parse-error',
|
|
237
|
+
message: errorMessage,
|
|
238
|
+
})
|
|
239
|
+
}
|
|
416
240
|
}
|
|
417
241
|
}
|
|
418
242
|
|
|
419
|
-
|
|
420
|
-
if (treeFiles.length > 0) {
|
|
243
|
+
for (const [extractor, files] of filesByExtractor.entries()) {
|
|
421
244
|
try {
|
|
422
|
-
const
|
|
423
|
-
|
|
245
|
+
const resolved = await extractor.resolveImports(files, normalizedRoot)
|
|
246
|
+
|
|
247
|
+
for (let i = 0; i < files.length; i++) {
|
|
248
|
+
const originalFile = files[i]
|
|
249
|
+
const resolvedFile = resolved[i]
|
|
250
|
+
if (resolvedFile && resolvedFile !== originalFile) {
|
|
251
|
+
files[i] = resolvedFile
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
fallbackFiles.push(...files)
|
|
424
255
|
} catch (err: unknown) {
|
|
425
|
-
|
|
256
|
+
diagnostics.push({
|
|
426
257
|
filePath: '*',
|
|
427
258
|
extension: '*',
|
|
428
|
-
parser: 'tree-sitter',
|
|
429
259
|
stage: 'resolve-imports',
|
|
430
260
|
reason: 'resolve-error',
|
|
431
261
|
message: normalizeErrorMessage(err),
|
|
432
262
|
})
|
|
263
|
+
fallbackFiles.push(...files)
|
|
433
264
|
}
|
|
434
265
|
}
|
|
435
266
|
|
|
436
|
-
const resolved: ParsedFile[] = [
|
|
437
|
-
...resolvedOxcFiles,
|
|
438
|
-
...resolvedGoFiles,
|
|
439
|
-
...resolvedTreeFiles,
|
|
440
|
-
...fallbackFiles,
|
|
441
|
-
]
|
|
442
|
-
|
|
443
|
-
// Persist cache metadata
|
|
444
267
|
cache.flush()
|
|
445
268
|
|
|
446
269
|
return {
|
|
447
|
-
files:
|
|
270
|
+
files: fallbackFiles,
|
|
448
271
|
diagnostics,
|
|
449
272
|
summary: {
|
|
450
273
|
requestedFiles: filePaths.length,
|
|
@@ -457,15 +280,6 @@ export async function parseFilesWithDiagnostics(
|
|
|
457
280
|
}
|
|
458
281
|
}
|
|
459
282
|
|
|
460
|
-
/**
|
|
461
|
-
* Parse multiple files, resolve their imports, and return ParsedFile[].
|
|
462
|
-
*
|
|
463
|
-
* Path contract (critical for graph correctness):
|
|
464
|
-
* - filePaths come from discoverFiles() as project-root-relative strings
|
|
465
|
-
* - We resolve them to ABSOLUTE posix paths before passing to parse()
|
|
466
|
-
* - ParsedFile.path is therefore always absolute + forward-slash
|
|
467
|
-
* - OxcResolver also returns absolute paths → import edges always consistent
|
|
468
|
-
*/
|
|
469
283
|
export async function parseFiles(
|
|
470
284
|
filePaths: string[],
|
|
471
285
|
projectRoot: string,
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { SyntaxNode } from 'web-tree-sitter';
|
|
2
|
+
import { BaseExtractor } from './base-extractor.js';
|
|
3
|
+
|
|
4
|
+
export interface LanguageDefinition {
|
|
5
|
+
name: string;
|
|
6
|
+
extensions: string[];
|
|
7
|
+
treeSitterGrammar: string;
|
|
8
|
+
extractor: BaseExtractor;
|
|
9
|
+
semanticFeatures: {
|
|
10
|
+
hasTypeSystem: boolean;
|
|
11
|
+
hasGenerics: boolean;
|
|
12
|
+
hasMacros: boolean;
|
|
13
|
+
hasAnnotations: boolean;
|
|
14
|
+
hasPatternMatching: boolean;
|
|
15
|
+
};
|
|
16
|
+
specialHandling?: {
|
|
17
|
+
importResolution?: (source: string, filePath: string) => Promise<string[]>;
|
|
18
|
+
exportDetection?: (node: SyntaxNode) => boolean;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class LanguageRegistry {
|
|
23
|
+
private static instance: LanguageRegistry;
|
|
24
|
+
private languages: Map<string, LanguageDefinition> = new Map();
|
|
25
|
+
private extMap: Map<string, string> = new Map();
|
|
26
|
+
|
|
27
|
+
private constructor() {}
|
|
28
|
+
|
|
29
|
+
public static getInstance(): LanguageRegistry {
|
|
30
|
+
if (!LanguageRegistry.instance) {
|
|
31
|
+
LanguageRegistry.instance = new LanguageRegistry();
|
|
32
|
+
}
|
|
33
|
+
return LanguageRegistry.instance;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
public register(lang: LanguageDefinition): void {
|
|
37
|
+
this.languages.set(lang.name, lang);
|
|
38
|
+
for (const ext of lang.extensions) {
|
|
39
|
+
this.extMap.set(ext, lang.name);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
public getForFile(filePath: string): LanguageDefinition | null {
|
|
44
|
+
const ext = filePath.slice(filePath.lastIndexOf('.'));
|
|
45
|
+
const langName = this.extMap.get(ext);
|
|
46
|
+
if (!langName) return null;
|
|
47
|
+
return this.languages.get(langName) || null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
public getAllSupportedExtensions(): string[] {
|
|
51
|
+
return Array.from(this.extMap.keys());
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
public listLanguages(): string[] {
|
|
55
|
+
return Array.from(this.languages.keys());
|
|
56
|
+
}
|
|
57
|
+
}
|