@getmikk/core 2.0.13 → 2.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/package.json +2 -1
- package/src/analysis/index.ts +9 -0
- package/src/analysis/taint-analysis.ts +419 -0
- package/src/analysis/type-flow.ts +247 -0
- package/src/cache/incremental-cache.ts +278 -0
- package/src/cache/index.ts +1 -0
- package/src/contract/contract-generator.ts +31 -3
- package/src/contract/contract-reader.ts +1 -0
- package/src/contract/lock-compiler.ts +125 -12
- package/src/contract/schema.ts +4 -0
- package/src/error-handler.ts +2 -1
- package/src/graph/cluster-detector.ts +2 -4
- package/src/graph/dead-code-detector.ts +303 -117
- package/src/graph/graph-builder.ts +21 -161
- package/src/graph/impact-analyzer.ts +1 -0
- package/src/graph/index.ts +2 -0
- package/src/graph/rich-function-index.ts +1080 -0
- package/src/graph/symbol-table.ts +252 -0
- package/src/hash/hash-store.ts +1 -0
- package/src/index.ts +4 -0
- package/src/parser/base-extractor.ts +19 -0
- package/src/parser/boundary-checker.ts +31 -12
- package/src/parser/error-recovery.ts +647 -0
- package/src/parser/function-body-extractor.ts +248 -0
- package/src/parser/go/go-extractor.ts +249 -676
- package/src/parser/index.ts +138 -295
- package/src/parser/language-registry.ts +57 -0
- package/src/parser/oxc-parser.ts +166 -28
- package/src/parser/oxc-resolver.ts +179 -11
- package/src/parser/parser-constants.ts +1 -0
- package/src/parser/rust/rust-extractor.ts +109 -0
- package/src/parser/tree-sitter/parser.ts +400 -66
- package/src/parser/tree-sitter/queries.ts +106 -10
- package/src/parser/types.ts +20 -1
- package/src/search/bm25.ts +21 -8
- package/src/search/direct-search.ts +472 -0
- package/src/search/embedding-provider.ts +249 -0
- package/src/search/index.ts +12 -0
- package/src/search/semantic-search.ts +435 -0
- package/src/security/index.ts +1 -0
- package/src/security/scanner.ts +342 -0
- package/src/utils/artifact-transaction.ts +1 -0
- package/src/utils/atomic-write.ts +1 -0
- package/src/utils/errors.ts +89 -4
- package/src/utils/fs.ts +150 -65
- package/src/utils/json.ts +1 -0
- package/src/utils/language-registry.ts +96 -5
- package/src/utils/minimatch.ts +49 -6
- package/src/utils/path.ts +26 -0
- package/tests/dead-code.test.ts +3 -2
- package/tests/direct-search.test.ts +435 -0
- package/tests/error-recovery.test.ts +143 -0
- package/tests/fixtures/simple-api/src/index.ts +1 -1
- package/tests/go-parser.test.ts +19 -335
- package/tests/js-parser.test.ts +18 -1089
- package/tests/language-registry-all.test.ts +276 -0
- package/tests/language-registry.test.ts +6 -4
- package/tests/parse-diagnostics.test.ts +9 -96
- package/tests/parser.test.ts +42 -771
- package/tests/polyglot-parser.test.ts +117 -0
- package/tests/rich-function-index.test.ts +703 -0
- package/tests/tree-sitter-parser.test.ts +108 -80
- package/tests/ts-parser.test.ts +8 -8
- package/tests/verification.test.ts +175 -0
- package/src/parser/base-parser.ts +0 -16
- package/src/parser/go/go-parser.ts +0 -43
- package/src/parser/javascript/js-extractor.ts +0 -278
- package/src/parser/javascript/js-parser.ts +0 -101
- package/src/parser/typescript/ts-extractor.ts +0 -447
- package/src/parser/typescript/ts-parser.ts +0 -36
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
1
2
|
import * as path from 'node:path'
|
|
2
3
|
import { TreeSitterResolver } from './resolver.js'
|
|
4
|
+
import { LanguageRegistry } from '../language-registry.js'
|
|
3
5
|
import { createRequire } from 'node:module'
|
|
4
6
|
import { hashContent } from '../../hash/file-hasher.js'
|
|
5
|
-
import {
|
|
7
|
+
import { BaseExtractor } from '../base-extractor.js'
|
|
6
8
|
import type { ParsedFile, ParsedFunction, ParsedClass, ParsedParam, ParsedImport, ParsedGeneric } from '../types.js'
|
|
7
9
|
import * as Queries from './queries.js'
|
|
8
10
|
|
|
@@ -15,20 +17,106 @@ const _require = getRequire()
|
|
|
15
17
|
let Parser: any = null
|
|
16
18
|
let Language: any = null
|
|
17
19
|
let initialized = false
|
|
18
|
-
let initPromise: Promise<
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
20
|
+
let initPromise: Promise<boolean> | null = null
|
|
21
|
+
|
|
22
|
+
function isValidTreeSitterModule(module: any): boolean {
|
|
23
|
+
if (!module) return false
|
|
24
|
+
if (module.HEAP8 || module.HEAP16 || module.HEAP32) return false
|
|
25
|
+
const hasInit = typeof module.init === 'function'
|
|
26
|
+
const hasLanguage = typeof module.Language !== 'undefined'
|
|
27
|
+
const hasDefault = module.default?.Language
|
|
28
|
+
const isFunctionWithInit = typeof module === 'function' && module.prototype?.init
|
|
29
|
+
const isFunctionWithLanguage = typeof module === 'function' && module.prototype?.Language
|
|
30
|
+
return hasInit || hasLanguage || !!hasDefault || isFunctionWithInit || isFunctionWithLanguage
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function ensureInitialized(): Promise<boolean> {
|
|
34
|
+
if (initialized && Language) return true
|
|
35
|
+
|
|
36
|
+
const attemptInit = async (): Promise<boolean> => {
|
|
37
|
+
try {
|
|
38
|
+
let ParserModule = _require('web-tree-sitter')
|
|
39
|
+
if (!ParserModule) return false
|
|
40
|
+
|
|
41
|
+
if (typeof ParserModule === 'function') {
|
|
42
|
+
if (ParserModule.prototype?.Language) {
|
|
43
|
+
Language = ParserModule.prototype.Language
|
|
44
|
+
Parser = ParserModule
|
|
45
|
+
initialized = true
|
|
46
|
+
return true
|
|
47
|
+
}
|
|
48
|
+
if (ParserModule.prototype?.init) {
|
|
49
|
+
await ParserModule.prototype.init()
|
|
50
|
+
Language = ParserModule.prototype.Language
|
|
51
|
+
Parser = ParserModule
|
|
52
|
+
initialized = !!Language
|
|
53
|
+
return initialized
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (!isValidTreeSitterModule(ParserModule)) {
|
|
58
|
+
ParserModule = ParserModule?.default || ParserModule
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (!isValidTreeSitterModule(ParserModule)) {
|
|
62
|
+
const moduleCache = _require.cache
|
|
63
|
+
const keys = Object.keys(moduleCache || {})
|
|
64
|
+
for (const key of keys) {
|
|
65
|
+
if (key.includes('web-tree-sitter')) {
|
|
66
|
+
delete moduleCache[key]
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
ParserModule = _require('web-tree-sitter')
|
|
70
|
+
if (typeof ParserModule === 'function' && ParserModule.prototype?.Language) {
|
|
71
|
+
Language = ParserModule.prototype.Language
|
|
72
|
+
Parser = ParserModule
|
|
73
|
+
initialized = true
|
|
74
|
+
return true
|
|
75
|
+
}
|
|
76
|
+
if (!isValidTreeSitterModule(ParserModule)) {
|
|
77
|
+
ParserModule = ParserModule?.default || ParserModule
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (!isValidTreeSitterModule(ParserModule)) {
|
|
82
|
+
return false
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
Parser = ParserModule
|
|
86
|
+
|
|
87
|
+
if (typeof ParserModule.init === 'function') {
|
|
88
|
+
await ParserModule.init()
|
|
89
|
+
Language = ParserModule.Language
|
|
90
|
+
initialized = !!Language
|
|
91
|
+
} else if (ParserModule.default?.Language) {
|
|
92
|
+
Language = ParserModule.default.Language
|
|
93
|
+
initialized = true
|
|
94
|
+
} else if (ParserModule.Language) {
|
|
95
|
+
Language = ParserModule.Language
|
|
96
|
+
initialized = true
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return initialized && !!Language
|
|
100
|
+
} catch (err) {
|
|
101
|
+
console.warn('[tree-sitter] Initialization failed:', err)
|
|
102
|
+
return false
|
|
103
|
+
}
|
|
30
104
|
}
|
|
31
|
-
|
|
105
|
+
|
|
106
|
+
if (initPromise) {
|
|
107
|
+
await initPromise.catch(() => {})
|
|
108
|
+
if (!initialized && Language) {
|
|
109
|
+
initPromise = attemptInit()
|
|
110
|
+
const retryResult = await initPromise
|
|
111
|
+
return retryResult
|
|
112
|
+
}
|
|
113
|
+
return initialized && !!Language
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
initPromise = attemptInit()
|
|
117
|
+
const result = await initPromise
|
|
118
|
+
return result
|
|
119
|
+
}
|
|
32
120
|
|
|
33
121
|
function isExportedByLanguage(ext: string, name: string, nodeText: string): boolean {
|
|
34
122
|
switch (ext) {
|
|
@@ -119,7 +207,35 @@ function findAllChildren(node: any, predicate: (n: any) => boolean): any[] {
|
|
|
119
207
|
return results
|
|
120
208
|
}
|
|
121
209
|
|
|
122
|
-
function
|
|
210
|
+
function extractDecoratorsFromNode(defNode: any): string[] {
|
|
211
|
+
const decorators: string[] = []
|
|
212
|
+
if (!defNode?.children) return decorators
|
|
213
|
+
|
|
214
|
+
for (const child of defNode.children) {
|
|
215
|
+
if (child.type === 'decorator' || child.type === 'attribute' || child.type === 'annotation') {
|
|
216
|
+
const nameNode = findFirstChild(child, n =>
|
|
217
|
+
n.type === 'identifier' ||
|
|
218
|
+
n.type === 'attribute' ||
|
|
219
|
+
n.type === 'decorator_target'
|
|
220
|
+
)
|
|
221
|
+
if (nameNode?.text) {
|
|
222
|
+
decorators.push(nameNode.text)
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
if (child.type === 'expression_statement') {
|
|
226
|
+
const innerChild = findFirstChild(child, n => n.type === 'decorator' || n.type === 'attribute')
|
|
227
|
+
if (innerChild) {
|
|
228
|
+
const nameNode = findFirstChild(innerChild, n => n.type === 'identifier')
|
|
229
|
+
if (nameNode?.text) {
|
|
230
|
+
decorators.push(nameNode.text)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
return decorators
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function _extractGenericsFromNode(defNode: any, filePath: string): ParsedGeneric[] {
|
|
123
239
|
const generics: ParsedGeneric[] = []
|
|
124
240
|
if (!defNode) return generics
|
|
125
241
|
|
|
@@ -179,7 +295,7 @@ function assignCallsToFunctions(
|
|
|
179
295
|
return unassigned
|
|
180
296
|
}
|
|
181
297
|
|
|
182
|
-
export class TreeSitterParser extends
|
|
298
|
+
export class TreeSitterParser extends BaseExtractor {
|
|
183
299
|
private parser: any = null
|
|
184
300
|
private languages = new Map<string, any>()
|
|
185
301
|
private nameCounter = new Map<string, number>()
|
|
@@ -190,10 +306,9 @@ export class TreeSitterParser extends BaseParser {
|
|
|
190
306
|
}
|
|
191
307
|
|
|
192
308
|
private async init() {
|
|
193
|
-
if (
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
if (!Language) return
|
|
309
|
+
if (this.parser) return
|
|
310
|
+
const ready = await ensureInitialized()
|
|
311
|
+
if (ready && Parser) {
|
|
197
312
|
this.parser = new Parser()
|
|
198
313
|
}
|
|
199
314
|
}
|
|
@@ -203,39 +318,57 @@ export class TreeSitterParser extends BaseParser {
|
|
|
203
318
|
return Boolean(this.parser)
|
|
204
319
|
}
|
|
205
320
|
|
|
206
|
-
async
|
|
207
|
-
this.nameCounter.clear()
|
|
321
|
+
async extract(filePath: string, content: string): Promise<ParsedFile> {
|
|
208
322
|
await this.init()
|
|
209
|
-
|
|
210
|
-
|
|
323
|
+
|
|
211
324
|
if (!this.parser) {
|
|
212
|
-
|
|
325
|
+
console.warn('[tree-sitter] Parser not initialized')
|
|
326
|
+
return this.buildEmptyFile(filePath, content, path.extname(filePath))
|
|
213
327
|
}
|
|
214
328
|
|
|
329
|
+
const ext = path.extname(filePath).toLowerCase()
|
|
215
330
|
const config = await this.getLanguageConfig(ext)
|
|
216
|
-
|
|
331
|
+
|
|
217
332
|
if (!config || !config.lang) {
|
|
333
|
+
console.warn('[tree-sitter] Language not available for', ext)
|
|
218
334
|
return this.buildEmptyFile(filePath, content, ext)
|
|
219
335
|
}
|
|
220
336
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
console.warn(`Parse error for ${filePath}:`, err)
|
|
225
|
-
return this.buildEmptyFile(filePath, content, ext)
|
|
226
|
-
}
|
|
337
|
+
const result = await this.parseWithConfig(filePath, content, ext, config)
|
|
338
|
+
|
|
339
|
+
return result
|
|
227
340
|
}
|
|
228
341
|
|
|
229
342
|
private async parseWithConfig(filePath: string, content: string, ext: string, config: any): Promise<ParsedFile> {
|
|
230
343
|
this.parser!.setLanguage(config.lang)
|
|
231
|
-
const tree = this.parser!.parse(content)
|
|
232
|
-
const query = config.lang.query(config.query)
|
|
233
344
|
|
|
345
|
+
let tree: any = null
|
|
346
|
+
try {
|
|
347
|
+
tree = this.parser!.parse(content)
|
|
348
|
+
} catch (parseErr) {
|
|
349
|
+
console.warn(`[tree-sitter] Parse failed for ${ext}:`, parseErr instanceof Error ? parseErr.message : String(parseErr))
|
|
350
|
+
return this.buildEmptyFile(filePath, content, ext)
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
let query: any = null
|
|
354
|
+
try {
|
|
355
|
+
query = config.lang.query(config.query)
|
|
356
|
+
} catch (queryErr) {
|
|
357
|
+
console.warn(`[tree-sitter] Query compilation failed for ${ext}:`, queryErr instanceof Error ? queryErr.message : String(queryErr))
|
|
358
|
+
return this.buildEmptyFile(filePath, content, ext)
|
|
359
|
+
}
|
|
360
|
+
|
|
234
361
|
if (!query) {
|
|
235
362
|
return this.buildEmptyFile(filePath, content, ext)
|
|
236
363
|
}
|
|
237
364
|
|
|
238
|
-
|
|
365
|
+
let matches: any[] = []
|
|
366
|
+
try {
|
|
367
|
+
matches = query.matches(tree.rootNode)
|
|
368
|
+
} catch (matchErr) {
|
|
369
|
+
console.warn(`[tree-sitter] Query execution failed for ${ext}:`, matchErr instanceof Error ? matchErr.message : String(matchErr))
|
|
370
|
+
return this.buildEmptyFile(filePath, content, ext)
|
|
371
|
+
}
|
|
239
372
|
|
|
240
373
|
const functions: ParsedFunction[] = []
|
|
241
374
|
const classesMap = new Map<string, ParsedClass>()
|
|
@@ -321,7 +454,6 @@ export class TreeSitterParser extends BaseParser {
|
|
|
321
454
|
|
|
322
455
|
// --- Generic types ---
|
|
323
456
|
if (captures['generic.name'] || captures['generic.arg']) {
|
|
324
|
-
const genName = captures['generic.name']?.text || ''
|
|
325
457
|
const genArg = captures['generic.arg']?.text || ''
|
|
326
458
|
if (genArg && !generics.some(g => g.name === genArg)) {
|
|
327
459
|
generics.push({
|
|
@@ -361,6 +493,7 @@ export class TreeSitterParser extends BaseParser {
|
|
|
361
493
|
|
|
362
494
|
const returnType = extractReturnType(ext, defNode, nodeText)
|
|
363
495
|
const params = extractParamsFromNode(defNode)
|
|
496
|
+
const decorators = extractDecoratorsFromNode(defNode)
|
|
364
497
|
|
|
365
498
|
functions.push({
|
|
366
499
|
id: fnId,
|
|
@@ -373,6 +506,7 @@ export class TreeSitterParser extends BaseParser {
|
|
|
373
506
|
isExported: exported,
|
|
374
507
|
isAsync,
|
|
375
508
|
calls: [],
|
|
509
|
+
decorators: decorators.length > 0 ? decorators : undefined,
|
|
376
510
|
hash: hashContent(nodeText),
|
|
377
511
|
purpose: extractDocComment(content, startLine),
|
|
378
512
|
edgeCasesHandled: [],
|
|
@@ -402,10 +536,6 @@ export class TreeSitterParser extends BaseParser {
|
|
|
402
536
|
const clsId = `class:${filePath}:${clsName}`
|
|
403
537
|
|
|
404
538
|
if (!classesMap.has(clsId)) {
|
|
405
|
-
const isEnum = type === 'definition.enum'
|
|
406
|
-
const isStruct = type === 'definition.struct'
|
|
407
|
-
const isUnion = type === 'definition.union'
|
|
408
|
-
|
|
409
539
|
classesMap.set(clsId, {
|
|
410
540
|
id: clsId,
|
|
411
541
|
name: clsName,
|
|
@@ -519,13 +649,71 @@ export class TreeSitterParser extends BaseParser {
|
|
|
519
649
|
try {
|
|
520
650
|
const nameForFile = name.replace(/-/g, '_')
|
|
521
651
|
|
|
522
|
-
// Try multiple possible WASM locations
|
|
523
|
-
const
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
652
|
+
// Try multiple possible WASM locations, including parent directories and siblings for monorepos
|
|
653
|
+
const baseDirs = new Set<string>()
|
|
654
|
+
baseDirs.add(process.cwd())
|
|
655
|
+
|
|
656
|
+
// Add parent directories (up to 4 levels) for monorepo setups
|
|
657
|
+
let current = process.cwd()
|
|
658
|
+
let parentDir = ''
|
|
659
|
+
for (let i = 0; i < 4; i++) {
|
|
660
|
+
parentDir = path.dirname(current)
|
|
661
|
+
if (parentDir === current) break
|
|
662
|
+
baseDirs.add(parentDir)
|
|
663
|
+
baseDirs.add(path.join(parentDir, 'node_modules'))
|
|
664
|
+
|
|
665
|
+
// Also check sibling directories in the parent for monorepo setups
|
|
666
|
+
// Also check subdirs like Mesh/packages/core/node_modules
|
|
667
|
+
try {
|
|
668
|
+
const fs = await import('node:fs')
|
|
669
|
+
const entries = fs.readdirSync(parentDir, { withFileTypes: true })
|
|
670
|
+
for (const entry of entries) {
|
|
671
|
+
if (entry.isDirectory() && entry.name !== path.basename(current)) {
|
|
672
|
+
baseDirs.add(path.join(parentDir, entry.name, 'node_modules'))
|
|
673
|
+
// Also check for packages/*/node_modules patterns
|
|
674
|
+
const subPath = path.join(parentDir, entry.name, 'packages')
|
|
675
|
+
if (fs.existsSync(subPath)) {
|
|
676
|
+
try {
|
|
677
|
+
const pkgEntries = fs.readdirSync(subPath, { withFileTypes: true })
|
|
678
|
+
for (const pkg of pkgEntries) {
|
|
679
|
+
if (pkg.isDirectory()) {
|
|
680
|
+
baseDirs.add(path.join(subPath, pkg.name, 'node_modules'))
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
} catch { /* skip */ }
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
} catch { /* skip */ }
|
|
688
|
+
|
|
689
|
+
current = parentDir
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
const possiblePaths: string[] = []
|
|
693
|
+
for (const baseDir of baseDirs) {
|
|
694
|
+
if (!baseDir) continue
|
|
695
|
+
const p = path.join(baseDir, 'node_modules/tree-sitter-wasms/out', `tree-sitter-${nameForFile}.wasm`)
|
|
696
|
+
possiblePaths.push(p)
|
|
697
|
+
|
|
698
|
+
// Also check Mesh/packages/*/node_modules directly
|
|
699
|
+
const meshBase = path.join(baseDir, '..', 'Mesh', 'packages')
|
|
700
|
+
try {
|
|
701
|
+
const fs = await import('node:fs')
|
|
702
|
+
if (fs.existsSync(meshBase)) {
|
|
703
|
+
const entries = fs.readdirSync(meshBase, { withFileTypes: true })
|
|
704
|
+
for (const entry of entries) {
|
|
705
|
+
if (entry.isDirectory()) {
|
|
706
|
+
possiblePaths.push(path.join(meshBase, entry.name, 'node_modules', 'tree-sitter-wasms', 'out', `tree-sitter-${nameForFile}.wasm`))
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
} catch { /* skip */ }
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
if (process.env.MIKK_DEBUG) {
|
|
714
|
+
console.log('[tree-sitter] Searching for:', name)
|
|
715
|
+
console.log('[tree-sitter] Paths checked:', possiblePaths.slice(0, 5).map(p => p.slice(-50)))
|
|
716
|
+
}
|
|
529
717
|
|
|
530
718
|
let wasmPath = ''
|
|
531
719
|
for (const p of possiblePaths) {
|
|
@@ -563,7 +751,6 @@ export class TreeSitterParser extends BaseParser {
|
|
|
563
751
|
|
|
564
752
|
if (!wasmPath) {
|
|
565
753
|
// WASM not found - but don't mark as permanent error, just skip this language
|
|
566
|
-
console.warn(`Tree-sitter WASM not found for ${name}`)
|
|
567
754
|
return null
|
|
568
755
|
}
|
|
569
756
|
|
|
@@ -596,6 +783,14 @@ export class TreeSitterParser extends BaseParser {
|
|
|
596
783
|
}
|
|
597
784
|
|
|
598
785
|
private async getLanguageConfig(ext: string) {
|
|
786
|
+
// Ensure parser is initialized first
|
|
787
|
+
await this.init()
|
|
788
|
+
|
|
789
|
+
if (!this.parser) {
|
|
790
|
+
console.warn('[tree-sitter] Parser not initialized, returning null for', ext)
|
|
791
|
+
return null
|
|
792
|
+
}
|
|
793
|
+
|
|
599
794
|
switch (ext) {
|
|
600
795
|
case '.py':
|
|
601
796
|
return { lang: await this.loadLang('python'), query: Queries.PYTHON_QUERIES }
|
|
@@ -604,8 +799,13 @@ export class TreeSitterParser extends BaseParser {
|
|
|
604
799
|
case '.kt':
|
|
605
800
|
case '.kts':
|
|
606
801
|
return { lang: await this.loadLang('kotlin'), query: Queries.KOTLIN_QUERIES }
|
|
802
|
+
case '.scala':
|
|
803
|
+
case '.sc':
|
|
804
|
+
return { lang: await this.loadLang('scala'), query: Queries.SCALA_QUERIES }
|
|
607
805
|
case '.swift':
|
|
608
806
|
return { lang: await this.loadLang('swift'), query: Queries.SWIFT_QUERIES }
|
|
807
|
+
case '.dart':
|
|
808
|
+
return { lang: await this.loadLang('dart'), query: Queries.SWIFT_QUERIES }
|
|
609
809
|
case '.c':
|
|
610
810
|
case '.h':
|
|
611
811
|
return { lang: await this.loadLang('c'), query: Queries.C_QUERIES }
|
|
@@ -622,35 +822,169 @@ export class TreeSitterParser extends BaseParser {
|
|
|
622
822
|
return { lang: await this.loadLang('go'), query: Queries.GO_QUERIES }
|
|
623
823
|
case '.rs':
|
|
624
824
|
return { lang: await this.loadLang('rust'), query: Queries.RUST_QUERIES }
|
|
825
|
+
case '.zig':
|
|
826
|
+
return { lang: await this.loadLang('zig'), query: Queries.ZIG_QUERIES }
|
|
625
827
|
case '.php':
|
|
626
828
|
return { lang: await this.loadLang('php'), query: Queries.PHP_QUERIES }
|
|
627
829
|
case '.rb':
|
|
628
830
|
return { lang: await this.loadLang('ruby'), query: Queries.RUBY_QUERIES }
|
|
831
|
+
case '.hs':
|
|
832
|
+
return { lang: await this.loadLang('haskell'), query: Queries.HASKELL_QUERIES }
|
|
833
|
+
case '.ex':
|
|
834
|
+
case '.exs':
|
|
835
|
+
return { lang: await this.loadLang('elixir'), query: Queries.ELIXIR_QUERIES }
|
|
836
|
+
case '.clj':
|
|
837
|
+
case '.cljs':
|
|
838
|
+
case '.cljc':
|
|
839
|
+
return { lang: await this.loadLang('clojure'), query: Queries.CLOJURE_QUERIES }
|
|
840
|
+
case '.fs':
|
|
841
|
+
case '.fsx':
|
|
842
|
+
case '.fsi':
|
|
843
|
+
return { lang: await this.loadLang('fsharp'), query: Queries.FSHARP_QUERIES }
|
|
844
|
+
case '.ml':
|
|
845
|
+
case '.mli':
|
|
846
|
+
return { lang: await this.loadLang('ocaml'), query: Queries.OCAML_QUERIES }
|
|
847
|
+
case '.pl':
|
|
848
|
+
case '.pm':
|
|
849
|
+
return { lang: await this.loadLang('perl'), query: Queries.PERL_QUERIES }
|
|
850
|
+
case '.r':
|
|
851
|
+
case '.R':
|
|
852
|
+
return { lang: await this.loadLang('r'), query: Queries.R_QUERIES }
|
|
853
|
+
case '.jl':
|
|
854
|
+
return { lang: await this.loadLang('julia'), query: Queries.JULIA_QUERIES }
|
|
855
|
+
case '.lua':
|
|
856
|
+
return { lang: await this.loadLang('lua'), query: Queries.LUA_QUERIES }
|
|
857
|
+
case '.sql':
|
|
858
|
+
return { lang: await this.loadLang('sql'), query: Queries.SQL_QUERIES }
|
|
859
|
+
case '.tf':
|
|
860
|
+
return { lang: await this.loadLang('hcl'), query: Queries.HCL_QUERIES }
|
|
861
|
+
case '.sh':
|
|
862
|
+
case '.bash':
|
|
863
|
+
case '.zsh':
|
|
864
|
+
return { lang: await this.loadLang('bash'), query: Queries.BASH_QUERIES }
|
|
629
865
|
default:
|
|
630
866
|
return null
|
|
631
867
|
}
|
|
632
868
|
}
|
|
633
869
|
}
|
|
634
870
|
|
|
871
|
+
// Register Tree-sitter for all supported languages (22+ languages)
|
|
872
|
+
const tsParser = new TreeSitterParser();
|
|
873
|
+
const registry = LanguageRegistry.getInstance();
|
|
874
|
+
|
|
875
|
+
const standardFeatures = {
|
|
876
|
+
hasTypeSystem: true,
|
|
877
|
+
hasGenerics: true,
|
|
878
|
+
hasMacros: false,
|
|
879
|
+
hasAnnotations: false,
|
|
880
|
+
hasPatternMatching: true,
|
|
881
|
+
};
|
|
882
|
+
|
|
883
|
+
const functionalFeatures = {
|
|
884
|
+
hasTypeSystem: true,
|
|
885
|
+
hasGenerics: true,
|
|
886
|
+
hasMacros: false,
|
|
887
|
+
hasAnnotations: false,
|
|
888
|
+
hasPatternMatching: false,
|
|
889
|
+
};
|
|
890
|
+
|
|
891
|
+
const scriptingFeatures = {
|
|
892
|
+
hasTypeSystem: false,
|
|
893
|
+
hasGenerics: false,
|
|
894
|
+
hasMacros: false,
|
|
895
|
+
hasAnnotations: false,
|
|
896
|
+
hasPatternMatching: false,
|
|
897
|
+
};
|
|
898
|
+
|
|
899
|
+
// All 22+ supported languages
|
|
900
|
+
const languages: Array<{ name: string; extensions: string[]; features: typeof standardFeatures }> = [
|
|
901
|
+
// JVM Languages
|
|
902
|
+
{ name: 'java', extensions: ['.java'], features: standardFeatures },
|
|
903
|
+
{ name: 'kotlin', extensions: ['.kt', '.kts'], features: standardFeatures },
|
|
904
|
+
{ name: 'scala', extensions: ['.scala', '.sc'], features: standardFeatures },
|
|
905
|
+
|
|
906
|
+
// Apple Languages
|
|
907
|
+
{ name: 'swift', extensions: ['.swift'], features: standardFeatures },
|
|
908
|
+
{ name: 'dart', extensions: ['.dart'], features: standardFeatures },
|
|
909
|
+
|
|
910
|
+
// C Family
|
|
911
|
+
{ name: 'c', extensions: ['.c', '.h'], features: standardFeatures },
|
|
912
|
+
{ name: 'cpp', extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hxx', '.hh'], features: standardFeatures },
|
|
913
|
+
{ name: 'csharp', extensions: ['.cs'], features: standardFeatures },
|
|
914
|
+
|
|
915
|
+
// Systems Languages
|
|
916
|
+
{ name: 'rust', extensions: ['.rs'], features: standardFeatures },
|
|
917
|
+
{ name: 'zig', extensions: ['.zig'], features: standardFeatures },
|
|
918
|
+
|
|
919
|
+
// Web Languages
|
|
920
|
+
{ name: 'php', extensions: ['.php'], features: standardFeatures },
|
|
921
|
+
{ name: 'ruby', extensions: ['.rb'], features: scriptingFeatures },
|
|
922
|
+
|
|
923
|
+
// Scripting Languages
|
|
924
|
+
{ name: 'python', extensions: ['.py', '.pyw'], features: scriptingFeatures },
|
|
925
|
+
{ name: 'lua', extensions: ['.lua'], features: scriptingFeatures },
|
|
926
|
+
|
|
927
|
+
// Functional Languages
|
|
928
|
+
{ name: 'haskell', extensions: ['.hs'], features: functionalFeatures },
|
|
929
|
+
{ name: 'elixir', extensions: ['.ex', '.exs'], features: functionalFeatures },
|
|
930
|
+
{ name: 'clojure', extensions: ['.clj', '.cljs', '.cljc'], features: functionalFeatures },
|
|
931
|
+
|
|
932
|
+
// .NET Family
|
|
933
|
+
{ name: 'fsharp', extensions: ['.fs', '.fsx', '.fsi'], features: standardFeatures },
|
|
934
|
+
|
|
935
|
+
// ML Family
|
|
936
|
+
{ name: 'ocaml', extensions: ['.ml', '.mli'], features: functionalFeatures },
|
|
937
|
+
|
|
938
|
+
// Other Languages
|
|
939
|
+
{ name: 'perl', extensions: ['.pl', '.pm'], features: scriptingFeatures },
|
|
940
|
+
{ name: 'r', extensions: ['.r', '.R'], features: scriptingFeatures },
|
|
941
|
+
{ name: 'julia', extensions: ['.jl'], features: scriptingFeatures },
|
|
942
|
+
|
|
943
|
+
// Config/Special Purpose
|
|
944
|
+
{ name: 'sql', extensions: ['.sql'], features: scriptingFeatures },
|
|
945
|
+
{ name: 'terraform', extensions: ['.tf'], features: scriptingFeatures },
|
|
946
|
+
{ name: 'shell', extensions: ['.sh', '.bash', '.zsh'], features: scriptingFeatures },
|
|
947
|
+
];
|
|
948
|
+
|
|
949
|
+
for (const lang of languages) {
|
|
950
|
+
registry.register({
|
|
951
|
+
...lang,
|
|
952
|
+
treeSitterGrammar: `tree-sitter-${lang.name}`,
|
|
953
|
+
extractor: tsParser,
|
|
954
|
+
semanticFeatures: lang.features
|
|
955
|
+
});
|
|
956
|
+
}
|
|
957
|
+
|
|
635
958
|
function extensionToLanguage(ext: string): ParsedFile['language'] {
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
959
|
+
const langMap: Record<string, ParsedFile['language']> = {
|
|
960
|
+
'.py': 'python', '.pyw': 'python',
|
|
961
|
+
'.java': 'java',
|
|
962
|
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
|
963
|
+
'.scala': 'scala', '.sc': 'scala',
|
|
964
|
+
'.swift': 'swift',
|
|
965
|
+
'.dart': 'dart',
|
|
966
|
+
'.c': 'c', '.h': 'c',
|
|
967
|
+
'.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.hpp': 'cpp', '.hxx': 'cpp', '.hh': 'cpp',
|
|
968
|
+
'.cs': 'csharp',
|
|
969
|
+
'.rs': 'rust',
|
|
970
|
+
'.zig': 'rust',
|
|
971
|
+
'.php': 'php',
|
|
972
|
+
'.rb': 'ruby',
|
|
973
|
+
'.go': 'go',
|
|
974
|
+
'.hs': 'haskell',
|
|
975
|
+
'.ex': 'elixir', '.exs': 'elixir',
|
|
976
|
+
'.clj': 'clojure', '.cljs': 'clojure', '.cljc': 'clojure',
|
|
977
|
+
'.fs': 'csharp', '.fsx': 'csharp', '.fsi': 'csharp',
|
|
978
|
+
'.ml': 'ocaml', '.mli': 'ocaml',
|
|
979
|
+
'.pl': 'perl', '.pm': 'perl',
|
|
980
|
+
'.r': 'r', '.R': 'r',
|
|
981
|
+
'.jl': 'julia',
|
|
982
|
+
'.lua': 'lua',
|
|
983
|
+
'.sql': 'sql',
|
|
984
|
+
'.tf': 'terraform',
|
|
985
|
+
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell',
|
|
986
|
+
};
|
|
987
|
+
return langMap[ext] ?? 'unknown';
|
|
654
988
|
}
|
|
655
989
|
|
|
656
990
|
function extractReturnType(ext: string, defNode: any, nodeText: string): string {
|