repomap-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repomap/__init__.py +320 -0
- repomap/ai.py +1108 -0
- repomap/check.py +1212 -0
- repomap/cli/__init__.py +3 -0
- repomap/cli/__main__.py +12 -0
- repomap/cli/cli.py +2475 -0
- repomap/core.py +730 -0
- repomap/lsp.py +753 -0
- repomap/parser.py +1697 -0
- repomap/ranking.py +639 -0
- repomap/resolver.py +906 -0
- repomap/toolkit.py +850 -0
- repomap/topic.py +600 -0
- repomap_cli-1.0.0.dist-info/METADATA +284 -0
- repomap_cli-1.0.0.dist-info/RECORD +18 -0
- repomap_cli-1.0.0.dist-info/WHEEL +4 -0
- repomap_cli-1.0.0.dist-info/entry_points.txt +2 -0
- repomap_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
repomap/parser.py
ADDED
|
@@ -0,0 +1,1697 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Repo Map Parser — Tree-sitter Analysis Layer
|
|
4
|
+
==============================================
|
|
5
|
+
负责代码解析、符号提取、import/export 绑定提取。
|
|
6
|
+
|
|
7
|
+
此模块独立于引擎层,可被单独使用进行代码分析。
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from . import JSImportBinding, JSExportBinding, Symbol
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger("repomap")
|
|
20
|
+
|
|
21
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
22
|
+
# Tree-sitter Queries(内嵌,无需外部 .scm 文件)
|
|
23
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
24
|
+
|
|
25
|
+
QUERIES: dict[str, dict[str, str]] = {
|
|
26
|
+
"python": {
|
|
27
|
+
"function": """
|
|
28
|
+
(function_definition name: (identifier) @name) @definition.function
|
|
29
|
+
(decorated_definition (function_definition name: (identifier) @name)) @definition.function
|
|
30
|
+
(class_definition body: (block (function_definition name: (identifier) @name))) @definition.method
|
|
31
|
+
(assignment left: (identifier) @name right: (lambda)) @definition.lambda
|
|
32
|
+
""",
|
|
33
|
+
"class": """
|
|
34
|
+
(class_definition name: (identifier) @name) @definition.class
|
|
35
|
+
(decorated_definition (class_definition name: (identifier) @name)) @definition.class
|
|
36
|
+
""",
|
|
37
|
+
"import": """
|
|
38
|
+
(import_statement name: (dotted_name) @name)
|
|
39
|
+
(import_statement name: (aliased_import name: (dotted_name) @name))
|
|
40
|
+
(import_from_statement module_name: (dotted_name) @name)
|
|
41
|
+
(import_from_statement module_name: (relative_import) @name)
|
|
42
|
+
""",
|
|
43
|
+
"call": """
|
|
44
|
+
(call function: (identifier) @name) @reference.call
|
|
45
|
+
(call function: (attribute attribute: (identifier) @name)) @reference.call
|
|
46
|
+
""",
|
|
47
|
+
"http_route": """
|
|
48
|
+
;; FastAPI: @app.get("/path") or @router.post("/path")
|
|
49
|
+
(decorated_definition
|
|
50
|
+
(decorator
|
|
51
|
+
(call
|
|
52
|
+
function: (attribute
|
|
53
|
+
object: (identifier) @_obj
|
|
54
|
+
attribute: (identifier) @method)
|
|
55
|
+
arguments: (argument_list (string) @path)))
|
|
56
|
+
definition: (function_definition name: (identifier) @handler))
|
|
57
|
+
(#match? @_obj "^(app|router|api)$")
|
|
58
|
+
(#match? @method "^(get|post|put|delete|patch|head|options)$")
|
|
59
|
+
""",
|
|
60
|
+
},
|
|
61
|
+
"javascript": {
|
|
62
|
+
"function": """
|
|
63
|
+
(function_declaration name: (identifier) @name) @definition.function
|
|
64
|
+
(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function
|
|
65
|
+
(variable_declarator name: (identifier) @name value: (function_expression)) @definition.function
|
|
66
|
+
(method_definition name: (property_identifier) @name) @definition.method
|
|
67
|
+
""",
|
|
68
|
+
"anonymous_function": """
|
|
69
|
+
(arrow_function) @definition.anonymous_function
|
|
70
|
+
(function_expression) @definition.anonymous_function
|
|
71
|
+
""",
|
|
72
|
+
"class": """
|
|
73
|
+
(class_declaration name: (identifier) @name) @definition.class
|
|
74
|
+
""",
|
|
75
|
+
"import": """
|
|
76
|
+
(import_statement source: (string) @source)
|
|
77
|
+
(import_specifier name: (identifier) @name)
|
|
78
|
+
(import_clause (identifier) @name)
|
|
79
|
+
""",
|
|
80
|
+
"call": """
|
|
81
|
+
(call_expression function: (identifier) @name) @reference.call
|
|
82
|
+
(call_expression function: (member_expression property: (property_identifier) @name)) @reference.call
|
|
83
|
+
""",
|
|
84
|
+
"http_route": """
|
|
85
|
+
;; Express: app.get("/path", handler) / router.post("/path", handler)
|
|
86
|
+
(call_expression
|
|
87
|
+
function: (member_expression
|
|
88
|
+
object: (identifier) @_router
|
|
89
|
+
property: (property_identifier) @method)
|
|
90
|
+
arguments: (arguments
|
|
91
|
+
(string) @path
|
|
92
|
+
.
|
|
93
|
+
[(identifier) @handler (arrow_function) @handler (function_expression) @handler]))
|
|
94
|
+
(#match? @_router "^(app|router)$")
|
|
95
|
+
(#match? @method "^(get|post|put|delete|patch|use|all)$")
|
|
96
|
+
""",
|
|
97
|
+
},
|
|
98
|
+
# TypeScript:使用专用绑定时节点名不同;回退到 JS parser 时 TS 特有语法会报 ERROR,
|
|
99
|
+
# 此处只保留两个 parser 都支持的通用模式
|
|
100
|
+
"typescript": {
|
|
101
|
+
"function": """
|
|
102
|
+
(function_declaration name: (identifier) @name) @definition.function
|
|
103
|
+
(variable_declarator name: (identifier) @name value: (arrow_function)) @definition.function
|
|
104
|
+
(method_definition name: (property_identifier) @name) @definition.method
|
|
105
|
+
""",
|
|
106
|
+
"anonymous_function": """
|
|
107
|
+
(arrow_function) @definition.anonymous_function
|
|
108
|
+
(function_expression) @definition.anonymous_function
|
|
109
|
+
""",
|
|
110
|
+
"class": """
|
|
111
|
+
(class_declaration name: (_) @name) @definition.class
|
|
112
|
+
""",
|
|
113
|
+
"import": """
|
|
114
|
+
(import_statement source: (string) @source)
|
|
115
|
+
(import_specifier name: (identifier) @name)
|
|
116
|
+
(import_clause (identifier) @name)
|
|
117
|
+
""",
|
|
118
|
+
"call": """
|
|
119
|
+
(call_expression function: (identifier) @name) @reference.call
|
|
120
|
+
(call_expression function: (member_expression property: (property_identifier) @name)) @reference.call
|
|
121
|
+
""",
|
|
122
|
+
"http_route": """
|
|
123
|
+
;; Express: app.get("/path", handler) / router.post("/path", handler)
|
|
124
|
+
(call_expression
|
|
125
|
+
function: (member_expression
|
|
126
|
+
object: (identifier) @_router
|
|
127
|
+
property: (property_identifier) @method)
|
|
128
|
+
arguments: (arguments
|
|
129
|
+
(string) @path
|
|
130
|
+
.
|
|
131
|
+
[(identifier) @handler (arrow_function) @handler (function_expression) @handler]))
|
|
132
|
+
(#match? @_router "^(app|router)$")
|
|
133
|
+
(#match? @method "^(get|post|put|delete|patch|use|all)$")
|
|
134
|
+
""",
|
|
135
|
+
},
|
|
136
|
+
"go": {
|
|
137
|
+
"function": """
|
|
138
|
+
(function_declaration name: (identifier) @name) @definition.function
|
|
139
|
+
(method_declaration name: (field_identifier) @name) @definition.method
|
|
140
|
+
""",
|
|
141
|
+
"class": """
|
|
142
|
+
(type_spec name: (type_identifier) @name type: (struct_type)) @definition.struct
|
|
143
|
+
(type_spec name: (type_identifier) @name type: (interface_type)) @definition.interface
|
|
144
|
+
""",
|
|
145
|
+
"import": """
|
|
146
|
+
(import_spec path: (interpreted_string_literal) @path)
|
|
147
|
+
""",
|
|
148
|
+
"call": """
|
|
149
|
+
(call_expression function: (identifier) @name) @reference.call
|
|
150
|
+
(call_expression function: (selector_expression field: (field_identifier) @name)) @reference.call
|
|
151
|
+
""",
|
|
152
|
+
},
|
|
153
|
+
"rust": {
|
|
154
|
+
"function": """
|
|
155
|
+
(function_item name: (identifier) @name) @definition.function
|
|
156
|
+
(function_signature_item name: (identifier) @name) @definition.trait_method
|
|
157
|
+
""",
|
|
158
|
+
"class": """
|
|
159
|
+
(struct_item name: (type_identifier) @name) @definition.struct
|
|
160
|
+
(enum_item name: (type_identifier) @name) @definition.enum
|
|
161
|
+
(trait_item name: (type_identifier) @name) @definition.trait
|
|
162
|
+
(impl_item type: (type_identifier) @name) @definition.impl
|
|
163
|
+
(type_item name: (type_identifier) @name) @definition.type
|
|
164
|
+
(mod_item name: (identifier) @name) @definition.module
|
|
165
|
+
""",
|
|
166
|
+
"import": """
|
|
167
|
+
; 捕获 use crate::module::Item 中的 module 部分
|
|
168
|
+
(use_declaration
|
|
169
|
+
argument: (scoped_identifier
|
|
170
|
+
path: (identifier) @path
|
|
171
|
+
name: (identifier) @name))
|
|
172
|
+
; 捕获 use crate::module::{A, B} 中的 module 部分
|
|
173
|
+
(use_declaration
|
|
174
|
+
argument: (scoped_use_list
|
|
175
|
+
path: (identifier) @path))
|
|
176
|
+
; 捕获 use module::Item 中的 module
|
|
177
|
+
(use_declaration
|
|
178
|
+
argument: (scoped_identifier
|
|
179
|
+
path: (identifier) @path
|
|
180
|
+
name: (identifier) @name))
|
|
181
|
+
; 捕获 extern crate name;
|
|
182
|
+
(extern_crate_declaration name: (identifier) @name)
|
|
183
|
+
; 捕获 use module;
|
|
184
|
+
(use_declaration argument: (identifier) @name)
|
|
185
|
+
""",
|
|
186
|
+
"call": """
|
|
187
|
+
(call_expression function: (identifier) @name) @reference.call
|
|
188
|
+
(call_expression function: (field_expression field: (field_identifier) @name)) @reference.call
|
|
189
|
+
(call_expression function: (scoped_identifier name: (identifier) @name)) @reference.call
|
|
190
|
+
""",
|
|
191
|
+
"http_route": """
|
|
192
|
+
;; Axum: .route("/path", get(handler))
|
|
193
|
+
(call_expression
|
|
194
|
+
function: (field_expression
|
|
195
|
+
field: (field_identifier) @_method_name)
|
|
196
|
+
arguments: (arguments
|
|
197
|
+
(string_literal) @path
|
|
198
|
+
(call_expression
|
|
199
|
+
function: (identifier) @http_method
|
|
200
|
+
arguments: (arguments (identifier) @handler))))
|
|
201
|
+
(#eq? @_method_name "route")
|
|
202
|
+
(#match? @http_method "^(get|post|put|delete|patch|head|options)$")
|
|
203
|
+
""",
|
|
204
|
+
},
|
|
205
|
+
"c": {
|
|
206
|
+
"function": """
|
|
207
|
+
(function_definition
|
|
208
|
+
declarator: (function_declarator
|
|
209
|
+
declarator: (identifier) @name)) @definition.function
|
|
210
|
+
""",
|
|
211
|
+
"class": """
|
|
212
|
+
(struct_specifier name: (type_identifier) @name) @definition.struct
|
|
213
|
+
(union_specifier name: (type_identifier) @name) @definition.union
|
|
214
|
+
(enum_specifier name: (type_identifier) @name) @definition.enum
|
|
215
|
+
""",
|
|
216
|
+
"import": """
|
|
217
|
+
(preproc_include path: (_) @path)
|
|
218
|
+
""",
|
|
219
|
+
"call": """
|
|
220
|
+
(call_expression function: (identifier) @name) @reference.call
|
|
221
|
+
""",
|
|
222
|
+
},
|
|
223
|
+
"java": {
|
|
224
|
+
"function": """
|
|
225
|
+
(method_declaration name: (identifier) @name) @definition.method
|
|
226
|
+
(constructor_declaration name: (identifier) @name) @definition.method
|
|
227
|
+
""",
|
|
228
|
+
"class": """
|
|
229
|
+
(class_declaration name: (identifier) @name) @definition.class
|
|
230
|
+
(interface_declaration name: (identifier) @name) @definition.interface
|
|
231
|
+
(enum_declaration name: (identifier) @name) @definition.enum
|
|
232
|
+
""",
|
|
233
|
+
"import": """
|
|
234
|
+
(import_declaration (scoped_identifier) @name)
|
|
235
|
+
(import_declaration (identifier) @name)
|
|
236
|
+
""",
|
|
237
|
+
"call": """
|
|
238
|
+
(method_invocation name: (identifier) @name) @reference.call
|
|
239
|
+
""",
|
|
240
|
+
"http_route": """
|
|
241
|
+
;; Spring Boot: @GetMapping("/path") / @PostMapping("/path")
|
|
242
|
+
(annotation
|
|
243
|
+
name: (identifier) @method
|
|
244
|
+
arguments: (annotation_argument_list
|
|
245
|
+
(element_value_pair
|
|
246
|
+
value: (string_literal) @path)))
|
|
247
|
+
(#match? @method "^(GetMapping|PostMapping|PutMapping|DeleteMapping|PatchMapping|RequestMapping)$")
|
|
248
|
+
""",
|
|
249
|
+
},
|
|
250
|
+
"kotlin": {
|
|
251
|
+
"function": """
|
|
252
|
+
(function_declaration name: (simple_identifier) @name) @definition.function
|
|
253
|
+
""",
|
|
254
|
+
"class": """
|
|
255
|
+
(class_declaration name: (type_identifier) @name) @definition.class
|
|
256
|
+
(object_declaration name: (type_identifier) @name) @definition.object
|
|
257
|
+
(interface_declaration name: (type_identifier) @name) @definition.interface
|
|
258
|
+
""",
|
|
259
|
+
"import": """
|
|
260
|
+
(import_header (identifier) @name)
|
|
261
|
+
""",
|
|
262
|
+
"call": """
|
|
263
|
+
(call_expression (simple_identifier) @name) @reference.call
|
|
264
|
+
(call_expression (navigation_expression (simple_identifier) @name)) @reference.call
|
|
265
|
+
""",
|
|
266
|
+
},
|
|
267
|
+
"swift": {
|
|
268
|
+
"function": """
|
|
269
|
+
(function_declaration name: (simple_identifier) @name) @definition.function
|
|
270
|
+
""",
|
|
271
|
+
"class": """
|
|
272
|
+
(class_declaration name: (type_identifier) @name) @definition.class
|
|
273
|
+
(struct_declaration name: (type_identifier) @name) @definition.struct
|
|
274
|
+
(enum_declaration name: (type_identifier) @name) @definition.enum
|
|
275
|
+
(protocol_declaration name: (type_identifier) @name) @definition.protocol
|
|
276
|
+
""",
|
|
277
|
+
"import": """
|
|
278
|
+
(import_declaration (identifier) @name)
|
|
279
|
+
""",
|
|
280
|
+
"call": """
|
|
281
|
+
(call_expression (simple_identifier) @name) @reference.call
|
|
282
|
+
(call_expression (navigation_expression (simple_identifier) @name)) @reference.call
|
|
283
|
+
""",
|
|
284
|
+
},
|
|
285
|
+
"cpp": {
|
|
286
|
+
"function": """
|
|
287
|
+
(function_definition
|
|
288
|
+
declarator: (function_declarator
|
|
289
|
+
declarator: [(identifier) (qualified_identifier)] @name)) @definition.function
|
|
290
|
+
""",
|
|
291
|
+
"class": """
|
|
292
|
+
(class_specifier name: (type_identifier) @name) @definition.class
|
|
293
|
+
(struct_specifier name: (type_identifier) @name) @definition.struct
|
|
294
|
+
(enum_specifier name: (type_identifier) @name) @definition.enum
|
|
295
|
+
""",
|
|
296
|
+
"import": """
|
|
297
|
+
(preproc_include path: (_) @path)
|
|
298
|
+
""",
|
|
299
|
+
"call": """
|
|
300
|
+
(call_expression function: [(identifier) (qualified_identifier)] @name) @reference.call
|
|
301
|
+
""",
|
|
302
|
+
},
|
|
303
|
+
"c_sharp": {
|
|
304
|
+
"function": """
|
|
305
|
+
(method_declaration name: (identifier) @name) @definition.method
|
|
306
|
+
(local_function_statement name: (identifier) @name) @definition.function
|
|
307
|
+
""",
|
|
308
|
+
"class": """
|
|
309
|
+
(class_declaration name: (identifier) @name) @definition.class
|
|
310
|
+
(interface_declaration name: (identifier) @name) @definition.interface
|
|
311
|
+
(struct_declaration name: (identifier) @name) @definition.struct
|
|
312
|
+
(enum_declaration name: (identifier) @name) @definition.enum
|
|
313
|
+
""",
|
|
314
|
+
"import": """
|
|
315
|
+
(using_directive name: [(identifier) (qualified_name)] @name)
|
|
316
|
+
""",
|
|
317
|
+
"call": """
|
|
318
|
+
(invocation_expression function: (identifier) @name) @reference.call
|
|
319
|
+
(invocation_expression function: (member_access_expression name: (identifier) @name)) @reference.call
|
|
320
|
+
""",
|
|
321
|
+
},
|
|
322
|
+
"php": {
|
|
323
|
+
"function": """
|
|
324
|
+
(function_definition name: (name) @name) @definition.function
|
|
325
|
+
(method_declaration name: (name) @name) @definition.method
|
|
326
|
+
""",
|
|
327
|
+
"class": """
|
|
328
|
+
(class_declaration name: (name) @name) @definition.class
|
|
329
|
+
(interface_declaration name: (name) @name) @definition.interface
|
|
330
|
+
(trait_declaration name: (name) @name) @definition.trait
|
|
331
|
+
(enum_declaration name: (name) @name) @definition.enum
|
|
332
|
+
""",
|
|
333
|
+
"import": """
|
|
334
|
+
(namespace_use_declaration (qualified_name) @name)
|
|
335
|
+
""",
|
|
336
|
+
"call": """
|
|
337
|
+
(function_call_expression function: (name) @name) @reference.call
|
|
338
|
+
(member_call_expression name: (name) @name) @reference.call
|
|
339
|
+
""",
|
|
340
|
+
},
|
|
341
|
+
"ruby": {
|
|
342
|
+
"function": """
|
|
343
|
+
(method name: (identifier) @name) @definition.method
|
|
344
|
+
(singleton_method name: (identifier) @name) @definition.method
|
|
345
|
+
""",
|
|
346
|
+
"class": """
|
|
347
|
+
(class name: (constant) @name) @definition.class
|
|
348
|
+
(module name: (constant) @name) @definition.module
|
|
349
|
+
""",
|
|
350
|
+
"import": """
|
|
351
|
+
(call method: (identifier) @_method arguments: (argument_list (string) @path))
|
|
352
|
+
(#match? @_method "^(require|require_relative|load)$")
|
|
353
|
+
""",
|
|
354
|
+
"call": """
|
|
355
|
+
(call method: (identifier) @name) @reference.call
|
|
356
|
+
""",
|
|
357
|
+
},
|
|
358
|
+
"html": {},
|
|
359
|
+
"css": {},
|
|
360
|
+
"json": {},
|
|
361
|
+
}
|
|
362
|
+
QUERIES["tsx"] = QUERIES["typescript"]
|
|
363
|
+
|
|
364
|
+
EXT_TO_LANG: dict[str, str] = {
|
|
365
|
+
".py": "python",
|
|
366
|
+
".pyi": "python",
|
|
367
|
+
".js": "javascript",
|
|
368
|
+
".jsx": "javascript",
|
|
369
|
+
".mjs": "javascript",
|
|
370
|
+
".cjs": "javascript",
|
|
371
|
+
".ts": "typescript",
|
|
372
|
+
".tsx": "tsx",
|
|
373
|
+
".mts": "typescript",
|
|
374
|
+
".cts": "typescript",
|
|
375
|
+
".go": "go",
|
|
376
|
+
".rs": "rust",
|
|
377
|
+
".html": "html",
|
|
378
|
+
".htm": "html",
|
|
379
|
+
".css": "css",
|
|
380
|
+
".json": "json",
|
|
381
|
+
".c": "c",
|
|
382
|
+
".h": "c",
|
|
383
|
+
".cpp": "cpp",
|
|
384
|
+
".cc": "cpp",
|
|
385
|
+
".cxx": "cpp",
|
|
386
|
+
".hpp": "cpp",
|
|
387
|
+
".hh": "cpp",
|
|
388
|
+
".java": "java",
|
|
389
|
+
".kt": "kotlin",
|
|
390
|
+
".kts": "kotlin",
|
|
391
|
+
".swift": "swift",
|
|
392
|
+
".cs": "c_sharp",
|
|
393
|
+
".php": "php",
|
|
394
|
+
".phtml": "php",
|
|
395
|
+
".rb": "ruby",
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
399
|
+
# Tree-sitter 适配层
|
|
400
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class TreeSitterAdapter:
|
|
404
|
+
"""
|
|
405
|
+
封装 tree-sitter 多语言解析。
|
|
406
|
+
‑ 兼容 tree-sitter 0.20 ~ 0.25+(捕获结果格式差异)
|
|
407
|
+
‑ 懒加载语言绑定,未安装的静默跳过
|
|
408
|
+
"""
|
|
409
|
+
|
|
410
|
+
def __init__(self) -> None:
|
|
411
|
+
self.parsers: dict[str, Any] = {}
|
|
412
|
+
# lang -> query_type -> compiled Query
|
|
413
|
+
self._queries: dict[str, dict[str, Any]] = {}
|
|
414
|
+
self._init_parsers()
|
|
415
|
+
|
|
416
|
+
# ── 初始化 ─────────────────────────────────────────────────────────────────
|
|
417
|
+
|
|
418
|
+
def _init_parsers(self) -> None:
|
|
419
|
+
"""加载各语言 parser,并预编译 queries。"""
|
|
420
|
+
bindings = {
|
|
421
|
+
"python": ("tree_sitter_python", "language"),
|
|
422
|
+
"javascript": ("tree_sitter_javascript", "language"),
|
|
423
|
+
"go": ("tree_sitter_go", "language"),
|
|
424
|
+
"rust": ("tree_sitter_rust", "language"),
|
|
425
|
+
"html": ("tree_sitter_html", "language"),
|
|
426
|
+
"css": ("tree_sitter_css", "language"),
|
|
427
|
+
"json": ("tree_sitter_json", "language"),
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
# 动态导入,失败则跳过
|
|
431
|
+
for lang, (module, attr) in bindings.items():
|
|
432
|
+
try:
|
|
433
|
+
mod = __import__(module)
|
|
434
|
+
lang_fn = getattr(mod, attr)
|
|
435
|
+
from tree_sitter import Language, Parser # type: ignore
|
|
436
|
+
self.parsers[lang] = Parser(Language(lang_fn()))
|
|
437
|
+
logger.debug(f"Parser loaded: {lang}")
|
|
438
|
+
except Exception as e:
|
|
439
|
+
logger.debug(f"Parser unavailable [{lang}]: {e}")
|
|
440
|
+
|
|
441
|
+
# C
|
|
442
|
+
try:
|
|
443
|
+
from tree_sitter_c import language as lang_c
|
|
444
|
+
from tree_sitter import Language, Parser
|
|
445
|
+
self.parsers["c"] = Parser(Language(lang_c()))
|
|
446
|
+
logger.debug("Parser loaded: c")
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.debug(f"Parser unavailable [c]: {e}")
|
|
449
|
+
|
|
450
|
+
# Java
|
|
451
|
+
try:
|
|
452
|
+
from tree_sitter_java import language as lang_java
|
|
453
|
+
from tree_sitter import Language, Parser
|
|
454
|
+
self.parsers["java"] = Parser(Language(lang_java()))
|
|
455
|
+
logger.debug("Parser loaded: java")
|
|
456
|
+
except Exception as e:
|
|
457
|
+
logger.debug(f"Parser unavailable [java]: {e}")
|
|
458
|
+
|
|
459
|
+
# Kotlin — 可选依赖,未安装时静默跳过
|
|
460
|
+
try:
|
|
461
|
+
from tree_sitter_kotlin import language as lang_kotlin
|
|
462
|
+
from tree_sitter import Language, Parser
|
|
463
|
+
self.parsers["kotlin"] = Parser(Language(lang_kotlin()))
|
|
464
|
+
logger.debug("Parser loaded: kotlin")
|
|
465
|
+
except Exception as e:
|
|
466
|
+
logger.debug(f"Parser unavailable [kotlin]: {e}")
|
|
467
|
+
|
|
468
|
+
# Swift — 可选依赖,未安装时静默跳过
|
|
469
|
+
try:
|
|
470
|
+
from tree_sitter_swift import language as lang_swift
|
|
471
|
+
from tree_sitter import Language, Parser
|
|
472
|
+
self.parsers["swift"] = Parser(Language(lang_swift()))
|
|
473
|
+
logger.debug("Parser loaded: swift")
|
|
474
|
+
except Exception as e:
|
|
475
|
+
logger.debug(f"Parser unavailable [swift]: {e}")
|
|
476
|
+
|
|
477
|
+
# C++ — 可选依赖,未安装时静默跳过
|
|
478
|
+
try:
|
|
479
|
+
from tree_sitter_cpp import language as lang_cpp
|
|
480
|
+
from tree_sitter import Language, Parser
|
|
481
|
+
self.parsers["cpp"] = Parser(Language(lang_cpp()))
|
|
482
|
+
logger.debug("Parser loaded: cpp")
|
|
483
|
+
except Exception as e:
|
|
484
|
+
logger.debug(f"Parser unavailable [cpp]: {e}")
|
|
485
|
+
|
|
486
|
+
# C# — 可选依赖,未安装时静默跳过
|
|
487
|
+
try:
|
|
488
|
+
from tree_sitter_c_sharp import language as lang_csharp
|
|
489
|
+
from tree_sitter import Language, Parser
|
|
490
|
+
self.parsers["c_sharp"] = Parser(Language(lang_csharp()))
|
|
491
|
+
logger.debug("Parser loaded: c_sharp")
|
|
492
|
+
except Exception as e:
|
|
493
|
+
logger.debug(f"Parser unavailable [c_sharp]: {e}")
|
|
494
|
+
|
|
495
|
+
# PHP — 可选依赖,未安装时静默跳过
|
|
496
|
+
try:
|
|
497
|
+
from tree_sitter_php import language as lang_php
|
|
498
|
+
from tree_sitter import Language, Parser
|
|
499
|
+
self.parsers["php"] = Parser(Language(lang_php()))
|
|
500
|
+
logger.debug("Parser loaded: php")
|
|
501
|
+
except Exception as e:
|
|
502
|
+
logger.debug(f"Parser unavailable [php]: {e}")
|
|
503
|
+
|
|
504
|
+
# Ruby — 可选依赖,未安装时静默跳过
|
|
505
|
+
try:
|
|
506
|
+
from tree_sitter_ruby import language as lang_ruby
|
|
507
|
+
from tree_sitter import Language, Parser
|
|
508
|
+
self.parsers["ruby"] = Parser(Language(lang_ruby()))
|
|
509
|
+
logger.debug("Parser loaded: ruby")
|
|
510
|
+
except Exception as e:
|
|
511
|
+
logger.debug(f"Parser unavailable [ruby]: {e}")
|
|
512
|
+
|
|
513
|
+
# TypeScript / TSX:优先专用绑定,TypeScript 回退到 JavaScript parser,TSX 不回退以避免误解析 JSX。
|
|
514
|
+
try:
|
|
515
|
+
from tree_sitter_typescript import language_typescript, language_tsx # type: ignore
|
|
516
|
+
from tree_sitter import Language, Parser # type: ignore
|
|
517
|
+
self.parsers["typescript"] = Parser(Language(language_typescript()))
|
|
518
|
+
self.parsers["tsx"] = Parser(Language(language_tsx()))
|
|
519
|
+
logger.debug("Parser loaded: typescript (dedicated)")
|
|
520
|
+
logger.debug("Parser loaded: tsx (dedicated)")
|
|
521
|
+
except Exception:
|
|
522
|
+
try:
|
|
523
|
+
from tree_sitter_typescript import language_typescript # type: ignore
|
|
524
|
+
from tree_sitter import Language, Parser # type: ignore
|
|
525
|
+
self.parsers["typescript"] = Parser(Language(language_typescript()))
|
|
526
|
+
logger.debug("Parser loaded: typescript (dedicated)")
|
|
527
|
+
except Exception:
|
|
528
|
+
if "javascript" in self.parsers:
|
|
529
|
+
self.parsers["typescript"] = self.parsers["javascript"]
|
|
530
|
+
logger.debug("Parser loaded: typescript (fallback to javascript)")
|
|
531
|
+
|
|
532
|
+
# 预编译 queries —— 只对已加载的语言
|
|
533
|
+
self._precompile_queries()
|
|
534
|
+
|
|
535
|
+
def _precompile_queries(self) -> None:
|
|
536
|
+
"""预编译所有 tree-sitter queries,失败时记录警告。
|
|
537
|
+
|
|
538
|
+
要求 tree-sitter >= 0.21(支持 Query 类)
|
|
539
|
+
"""
|
|
540
|
+
try:
|
|
541
|
+
from tree_sitter import Query # type: ignore
|
|
542
|
+
except ImportError:
|
|
543
|
+
logger.warning("tree-sitter Query class not available (requires >=0.21), queries disabled")
|
|
544
|
+
return
|
|
545
|
+
|
|
546
|
+
for lang, patterns in QUERIES.items():
|
|
547
|
+
if lang not in self.parsers:
|
|
548
|
+
continue
|
|
549
|
+
self._queries[lang] = {}
|
|
550
|
+
parser = self.parsers[lang]
|
|
551
|
+
for qtype, src in patterns.items():
|
|
552
|
+
try:
|
|
553
|
+
q = Query(parser.language, src)
|
|
554
|
+
self._queries[lang][qtype] = q
|
|
555
|
+
except Exception as e:
|
|
556
|
+
logger.warning(f"Query compile failed [{lang}/{qtype}]: {e}")
|
|
557
|
+
|
|
558
|
+
# ── 公开接口 ────────────────────────────────────────────────────────────────
|
|
559
|
+
|
|
560
|
+
def parse(self, content: bytes, lang: str) -> Any | None:
|
|
561
|
+
parser = self.parsers.get(lang)
|
|
562
|
+
if not parser:
|
|
563
|
+
return None
|
|
564
|
+
|
|
565
|
+
# 内容大小限制(防止内存溢出)
|
|
566
|
+
MAX_PARSE_SIZE = 10 * 1024 * 1024 # 10MB
|
|
567
|
+
if len(content) > MAX_PARSE_SIZE:
|
|
568
|
+
logger.warning(f"File too large for parsing ({len(content)} bytes > {MAX_PARSE_SIZE}), skipping")
|
|
569
|
+
return None
|
|
570
|
+
|
|
571
|
+
# 检测异常内容模式(可能导致解析器崩溃)
|
|
572
|
+
try:
|
|
573
|
+
# 检查是否包含可能导致解析器栈溢出的极端嵌套模式
|
|
574
|
+
content_str = content.decode('utf-8', errors='ignore')
|
|
575
|
+
# 检测极端深度的括号嵌套(可能导致递归溢出)
|
|
576
|
+
max_nesting = 0
|
|
577
|
+
current_nesting = 0
|
|
578
|
+
for char in content_str[:100000]: # 只检查前 100KB
|
|
579
|
+
if char in '({[<':
|
|
580
|
+
current_nesting += 1
|
|
581
|
+
max_nesting = max(max_nesting, current_nesting)
|
|
582
|
+
elif char in ')}]>':
|
|
583
|
+
current_nesting -= 1
|
|
584
|
+
# 如果嵌套深度超过 1000,可能触发解析器栈溢出
|
|
585
|
+
if max_nesting > 1000:
|
|
586
|
+
logger.warning(f"Extreme nesting detected ({max_nesting} levels), skipping file to prevent parser crash")
|
|
587
|
+
return None
|
|
588
|
+
except Exception:
|
|
589
|
+
pass # 解码失败继续尝试解析
|
|
590
|
+
|
|
591
|
+
try:
|
|
592
|
+
return parser.parse(content)
|
|
593
|
+
except RecursionError:
|
|
594
|
+
logger.warning(f"Parser recursion limit exceeded for {lang}, skipping file")
|
|
595
|
+
return None
|
|
596
|
+
except MemoryError:
|
|
597
|
+
logger.warning(f"Parser out of memory for {lang}, skipping file")
|
|
598
|
+
return None
|
|
599
|
+
except Exception as e:
|
|
600
|
+
logger.debug(f"Parse error [{lang}]: {e}")
|
|
601
|
+
return None
|
|
602
|
+
|
|
603
|
+
def extract_symbols(self, tree: Any, lang: str, file: str, content: bytes) -> list[Symbol]:
|
|
604
|
+
"""从 AST 提取函数 / 类等符号定义。"""
|
|
605
|
+
if lang == "html":
|
|
606
|
+
return self._extract_html_symbols(tree, file)
|
|
607
|
+
if lang == "css":
|
|
608
|
+
return self._extract_css_symbols(tree, file)
|
|
609
|
+
if lang == "json":
|
|
610
|
+
return self._extract_json_symbols(tree, file)
|
|
611
|
+
|
|
612
|
+
symbols_by_id: dict[str, Symbol] = {}
|
|
613
|
+
root = tree.root_node
|
|
614
|
+
|
|
615
|
+
for qtype in ("function", "class"):
|
|
616
|
+
query = self._queries.get(lang, {}).get(qtype)
|
|
617
|
+
if not query:
|
|
618
|
+
continue
|
|
619
|
+
|
|
620
|
+
captures = self._run_query(query, root)
|
|
621
|
+
name_nodes: list[Any] = []
|
|
622
|
+
def_nodes: list[tuple[Any, str]] = []
|
|
623
|
+
|
|
624
|
+
for cap_name, node in captures:
|
|
625
|
+
if cap_name == "name":
|
|
626
|
+
name_nodes.append(node)
|
|
627
|
+
elif "definition" in cap_name or "export" in cap_name:
|
|
628
|
+
def_nodes.append((node, cap_name))
|
|
629
|
+
|
|
630
|
+
for name_node in name_nodes:
|
|
631
|
+
matching_defs = [
|
|
632
|
+
(def_node, def_cap)
|
|
633
|
+
for def_node, def_cap in def_nodes
|
|
634
|
+
if self._within(name_node, def_node)
|
|
635
|
+
]
|
|
636
|
+
matching_defs.sort(
|
|
637
|
+
key=lambda item: (
|
|
638
|
+
(item[0].end_point[0] - item[0].start_point[0], item[0].end_point[1] - item[0].start_point[1]),
|
|
639
|
+
item[0].start_point[0],
|
|
640
|
+
item[0].start_point[1],
|
|
641
|
+
)
|
|
642
|
+
)
|
|
643
|
+
for def_node, def_cap in matching_defs:
|
|
644
|
+
kind = def_cap.split(".")[-1] if "." in def_cap else def_cap
|
|
645
|
+
vis = "exported" if "export" in def_cap else "public"
|
|
646
|
+
name = self._text(name_node)
|
|
647
|
+
if not name:
|
|
648
|
+
break
|
|
649
|
+
# Python: _ 前缀视为 private
|
|
650
|
+
if lang == "python" and name.startswith("_") and not name.startswith("__"):
|
|
651
|
+
vis = "private"
|
|
652
|
+
sym_id = f"{file}::{name}::{name_node.start_point[0] + 1}"
|
|
653
|
+
symbols_by_id[sym_id] = Symbol(
|
|
654
|
+
id=sym_id,
|
|
655
|
+
name=name,
|
|
656
|
+
kind=kind,
|
|
657
|
+
file=file,
|
|
658
|
+
line=name_node.start_point[0] + 1,
|
|
659
|
+
end_line=def_node.end_point[0] + 1,
|
|
660
|
+
col=name_node.start_point[1],
|
|
661
|
+
visibility=vis,
|
|
662
|
+
docstring=self._docstring(def_node, lang),
|
|
663
|
+
signature=self._signature(def_node, lang),
|
|
664
|
+
)
|
|
665
|
+
break
|
|
666
|
+
|
|
667
|
+
for symbol in self._extract_exported_function_expression_symbols(tree, lang, file):
|
|
668
|
+
symbols_by_id.setdefault(symbol.id, symbol)
|
|
669
|
+
|
|
670
|
+
for symbol in self._extract_object_literal_method_symbols(tree, lang, file):
|
|
671
|
+
symbols_by_id.setdefault(symbol.id, symbol)
|
|
672
|
+
|
|
673
|
+
for symbol in self._extract_anonymous_symbols(tree, lang, file):
|
|
674
|
+
symbols_by_id.setdefault(symbol.id, symbol)
|
|
675
|
+
|
|
676
|
+
return sorted(
|
|
677
|
+
symbols_by_id.values(),
|
|
678
|
+
key=lambda symbol: (
|
|
679
|
+
symbol.file,
|
|
680
|
+
symbol.line,
|
|
681
|
+
symbol.end_line,
|
|
682
|
+
symbol.col,
|
|
683
|
+
symbol.name,
|
|
684
|
+
symbol.kind,
|
|
685
|
+
),
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
def _extract_exported_function_expression_symbols(self, tree: Any, lang: str, file: str) -> list[Symbol]:
|
|
689
|
+
if lang not in ("javascript", "typescript", "tsx"):
|
|
690
|
+
return []
|
|
691
|
+
symbols_by_id: dict[str, Symbol] = {}
|
|
692
|
+
for node in self._walk_tree(tree.root_node):
|
|
693
|
+
if node.type not in {"function_expression", "arrow_function"}:
|
|
694
|
+
continue
|
|
695
|
+
if not self._is_exported_anonymous_expression(node):
|
|
696
|
+
continue
|
|
697
|
+
explicit_name = self._declaration_primary_name(node)
|
|
698
|
+
if explicit_name:
|
|
699
|
+
name = explicit_name
|
|
700
|
+
elif self._is_export_default(node):
|
|
701
|
+
name = self._export_default_name(node)
|
|
702
|
+
else:
|
|
703
|
+
name = self._anonymous_symbol_name(node)
|
|
704
|
+
line = node.start_point[0] + 1
|
|
705
|
+
symbol_id = f"{file}::{name}::{line}"
|
|
706
|
+
symbols_by_id[symbol_id] = Symbol(
|
|
707
|
+
id=symbol_id,
|
|
708
|
+
name=name,
|
|
709
|
+
kind="anonymous_function",
|
|
710
|
+
file=file,
|
|
711
|
+
line=line,
|
|
712
|
+
end_line=node.end_point[0] + 1,
|
|
713
|
+
col=node.start_point[1],
|
|
714
|
+
visibility="private",
|
|
715
|
+
signature=self._signature(node, lang),
|
|
716
|
+
)
|
|
717
|
+
return sorted(symbols_by_id.values(), key=lambda symbol: (symbol.file, symbol.line, symbol.col, symbol.name))
|
|
718
|
+
|
|
719
|
+
def _is_export_default(self, node: Any) -> bool:
|
|
720
|
+
current = getattr(node, "parent", None)
|
|
721
|
+
depth = 0
|
|
722
|
+
while current is not None and depth < 4:
|
|
723
|
+
if current.type == "export_statement":
|
|
724
|
+
return self._first_child_of_type(current, "default") is not None
|
|
725
|
+
current = getattr(current, "parent", None)
|
|
726
|
+
depth += 1
|
|
727
|
+
return False
|
|
728
|
+
|
|
729
|
+
def _extract_object_literal_method_symbols(self, tree: Any, lang: str, file: str) -> list[Symbol]:
|
|
730
|
+
if lang not in ("javascript", "typescript", "tsx"):
|
|
731
|
+
return []
|
|
732
|
+
symbols_by_id: dict[str, Symbol] = {}
|
|
733
|
+
for node in self._walk_tree(tree.root_node):
|
|
734
|
+
if node.type != "pair":
|
|
735
|
+
continue
|
|
736
|
+
value_node = node.child_by_field_name("value")
|
|
737
|
+
if value_node is None or value_node.type not in {"arrow_function", "function_expression"}:
|
|
738
|
+
continue
|
|
739
|
+
key_node = node.child_by_field_name("key")
|
|
740
|
+
if key_node is None:
|
|
741
|
+
for child in node.children:
|
|
742
|
+
if child.type in {"property_identifier", "identifier", "string"}:
|
|
743
|
+
key_node = child
|
|
744
|
+
break
|
|
745
|
+
name = self._identifier_text(key_node) or (self._string_literal_value(key_node) if key_node and key_node.type == "string" else "")
|
|
746
|
+
if not name:
|
|
747
|
+
continue
|
|
748
|
+
line = key_node.start_point[0] + 1
|
|
749
|
+
symbol_id = f"{file}::{name}::{line}"
|
|
750
|
+
symbols_by_id[symbol_id] = Symbol(
|
|
751
|
+
id=symbol_id,
|
|
752
|
+
name=name,
|
|
753
|
+
kind="method",
|
|
754
|
+
file=file,
|
|
755
|
+
line=line,
|
|
756
|
+
end_line=value_node.end_point[0] + 1,
|
|
757
|
+
col=key_node.start_point[1],
|
|
758
|
+
visibility="public",
|
|
759
|
+
signature=self._signature(value_node, lang),
|
|
760
|
+
)
|
|
761
|
+
return sorted(symbols_by_id.values(), key=lambda symbol: (symbol.file, symbol.line, symbol.col, symbol.name))
|
|
762
|
+
|
|
763
|
+
def _extract_anonymous_symbols(self, tree: Any, lang: str, file: str) -> list[Symbol]:
|
|
764
|
+
if lang not in ("javascript", "typescript", "tsx"):
|
|
765
|
+
return []
|
|
766
|
+
|
|
767
|
+
anonymous_symbols: dict[str, Symbol] = {}
|
|
768
|
+
for node in self._walk_tree(tree.root_node):
|
|
769
|
+
if node.type not in {"arrow_function", "function_expression"}:
|
|
770
|
+
continue
|
|
771
|
+
if self._has_named_owner(node) and not self._is_exported_anonymous_expression(node):
|
|
772
|
+
continue
|
|
773
|
+
if node.end_point[0] <= node.start_point[0] and not self._is_exported_anonymous_expression(node):
|
|
774
|
+
continue
|
|
775
|
+
|
|
776
|
+
explicit_name = self._declaration_primary_name(node)
|
|
777
|
+
if explicit_name is not None and not self._is_exported_anonymous_expression(node):
|
|
778
|
+
continue
|
|
779
|
+
line = node.start_point[0] + 1
|
|
780
|
+
|
|
781
|
+
# 尝试从上下文推断更有意义的名字
|
|
782
|
+
name = explicit_name or self._contextual_anonymous_name(node)
|
|
783
|
+
|
|
784
|
+
symbol_id = f"{file}::{name}::{line}"
|
|
785
|
+
anonymous_symbols[symbol_id] = Symbol(
|
|
786
|
+
id=symbol_id,
|
|
787
|
+
name=name,
|
|
788
|
+
kind="anonymous_function",
|
|
789
|
+
file=file,
|
|
790
|
+
line=line,
|
|
791
|
+
end_line=node.end_point[0] + 1,
|
|
792
|
+
col=node.start_point[1],
|
|
793
|
+
visibility="private",
|
|
794
|
+
signature=self._signature(node, lang),
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
return list(anonymous_symbols.values())
|
|
798
|
+
|
|
799
|
+
def _contextual_anonymous_name(self, node: Any) -> str:
|
|
800
|
+
"""从父节点上下文推断匿名函数名(JSX handler / Hook callback 等)。"""
|
|
801
|
+
parent = getattr(node, "parent", None)
|
|
802
|
+
if parent is None:
|
|
803
|
+
return self._anonymous_symbol_name(node)
|
|
804
|
+
|
|
805
|
+
# JSX 属性: onClick={() => ...} → onClick_handler@L24
|
|
806
|
+
if parent.type == "jsx_expression":
|
|
807
|
+
grandparent = getattr(parent, "parent", None)
|
|
808
|
+
if grandparent is not None and grandparent.type == "jsx_attribute":
|
|
809
|
+
prop_name = ""
|
|
810
|
+
for child in grandparent.children:
|
|
811
|
+
if child.type == "property_identifier":
|
|
812
|
+
prop_name = self._text(child)
|
|
813
|
+
break
|
|
814
|
+
if prop_name:
|
|
815
|
+
return f"<{prop_name}_handler@{node.start_point[0] + 1}>"
|
|
816
|
+
|
|
817
|
+
# 调用参数: useEffect(() => ...) → useEffect_callback@L24
|
|
818
|
+
if parent.type == "arguments":
|
|
819
|
+
grandparent = getattr(parent, "parent", None)
|
|
820
|
+
if grandparent is not None and grandparent.type == "call_expression":
|
|
821
|
+
func_node = grandparent.child_by_field_name("function")
|
|
822
|
+
if func_node is not None:
|
|
823
|
+
func_name = self._text(func_node)
|
|
824
|
+
if func_name and len(func_name) <= 40:
|
|
825
|
+
return f"<{func_name}_callback@{node.start_point[0] + 1}>"
|
|
826
|
+
|
|
827
|
+
# 数组方法回调: arr.map(() => ...) → map_callback@L24
|
|
828
|
+
if parent.type == "arguments":
|
|
829
|
+
grandparent = getattr(parent, "parent", None)
|
|
830
|
+
if grandparent is not None and grandparent.type == "call_expression":
|
|
831
|
+
func_node = grandparent.child_by_field_name("function")
|
|
832
|
+
if func_node is not None and func_node.type == "member_expression":
|
|
833
|
+
prop_node = func_node.child_by_field_name("property")
|
|
834
|
+
if prop_node is not None:
|
|
835
|
+
method_name = self._text(prop_node)
|
|
836
|
+
if method_name in {"map", "filter", "reduce", "forEach", "find", "some", "every", "sort", "flatMap"}:
|
|
837
|
+
return f"<{method_name}_callback@{node.start_point[0] + 1}>"
|
|
838
|
+
|
|
839
|
+
return self._anonymous_symbol_name(node)
|
|
840
|
+
|
|
841
|
+
def _is_exported_anonymous_expression(self, node: Any) -> bool:
|
|
842
|
+
current = getattr(node, "parent", None)
|
|
843
|
+
depth = 0
|
|
844
|
+
while current is not None and depth < 4:
|
|
845
|
+
if current.type == "export_statement" and self._first_child_of_type(current, "default") is not None:
|
|
846
|
+
return True
|
|
847
|
+
if current.type == "assignment_expression":
|
|
848
|
+
left_node = current.child_by_field_name("left")
|
|
849
|
+
if left_node is not None and self._commonjs_export_target(left_node) is not None:
|
|
850
|
+
return True
|
|
851
|
+
current = getattr(current, "parent", None)
|
|
852
|
+
depth += 1
|
|
853
|
+
return False
|
|
854
|
+
|
|
855
|
+
def _has_named_owner(self, node: Any) -> bool:
|
|
856
|
+
current = getattr(node, "parent", None)
|
|
857
|
+
depth = 0
|
|
858
|
+
while current is not None and depth < 4:
|
|
859
|
+
if current.type in {"function_declaration", "method_definition"}:
|
|
860
|
+
return True
|
|
861
|
+
if current.type == "pair":
|
|
862
|
+
value_node = current.child_by_field_name("value")
|
|
863
|
+
key_node = current.child_by_field_name("key")
|
|
864
|
+
if value_node is node and key_node is not None:
|
|
865
|
+
return True
|
|
866
|
+
if current.type == "variable_declarator":
|
|
867
|
+
for child in current.children:
|
|
868
|
+
if child.type == "identifier":
|
|
869
|
+
return True
|
|
870
|
+
current = getattr(current, "parent", None)
|
|
871
|
+
depth += 1
|
|
872
|
+
return False
|
|
873
|
+
|
|
874
|
+
def extract_imports(self, tree: Any, lang: str) -> list[tuple[str, int]]:
|
|
875
|
+
query = self._queries.get(lang, {}).get("import")
|
|
876
|
+
if not query:
|
|
877
|
+
return []
|
|
878
|
+
results = set()
|
|
879
|
+
|
|
880
|
+
# 对于Rust,需要特殊处理:优先使用path而不是name
|
|
881
|
+
if lang == "rust":
|
|
882
|
+
paths_by_line: dict[int, str] = {}
|
|
883
|
+
names_by_line: dict[int, list[str]] = defaultdict(list)
|
|
884
|
+
|
|
885
|
+
for cap_name, node in self._run_query(query, tree.root_node):
|
|
886
|
+
text = self._text(node)
|
|
887
|
+
line = node.start_point[0] + 1
|
|
888
|
+
if cap_name == "path":
|
|
889
|
+
paths_by_line[line] = text
|
|
890
|
+
elif cap_name == "name":
|
|
891
|
+
names_by_line[line].append(text)
|
|
892
|
+
|
|
893
|
+
# 优先使用path(模块名),其次使用name
|
|
894
|
+
for line in sorted(set(list(paths_by_line.keys()) + list(names_by_line.keys()))):
|
|
895
|
+
if line in paths_by_line:
|
|
896
|
+
results.add((paths_by_line[line], line))
|
|
897
|
+
else:
|
|
898
|
+
for name in names_by_line[line]:
|
|
899
|
+
results.add((name, line))
|
|
900
|
+
else:
|
|
901
|
+
for cap_name, node in self._run_query(query, tree.root_node):
|
|
902
|
+
if lang in ("javascript", "typescript", "tsx") and cap_name != "source":
|
|
903
|
+
continue
|
|
904
|
+
text = self._text(node).strip("\"'")
|
|
905
|
+
if text:
|
|
906
|
+
results.add((text, node.start_point[0] + 1))
|
|
907
|
+
return sorted(results, key=lambda item: (item[1], item[0]))
|
|
908
|
+
|
|
909
|
+
@staticmethod
|
|
910
|
+
def _call_reference_kind(node: Any) -> str:
|
|
911
|
+
parent = getattr(node, "parent", None)
|
|
912
|
+
while parent is not None:
|
|
913
|
+
if parent.type in {"call_expression", "call"}:
|
|
914
|
+
function_node = parent.child_by_field_name("function")
|
|
915
|
+
if function_node is not None and function_node.type in {"member_expression", "field_expression", "selector_expression", "attribute"}:
|
|
916
|
+
return "member"
|
|
917
|
+
return "direct"
|
|
918
|
+
parent = getattr(parent, "parent", None)
|
|
919
|
+
return "direct"
|
|
920
|
+
|
|
921
|
+
def _extract_html_symbols(self, tree: Any, file: str) -> list[Symbol]:
|
|
922
|
+
symbols_by_id: dict[str, Symbol] = {}
|
|
923
|
+
seen_names: dict[tuple[str, int], int] = {}
|
|
924
|
+
for node in self._walk_tree(tree.root_node):
|
|
925
|
+
if node.type != "element":
|
|
926
|
+
continue
|
|
927
|
+
start_tag = self._first_child_of_type(node, "start_tag")
|
|
928
|
+
if start_tag is None:
|
|
929
|
+
continue
|
|
930
|
+
tag_name = None
|
|
931
|
+
for child in start_tag.children:
|
|
932
|
+
if child.type == "tag_name":
|
|
933
|
+
tag_name = self._text(child)
|
|
934
|
+
break
|
|
935
|
+
if not tag_name:
|
|
936
|
+
continue
|
|
937
|
+
line = node.start_point[0] + 1
|
|
938
|
+
visible_name = f"<{tag_name}>"
|
|
939
|
+
key = (visible_name, line)
|
|
940
|
+
seen_names[key] = seen_names.get(key, 0) + 1
|
|
941
|
+
if seen_names[key] > 1:
|
|
942
|
+
visible_name = f"{visible_name}#{seen_names[key]}"
|
|
943
|
+
symbol_id = f"{file}::{visible_name}::{line}"
|
|
944
|
+
symbols_by_id[symbol_id] = Symbol(
|
|
945
|
+
id=symbol_id,
|
|
946
|
+
name=visible_name,
|
|
947
|
+
kind="element",
|
|
948
|
+
file=file,
|
|
949
|
+
line=line,
|
|
950
|
+
end_line=node.end_point[0] + 1,
|
|
951
|
+
col=node.start_point[1],
|
|
952
|
+
visibility="public",
|
|
953
|
+
signature=visible_name,
|
|
954
|
+
)
|
|
955
|
+
return sorted(symbols_by_id.values(), key=lambda symbol: (symbol.file, symbol.line, symbol.col, symbol.name))
|
|
956
|
+
|
|
957
|
+
def _extract_css_symbols(self, tree: Any, file: str) -> list[Symbol]:
|
|
958
|
+
symbols_by_id: dict[str, Symbol] = {}
|
|
959
|
+
seen_names: dict[tuple[str, int], int] = {}
|
|
960
|
+
selector_types = {"class_selector", "id_selector", "tag_name", "nesting_selector"}
|
|
961
|
+
for node in self._walk_tree(tree.root_node):
|
|
962
|
+
if node.type not in selector_types:
|
|
963
|
+
continue
|
|
964
|
+
raw_name = self._text(node).strip()
|
|
965
|
+
if not raw_name:
|
|
966
|
+
continue
|
|
967
|
+
line = node.start_point[0] + 1
|
|
968
|
+
kind = "selector"
|
|
969
|
+
if raw_name.startswith("."):
|
|
970
|
+
kind = "class_selector"
|
|
971
|
+
elif raw_name.startswith("#"):
|
|
972
|
+
kind = "id_selector"
|
|
973
|
+
key = (raw_name, line)
|
|
974
|
+
seen_names[key] = seen_names.get(key, 0) + 1
|
|
975
|
+
visible_name = raw_name if seen_names[key] == 1 else f"{raw_name}#{seen_names[key]}"
|
|
976
|
+
symbol_id = f"{file}::{visible_name}::{line}"
|
|
977
|
+
symbols_by_id[symbol_id] = Symbol(
|
|
978
|
+
id=symbol_id,
|
|
979
|
+
name=visible_name,
|
|
980
|
+
kind=kind,
|
|
981
|
+
file=file,
|
|
982
|
+
line=line,
|
|
983
|
+
end_line=node.end_point[0] + 1,
|
|
984
|
+
col=node.start_point[1],
|
|
985
|
+
visibility="public",
|
|
986
|
+
signature=raw_name,
|
|
987
|
+
)
|
|
988
|
+
return sorted(symbols_by_id.values(), key=lambda symbol: (symbol.file, symbol.line, symbol.col, symbol.name))
|
|
989
|
+
|
|
990
|
+
def _extract_json_symbols(self, tree: Any, file: str) -> list[Symbol]:
|
|
991
|
+
symbols_by_id: dict[str, Symbol] = {}
|
|
992
|
+
seen_names: dict[tuple[str, int], int] = {}
|
|
993
|
+
for node in self._walk_tree(tree.root_node):
|
|
994
|
+
if node.type != "pair":
|
|
995
|
+
continue
|
|
996
|
+
key_node = node.child_by_field_name("key")
|
|
997
|
+
if key_node is None:
|
|
998
|
+
continue
|
|
999
|
+
key_name = self._string_literal_value(key_node)
|
|
1000
|
+
if not key_name:
|
|
1001
|
+
continue
|
|
1002
|
+
line = node.start_point[0] + 1
|
|
1003
|
+
key = (key_name, line)
|
|
1004
|
+
seen_names[key] = seen_names.get(key, 0) + 1
|
|
1005
|
+
visible_name = key_name if seen_names[key] == 1 else f"{key_name}#{seen_names[key]}"
|
|
1006
|
+
symbol_id = f"{file}::{visible_name}::{line}"
|
|
1007
|
+
symbols_by_id[symbol_id] = Symbol(
|
|
1008
|
+
id=symbol_id,
|
|
1009
|
+
name=visible_name,
|
|
1010
|
+
kind="json_key",
|
|
1011
|
+
file=file,
|
|
1012
|
+
line=line,
|
|
1013
|
+
end_line=node.end_point[0] + 1,
|
|
1014
|
+
col=node.start_point[1],
|
|
1015
|
+
visibility="public",
|
|
1016
|
+
signature=f'"{key_name}"',
|
|
1017
|
+
)
|
|
1018
|
+
return sorted(symbols_by_id.values(), key=lambda symbol: (symbol.file, symbol.line, symbol.col, symbol.name))
|
|
1019
|
+
|
|
1020
|
+
def extract_js_ts_import_bindings(
|
|
1021
|
+
self,
|
|
1022
|
+
content: bytes,
|
|
1023
|
+
lang: str,
|
|
1024
|
+
tree: Any | None = None,
|
|
1025
|
+
) -> list[JSImportBinding]:
|
|
1026
|
+
"""提取 JS/TS import 绑定信息。"""
|
|
1027
|
+
if lang not in ("javascript", "typescript", "tsx"):
|
|
1028
|
+
return []
|
|
1029
|
+
parsed_tree = tree or self.parse(content, lang)
|
|
1030
|
+
if not parsed_tree:
|
|
1031
|
+
return []
|
|
1032
|
+
bindings: dict[tuple[str, str, str, int, str], JSImportBinding] = {}
|
|
1033
|
+
for node in parsed_tree.root_node.children:
|
|
1034
|
+
if node.type == "import_statement":
|
|
1035
|
+
self._collect_es_import_bindings(node, bindings)
|
|
1036
|
+
for node in self._walk_tree(parsed_tree.root_node):
|
|
1037
|
+
if node.type == "variable_declarator":
|
|
1038
|
+
self._collect_commonjs_import_bindings(node, bindings)
|
|
1039
|
+
return sorted(
|
|
1040
|
+
bindings.values(),
|
|
1041
|
+
key=lambda item: (item.line, item.module, item.local_name, item.imported_name, item.kind),
|
|
1042
|
+
)
|
|
1043
|
+
|
|
1044
|
+
def extract_js_ts_export_bindings(
|
|
1045
|
+
self,
|
|
1046
|
+
content: bytes,
|
|
1047
|
+
lang: str,
|
|
1048
|
+
tree: Any | None = None,
|
|
1049
|
+
) -> list[JSExportBinding]:
|
|
1050
|
+
"""提取 JS/TS export 绑定信息。"""
|
|
1051
|
+
if lang not in ("javascript", "typescript", "tsx"):
|
|
1052
|
+
return []
|
|
1053
|
+
parsed_tree = tree or self.parse(content, lang)
|
|
1054
|
+
if not parsed_tree:
|
|
1055
|
+
return []
|
|
1056
|
+
bindings: dict[tuple[str, str | None, str | None, int, str], JSExportBinding] = {}
|
|
1057
|
+
|
|
1058
|
+
def add_binding(exported_name: str, source_name: str | None, module: str | None, line: int, kind: str) -> None:
|
|
1059
|
+
key = (exported_name, source_name, module, line, kind)
|
|
1060
|
+
bindings[key] = JSExportBinding(
|
|
1061
|
+
exported_name=exported_name,
|
|
1062
|
+
source_name=source_name,
|
|
1063
|
+
module=module,
|
|
1064
|
+
line=line,
|
|
1065
|
+
kind=kind,
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1068
|
+
for node in parsed_tree.root_node.children:
|
|
1069
|
+
if node.type == "export_statement":
|
|
1070
|
+
self._collect_es_export_bindings(node, add_binding)
|
|
1071
|
+
for node in self._walk_tree(parsed_tree.root_node):
|
|
1072
|
+
if node.type == "assignment_expression":
|
|
1073
|
+
self._collect_commonjs_export_bindings(node, add_binding)
|
|
1074
|
+
return sorted(
|
|
1075
|
+
bindings.values(),
|
|
1076
|
+
key=lambda item: (
|
|
1077
|
+
item.line,
|
|
1078
|
+
item.exported_name,
|
|
1079
|
+
item.source_name or "",
|
|
1080
|
+
item.module or "",
|
|
1081
|
+
item.kind,
|
|
1082
|
+
),
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
def _collect_es_import_bindings(
|
|
1086
|
+
self,
|
|
1087
|
+
node: Any,
|
|
1088
|
+
bindings: dict[tuple[str, str, str, int, str], JSImportBinding],
|
|
1089
|
+
) -> None:
|
|
1090
|
+
module = self._module_literal_from_statement(node)
|
|
1091
|
+
if not module:
|
|
1092
|
+
return
|
|
1093
|
+
line = node.start_point[0] + 1
|
|
1094
|
+
import_clause = self._first_child_of_type(node, "import_clause")
|
|
1095
|
+
if not import_clause:
|
|
1096
|
+
return
|
|
1097
|
+
for child in import_clause.children:
|
|
1098
|
+
if child.type == "identifier":
|
|
1099
|
+
self._add_import_binding(bindings, child.text.decode("utf-8"), "default", module, line, "default")
|
|
1100
|
+
elif child.type == "named_imports":
|
|
1101
|
+
for specifier in child.children:
|
|
1102
|
+
if specifier.type != "import_specifier":
|
|
1103
|
+
continue
|
|
1104
|
+
source_node = specifier.child_by_field_name("name")
|
|
1105
|
+
alias_node = specifier.child_by_field_name("alias")
|
|
1106
|
+
source_name = self._identifier_text(source_node)
|
|
1107
|
+
local_name = self._identifier_text(alias_node) or source_name
|
|
1108
|
+
if source_name and local_name:
|
|
1109
|
+
self._add_import_binding(bindings, local_name, source_name, module, line, "named")
|
|
1110
|
+
elif child.type == "namespace_import":
|
|
1111
|
+
local_name = self._last_identifier(child)
|
|
1112
|
+
if local_name:
|
|
1113
|
+
self._add_import_binding(bindings, local_name, "*", module, line, "namespace")
|
|
1114
|
+
|
|
1115
|
+
def _collect_commonjs_import_bindings(
|
|
1116
|
+
self,
|
|
1117
|
+
node: Any,
|
|
1118
|
+
bindings: dict[tuple[str, str, str, int, str], JSImportBinding],
|
|
1119
|
+
) -> None:
|
|
1120
|
+
value_node = node.child_by_field_name("value")
|
|
1121
|
+
module = self._require_call_module(value_node)
|
|
1122
|
+
if not module:
|
|
1123
|
+
return
|
|
1124
|
+
name_node = node.child_by_field_name("name")
|
|
1125
|
+
if not name_node:
|
|
1126
|
+
return
|
|
1127
|
+
line = node.start_point[0] + 1
|
|
1128
|
+
if name_node.type == "identifier":
|
|
1129
|
+
self._add_import_binding(bindings, name_node.text.decode("utf-8"), "default", module, line, "default")
|
|
1130
|
+
return
|
|
1131
|
+
if name_node.type != "object_pattern":
|
|
1132
|
+
return
|
|
1133
|
+
for child in name_node.children:
|
|
1134
|
+
if child.type in {"shorthand_property_identifier_pattern", "identifier"}:
|
|
1135
|
+
name = child.text.decode("utf-8")
|
|
1136
|
+
self._add_import_binding(bindings, name, name, module, line, "named")
|
|
1137
|
+
elif child.type == "pair_pattern":
|
|
1138
|
+
source_name = self._identifier_text(child.child_by_field_name("key"))
|
|
1139
|
+
local_name = self._identifier_text(child.child_by_field_name("value"))
|
|
1140
|
+
if source_name and local_name:
|
|
1141
|
+
self._add_import_binding(bindings, local_name, source_name, module, line, "named")
|
|
1142
|
+
|
|
1143
|
+
def _collect_es_export_bindings(
|
|
1144
|
+
self,
|
|
1145
|
+
node: Any,
|
|
1146
|
+
add_binding: Any,
|
|
1147
|
+
) -> None:
|
|
1148
|
+
line = node.start_point[0] + 1
|
|
1149
|
+
module = self._module_literal_from_statement(node)
|
|
1150
|
+
has_default = self._first_child_of_type(node, "default") is not None
|
|
1151
|
+
namespace_export = self._first_child_of_type(node, "namespace_export")
|
|
1152
|
+
export_clause = self._first_child_of_type(node, "export_clause")
|
|
1153
|
+
declaration = self._export_declaration_node(node)
|
|
1154
|
+
|
|
1155
|
+
if namespace_export is not None and module:
|
|
1156
|
+
exported_name = self._last_identifier(namespace_export)
|
|
1157
|
+
if exported_name:
|
|
1158
|
+
add_binding(exported_name, "*", module, line, "namespace")
|
|
1159
|
+
return
|
|
1160
|
+
|
|
1161
|
+
if self._first_child_of_type(node, "*") is not None and module:
|
|
1162
|
+
add_binding("*", "*", module, line, "wildcard")
|
|
1163
|
+
return
|
|
1164
|
+
|
|
1165
|
+
if export_clause is not None:
|
|
1166
|
+
kind = "reexport" if module else "local"
|
|
1167
|
+
for specifier in export_clause.children:
|
|
1168
|
+
if specifier.type != "export_specifier":
|
|
1169
|
+
continue
|
|
1170
|
+
source_name = self._identifier_text(specifier.child_by_field_name("name"))
|
|
1171
|
+
exported_name = self._identifier_text(specifier.child_by_field_name("alias")) or source_name
|
|
1172
|
+
if source_name and exported_name:
|
|
1173
|
+
add_binding(exported_name, source_name, module, line, kind)
|
|
1174
|
+
return
|
|
1175
|
+
|
|
1176
|
+
if has_default:
|
|
1177
|
+
source_name = self._export_default_source_name(node, declaration)
|
|
1178
|
+
if source_name:
|
|
1179
|
+
add_binding("default", source_name, None, line, "local")
|
|
1180
|
+
return
|
|
1181
|
+
|
|
1182
|
+
for exported_name in self._exported_names_from_declaration(declaration):
|
|
1183
|
+
add_binding(exported_name, exported_name, None, line, "local")
|
|
1184
|
+
|
|
1185
|
+
def _collect_commonjs_export_bindings(
|
|
1186
|
+
self,
|
|
1187
|
+
node: Any,
|
|
1188
|
+
add_binding: Any,
|
|
1189
|
+
) -> None:
|
|
1190
|
+
target_node = node.child_by_field_name("left")
|
|
1191
|
+
value_node = node.child_by_field_name("right")
|
|
1192
|
+
if target_node is None or value_node is None or target_node.type != "member_expression":
|
|
1193
|
+
return
|
|
1194
|
+
export_target = self._commonjs_export_target(target_node)
|
|
1195
|
+
if export_target is None:
|
|
1196
|
+
return
|
|
1197
|
+
line = node.start_point[0] + 1
|
|
1198
|
+
if export_target == "default":
|
|
1199
|
+
if value_node.type == "object":
|
|
1200
|
+
for child in value_node.children:
|
|
1201
|
+
if child.type == "shorthand_property_identifier":
|
|
1202
|
+
name = child.text.decode("utf-8")
|
|
1203
|
+
add_binding(name, name, None, line, "local")
|
|
1204
|
+
elif child.type == "pair":
|
|
1205
|
+
exported_name = self._identifier_text(child.child_by_field_name("key"))
|
|
1206
|
+
source_name = self._identifier_text(child.child_by_field_name("value")) or self._expression_binding_name(child.child_by_field_name("value"))
|
|
1207
|
+
if exported_name and source_name:
|
|
1208
|
+
add_binding(exported_name, source_name, None, line, "local")
|
|
1209
|
+
return
|
|
1210
|
+
source_name = self._expression_binding_name(value_node)
|
|
1211
|
+
if source_name:
|
|
1212
|
+
add_binding("default", source_name, None, line, "local")
|
|
1213
|
+
return
|
|
1214
|
+
source_name = self._expression_binding_name(value_node)
|
|
1215
|
+
if source_name:
|
|
1216
|
+
add_binding(export_target, source_name, None, line, "local")
|
|
1217
|
+
|
|
1218
|
+
def _add_import_binding(
|
|
1219
|
+
self,
|
|
1220
|
+
bindings: dict[tuple[str, str, str, int, str], JSImportBinding],
|
|
1221
|
+
local_name: str,
|
|
1222
|
+
imported_name: str,
|
|
1223
|
+
module: str,
|
|
1224
|
+
line: int,
|
|
1225
|
+
kind: str,
|
|
1226
|
+
) -> None:
|
|
1227
|
+
key = (local_name, imported_name, module, line, kind)
|
|
1228
|
+
bindings[key] = JSImportBinding(local_name, imported_name, module, line, kind)
|
|
1229
|
+
|
|
1230
|
+
def _module_literal_from_statement(self, node: Any) -> str | None:
|
|
1231
|
+
for child in node.children:
|
|
1232
|
+
if child.type == "string":
|
|
1233
|
+
return self._string_literal_value(child)
|
|
1234
|
+
return None
|
|
1235
|
+
|
|
1236
|
+
def _require_call_module(self, node: Any | None) -> str | None:
|
|
1237
|
+
if node is None or node.type != "call_expression":
|
|
1238
|
+
return None
|
|
1239
|
+
function_node = node.child_by_field_name("function")
|
|
1240
|
+
arguments_node = node.child_by_field_name("arguments")
|
|
1241
|
+
if function_node is None or function_node.type != "identifier":
|
|
1242
|
+
return None
|
|
1243
|
+
if function_node.text.decode("utf-8") != "require" or arguments_node is None:
|
|
1244
|
+
return None
|
|
1245
|
+
for child in arguments_node.children:
|
|
1246
|
+
if child.type == "string":
|
|
1247
|
+
return self._string_literal_value(child)
|
|
1248
|
+
return None
|
|
1249
|
+
|
|
1250
|
+
def _commonjs_export_target(self, node: Any) -> str | None:
|
|
1251
|
+
object_node = node.child_by_field_name("object")
|
|
1252
|
+
property_node = node.child_by_field_name("property")
|
|
1253
|
+
if object_node is None or property_node is None:
|
|
1254
|
+
return None
|
|
1255
|
+
if object_node.type == "identifier" and object_node.text.decode("utf-8") == "exports":
|
|
1256
|
+
return property_node.text.decode("utf-8")
|
|
1257
|
+
if object_node.type == "member_expression":
|
|
1258
|
+
inner_object = object_node.child_by_field_name("object")
|
|
1259
|
+
inner_property = object_node.child_by_field_name("property")
|
|
1260
|
+
if (
|
|
1261
|
+
inner_object is not None
|
|
1262
|
+
and inner_property is not None
|
|
1263
|
+
and inner_object.type == "identifier"
|
|
1264
|
+
and inner_object.text.decode("utf-8") == "module"
|
|
1265
|
+
and inner_property.type == "property_identifier"
|
|
1266
|
+
and inner_property.text.decode("utf-8") == "exports"
|
|
1267
|
+
):
|
|
1268
|
+
return property_node.text.decode("utf-8")
|
|
1269
|
+
if (
|
|
1270
|
+
object_node.type == "identifier"
|
|
1271
|
+
and property_node.type == "property_identifier"
|
|
1272
|
+
and object_node.text.decode("utf-8") == "module"
|
|
1273
|
+
and property_node.text.decode("utf-8") == "exports"
|
|
1274
|
+
):
|
|
1275
|
+
return "default"
|
|
1276
|
+
return None
|
|
1277
|
+
|
|
1278
|
+
def _export_declaration_node(self, node: Any) -> Any | None:
|
|
1279
|
+
for child in node.children:
|
|
1280
|
+
if child.type in {
|
|
1281
|
+
"function_declaration",
|
|
1282
|
+
"class_declaration",
|
|
1283
|
+
"lexical_declaration",
|
|
1284
|
+
"interface_declaration",
|
|
1285
|
+
"type_alias_declaration",
|
|
1286
|
+
"enum_declaration",
|
|
1287
|
+
}:
|
|
1288
|
+
return child
|
|
1289
|
+
return None
|
|
1290
|
+
|
|
1291
|
+
def _export_default_source_name(self, node: Any, declaration: Any | None) -> str | None:
|
|
1292
|
+
if declaration is not None:
|
|
1293
|
+
return self._declaration_primary_name(declaration)
|
|
1294
|
+
for child in node.children:
|
|
1295
|
+
if child.type in {"export", "default", ";"}:
|
|
1296
|
+
continue
|
|
1297
|
+
source_name = self._expression_binding_name(child)
|
|
1298
|
+
if source_name:
|
|
1299
|
+
return source_name
|
|
1300
|
+
return None
|
|
1301
|
+
|
|
1302
|
+
def _exported_names_from_declaration(self, declaration: Any | None) -> list[str]:
|
|
1303
|
+
if declaration is None:
|
|
1304
|
+
return []
|
|
1305
|
+
if declaration.type == "lexical_declaration":
|
|
1306
|
+
names: list[str] = []
|
|
1307
|
+
for child in declaration.children:
|
|
1308
|
+
if child.type != "variable_declarator":
|
|
1309
|
+
continue
|
|
1310
|
+
name_node = child.child_by_field_name("name")
|
|
1311
|
+
if name_node is not None and name_node.type == "identifier":
|
|
1312
|
+
names.append(name_node.text.decode("utf-8"))
|
|
1313
|
+
return names
|
|
1314
|
+
primary_name = self._declaration_primary_name(declaration)
|
|
1315
|
+
return [primary_name] if primary_name else []
|
|
1316
|
+
|
|
1317
|
+
def _declaration_primary_name(self, declaration: Any) -> str | None:
|
|
1318
|
+
for field_name in ("name",):
|
|
1319
|
+
target = declaration.child_by_field_name(field_name)
|
|
1320
|
+
if target is not None:
|
|
1321
|
+
return target.text.decode("utf-8")
|
|
1322
|
+
for child in declaration.children:
|
|
1323
|
+
if child.type in {"identifier", "type_identifier"}:
|
|
1324
|
+
return child.text.decode("utf-8")
|
|
1325
|
+
return None
|
|
1326
|
+
|
|
1327
|
+
def _expression_binding_name(self, node: Any | None) -> str | None:
|
|
1328
|
+
if node is None:
|
|
1329
|
+
return None
|
|
1330
|
+
if node.type in {"identifier", "property_identifier", "type_identifier"}:
|
|
1331
|
+
return node.text.decode("utf-8")
|
|
1332
|
+
if node.type in {"function_declaration", "class_declaration", "function_expression"}:
|
|
1333
|
+
return self._declaration_primary_name(node) or self._anonymous_symbol_name(node)
|
|
1334
|
+
if node.type == "arrow_function":
|
|
1335
|
+
return self._anonymous_symbol_name(node)
|
|
1336
|
+
return None
|
|
1337
|
+
|
|
1338
|
+
@staticmethod
|
|
1339
|
+
def _anonymous_symbol_name(node: Any) -> str:
|
|
1340
|
+
return f"<anonymous@{node.start_point[0] + 1}>"
|
|
1341
|
+
|
|
1342
|
+
@staticmethod
|
|
1343
|
+
def _export_default_name(node: Any) -> str:
|
|
1344
|
+
"""为 export default 无名字的函数/类生成可读名。"""
|
|
1345
|
+
line = node.start_point[0] + 1
|
|
1346
|
+
kind = node.type.replace("_expression", "").replace("_declaration", "")
|
|
1347
|
+
return f"<default_export_{kind}@{line}>"
|
|
1348
|
+
|
|
1349
|
+
def _string_literal_value(self, node: Any) -> str:
|
|
1350
|
+
return self._text(node).strip("\"'`")
|
|
1351
|
+
|
|
1352
|
+
def _first_child_of_type(self, node: Any, node_type: str) -> Any | None:
|
|
1353
|
+
for child in node.children:
|
|
1354
|
+
if child.type == node_type:
|
|
1355
|
+
return child
|
|
1356
|
+
return None
|
|
1357
|
+
|
|
1358
|
+
def _last_identifier(self, node: Any) -> str | None:
|
|
1359
|
+
identifiers = [
|
|
1360
|
+
child.text.decode("utf-8")
|
|
1361
|
+
for child in node.children
|
|
1362
|
+
if child.type in {"identifier", "property_identifier", "type_identifier"}
|
|
1363
|
+
]
|
|
1364
|
+
return identifiers[-1] if identifiers else None
|
|
1365
|
+
|
|
1366
|
+
def _identifier_text(self, node: Any | None) -> str | None:
|
|
1367
|
+
if node is None:
|
|
1368
|
+
return None
|
|
1369
|
+
if node.type in {"identifier", "property_identifier", "type_identifier", "shorthand_property_identifier", "shorthand_property_identifier_pattern"}:
|
|
1370
|
+
return node.text.decode("utf-8")
|
|
1371
|
+
return None
|
|
1372
|
+
|
|
1373
|
+
def _walk_tree(self, root: Any) -> list[Any]:
|
|
1374
|
+
nodes = [root]
|
|
1375
|
+
result: list[Any] = []
|
|
1376
|
+
while nodes:
|
|
1377
|
+
current = nodes.pop()
|
|
1378
|
+
result.append(current)
|
|
1379
|
+
nodes.extend(reversed(current.children))
|
|
1380
|
+
return result
|
|
1381
|
+
|
|
1382
|
+
def extract_calls(self, tree: Any, lang: str) -> list[tuple[str, int, str]]:
|
|
1383
|
+
query = self._queries.get(lang, {}).get("call")
|
|
1384
|
+
if not query:
|
|
1385
|
+
return []
|
|
1386
|
+
results = []
|
|
1387
|
+
for cap_name, node in self._run_query(query, tree.root_node):
|
|
1388
|
+
if cap_name != "name":
|
|
1389
|
+
continue
|
|
1390
|
+
name = self._text(node)
|
|
1391
|
+
if name:
|
|
1392
|
+
results.append((name, node.start_point[0] + 1, self._call_reference_kind(node)))
|
|
1393
|
+
return sorted(set(results), key=lambda item: (item[1], item[0], item[2]))
|
|
1394
|
+
|
|
1395
|
+
def extract_http_routes(self, tree: Any, lang: str, file: str) -> list[Any]:
|
|
1396
|
+
"""从 AST 中提取 HTTP 路由定义。
|
|
1397
|
+
|
|
1398
|
+
支持框架:FastAPI (Python), Express (JS/TS), Axum (Rust)。
|
|
1399
|
+
route inventory 只输出严格匹配的生产路由定义,避免把测试 DSL、日志、
|
|
1400
|
+
Array/Option 等普通调用误判为 HTTP route。
|
|
1401
|
+
"""
|
|
1402
|
+
from . import HttpRoute
|
|
1403
|
+
|
|
1404
|
+
if self._should_skip_route_file(file):
|
|
1405
|
+
return []
|
|
1406
|
+
|
|
1407
|
+
query = self._queries.get(lang, {}).get("http_route")
|
|
1408
|
+
if not query:
|
|
1409
|
+
return []
|
|
1410
|
+
|
|
1411
|
+
routes: list[HttpRoute] = []
|
|
1412
|
+
for captures in self._run_query_matches(query, tree.root_node):
|
|
1413
|
+
route = self._http_route_from_captures(captures, lang, file)
|
|
1414
|
+
if route is not None:
|
|
1415
|
+
routes.append(route)
|
|
1416
|
+
|
|
1417
|
+
return sorted(
|
|
1418
|
+
routes,
|
|
1419
|
+
key=lambda route: (route.file, route.line, route.method, route.path, route.handler),
|
|
1420
|
+
)
|
|
1421
|
+
|
|
1422
|
+
def _http_route_from_captures(self, captures: dict[str, list[Any]], lang: str, file: str) -> Any | None:
|
|
1423
|
+
from . import HttpRoute
|
|
1424
|
+
|
|
1425
|
+
path_node = self._first_capture(captures, "path")
|
|
1426
|
+
handler_node = self._first_capture(captures, "handler")
|
|
1427
|
+
if path_node is None or handler_node is None:
|
|
1428
|
+
return None
|
|
1429
|
+
|
|
1430
|
+
method_node = self._first_capture(captures, "method") or self._first_capture(captures, "http_method")
|
|
1431
|
+
method = (self._text(method_node) if method_node is not None else "").lower()
|
|
1432
|
+
if not method:
|
|
1433
|
+
return None
|
|
1434
|
+
|
|
1435
|
+
if lang == "python":
|
|
1436
|
+
obj = self._text(self._first_capture(captures, "_obj"))
|
|
1437
|
+
if obj not in {"app", "router", "api"} or method not in {"get", "post", "put", "delete", "patch", "head", "options"}:
|
|
1438
|
+
return None
|
|
1439
|
+
framework = "fastapi"
|
|
1440
|
+
elif lang in ("javascript", "typescript", "tsx"):
|
|
1441
|
+
router = self._text(self._first_capture(captures, "_router"))
|
|
1442
|
+
if router not in {"app", "router"} or method not in {"get", "post", "put", "delete", "patch", "use", "all"}:
|
|
1443
|
+
return None
|
|
1444
|
+
if method in {"describe", "test", "it", "expect", "log", "some", "map", "filter", "find", "reduce", "foreach"}:
|
|
1445
|
+
return None
|
|
1446
|
+
framework = "express"
|
|
1447
|
+
elif lang == "rust":
|
|
1448
|
+
method_name = self._text(self._first_capture(captures, "_method_name"))
|
|
1449
|
+
if method_name != "route" or method not in {"get", "post", "put", "delete", "patch", "head", "options"}:
|
|
1450
|
+
return None
|
|
1451
|
+
if method in {"some", "ok", "err", "is_some", "unwrap", "map", "filter"}:
|
|
1452
|
+
return None
|
|
1453
|
+
framework = "axum"
|
|
1454
|
+
else:
|
|
1455
|
+
return None
|
|
1456
|
+
|
|
1457
|
+
path = self._string_literal_value(path_node)
|
|
1458
|
+
if not path:
|
|
1459
|
+
return None
|
|
1460
|
+
handler_name = self._route_handler_name(handler_node)
|
|
1461
|
+
if not handler_name:
|
|
1462
|
+
return None
|
|
1463
|
+
|
|
1464
|
+
return HttpRoute(
|
|
1465
|
+
method=method.upper(),
|
|
1466
|
+
path=path,
|
|
1467
|
+
handler=handler_name,
|
|
1468
|
+
file=file,
|
|
1469
|
+
line=handler_node.start_point[0] + 1,
|
|
1470
|
+
framework=framework,
|
|
1471
|
+
)
|
|
1472
|
+
|
|
1473
|
+
def _run_query_matches(self, query: Any, root: Any) -> list[dict[str, list[Any]]]:
|
|
1474
|
+
"""按 tree-sitter match 返回 captures,避免跨匹配错位拼接 route。"""
|
|
1475
|
+
try:
|
|
1476
|
+
from tree_sitter import QueryCursor # type: ignore
|
|
1477
|
+
cursor = QueryCursor(query)
|
|
1478
|
+
if hasattr(cursor, "matches"):
|
|
1479
|
+
raw_matches = cursor.matches(root)
|
|
1480
|
+
results: list[dict[str, list[Any]]] = []
|
|
1481
|
+
for item in raw_matches:
|
|
1482
|
+
if not isinstance(item, (list, tuple)) or len(item) != 2:
|
|
1483
|
+
continue
|
|
1484
|
+
_, captures = item
|
|
1485
|
+
if not isinstance(captures, dict) or not captures:
|
|
1486
|
+
continue
|
|
1487
|
+
normalized: dict[str, list[Any]] = {}
|
|
1488
|
+
for cap_name, nodes in captures.items():
|
|
1489
|
+
normalized[cap_name] = nodes if isinstance(nodes, list) else [nodes]
|
|
1490
|
+
results.append(normalized)
|
|
1491
|
+
if results:
|
|
1492
|
+
return results
|
|
1493
|
+
except Exception as e:
|
|
1494
|
+
logger.debug(f"Query match run error: {e}")
|
|
1495
|
+
|
|
1496
|
+
# 兼容旧 runtime:只能拿到 capture 列表时,按捕获起始行粗分组后再严格校验。
|
|
1497
|
+
captures_by_line: dict[int, dict[str, list[Any]]] = {}
|
|
1498
|
+
for cap_name, node in self._run_query(query, root):
|
|
1499
|
+
line = node.start_point[0]
|
|
1500
|
+
captures_by_line.setdefault(line, {}).setdefault(cap_name, []).append(node)
|
|
1501
|
+
return [captures for _, captures in sorted(captures_by_line.items())]
|
|
1502
|
+
|
|
1503
|
+
@staticmethod
|
|
1504
|
+
def _first_capture(captures: dict[str, list[Any]], name: str) -> Any | None:
|
|
1505
|
+
nodes = captures.get(name) or []
|
|
1506
|
+
return nodes[0] if nodes else None
|
|
1507
|
+
|
|
1508
|
+
@staticmethod
|
|
1509
|
+
def _should_skip_route_file(file: str) -> bool:
|
|
1510
|
+
normalized = file.replace("\\", "/")
|
|
1511
|
+
parts = {part.lower() for part in normalized.split("/")}
|
|
1512
|
+
if parts & {"e2e", "tests", "__tests__"}:
|
|
1513
|
+
return True
|
|
1514
|
+
name = normalized.rsplit("/", 1)[-1].lower()
|
|
1515
|
+
return bool(re.search(r"(_test\.rs|\.(test|spec)\.(js|jsx|ts|tsx|mjs|cjs|mts|cts))$", name))
|
|
1516
|
+
|
|
1517
|
+
def _route_handler_name(self, node: Any) -> str:
|
|
1518
|
+
explicit = self._identifier_text(node)
|
|
1519
|
+
if explicit:
|
|
1520
|
+
return explicit
|
|
1521
|
+
if node.type in {"arrow_function", "function_expression", "lambda"}:
|
|
1522
|
+
return self._anonymous_symbol_name(node)
|
|
1523
|
+
return self._text(node)
|
|
1524
|
+
|
|
1525
|
+
def _parse_import_specifiers(self, spec: str, module: str, line: int) -> list[JSImportBinding]:
|
|
1526
|
+
bindings: list[JSImportBinding] = []
|
|
1527
|
+
remaining = spec.strip()
|
|
1528
|
+
if not remaining:
|
|
1529
|
+
return bindings
|
|
1530
|
+
|
|
1531
|
+
if remaining.startswith("{"):
|
|
1532
|
+
named_text = remaining[1:remaining.rfind("}")]
|
|
1533
|
+
for imported_name, local_name in self._parse_named_clause(named_text):
|
|
1534
|
+
bindings.append(JSImportBinding(local_name, imported_name, module, line, "named"))
|
|
1535
|
+
return bindings
|
|
1536
|
+
|
|
1537
|
+
if remaining.startswith("*"):
|
|
1538
|
+
namespace_match = re.match(r"\*\s+as\s+([A-Za-z_$][\w$]*)", remaining)
|
|
1539
|
+
if namespace_match:
|
|
1540
|
+
bindings.append(
|
|
1541
|
+
JSImportBinding(
|
|
1542
|
+
local_name=namespace_match.group(1),
|
|
1543
|
+
imported_name="*",
|
|
1544
|
+
module=module,
|
|
1545
|
+
line=line,
|
|
1546
|
+
kind="namespace",
|
|
1547
|
+
)
|
|
1548
|
+
)
|
|
1549
|
+
return bindings
|
|
1550
|
+
|
|
1551
|
+
default_part = remaining
|
|
1552
|
+
rest = ""
|
|
1553
|
+
if "," in remaining:
|
|
1554
|
+
default_part, rest = remaining.split(",", 1)
|
|
1555
|
+
default_name = default_part.strip()
|
|
1556
|
+
if default_name and re.fullmatch(r"[A-Za-z_$][\w$]*", default_name):
|
|
1557
|
+
bindings.append(
|
|
1558
|
+
JSImportBinding(
|
|
1559
|
+
local_name=default_name,
|
|
1560
|
+
imported_name="default",
|
|
1561
|
+
module=module,
|
|
1562
|
+
line=line,
|
|
1563
|
+
kind="default",
|
|
1564
|
+
)
|
|
1565
|
+
)
|
|
1566
|
+
rest = rest.strip()
|
|
1567
|
+
if rest.startswith("{") and "}" in rest:
|
|
1568
|
+
named_text = rest[1:rest.rfind("}")]
|
|
1569
|
+
for imported_name, local_name in self._parse_named_clause(named_text):
|
|
1570
|
+
bindings.append(JSImportBinding(local_name, imported_name, module, line, "named"))
|
|
1571
|
+
elif rest.startswith("*"):
|
|
1572
|
+
namespace_match = re.match(r"\*\s+as\s+([A-Za-z_$][\w$]*)", rest)
|
|
1573
|
+
if namespace_match:
|
|
1574
|
+
bindings.append(
|
|
1575
|
+
JSImportBinding(
|
|
1576
|
+
local_name=namespace_match.group(1),
|
|
1577
|
+
imported_name="*",
|
|
1578
|
+
module=module,
|
|
1579
|
+
line=line,
|
|
1580
|
+
kind="namespace",
|
|
1581
|
+
)
|
|
1582
|
+
)
|
|
1583
|
+
return bindings
|
|
1584
|
+
|
|
1585
|
+
def _parse_named_clause(self, text: str) -> list[tuple[str, str]]:
|
|
1586
|
+
pairs: list[tuple[str, str]] = []
|
|
1587
|
+
for raw_item in text.split(","):
|
|
1588
|
+
item = raw_item.strip()
|
|
1589
|
+
if not item:
|
|
1590
|
+
continue
|
|
1591
|
+
item = re.sub(r"^type\s+", "", item)
|
|
1592
|
+
parts = re.split(r"\s+as\s+", item, maxsplit=1)
|
|
1593
|
+
if len(parts) == 2:
|
|
1594
|
+
source_name, exported_name = parts[0].strip(), parts[1].strip()
|
|
1595
|
+
else:
|
|
1596
|
+
source_name = exported_name = item
|
|
1597
|
+
if re.fullmatch(r"[A-Za-z_$][\w$]*", source_name) and re.fullmatch(r"[A-Za-z_$][\w$]*", exported_name):
|
|
1598
|
+
pairs.append((source_name, exported_name))
|
|
1599
|
+
return pairs
|
|
1600
|
+
|
|
1601
|
+
def _parse_commonjs_object_clause(self, text: str) -> list[tuple[str, str]]:
|
|
1602
|
+
pairs: list[tuple[str, str]] = []
|
|
1603
|
+
for raw_item in text.split(","):
|
|
1604
|
+
item = raw_item.strip()
|
|
1605
|
+
if not item:
|
|
1606
|
+
continue
|
|
1607
|
+
parts = [part.strip() for part in item.split(":", 1)]
|
|
1608
|
+
if len(parts) == 2:
|
|
1609
|
+
source_name, local_name = parts
|
|
1610
|
+
else:
|
|
1611
|
+
source_name = local_name = parts[0]
|
|
1612
|
+
if re.fullmatch(r"[A-Za-z_$][\w$]*", source_name) and re.fullmatch(r"[A-Za-z_$][\w$]*", local_name):
|
|
1613
|
+
pairs.append((source_name, local_name))
|
|
1614
|
+
return pairs
|
|
1615
|
+
|
|
1616
|
+
@staticmethod
|
|
1617
|
+
def _line_number(text: str, offset: int) -> int:
|
|
1618
|
+
return text.count("\n", 0, offset) + 1
|
|
1619
|
+
|
|
1620
|
+
# ── 内部辅助 ────────────────────────────────────────────────────────────────
|
|
1621
|
+
|
|
1622
|
+
def _run_query(self, query: Any, root: Any) -> list[tuple[str, Any]]:
|
|
1623
|
+
"""
|
|
1624
|
+
执行 tree-sitter query 并返回统一格式 list[(cap_name, Node)]
|
|
1625
|
+
|
|
1626
|
+
要求 tree-sitter >= 0.22(使用 QueryCursor)
|
|
1627
|
+
"""
|
|
1628
|
+
try:
|
|
1629
|
+
from tree_sitter import QueryCursor # type: ignore
|
|
1630
|
+
cursor = QueryCursor(query)
|
|
1631
|
+
raw = cursor.captures(root)
|
|
1632
|
+
|
|
1633
|
+
pairs: list[tuple[str, Any]] = []
|
|
1634
|
+
if isinstance(raw, dict):
|
|
1635
|
+
# 新版格式: dict[cap_name, list[Node]]
|
|
1636
|
+
for cap_name, nodes in raw.items():
|
|
1637
|
+
node_list = nodes if isinstance(nodes, list) else [nodes]
|
|
1638
|
+
for n in node_list:
|
|
1639
|
+
pairs.append((cap_name, n))
|
|
1640
|
+
else:
|
|
1641
|
+
# 旧版格式: list[(Node, cap_name)]
|
|
1642
|
+
for item in raw:
|
|
1643
|
+
if isinstance(item, (list, tuple)) and len(item) == 2:
|
|
1644
|
+
node, cap_name = item
|
|
1645
|
+
pairs.append((cap_name, node))
|
|
1646
|
+
return pairs
|
|
1647
|
+
except Exception as e:
|
|
1648
|
+
logger.debug(f"Query run error: {e}")
|
|
1649
|
+
return []
|
|
1650
|
+
|
|
1651
|
+
@staticmethod
|
|
1652
|
+
def _within(child: Any, parent: Any) -> bool:
|
|
1653
|
+
return (child.start_point >= parent.start_point and
|
|
1654
|
+
child.end_point <= parent.end_point)
|
|
1655
|
+
|
|
1656
|
+
@staticmethod
|
|
1657
|
+
def _text(node: Any) -> str:
|
|
1658
|
+
return node.text.decode("utf-8") if getattr(node, "text", None) else ""
|
|
1659
|
+
|
|
1660
|
+
def _docstring(self, node: Any, lang: str) -> str:
|
|
1661
|
+
if not node:
|
|
1662
|
+
return ""
|
|
1663
|
+
try:
|
|
1664
|
+
if lang == "python":
|
|
1665
|
+
for child in node.children:
|
|
1666
|
+
if child.type == "expression_statement":
|
|
1667
|
+
for sub in child.children:
|
|
1668
|
+
if sub.type == "string":
|
|
1669
|
+
return self._text(sub).strip("\"'` \n")
|
|
1670
|
+
elif lang in ("javascript", "typescript", "go", "rust"):
|
|
1671
|
+
prev = getattr(node, "prev_sibling", None)
|
|
1672
|
+
if prev and "comment" in prev.type:
|
|
1673
|
+
return self._text(prev).lstrip("/* \n").rstrip("*/ \n")
|
|
1674
|
+
except Exception:
|
|
1675
|
+
pass
|
|
1676
|
+
return ""
|
|
1677
|
+
|
|
1678
|
+
def _signature(self, node: Any, lang: str) -> str:
|
|
1679
|
+
if not node:
|
|
1680
|
+
return ""
|
|
1681
|
+
try:
|
|
1682
|
+
first_line = self._text(node).split("\n")[0]
|
|
1683
|
+
patterns = {
|
|
1684
|
+
"python": r"(?:async\s+)?def\s+\w+\s*\([^)]*\)(?:\s*->\s*[^:]+)?",
|
|
1685
|
+
"javascript": r"(?:async\s+)?(?:function\s+\w+|(?:const|let|var)\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)\s*=>)",
|
|
1686
|
+
"typescript": r"(?:async\s+)?(?:function\s+\w+|(?:const|let|var)\s+\w+\s*=\s*(?:async\s*)?\([^)]*\)(?:\s*:\s*\S+)?\s*=>)",
|
|
1687
|
+
"rust": r"(?:pub\s+)?(?:async\s+)?fn\s+\w+(?:<[^>]*>)?\s*\([^)]*\)(?:\s*->\s*[^{]+)?",
|
|
1688
|
+
"go": r"func\s+(?:\([^)]+\)\s+)?\w+\s*\([^)]*\)(?:\s*\([^)]*\))?(?:\s*[^{]+)?",
|
|
1689
|
+
}
|
|
1690
|
+
pat = patterns.get(lang, "")
|
|
1691
|
+
if pat:
|
|
1692
|
+
m = re.search(pat, first_line)
|
|
1693
|
+
if m:
|
|
1694
|
+
return m.group(0).strip()
|
|
1695
|
+
except Exception:
|
|
1696
|
+
pass
|
|
1697
|
+
return ""
|