context-mcp-server 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -16
- package/codegraph/__pycache__/config.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/scanner.cpython-313.pyc +0 -0
- package/codegraph/__pycache__/server.cpython-313.pyc +0 -0
- package/codegraph/config.py +139 -22
- package/codegraph/extractors/__pycache__/ast_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/__pycache__/build_extractor.cpython-313.pyc +0 -0
- package/codegraph/extractors/ast_extractor.py +392 -176
- package/codegraph/extractors/build_extractor.py +68 -0
- package/codegraph/scanner.py +5 -21
- package/codegraph/server.py +32 -229
- package/package.json +1 -1
- package/src/templates/AGENTS.md +8 -15
- package/src/templates/CLAUDE.md +15 -27
- package/src/templates/GEMINI.md +7 -14
|
@@ -1,222 +1,438 @@
|
|
|
1
1
|
"""
|
|
2
|
-
ast_extractor.py — extract nodes from code files
|
|
3
|
-
|
|
4
|
-
Falls back to regex if tree-sitter grammars aren't installed.
|
|
5
|
-
Each node: { id, name, type, file, line, docstring?, calls?, imports? }
|
|
2
|
+
ast_extractor.py — extract AST nodes from code files.
|
|
3
|
+
Tries tree-sitter first; falls back to regex if grammar not installed.
|
|
6
4
|
"""
|
|
7
5
|
|
|
6
|
+
from __future__ import annotations
|
|
8
7
|
import re
|
|
9
8
|
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
|
+
try:
|
|
12
|
+
from tree_sitter import Language, Parser, Node as TSNode
|
|
13
|
+
_TS_AVAILABLE = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
_TS_AVAILABLE = False
|
|
11
16
|
|
|
12
|
-
# ──
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
"""
|
|
17
|
+
# ── Language registry ─────────────────────────────────────────────────────────
|
|
18
|
+
# pkg: importable package name fn: function returning Language object
|
|
19
|
+
_REGISTRY: dict[str, dict] = {
|
|
20
|
+
"python": {
|
|
21
|
+
"pkg": "tree_sitter_python", "fn": "language",
|
|
22
|
+
"ext": {".py", ".pyw"},
|
|
23
|
+
"function_types": {"function_definition"},
|
|
24
|
+
"class_types": {"class_definition"},
|
|
25
|
+
"import_types": {"import_statement", "import_from_statement"},
|
|
26
|
+
"call_types": {"call"},
|
|
27
|
+
"name_field": "name",
|
|
28
|
+
"call_field": "function",
|
|
29
|
+
},
|
|
30
|
+
"javascript": {
|
|
31
|
+
"pkg": "tree_sitter_javascript", "fn": "language",
|
|
32
|
+
"ext": {".js", ".mjs", ".cjs", ".jsx"},
|
|
33
|
+
"function_types": {
|
|
34
|
+
"function_declaration", "function_expression",
|
|
35
|
+
"arrow_function", "method_definition",
|
|
36
|
+
"generator_function_declaration",
|
|
37
|
+
},
|
|
38
|
+
"class_types": {"class_declaration", "class_expression"},
|
|
39
|
+
"import_types": {"import_statement"},
|
|
40
|
+
"call_types": {"call_expression"},
|
|
41
|
+
"name_field": "name",
|
|
42
|
+
"call_field": "function",
|
|
43
|
+
},
|
|
44
|
+
"typescript": {
|
|
45
|
+
"pkg": "tree_sitter_typescript", "fn": "language_typescript",
|
|
46
|
+
"ext": {".ts", ".mts", ".cts"},
|
|
47
|
+
"function_types": {
|
|
48
|
+
"function_declaration", "function_expression",
|
|
49
|
+
"arrow_function", "method_definition",
|
|
50
|
+
"method_signature", "abstract_method_signature",
|
|
51
|
+
},
|
|
52
|
+
"class_types": {
|
|
53
|
+
"class_declaration", "class_expression",
|
|
54
|
+
"interface_declaration", "type_alias_declaration",
|
|
55
|
+
},
|
|
56
|
+
"import_types": {"import_statement"},
|
|
57
|
+
"call_types": {"call_expression"},
|
|
58
|
+
"name_field": "name",
|
|
59
|
+
"call_field": "function",
|
|
60
|
+
},
|
|
61
|
+
"tsx": {
|
|
62
|
+
"pkg": "tree_sitter_typescript", "fn": "language_tsx",
|
|
63
|
+
"ext": {".tsx"},
|
|
64
|
+
"function_types": {
|
|
65
|
+
"function_declaration", "function_expression",
|
|
66
|
+
"arrow_function", "method_definition",
|
|
67
|
+
},
|
|
68
|
+
"class_types": {"class_declaration", "interface_declaration"},
|
|
69
|
+
"import_types": {"import_statement"},
|
|
70
|
+
"call_types": {"call_expression"},
|
|
71
|
+
"name_field": "name",
|
|
72
|
+
"call_field": "function",
|
|
73
|
+
},
|
|
74
|
+
"go": {
|
|
75
|
+
"pkg": "tree_sitter_go", "fn": "language",
|
|
76
|
+
"ext": {".go"},
|
|
77
|
+
"function_types": {"function_declaration", "method_declaration"},
|
|
78
|
+
"class_types": {"type_declaration"},
|
|
79
|
+
"import_types": {"import_declaration"},
|
|
80
|
+
"call_types": {"call_expression"},
|
|
81
|
+
"name_field": "name",
|
|
82
|
+
"call_field": "function",
|
|
83
|
+
},
|
|
84
|
+
"rust": {
|
|
85
|
+
"pkg": "tree_sitter_rust", "fn": "language",
|
|
86
|
+
"ext": {".rs"},
|
|
87
|
+
"function_types": {"function_item"},
|
|
88
|
+
"class_types": {"struct_item", "enum_item", "trait_item", "impl_item"},
|
|
89
|
+
"import_types": {"use_declaration"},
|
|
90
|
+
"call_types": {"call_expression"},
|
|
91
|
+
"name_field": "name",
|
|
92
|
+
"call_field": "function",
|
|
93
|
+
},
|
|
94
|
+
"java": {
|
|
95
|
+
"pkg": "tree_sitter_java", "fn": "language",
|
|
96
|
+
"ext": {".java"},
|
|
97
|
+
"function_types": {"method_declaration", "constructor_declaration"},
|
|
98
|
+
"class_types": {
|
|
99
|
+
"class_declaration", "interface_declaration",
|
|
100
|
+
"enum_declaration", "annotation_type_declaration",
|
|
101
|
+
},
|
|
102
|
+
"import_types": {"import_declaration"},
|
|
103
|
+
"call_types": {"method_invocation"},
|
|
104
|
+
"name_field": "name",
|
|
105
|
+
"call_field": "name",
|
|
106
|
+
},
|
|
107
|
+
"kotlin": {
|
|
108
|
+
"pkg": "tree_sitter_kotlin", "fn": "language",
|
|
109
|
+
"ext": {".kt"},
|
|
110
|
+
"function_types": {"function_declaration", "anonymous_function"},
|
|
111
|
+
"class_types": {"class_declaration", "interface_declaration", "object_declaration"},
|
|
112
|
+
"import_types": {"import_header"},
|
|
113
|
+
"call_types": {"call_expression"},
|
|
114
|
+
"name_field": "simple_identifier",
|
|
115
|
+
"call_field": "call_suffix",
|
|
116
|
+
},
|
|
117
|
+
"c": {
|
|
118
|
+
"pkg": "tree_sitter_c", "fn": "language",
|
|
119
|
+
"ext": {".c", ".h"},
|
|
120
|
+
"function_types": {"function_definition"},
|
|
121
|
+
"class_types": {"struct_specifier", "enum_specifier", "union_specifier"},
|
|
122
|
+
"import_types": {"preproc_include"},
|
|
123
|
+
"call_types": {"call_expression"},
|
|
124
|
+
"name_field": "declarator",
|
|
125
|
+
"call_field": "function",
|
|
126
|
+
},
|
|
127
|
+
"cpp": {
|
|
128
|
+
"pkg": "tree_sitter_cpp", "fn": "language",
|
|
129
|
+
"ext": {".cpp", ".cc", ".cxx", ".hpp", ".hh"},
|
|
130
|
+
"function_types": {"function_definition"},
|
|
131
|
+
"class_types": {
|
|
132
|
+
"class_specifier", "struct_specifier",
|
|
133
|
+
"enum_specifier", "namespace_definition",
|
|
134
|
+
},
|
|
135
|
+
"import_types": {"preproc_include"},
|
|
136
|
+
"call_types": {"call_expression"},
|
|
137
|
+
"name_field": "declarator",
|
|
138
|
+
"call_field": "function",
|
|
139
|
+
},
|
|
140
|
+
"csharp": {
|
|
141
|
+
"pkg": "tree_sitter_c_sharp", "fn": "language",
|
|
142
|
+
"ext": {".cs"},
|
|
143
|
+
"function_types": {"method_declaration", "constructor_declaration", "local_function_statement"},
|
|
144
|
+
"class_types": {
|
|
145
|
+
"class_declaration", "interface_declaration",
|
|
146
|
+
"struct_declaration", "enum_declaration", "record_declaration",
|
|
147
|
+
},
|
|
148
|
+
"import_types": {"using_directive"},
|
|
149
|
+
"call_types": {"invocation_expression"},
|
|
150
|
+
"name_field": "name",
|
|
151
|
+
"call_field": "expression",
|
|
152
|
+
},
|
|
153
|
+
"ruby": {
|
|
154
|
+
"pkg": "tree_sitter_ruby", "fn": "language",
|
|
155
|
+
"ext": {".rb", ".rake"},
|
|
156
|
+
"function_types": {"method", "singleton_method"},
|
|
157
|
+
"class_types": {"class", "module"},
|
|
158
|
+
"import_types": set(),
|
|
159
|
+
"call_types": {"call"},
|
|
160
|
+
"name_field": "name",
|
|
161
|
+
"call_field": "method",
|
|
162
|
+
},
|
|
163
|
+
"php": {
|
|
164
|
+
"pkg": "tree_sitter_php", "fn": "language",
|
|
165
|
+
"ext": {".php"},
|
|
166
|
+
"function_types": {"function_definition", "method_declaration"},
|
|
167
|
+
"class_types": {"class_declaration", "interface_declaration", "trait_declaration"},
|
|
168
|
+
"import_types": {"namespace_use_declaration"},
|
|
169
|
+
"call_types": {"function_call_expression", "member_call_expression"},
|
|
170
|
+
"name_field": "name",
|
|
171
|
+
"call_field": "function",
|
|
172
|
+
},
|
|
173
|
+
"swift": {
|
|
174
|
+
"pkg": "tree_sitter_swift", "fn": "language",
|
|
175
|
+
"ext": {".swift"},
|
|
176
|
+
"function_types": {"function_declaration"},
|
|
177
|
+
"class_types": {
|
|
178
|
+
"class_declaration", "struct_declaration",
|
|
179
|
+
"protocol_declaration", "extension_declaration",
|
|
180
|
+
},
|
|
181
|
+
"import_types": {"import_declaration"},
|
|
182
|
+
"call_types": {"call_expression"},
|
|
183
|
+
"name_field": "name",
|
|
184
|
+
"call_field": "function",
|
|
185
|
+
},
|
|
186
|
+
"lua": {
|
|
187
|
+
"pkg": "tree_sitter_lua", "fn": "language",
|
|
188
|
+
"ext": {".lua", ".luau"},
|
|
189
|
+
"function_types": {"function_declaration", "local_function"},
|
|
190
|
+
"class_types": set(),
|
|
191
|
+
"import_types": set(),
|
|
192
|
+
"call_types": {"function_call"},
|
|
193
|
+
"name_field": "name",
|
|
194
|
+
"call_field": "name",
|
|
195
|
+
},
|
|
196
|
+
"dart": {
|
|
197
|
+
"pkg": "tree_sitter_dart", "fn": "language",
|
|
198
|
+
"ext": {".dart"},
|
|
199
|
+
"function_types": {"function_signature", "method_signature"},
|
|
200
|
+
"class_types": {"class_definition", "mixin_declaration"},
|
|
201
|
+
"import_types": {"import_or_export"},
|
|
202
|
+
"call_types": {"invocation_expression"},
|
|
203
|
+
"name_field": "name",
|
|
204
|
+
"call_field": "function_expression",
|
|
205
|
+
},
|
|
206
|
+
}
|
|
37
207
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
208
|
+
# Extension → language config lookup
|
|
209
|
+
_EXT_TO_LANG: dict[str, dict] = {}
|
|
210
|
+
for _lang, _cfg in _REGISTRY.items():
|
|
211
|
+
for _ext in _cfg["ext"]:
|
|
212
|
+
_EXT_TO_LANG[_ext] = _cfg
|
|
41
213
|
|
|
42
|
-
|
|
43
|
-
[
|
|
44
|
-
(function_declaration name: (identifier) @name)
|
|
45
|
-
(method_definition name: (property_identifier) @name)
|
|
46
|
-
] @func
|
|
47
|
-
"""
|
|
214
|
+
# ── Grammar cache ─────────────────────────────────────────────────────────────
|
|
215
|
+
_GRAMMAR_CACHE: dict[str, Any] = {}
|
|
48
216
|
|
|
49
217
|
|
|
50
|
-
def
|
|
51
|
-
|
|
52
|
-
if
|
|
218
|
+
def _get_language(cfg: dict) -> "Language | None":
|
|
219
|
+
key = f"{cfg['pkg']}.{cfg['fn']}"
|
|
220
|
+
if key in _GRAMMAR_CACHE:
|
|
221
|
+
return _GRAMMAR_CACHE[key]
|
|
222
|
+
try:
|
|
223
|
+
import importlib
|
|
224
|
+
mod = importlib.import_module(cfg["pkg"])
|
|
225
|
+
lang = Language(getattr(mod, cfg["fn"])())
|
|
226
|
+
_GRAMMAR_CACHE[key] = lang
|
|
227
|
+
return lang
|
|
228
|
+
except Exception:
|
|
229
|
+
_GRAMMAR_CACHE[key] = None
|
|
230
|
+
return None
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
# ── Tree walker ───────────────────────────────────────────────────────────────
|
|
234
|
+
|
|
235
|
+
def _walk(node: "TSNode", target_types: set[str]):
|
|
236
|
+
if node.type in target_types:
|
|
237
|
+
yield node
|
|
238
|
+
for child in node.children:
|
|
239
|
+
yield from _walk(child, target_types)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _get_name(node: "TSNode", name_field: str) -> str | None:
|
|
243
|
+
named = node.child_by_field_name(name_field)
|
|
244
|
+
if named:
|
|
245
|
+
return named.text.decode("utf-8", errors="ignore").strip()
|
|
246
|
+
for child in node.children:
|
|
247
|
+
if child.type in {"identifier", "name", "simple_identifier",
|
|
248
|
+
"property_identifier", "type_identifier"}:
|
|
249
|
+
return child.text.decode("utf-8", errors="ignore").strip()
|
|
250
|
+
return None
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _get_call_name(node: "TSNode", call_field: str) -> str | None:
|
|
254
|
+
func = node.child_by_field_name(call_field)
|
|
255
|
+
if not func:
|
|
256
|
+
return None
|
|
257
|
+
text = func.text.decode("utf-8", errors="ignore").strip()
|
|
258
|
+
return text.split(".")[-1] if "." in text else text
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _find_enclosing_function(
|
|
262
|
+
node: "TSNode",
|
|
263
|
+
function_types: set[str],
|
|
264
|
+
name_field: str,
|
|
265
|
+
) -> str | None:
|
|
266
|
+
parent = node.parent
|
|
267
|
+
while parent:
|
|
268
|
+
if parent.type in function_types:
|
|
269
|
+
return _get_name(parent, name_field)
|
|
270
|
+
parent = parent.parent
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _extract_with_treesitter(source: bytes, rel_path: str, cfg: dict) -> list[dict]:
|
|
275
|
+
lang = _get_language(cfg)
|
|
276
|
+
if lang is None:
|
|
53
277
|
return []
|
|
54
|
-
lang, parser = parsers[lang_key]
|
|
55
|
-
tree = parser.parse(source)
|
|
56
278
|
|
|
57
|
-
|
|
58
|
-
|
|
279
|
+
parser = Parser(lang)
|
|
280
|
+
tree = parser.parse(source)
|
|
281
|
+
root = tree.root_node
|
|
282
|
+
|
|
283
|
+
nodes: list[dict] = []
|
|
284
|
+
seen: set[str] = set()
|
|
59
285
|
|
|
60
|
-
def
|
|
61
|
-
|
|
62
|
-
|
|
286
|
+
def _add(name: str, ntype: str, line: int):
|
|
287
|
+
if not name or name in seen:
|
|
288
|
+
return
|
|
289
|
+
seen.add(name)
|
|
290
|
+
nodes.append({
|
|
291
|
+
"id": f"{rel_path}::{ntype}::{name}",
|
|
63
292
|
"name": name,
|
|
64
|
-
"type":
|
|
293
|
+
"type": ntype,
|
|
65
294
|
"file": rel_path,
|
|
66
|
-
"line": line,
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
def _iter_captures(query, root):
|
|
70
|
-
"""Yield (capture_name, tree_node) pairs; compatible with tree-sitter >=0.20."""
|
|
71
|
-
try:
|
|
72
|
-
# tree-sitter >= 0.22: matches() returns list of (pattern_idx, {name: [Node]})
|
|
73
|
-
for _pat_idx, caps in query.matches(root):
|
|
74
|
-
for cap_name, cap_nodes in caps.items():
|
|
75
|
-
for n in (cap_nodes if isinstance(cap_nodes, list) else [cap_nodes]):
|
|
76
|
-
yield cap_name, n
|
|
77
|
-
except Exception:
|
|
78
|
-
pass
|
|
79
|
-
|
|
80
|
-
# Classes
|
|
81
|
-
try:
|
|
82
|
-
query = lang.query(_PY_CLASS_QUERY if lang_key == "python" else _JS_CLASS_QUERY)
|
|
83
|
-
for cap_name, node in _iter_captures(query, tree.root_node):
|
|
84
|
-
if cap_name == "name" and node.type == "identifier":
|
|
85
|
-
nodes.append(_node("class", node.text.decode(), node.start_point[0] + 1))
|
|
86
|
-
except Exception:
|
|
87
|
-
pass
|
|
295
|
+
"line": line + 1,
|
|
296
|
+
})
|
|
88
297
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
298
|
+
for node in _walk(root, cfg["function_types"]):
|
|
299
|
+
name = _get_name(node, cfg["name_field"])
|
|
300
|
+
if name:
|
|
301
|
+
_add(name, "function", node.start_point[0])
|
|
302
|
+
|
|
303
|
+
for node in _walk(root, cfg["class_types"]):
|
|
304
|
+
name = _get_name(node, cfg["name_field"])
|
|
305
|
+
if name:
|
|
306
|
+
_add(name, "class", node.start_point[0])
|
|
97
307
|
|
|
98
308
|
return nodes
|
|
99
309
|
|
|
100
310
|
|
|
101
|
-
# ── Regex
|
|
311
|
+
# ── Regex fallbacks ───────────────────────────────────────────────────────────
|
|
102
312
|
|
|
103
|
-
|
|
313
|
+
_REGEX_PATTERNS: dict[str, dict[str, str | None]] = {
|
|
104
314
|
"python": {
|
|
105
|
-
"
|
|
106
|
-
"
|
|
107
|
-
"import": re.compile(r"^(?:import|from)\s+([\w.]+)", re.MULTILINE),
|
|
315
|
+
"function": r"^(?:async\s+)?def\s+([a-zA-Z_]\w*)\s*\(",
|
|
316
|
+
"class": r"^class\s+([a-zA-Z_]\w*)\s*[:\(]",
|
|
108
317
|
},
|
|
109
318
|
"javascript": {
|
|
110
|
-
"
|
|
111
|
-
"
|
|
112
|
-
|
|
319
|
+
"function": r"(?:function\s+([a-zA-Z_$]\w*)|([a-zA-Z_$]\w*)\s*[:=]\s*(?:async\s+)?(?:function|\([^)]*\)\s*=>))",
|
|
320
|
+
"class": r"class\s+([a-zA-Z_$]\w*)",
|
|
321
|
+
},
|
|
322
|
+
"typescript": {
|
|
323
|
+
"function": r"(?:function\s+([a-zA-Z_$]\w*)|([a-zA-Z_$]\w*)\s*[:=]\s*(?:async\s+)?(?:function|\([^)]*\)\s*=>))",
|
|
324
|
+
"class": r"(?:class|interface)\s+([a-zA-Z_$]\w*)",
|
|
113
325
|
},
|
|
114
326
|
"go": {
|
|
115
|
-
"function":
|
|
116
|
-
"
|
|
117
|
-
"import": re.compile(r'"([\w./]+)"', re.MULTILINE),
|
|
327
|
+
"function": r"^func\s+(?:\([^)]*\)\s+)?([a-zA-Z_]\w*)\s*\(",
|
|
328
|
+
"class": r"^type\s+([a-zA-Z_]\w*)\s+(?:struct|interface)",
|
|
118
329
|
},
|
|
119
330
|
"rust": {
|
|
120
|
-
"function":
|
|
121
|
-
"
|
|
122
|
-
"import": re.compile(r"^use\s+([\w:]+)", re.MULTILINE),
|
|
331
|
+
"function": r"^(?:pub\s+)?(?:async\s+)?fn\s+([a-zA-Z_]\w*)",
|
|
332
|
+
"class": r"^(?:pub\s+)?(?:struct|enum|trait|impl)\s+([a-zA-Z_]\w*)",
|
|
123
333
|
},
|
|
124
334
|
"java": {
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
|
|
335
|
+
"function": r"(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+([a-zA-Z_]\w*)\s*\(",
|
|
336
|
+
"class": r"(?:class|interface|enum)\s+([a-zA-Z_]\w*)",
|
|
337
|
+
},
|
|
338
|
+
"c": {
|
|
339
|
+
"function": r"^[a-zA-Z_][\w\s\*]+\s+([a-zA-Z_]\w*)\s*\([^;]*\)\s*\{",
|
|
340
|
+
"class": r"^(?:struct|enum|union)\s+([a-zA-Z_]\w*)",
|
|
341
|
+
},
|
|
342
|
+
"cpp": {
|
|
343
|
+
"function": r"(?:[\w:~]+\s+)+([a-zA-Z_]\w*)\s*\([^;]*\)\s*(?:const\s*)?\{",
|
|
344
|
+
"class": r"(?:class|struct|enum|namespace)\s+([a-zA-Z_]\w*)",
|
|
128
345
|
},
|
|
129
346
|
"ruby": {
|
|
130
|
-
"
|
|
131
|
-
"
|
|
347
|
+
"function": r"^\s*def\s+([a-zA-Z_]\w*[?!]?)",
|
|
348
|
+
"class": r"^\s*(?:class|module)\s+([A-Z]\w*)",
|
|
349
|
+
},
|
|
350
|
+
"csharp": {
|
|
351
|
+
"function": r"(?:public|private|protected|static|\s)+[\w<>\[\]]+\s+([a-zA-Z_]\w*)\s*\(",
|
|
352
|
+
"class": r"(?:class|interface|struct|enum|record)\s+([a-zA-Z_]\w*)",
|
|
353
|
+
},
|
|
354
|
+
"php": {
|
|
355
|
+
"function": r"^\s*(?:public|private|protected|static|\s)*function\s+([a-zA-Z_]\w*)",
|
|
356
|
+
"class": r"^\s*(?:abstract\s+)?(?:class|interface|trait)\s+([a-zA-Z_]\w*)",
|
|
132
357
|
},
|
|
133
|
-
"
|
|
134
|
-
"
|
|
135
|
-
"
|
|
358
|
+
"swift": {
|
|
359
|
+
"function": r"^\s*(?:public|private|internal|open|\s)*func\s+([a-zA-Z_]\w*)",
|
|
360
|
+
"class": r"^\s*(?:public|private|internal|open|\s)*(?:class|struct|protocol|extension|enum)\s+([a-zA-Z_]\w*)",
|
|
136
361
|
},
|
|
137
|
-
|
|
362
|
+
"lua": {
|
|
363
|
+
"function": r"(?:local\s+)?function\s+([a-zA-Z_]\w*)",
|
|
364
|
+
"class": None,
|
|
365
|
+
},
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
_EXT_TO_LANG_NAME: dict[str, str] = {
|
|
369
|
+
".py": "python", ".pyw": "python",
|
|
370
|
+
".js": "javascript", ".mjs": "javascript", ".jsx": "javascript",
|
|
371
|
+
".ts": "typescript", ".tsx": "typescript",
|
|
372
|
+
".go": "go",
|
|
373
|
+
".rs": "rust",
|
|
374
|
+
".java": "java", ".kt": "java",
|
|
375
|
+
".c": "c", ".h": "c",
|
|
376
|
+
".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp", ".hpp": "cpp", ".hh": "cpp",
|
|
377
|
+
".cs": "csharp",
|
|
378
|
+
".rb": "ruby", ".rake": "ruby",
|
|
379
|
+
".php": "php",
|
|
380
|
+
".swift": "swift",
|
|
381
|
+
".lua": "lua", ".luau": "lua",
|
|
138
382
|
}
|
|
139
383
|
|
|
140
384
|
|
|
141
|
-
def
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
return _wrap_in_module(rel_path, children, imports)
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
# ── Module wrapper ───────────────────────────────────────────────────────────
|
|
177
|
-
|
|
178
|
-
def _wrap_in_module(rel_path: str, children: list, imports: list) -> list:
|
|
385
|
+
def _extract_with_regex(source: str, rel_path: str, ext: str) -> list[dict]:
|
|
386
|
+
lang = _EXT_TO_LANG_NAME.get(ext.lower())
|
|
387
|
+
if not lang or lang not in _REGEX_PATTERNS:
|
|
388
|
+
return []
|
|
389
|
+
|
|
390
|
+
patterns = _REGEX_PATTERNS[lang]
|
|
391
|
+
nodes: list[dict] = []
|
|
392
|
+
seen: set[str] = set()
|
|
393
|
+
|
|
394
|
+
for line_no, line in enumerate(source.splitlines(), 1):
|
|
395
|
+
for ntype, pattern in patterns.items():
|
|
396
|
+
if not pattern:
|
|
397
|
+
continue
|
|
398
|
+
m = re.search(pattern, line)
|
|
399
|
+
if m:
|
|
400
|
+
name = next((g for g in m.groups() if g), None)
|
|
401
|
+
if name and name not in seen:
|
|
402
|
+
seen.add(name)
|
|
403
|
+
nodes.append({
|
|
404
|
+
"id": f"{rel_path}::{ntype}::{name}",
|
|
405
|
+
"name": name,
|
|
406
|
+
"type": ntype,
|
|
407
|
+
"file": rel_path,
|
|
408
|
+
"line": line_no,
|
|
409
|
+
})
|
|
410
|
+
return nodes
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
# ── Public interface ──────────────────────────────────────────────────────────
|
|
414
|
+
|
|
415
|
+
def extract(abs_path: str, rel_path: str) -> list[dict]:
|
|
179
416
|
"""
|
|
180
|
-
|
|
181
|
-
|
|
417
|
+
Extract AST nodes from a code file.
|
|
418
|
+
Tries tree-sitter first; falls back to regex if grammar not installed.
|
|
182
419
|
"""
|
|
183
|
-
from pathlib import Path as _Path
|
|
184
|
-
stem = _Path(rel_path).stem
|
|
185
|
-
mod_id = f"{rel_path}::module::{stem}"
|
|
186
|
-
module = {
|
|
187
|
-
"id": mod_id,
|
|
188
|
-
"name": stem,
|
|
189
|
-
"type": "module",
|
|
190
|
-
"file": rel_path,
|
|
191
|
-
"line": 1,
|
|
192
|
-
"imports": imports,
|
|
193
|
-
}
|
|
194
|
-
for child in children:
|
|
195
|
-
child["relations"] = [{"id": mod_id, "relation": "defined-in", "confidence": "EXTRACTED"}]
|
|
196
|
-
return [module] + children
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
# ── Public API ────────────────────────────────────────────────────────────────
|
|
200
|
-
|
|
201
|
-
def extract(abs_path: str, rel_path: str) -> list:
|
|
202
|
-
"""Extract nodes from a code/sql/config file. Returns list of node dicts."""
|
|
203
420
|
ext = Path(abs_path).suffix.lower()
|
|
204
|
-
|
|
205
|
-
if not lang:
|
|
206
|
-
return []
|
|
421
|
+
cfg = _EXT_TO_LANG.get(ext)
|
|
207
422
|
|
|
208
423
|
try:
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
except OSError:
|
|
424
|
+
source_bytes = Path(abs_path).read_bytes()
|
|
425
|
+
except Exception:
|
|
212
426
|
return []
|
|
213
427
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
428
|
+
if _TS_AVAILABLE and cfg:
|
|
429
|
+
nodes = _extract_with_treesitter(source_bytes, rel_path, cfg)
|
|
430
|
+
if nodes:
|
|
431
|
+
return nodes
|
|
432
|
+
|
|
433
|
+
try:
|
|
434
|
+
source_text = source_bytes.decode("utf-8", errors="ignore")
|
|
435
|
+
except Exception:
|
|
436
|
+
return []
|
|
221
437
|
|
|
222
|
-
return
|
|
438
|
+
return _extract_with_regex(source_text, rel_path, ext)
|