codebase-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,303 @@
1
+ """TypeScript/JavaScript parser using tree-sitter."""
2
+
3
+ from __future__ import annotations
4
+ import logging
5
+ from ..models import ParseResult, ParsedSymbol, ParsedImport, ParsedCall
6
+ from .base import BaseParser, register, _load_language, _ts_captures, _make_query
7
+ from .generic import GenericParser
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+ _LANG_TS = None
12
+ _PARSER_TS = None
13
+ _LANG_JS = None
14
+ _PARSER_JS = None
15
+
16
+
17
+ def _get_lang(name: str):
18
+ global _LANG_TS, _PARSER_TS, _LANG_JS, _PARSER_JS
19
+ if name in ("typescript", "tsx"):
20
+ if _LANG_TS is None:
21
+ _LANG_TS, _PARSER_TS = _load_language("typescript")
22
+ return _LANG_TS, _PARSER_TS
23
+ else:
24
+ if _LANG_JS is None:
25
+ _LANG_JS, _PARSER_JS = _load_language("javascript")
26
+ return _LANG_JS, _PARSER_JS
27
+
28
+
29
+ _FUNC_Q = """
30
+ (function_declaration) @func
31
+ (function_expression) @func
32
+ (generator_function_declaration) @func
33
+ (method_definition) @method
34
+ (arrow_function) @arrow
35
+ """
36
+
37
+ _CLASS_Q = """
38
+ (class_declaration) @cls
39
+ (abstract_class_declaration) @cls
40
+ """
41
+
42
+ _INTERFACE_Q = """
43
+ (interface_declaration) @iface
44
+ """
45
+
46
+ _IMPORT_Q = """
47
+ (import_statement
48
+ source: (string) @source) @imp
49
+
50
+ (call_expression
51
+ function: (identifier) @req_fn
52
+ arguments: (arguments (string) @source)) @require
53
+ """
54
+
55
+ _CALL_Q = """
56
+ (call_expression
57
+ function: [(identifier) @callee
58
+ (member_expression property: (property_identifier) @callee)]) @call
59
+ """
60
+
61
+
62
+ @register("javascript")
63
+ @register("typescript")
64
+ class TypeScriptParser(BaseParser):
65
+ language_name = "typescript"
66
+
67
+ def parse(self, path: str, content: str) -> ParseResult:
68
+ lang_name = "typescript" if path.endswith((".ts", ".tsx")) else "javascript"
69
+ lang, parser = _get_lang(lang_name)
70
+ if lang is None:
71
+ log.debug("tree-sitter %s not available, falling back to generic parser", lang_name)
72
+ r = GenericParser().parse(path, content)
73
+ r.language = lang_name
74
+ return r
75
+
76
+ try:
77
+ cb = content.encode("utf-8")
78
+ tree = parser.parse(cb)
79
+ return self._extract(content, cb, tree.root_node, lang, lang_name)
80
+ except Exception as e:
81
+ log.warning("TS/JS parse error in %s: %s", path, e)
82
+ r = GenericParser().parse(path, content)
83
+ r.language = lang_name
84
+ r.parse_error = str(e)
85
+ return r
86
+
87
+ def _extract(self, content: str, cb: bytes, root, lang, lang_name: str) -> ParseResult:
88
+ result = ParseResult(language=lang_name, line_count=content.count("\n") + 1)
89
+
90
+ # ── Classes ────────────────────────────────────────────────────────
91
+ class_ranges: list[tuple[int, int, str]] = []
92
+ try:
93
+ cls_query = _make_query(lang, _CLASS_Q)
94
+ cls_caps = _ts_captures(cls_query, root)
95
+ for cls_node in cls_caps.get("cls", []):
96
+ name = ""
97
+ for c in cls_node.children:
98
+ if c.type in ("identifier", "type_identifier"):
99
+ name = cb[c.start_byte:c.end_byte].decode()
100
+ break
101
+ if name:
102
+ class_ranges.append((cls_node.start_byte, cls_node.end_byte, name))
103
+ result.symbols.append(ParsedSymbol(
104
+ kind="class",
105
+ name=name,
106
+ line_start=cls_node.start_point[0] + 1,
107
+ line_end=cls_node.end_point[0] + 1,
108
+ signature=self._first_line(cls_node, cb),
109
+ is_exported=self._is_exported(cls_node, cb),
110
+ ))
111
+ except Exception:
112
+ pass
113
+
114
+ # ── Interfaces ─────────────────────────────────────────────────────
115
+ try:
116
+ iface_query = _make_query(lang, _INTERFACE_Q)
117
+ iface_caps = _ts_captures(iface_query, root)
118
+ for iface_node in iface_caps.get("iface", []):
119
+ name = ""
120
+ for c in iface_node.children:
121
+ if c.type == "type_identifier":
122
+ name = cb[c.start_byte:c.end_byte].decode()
123
+ break
124
+ if name:
125
+ result.symbols.append(ParsedSymbol(
126
+ kind="interface",
127
+ name=name,
128
+ line_start=iface_node.start_point[0] + 1,
129
+ line_end=iface_node.end_point[0] + 1,
130
+ signature=self._first_line(iface_node, cb),
131
+ is_exported=self._is_exported(iface_node, cb),
132
+ ))
133
+ except Exception:
134
+ pass
135
+
136
+ # ── Functions and methods ──────────────────────────────────────────
137
+ func_byte_ranges: list[tuple[int, int, str]] = [] # for caller tracking
138
+ try:
139
+ func_query = _make_query(lang, _FUNC_Q)
140
+ func_caps = _ts_captures(func_query, root)
141
+
142
+ for func_node in func_caps.get("func", []):
143
+ name = ""
144
+ is_async = any(c.type == "async" for c in func_node.children)
145
+ for c in func_node.children:
146
+ if c.type in ("identifier", "property_identifier"):
147
+ name = cb[c.start_byte:c.end_byte].decode()
148
+ break
149
+ # Arrow functions assigned to a variable — walk up to declarator
150
+ if not name:
151
+ parent = func_node.parent
152
+ if parent and parent.type == "variable_declarator":
153
+ for c in parent.children:
154
+ if c.type == "identifier":
155
+ name = cb[c.start_byte:c.end_byte].decode()
156
+ break
157
+ elif parent and parent.type == "pair":
158
+ # { key: () => ... } — object method
159
+ for c in parent.children:
160
+ if c.type in ("property_identifier", "string", "identifier"):
161
+ name = cb[c.start_byte:c.end_byte].decode().strip("\"'")
162
+ break
163
+ if not name:
164
+ continue
165
+
166
+ parent_class = None
167
+ for cs, ce, cn in class_ranges:
168
+ if cs < func_node.start_byte < ce:
169
+ parent_class = cn
170
+ break
171
+
172
+ func_byte_ranges.append((func_node.start_byte, func_node.end_byte, name))
173
+ result.symbols.append(ParsedSymbol(
174
+ kind="method" if parent_class else "function",
175
+ name=name,
176
+ line_start=func_node.start_point[0] + 1,
177
+ line_end=func_node.end_point[0] + 1,
178
+ signature=self._first_line(func_node, cb),
179
+ parent_name=parent_class,
180
+ is_exported=self._is_exported(func_node, cb),
181
+ is_async=is_async,
182
+ ))
183
+
184
+ # Arrow functions (const foo = () => ...) — captured separately as @arrow
185
+ for arrow_node in func_caps.get("arrow", []):
186
+ name = ""
187
+ is_async = any(c.type == "async" for c in arrow_node.children)
188
+ parent = arrow_node.parent
189
+ if parent and parent.type == "variable_declarator":
190
+ for c in parent.children:
191
+ if c.type == "identifier":
192
+ name = cb[c.start_byte:c.end_byte].decode()
193
+ break
194
+ elif parent and parent.type == "pair":
195
+ for c in parent.children:
196
+ if c.type in ("property_identifier", "string", "identifier"):
197
+ name = cb[c.start_byte:c.end_byte].decode().strip("\"'")
198
+ break
199
+ if not name:
200
+ continue
201
+ func_byte_ranges.append((arrow_node.start_byte, arrow_node.end_byte, name))
202
+ result.symbols.append(ParsedSymbol(
203
+ kind="function",
204
+ name=name,
205
+ line_start=arrow_node.start_point[0] + 1,
206
+ line_end=arrow_node.end_point[0] + 1,
207
+ signature=self._first_line(arrow_node, cb),
208
+ is_exported=self._is_exported(arrow_node, cb),
209
+ is_async=is_async,
210
+ ))
211
+
212
+ for method_node in func_caps.get("method", []):
213
+ name = ""
214
+ is_async = any(c.type == "async" for c in method_node.children)
215
+ for c in method_node.children:
216
+ if c.type in ("property_identifier", "identifier", "private_property_identifier"):
217
+ name = cb[c.start_byte:c.end_byte].decode()
218
+ break
219
+ if not name:
220
+ continue
221
+
222
+ # Walk up AST to find the enclosing class — more reliable than byte ranges
223
+ parent_class = self._enclosing_class_name(method_node, cb)
224
+
225
+ func_byte_ranges.append((method_node.start_byte, method_node.end_byte, name))
226
+ result.symbols.append(ParsedSymbol(
227
+ kind="method",
228
+ name=name,
229
+ line_start=method_node.start_point[0] + 1,
230
+ line_end=method_node.end_point[0] + 1,
231
+ signature=self._first_line(method_node, cb),
232
+ parent_name=parent_class,
233
+ is_async=is_async,
234
+ ))
235
+ except Exception:
236
+ pass
237
+
238
+ # ── Imports ────────────────────────────────────────────────────────
239
+ try:
240
+ imp_query = _make_query(lang, _IMPORT_Q)
241
+ imp_caps = _ts_captures(imp_query, root)
242
+ for imp_node in imp_caps.get("imp", []):
243
+ for c in imp_node.children:
244
+ if c.type == "string":
245
+ src = cb[c.start_byte:c.end_byte].decode().strip("\"'")
246
+ result.imports.append(ParsedImport(
247
+ imported_name=src,
248
+ line_number=imp_node.start_point[0] + 1,
249
+ import_kind="module",
250
+ ))
251
+ for req_node in imp_caps.get("require", []):
252
+ src_nodes = imp_caps.get("source", [])
253
+ for sn in src_nodes:
254
+ if req_node.start_byte <= sn.start_byte <= req_node.end_byte:
255
+ src = cb[sn.start_byte:sn.end_byte].decode().strip("\"'")
256
+ result.imports.append(ParsedImport(
257
+ imported_name=src,
258
+ line_number=req_node.start_point[0] + 1,
259
+ import_kind="module",
260
+ ))
261
+ except Exception:
262
+ pass
263
+
264
+ # ── Calls ──────────────────────────────────────────────────────────
265
+ try:
266
+ call_query = _make_query(lang, _CALL_Q)
267
+ call_caps = _ts_captures(call_query, root)
268
+ _SKIP = frozenset({"console", "require", "import", "exports", "module",
269
+ "Array", "Object", "String", "Number", "Boolean",
270
+ "Promise", "Error", "JSON", "Math", "Symbol", "Date"})
271
+ for callee_node in call_caps.get("callee", []):
272
+ callee = cb[callee_node.start_byte:callee_node.end_byte].decode()
273
+ if callee not in _SKIP:
274
+ caller = self._smallest_enclosing(callee_node.start_byte, func_byte_ranges)
275
+ result.calls.append(ParsedCall(
276
+ callee_name=callee,
277
+ caller_name=caller,
278
+ line_number=callee_node.start_point[0] + 1,
279
+ ))
280
+ except Exception:
281
+ pass
282
+
283
+ return result
284
+
285
+ def _enclosing_class_name(self, node, cb: bytes) -> str | None:
286
+ """Walk up the AST to find the nearest enclosing class/abstract class."""
287
+ parent = node.parent
288
+ while parent:
289
+ if parent.type in ("class_declaration", "class_expression",
290
+ "abstract_class_declaration"):
291
+ for c in parent.children:
292
+ if c.type in ("identifier", "type_identifier"):
293
+ return cb[c.start_byte:c.end_byte].decode()
294
+ parent = parent.parent
295
+ return None
296
+
297
+ def _is_exported(self, node, cb: bytes) -> bool:
298
+ parent = node.parent
299
+ while parent:
300
+ if parent.type == "export_statement":
301
+ return True
302
+ parent = parent.parent
303
+ return False