codegraph-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,556 @@
1
+ """JavaScript / TypeScript source code adapter using tree-sitter.
2
+
3
+ Handles ``.js``, ``.mjs``, ``.cjs``, ``.ts``, ``.tsx`` and ``.jsx`` files.
4
+ Extracts:
5
+ - ``function`` declarations
6
+ - Arrow functions assigned to ``const`` / ``let`` / ``var``
7
+ - Class declarations and their methods
8
+ - Function calls
9
+ - ES module imports (``import … from``) and CommonJS ``require()``
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from tree_sitter_language_pack import get_parser
15
+
16
+ from codegraph.adapters.base import BaseAdapter
17
+ from codegraph.models import (
18
+ CallInfo,
19
+ ParsedClass,
20
+ ParsedFunction,
21
+ ParsedImport,
22
+ ParseResult,
23
+ )
24
+
25
+
26
+ # ---------------------------------------------------------------------------
27
+ # Helpers
28
+ # ---------------------------------------------------------------------------
29
+
30
+
31
+ def _node_text(node) -> str:
32
+ """Return the UTF-8 text of a tree-sitter node."""
33
+ return node.text.decode("utf-8") if node and node.text else ""
34
+
35
+
36
+ def _extract_jsdoc(node) -> str:
37
+ """Extract a JSDoc comment preceding a node.
38
+
39
+ Looks at the previous sibling; if it is a ``comment`` whose text
40
+ starts with ``/**``, we strip the delimiters and leading ``*``'s.
41
+ """
42
+ prev = node.prev_sibling
43
+ if prev is None or prev.type != "comment":
44
+ # For method_definition inside class_body the comment is a sibling too
45
+ return ""
46
+ text = _node_text(prev)
47
+ if not text.startswith("/**"):
48
+ return ""
49
+ # Strip /** ... */
50
+ text = text[3:]
51
+ if text.endswith("*/"):
52
+ text = text[:-2]
53
+ lines = text.splitlines()
54
+ cleaned: list[str] = []
55
+ for line in lines:
56
+ line = line.strip()
57
+ if line.startswith("*"):
58
+ line = line[1:].strip()
59
+ if line.startswith("@"):
60
+ break # stop at first tag
61
+ if line:
62
+ cleaned.append(line)
63
+ return " ".join(cleaned)
64
+
65
+
66
+ def _build_js_signature(name: str, params_node, ret_node=None) -> str:
67
+ """Build a human-readable function signature."""
68
+ params = _node_text(params_node) if params_node else "()"
69
+ ret = ""
70
+ if ret_node:
71
+ ret = f": {_node_text(ret_node)}"
72
+ return f"function {name}{params}{ret}"
73
+
74
+
75
+ def _collect_calls(node, calls: list[CallInfo]) -> None:
76
+ """Recursively collect function calls with receiver context."""
77
+ if node.type == "call_expression":
78
+ func = node.child_by_field_name("function")
79
+ if func:
80
+ if func.type == "member_expression":
81
+ obj_node = func.child_by_field_name("object")
82
+ prop_node = func.child_by_field_name("property")
83
+ receiver = _node_text(obj_node) if obj_node else None
84
+ callee = _node_text(prop_node) if prop_node else _node_text(func)
85
+ if receiver and "." in receiver:
86
+ receiver = receiver.rsplit(".", 1)[-1]
87
+ if callee != "require":
88
+ calls.append(CallInfo(
89
+ callee_name=callee,
90
+ receiver=receiver,
91
+ raw_expression=_node_text(func),
92
+ ))
93
+ else:
94
+ callee = _node_text(func)
95
+ if callee != "require":
96
+ calls.append(CallInfo(
97
+ callee_name=callee,
98
+ receiver=None,
99
+ raw_expression=callee,
100
+ ))
101
+ for child in node.children:
102
+ _collect_calls(child, calls)
103
+
104
+
105
+ # ---------------------------------------------------------------------------
106
+ # Adapter
107
+ # ---------------------------------------------------------------------------
108
+
109
+
110
+ class JsAdapter(BaseAdapter):
111
+ """Extract functions, classes, calls and imports from JS / TS files."""
112
+
113
+ _LANG_MAP = {
114
+ ".js": "javascript",
115
+ ".mjs": "javascript",
116
+ ".cjs": "javascript",
117
+ ".jsx": "javascript",
118
+ ".ts": "typescript",
119
+ ".tsx": "tsx",
120
+ }
121
+
122
+ def __init__(self) -> None:
123
+ self._parsers: dict[str, object] = {}
124
+
125
+ def _get_parser(self, ext: str):
126
+ lang = self._LANG_MAP.get(ext, "typescript")
127
+ if lang not in self._parsers:
128
+ self._parsers[lang] = get_parser(lang)
129
+ return self._parsers[lang]
130
+
131
+ # -- BaseAdapter interface ------------------------------------------------
132
+
133
+ def language_name(self) -> str:
134
+ return "javascript"
135
+
136
+ def supported_extensions(self) -> list[str]:
137
+ return list(self._LANG_MAP.keys())
138
+
139
+ def parse_file(self, source: bytes, file_path: str) -> ParseResult:
140
+ ext = ""
141
+ for e in self._LANG_MAP:
142
+ if file_path.endswith(e):
143
+ ext = e
144
+ break
145
+ parser = self._get_parser(ext)
146
+ tree = parser.parse(source)
147
+ root = tree.root_node
148
+
149
+ functions: list[ParsedFunction] = []
150
+ classes: list[ParsedClass] = []
151
+ imports: list[ParsedImport] = []
152
+
153
+ self._walk(root, file_path, functions, classes, imports)
154
+ return ParseResult(functions=functions, classes=classes, imports=imports)
155
+
156
+ # -- Internal helpers -----------------------------------------------------
157
+
158
+ def _walk(
159
+ self,
160
+ node,
161
+ file_path: str,
162
+ functions: list[ParsedFunction],
163
+ classes: list[ParsedClass],
164
+ imports: list[ParsedImport],
165
+ ) -> None:
166
+ """Walk top-level children and populate result lists."""
167
+ for child in node.children:
168
+ self._process_node(
169
+ child, file_path, functions, classes, imports, class_name=None
170
+ )
171
+
172
+ def _process_node(
173
+ self,
174
+ child,
175
+ file_path: str,
176
+ functions: list[ParsedFunction],
177
+ classes: list[ParsedClass],
178
+ imports: list[ParsedImport],
179
+ class_name: str | None,
180
+ ) -> None:
181
+ """Handle a single AST node."""
182
+ # --- function declaration ---
183
+ if child.type == "function_declaration":
184
+ self._extract_function_decl(child, file_path, functions, class_name)
185
+
186
+ # --- const/let/var = arrow / function expression ---
187
+ elif child.type == "lexical_declaration":
188
+ self._extract_lexical(child, file_path, functions, imports)
189
+ elif child.type == "variable_declaration":
190
+ self._extract_lexical(child, file_path, functions, imports)
191
+
192
+ # --- class declaration ---
193
+ elif child.type == "class_declaration":
194
+ self._extract_class(child, file_path, functions, classes)
195
+
196
+ # --- export statement: unwrap and recurse ---
197
+ elif child.type == "export_statement":
198
+ for ec in child.children:
199
+ self._process_node(
200
+ ec, file_path, functions, classes, imports, class_name
201
+ )
202
+
203
+ # --- imports ---
204
+ elif child.type == "import_statement":
205
+ self._extract_import(child, file_path, imports)
206
+
207
+ # --- expression_statement: may contain an exported assignment ---
208
+ elif child.type == "expression_statement":
209
+ for ec in child.children:
210
+ if ec.type == "assignment_expression":
211
+ self._extract_assignment_func(
212
+ ec, file_path, functions
213
+ )
214
+
215
+ # -- Extractors -----------------------------------------------------------
216
+
217
+ def _extract_function_decl(
218
+ self,
219
+ node,
220
+ file_path: str,
221
+ functions: list[ParsedFunction],
222
+ class_name: str | None,
223
+ ) -> None:
224
+ """Extract a ``function foo(…) { … }`` declaration."""
225
+ name_node = node.child_by_field_name("name")
226
+ name = _node_text(name_node) if name_node else "anonymous"
227
+ params = node.child_by_field_name("parameters")
228
+ ret = node.child_by_field_name("return_type")
229
+ body = node.child_by_field_name("body")
230
+
231
+ sig = _build_js_signature(name, params, ret)
232
+ doc = _extract_jsdoc(node)
233
+ calls: list[CallInfo] = []
234
+ if body:
235
+ _collect_calls(body, calls)
236
+
237
+ start_line = node.start_point[0] + 1
238
+ end_line = node.end_point[0] + 1
239
+ qualified = (
240
+ f"{file_path}:{class_name}.{name}" if class_name
241
+ else f"{file_path}:{name}"
242
+ )
243
+
244
+ functions.append(
245
+ ParsedFunction(
246
+ name=name,
247
+ qualified_name=qualified,
248
+ signature=sig,
249
+ file_path=file_path,
250
+ start_line=start_line,
251
+ end_line=end_line,
252
+ doc_comment=doc,
253
+ call_names=[c.callee_name for c in calls],
254
+ calls=calls,
255
+ class_name=class_name,
256
+ )
257
+ )
258
+
259
+ def _extract_lexical(
260
+ self,
261
+ node,
262
+ file_path: str,
263
+ functions: list[ParsedFunction],
264
+ imports: list[ParsedImport],
265
+ ) -> None:
266
+ """Handle ``const x = …`` — may be arrow function or require()."""
267
+ for child in node.children:
268
+ if child.type != "variable_declarator":
269
+ continue
270
+ name_node = child.child_by_field_name("name")
271
+ value_node = child.child_by_field_name("value")
272
+ if name_node is None or value_node is None:
273
+ continue
274
+
275
+ name = _node_text(name_node)
276
+
277
+ # Arrow function: const foo = (…) => { … }
278
+ if value_node.type == "arrow_function":
279
+ self._extract_arrow(
280
+ name, value_node, node, file_path, functions
281
+ )
282
+ # Function expression: const foo = function(…) { … }
283
+ elif value_node.type == "function_expression":
284
+ self._extract_func_expr(
285
+ name, value_node, node, file_path, functions
286
+ )
287
+ # require(): const x = require("mod")
288
+ elif value_node.type == "call_expression":
289
+ func_node = value_node.child_by_field_name("function")
290
+ if func_node and _node_text(func_node) == "require":
291
+ args = value_node.child_by_field_name("arguments")
292
+ if args:
293
+ for a in args.children:
294
+ if a.type == "string":
295
+ mod = _node_text(a).strip("'\"")
296
+ imports.append(
297
+ ParsedImport(
298
+ source_path=file_path,
299
+ target_module=mod,
300
+ )
301
+ )
302
+ # await import() — dynamic import
303
+ elif value_node.type == "await_expression":
304
+ pass # skip dynamic imports for now
305
+
306
+ def _extract_arrow(
307
+ self,
308
+ name: str,
309
+ arrow_node,
310
+ decl_node,
311
+ file_path: str,
312
+ functions: list[ParsedFunction],
313
+ ) -> None:
314
+ """Extract an arrow function assigned to a variable."""
315
+ params = arrow_node.child_by_field_name("parameters")
316
+ ret = arrow_node.child_by_field_name("return_type")
317
+ body = arrow_node.child_by_field_name("body")
318
+
319
+ sig = _build_js_signature(name, params, ret)
320
+ doc = _extract_jsdoc(decl_node)
321
+ calls: list[CallInfo] = []
322
+ if body:
323
+ _collect_calls(body, calls)
324
+
325
+ start_line = decl_node.start_point[0] + 1
326
+ end_line = decl_node.end_point[0] + 1
327
+
328
+ functions.append(
329
+ ParsedFunction(
330
+ name=name,
331
+ qualified_name=f"{file_path}:{name}",
332
+ signature=sig,
333
+ file_path=file_path,
334
+ start_line=start_line,
335
+ end_line=end_line,
336
+ doc_comment=doc,
337
+ call_names=[c.callee_name for c in calls],
338
+ calls=calls,
339
+ class_name=None,
340
+ )
341
+ )
342
+
343
+ def _extract_func_expr(
344
+ self,
345
+ name: str,
346
+ func_node,
347
+ decl_node,
348
+ file_path: str,
349
+ functions: list[ParsedFunction],
350
+ ) -> None:
351
+ """Extract a function expression assigned to a variable."""
352
+ params = func_node.child_by_field_name("parameters")
353
+ ret = func_node.child_by_field_name("return_type")
354
+ body = func_node.child_by_field_name("body")
355
+
356
+ sig = _build_js_signature(name, params, ret)
357
+ doc = _extract_jsdoc(decl_node)
358
+ calls: list[CallInfo] = []
359
+ if body:
360
+ _collect_calls(body, calls)
361
+
362
+ start_line = decl_node.start_point[0] + 1
363
+ end_line = decl_node.end_point[0] + 1
364
+
365
+ functions.append(
366
+ ParsedFunction(
367
+ name=name,
368
+ qualified_name=f"{file_path}:{name}",
369
+ signature=sig,
370
+ file_path=file_path,
371
+ start_line=start_line,
372
+ end_line=end_line,
373
+ doc_comment=doc,
374
+ call_names=[c.callee_name for c in calls],
375
+ calls=calls,
376
+ class_name=None,
377
+ )
378
+ )
379
+
380
+ def _extract_class(
381
+ self,
382
+ node,
383
+ file_path: str,
384
+ functions: list[ParsedFunction],
385
+ classes: list[ParsedClass],
386
+ ) -> None:
387
+ """Extract a class and its methods."""
388
+ name_node = node.child_by_field_name("name")
389
+ cls_name = _node_text(name_node) if name_node else "AnonymousClass"
390
+ start_line = node.start_point[0] + 1
391
+ end_line = node.end_point[0] + 1
392
+ qualified = f"{file_path}:{cls_name}"
393
+
394
+ base_classes: list[str] = []
395
+ for child in node.children:
396
+ if child.type == "class_heritage":
397
+ for hc in child.children:
398
+ if hc.is_named:
399
+ base_classes.append(_node_text(hc))
400
+ break
401
+
402
+ method_names: list[str] = []
403
+ body = node.child_by_field_name("body")
404
+ if body:
405
+ for child in body.children:
406
+ if child.type == "method_definition":
407
+ m_name = self._method_name(child)
408
+ method_names.append(m_name)
409
+ self._extract_method(
410
+ child, file_path, functions, cls_name
411
+ )
412
+ elif child.type == "public_field_definition":
413
+ # class field with arrow function initializer
414
+ fname = child.child_by_field_name("name")
415
+ val = child.child_by_field_name("value")
416
+ if fname and val and val.type == "arrow_function":
417
+ m_name = _node_text(fname)
418
+ method_names.append(m_name)
419
+ self._extract_arrow(
420
+ m_name, val, child, file_path, functions
421
+ )
422
+ # Fix class_name on the last appended function
423
+ functions[-1] = ParsedFunction(
424
+ **{
425
+ **functions[-1].__dict__,
426
+ "class_name": cls_name,
427
+ "qualified_name": f"{file_path}:{cls_name}.{m_name}",
428
+ }
429
+ )
430
+
431
+ classes.append(
432
+ ParsedClass(
433
+ name=cls_name,
434
+ qualified_name=qualified,
435
+ file_path=file_path,
436
+ start_line=start_line,
437
+ end_line=end_line,
438
+ method_names=method_names,
439
+ base_classes=base_classes,
440
+ )
441
+ )
442
+
443
+ def _extract_method(
444
+ self,
445
+ node,
446
+ file_path: str,
447
+ functions: list[ParsedFunction],
448
+ class_name: str,
449
+ ) -> None:
450
+ """Extract a method_definition inside a class body."""
451
+ name = self._method_name(node)
452
+ params = node.child_by_field_name("parameters")
453
+ ret = node.child_by_field_name("return_type")
454
+ body = node.child_by_field_name("body")
455
+
456
+ sig = _build_js_signature(name, params, ret)
457
+ doc = _extract_jsdoc(node)
458
+ calls: list[CallInfo] = []
459
+ if body:
460
+ _collect_calls(body, calls)
461
+
462
+ start_line = node.start_point[0] + 1
463
+ end_line = node.end_point[0] + 1
464
+
465
+ functions.append(
466
+ ParsedFunction(
467
+ name=name,
468
+ qualified_name=f"{file_path}:{class_name}.{name}",
469
+ signature=sig,
470
+ file_path=file_path,
471
+ start_line=start_line,
472
+ end_line=end_line,
473
+ doc_comment=doc,
474
+ call_names=[c.callee_name for c in calls],
475
+ calls=calls,
476
+ class_name=class_name,
477
+ )
478
+ )
479
+
480
+ def _extract_import(
481
+ self,
482
+ node,
483
+ file_path: str,
484
+ imports: list[ParsedImport],
485
+ ) -> None:
486
+ """Extract ES module import with imported names and relative detection."""
487
+ source_node = node.child_by_field_name("source")
488
+ if not source_node:
489
+ return
490
+
491
+ mod = _node_text(source_node).strip("'\"")
492
+ is_relative = mod.startswith(".")
493
+
494
+ imported_names: list[str] = []
495
+ for child in node.children:
496
+ if child.type == "import_clause":
497
+ self._extract_import_clause_names(child, imported_names)
498
+
499
+ imports.append(
500
+ ParsedImport(
501
+ source_path=file_path,
502
+ target_module=mod,
503
+ imported_names=imported_names,
504
+ is_relative=is_relative,
505
+ )
506
+ )
507
+
508
+ @staticmethod
509
+ def _extract_import_clause_names(
510
+ clause_node, names: list[str]
511
+ ) -> None:
512
+ """Extract bound names from an import_clause node."""
513
+ for child in clause_node.children:
514
+ if child.type == "identifier":
515
+ names.append(_node_text(child))
516
+ elif child.type == "named_imports":
517
+ for spec in child.children:
518
+ if spec.type == "import_specifier":
519
+ name_node = spec.child_by_field_name("name")
520
+ if name_node:
521
+ names.append(_node_text(name_node))
522
+ elif child.type == "namespace_import":
523
+ for nc in child.children:
524
+ if nc.type == "identifier":
525
+ names.append(_node_text(nc))
526
+ break
527
+
528
+ def _extract_assignment_func(
529
+ self,
530
+ node,
531
+ file_path: str,
532
+ functions: list[ParsedFunction],
533
+ ) -> None:
534
+ """Handle ``module.exports = function …`` or bare assignment arrows."""
535
+ right = node.child_by_field_name("right")
536
+ left = node.child_by_field_name("left")
537
+ if right is None or left is None:
538
+ return
539
+
540
+ name = _node_text(left)
541
+ # Simplify: module.exports -> exports
542
+ if "." in name:
543
+ name = name.rsplit(".", 1)[-1]
544
+
545
+ if right.type == "arrow_function":
546
+ self._extract_arrow(name, right, node, file_path, functions)
547
+ elif right.type == "function_expression":
548
+ self._extract_func_expr(name, right, node, file_path, functions)
549
+
550
+ # -- Tiny helpers ---------------------------------------------------------
551
+
552
+ @staticmethod
553
+ def _method_name(method_node) -> str:
554
+ """Return the name of a method_definition node."""
555
+ name_node = method_node.child_by_field_name("name")
556
+ return _node_text(name_node) if name_node else "anonymous"