nogic 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,674 @@
1
+ """
2
+ JavaScript/TypeScript code extractor using Tree-sitter.
3
+
4
+ Handles:
5
+ - Function declarations
6
+ - Arrow functions assigned to variables
7
+ - Class declarations with methods
8
+ - React components (functions returning JSX)
9
+ - ES6 and CommonJS imports
10
+ """
11
+
12
+ from tree_sitter import Node
13
+
14
+ from .types import (
15
+ ExtractedFunction,
16
+ ExtractedClass,
17
+ ExtractedCall,
18
+ ExtractedImport,
19
+ )
20
+
21
+
22
+ class JavaScriptExtractor:
23
+ """Extractor for JavaScript and TypeScript code."""
24
+
25
+ def __init__(self, source: bytes, module_qn: str, is_typescript: bool = False):
26
+ self.source = source
27
+ self.module_qn = module_qn
28
+ self.is_typescript = is_typescript
29
+
30
+ def get_node_text(self, node: Node) -> str:
31
+ """Get text content of a node."""
32
+ return self.source[node.start_byte : node.end_byte].decode("utf-8")
33
+
34
+ def get_node_lines(self, node: Node) -> tuple[int, int]:
35
+ """Get start and end line numbers (1-indexed)."""
36
+ return node.start_point[0] + 1, node.end_point[0] + 1
37
+
38
+ def extract_functions(self, root: Node) -> list[ExtractedFunction]:
39
+ """Extract all top-level functions (not methods)."""
40
+ functions: list[ExtractedFunction] = []
41
+ self._extract_functions_recursive(root, functions)
42
+ return functions
43
+
44
+ def _extract_functions_recursive(
45
+ self, node: Node, functions: list[ExtractedFunction]
46
+ ) -> None:
47
+ """Recursively extract functions."""
48
+ # Function declarations
49
+ if node.type == "function_declaration":
50
+ if not self._is_inside_class(node):
51
+ func = self._extract_function_declaration(node)
52
+ if func:
53
+ functions.append(func)
54
+
55
+ # Arrow functions assigned to variables: const foo = () => {}
56
+ elif node.type in ("lexical_declaration", "variable_declaration"):
57
+ for declarator in self._get_declarators(node):
58
+ func = self._extract_arrow_function(declarator)
59
+ if func:
60
+ functions.append(func)
61
+
62
+ # Export statements may contain functions
63
+ elif node.type in ("export_statement", "export_default_declaration"):
64
+ for child in node.children:
65
+ if child.type == "function_declaration":
66
+ func = self._extract_function_declaration(child)
67
+ if func:
68
+ functions.append(func)
69
+ elif child.type in ("lexical_declaration", "variable_declaration"):
70
+ for declarator in self._get_declarators(child):
71
+ func = self._extract_arrow_function(declarator)
72
+ if func:
73
+ functions.append(func)
74
+
75
+ # Recurse but skip class bodies
76
+ if node.type not in ("class_body", "class_declaration", "class_expression"):
77
+ for child in node.children:
78
+ self._extract_functions_recursive(child, functions)
79
+
80
+ def _get_declarators(self, decl_node: Node) -> list[Node]:
81
+ """Get variable_declarator nodes from a declaration."""
82
+ return [c for c in decl_node.children if c.type == "variable_declarator"]
83
+
84
+ def _is_inside_class(self, node: Node) -> bool:
85
+ """Check if node is inside a class body."""
86
+ parent = node.parent
87
+ while parent:
88
+ if parent.type == "class_body":
89
+ return True
90
+ parent = parent.parent
91
+ return False
92
+
93
+ def _extract_function_declaration(self, node: Node) -> ExtractedFunction | None:
94
+ """Extract from function_declaration node."""
95
+ name_node = node.child_by_field_name("name")
96
+ if not name_node:
97
+ return None
98
+
99
+ name = self.get_node_text(name_node)
100
+ qualified_name = f"{self.module_qn}.{name}"
101
+ start_line, end_line = self.get_node_lines(node)
102
+
103
+ # Get parameters
104
+ parameters = self._extract_parameters(node)
105
+ params_text = self._get_parameters_text(node)
106
+
107
+ # Check if async
108
+ is_async = self._is_async(node)
109
+
110
+ # Build signature
111
+ prefix = "async " if is_async else ""
112
+ signature = f"{prefix}function {name}{params_text}"
113
+
114
+ # Get JSDoc comment as docstring
115
+ docstring = self._extract_jsdoc(node)
116
+
117
+ return ExtractedFunction(
118
+ name=name,
119
+ qualified_name=qualified_name,
120
+ start_line=start_line,
121
+ end_line=end_line,
122
+ signature=signature,
123
+ docstring=docstring,
124
+ decorators=[],
125
+ is_async=is_async,
126
+ is_method=False,
127
+ parameters=parameters,
128
+ source_code=self.get_node_text(node),
129
+ node=node,
130
+ )
131
+
132
+ def _extract_arrow_function(self, declarator: Node) -> ExtractedFunction | None:
133
+ """Extract arrow function from variable_declarator."""
134
+ name_node = declarator.child_by_field_name("name")
135
+ value_node = declarator.child_by_field_name("value")
136
+
137
+ if not name_node or not value_node:
138
+ return None
139
+
140
+ # Check if value is an arrow function or function expression
141
+ if value_node.type not in (
142
+ "arrow_function",
143
+ "function_expression",
144
+ "function",
145
+ ):
146
+ return None
147
+
148
+ name = self.get_node_text(name_node)
149
+ qualified_name = f"{self.module_qn}.{name}"
150
+
151
+ # Use the whole declaration for line numbers
152
+ parent = declarator.parent
153
+ if parent:
154
+ start_line, end_line = self.get_node_lines(parent)
155
+ else:
156
+ start_line, end_line = self.get_node_lines(declarator)
157
+
158
+ # Get parameters from the arrow/function
159
+ parameters = self._extract_parameters(value_node)
160
+ params_text = self._get_parameters_text(value_node)
161
+
162
+ # Check if async
163
+ is_async = self._is_async(value_node)
164
+
165
+ # Build signature
166
+ prefix = "async " if is_async else ""
167
+ if value_node.type == "arrow_function":
168
+ signature = f"const {name} = {prefix}{params_text} =>"
169
+ else:
170
+ signature = f"const {name} = {prefix}function{params_text}"
171
+
172
+ # Get JSDoc
173
+ docstring = self._extract_jsdoc(parent) if parent else None
174
+
175
+ return ExtractedFunction(
176
+ name=name,
177
+ qualified_name=qualified_name,
178
+ start_line=start_line,
179
+ end_line=end_line,
180
+ signature=signature,
181
+ docstring=docstring,
182
+ decorators=[],
183
+ is_async=is_async,
184
+ is_method=False,
185
+ parameters=parameters,
186
+ source_code=self.get_node_text(parent)
187
+ if parent
188
+ else self.get_node_text(declarator),
189
+ node=value_node,
190
+ )
191
+
192
+ def _extract_parameters(self, func_node: Node) -> list[str]:
193
+ """Extract parameter names."""
194
+ parameters: list[str] = []
195
+ params_node = func_node.child_by_field_name("parameters")
196
+ if not params_node:
197
+ # Arrow functions might have params directly
198
+ params_node = func_node.child_by_field_name("parameter")
199
+ if params_node and params_node.type == "identifier":
200
+ return [self.get_node_text(params_node)]
201
+ return parameters
202
+
203
+ for child in params_node.children:
204
+ if child.type == "identifier":
205
+ parameters.append(self.get_node_text(child))
206
+ elif child.type in ("required_parameter", "optional_parameter"):
207
+ # TypeScript typed parameters
208
+ pattern = child.child_by_field_name("pattern")
209
+ if pattern and pattern.type == "identifier":
210
+ parameters.append(self.get_node_text(pattern))
211
+ elif child.type == "rest_pattern":
212
+ # ...args
213
+ for c in child.children:
214
+ if c.type == "identifier":
215
+ parameters.append("..." + self.get_node_text(c))
216
+ elif child.type == "assignment_pattern":
217
+ # Default parameters: x = value
218
+ left = child.child_by_field_name("left")
219
+ if left and left.type == "identifier":
220
+ parameters.append(self.get_node_text(left))
221
+
222
+ return parameters
223
+
224
+ def _get_parameters_text(self, func_node: Node) -> str:
225
+ """Get parameters as text."""
226
+ params_node = func_node.child_by_field_name("parameters")
227
+ if params_node:
228
+ return self.get_node_text(params_node)
229
+ # Single parameter arrow function without parens
230
+ param = func_node.child_by_field_name("parameter")
231
+ if param:
232
+ return f"({self.get_node_text(param)})"
233
+ return "()"
234
+
235
+ def _is_async(self, node: Node) -> bool:
236
+ """Check if function is async."""
237
+ # Check for 'async' keyword in children
238
+ for child in node.children:
239
+ if child.type == "async":
240
+ return True
241
+ if self.get_node_text(child) == "async":
242
+ return True
243
+ return False
244
+
245
+ def _extract_jsdoc(self, node: Node) -> str | None:
246
+ """Extract JSDoc comment preceding a node."""
247
+ # Look for comment in previous siblings
248
+ if node.prev_sibling and node.prev_sibling.type == "comment":
249
+ comment = self.get_node_text(node.prev_sibling)
250
+ if comment.startswith("/**"):
251
+ # Strip /** and */
252
+ return comment[3:-2].strip()
253
+ return None
254
+
255
+ def extract_classes(self, root: Node) -> list[ExtractedClass]:
256
+ """Extract all classes from the AST."""
257
+ classes: list[ExtractedClass] = []
258
+ self._extract_classes_recursive(root, classes)
259
+ return classes
260
+
261
+ def _extract_classes_recursive(
262
+ self, node: Node, classes: list[ExtractedClass]
263
+ ) -> None:
264
+ """Recursively extract classes."""
265
+ if node.type in ("class_declaration", "class_expression"):
266
+ cls = self._extract_class(node)
267
+ if cls:
268
+ classes.append(cls)
269
+
270
+ # Also check export statements
271
+ elif node.type in ("export_statement", "export_default_declaration"):
272
+ for child in node.children:
273
+ if child.type in ("class_declaration", "class_expression"):
274
+ cls = self._extract_class(child)
275
+ if cls:
276
+ classes.append(cls)
277
+
278
+ for child in node.children:
279
+ if child.type not in ("class_body",):
280
+ self._extract_classes_recursive(child, classes)
281
+
282
+ def _extract_class(self, node: Node) -> ExtractedClass | None:
283
+ """Extract class information."""
284
+ name_node = node.child_by_field_name("name")
285
+ if not name_node:
286
+ # Anonymous class
287
+ return None
288
+
289
+ name = self.get_node_text(name_node)
290
+ qualified_name = f"{self.module_qn}.{name}"
291
+ start_line, end_line = self.get_node_lines(node)
292
+
293
+ # Get base classes (extends)
294
+ bases = self._extract_class_heritage(node)
295
+
296
+ # Get JSDoc
297
+ docstring = self._extract_jsdoc(node)
298
+
299
+ # Get decorators (TypeScript)
300
+ decorators = self._extract_decorators(node)
301
+
302
+ # Get methods
303
+ methods = self._extract_methods(node, name)
304
+
305
+ return ExtractedClass(
306
+ name=name,
307
+ qualified_name=qualified_name,
308
+ start_line=start_line,
309
+ end_line=end_line,
310
+ docstring=docstring,
311
+ decorators=decorators,
312
+ bases=bases,
313
+ methods=methods,
314
+ source_code=self.get_node_text(node),
315
+ node=node,
316
+ )
317
+
318
+ def _extract_class_heritage(self, class_node: Node) -> list[str]:
319
+ """Extract base class names."""
320
+ bases: list[str] = []
321
+ for child in class_node.children:
322
+ if child.type == "class_heritage":
323
+ for c in child.children:
324
+ if c.type == "extends_clause":
325
+ # Get the class being extended
326
+ for cc in c.children:
327
+ if cc.type == "identifier":
328
+ bases.append(self.get_node_text(cc))
329
+ elif cc.type == "member_expression":
330
+ bases.append(self.get_node_text(cc))
331
+ return bases
332
+
333
+ def _extract_decorators(self, node: Node) -> list[str]:
334
+ """Extract decorators (TypeScript)."""
335
+ decorators: list[str] = []
336
+ for child in node.children:
337
+ if child.type == "decorator":
338
+ decorators.append(self.get_node_text(child))
339
+ return decorators
340
+
341
+ def _extract_methods(
342
+ self, class_node: Node, class_name: str
343
+ ) -> list[ExtractedFunction]:
344
+ """Extract methods from a class."""
345
+ methods: list[ExtractedFunction] = []
346
+ body_node = class_node.child_by_field_name("body")
347
+ if not body_node:
348
+ return methods
349
+
350
+ for child in body_node.children:
351
+ if child.type == "method_definition":
352
+ method = self._extract_method(child, class_name)
353
+ if method:
354
+ methods.append(method)
355
+ elif child.type == "public_field_definition":
356
+ # Arrow function as class field: foo = () => {}
357
+ method = self._extract_field_method(child, class_name)
358
+ if method:
359
+ methods.append(method)
360
+
361
+ return methods
362
+
363
+ def _extract_method(
364
+ self, node: Node, class_name: str
365
+ ) -> ExtractedFunction | None:
366
+ """Extract method from method_definition node."""
367
+ name_node = node.child_by_field_name("name")
368
+ if not name_node:
369
+ return None
370
+
371
+ name = self.get_node_text(name_node)
372
+ qualified_name = f"{self.module_qn}.{class_name}.{name}"
373
+ start_line, end_line = self.get_node_lines(node)
374
+
375
+ parameters = self._extract_parameters(node)
376
+ params_text = self._get_parameters_text(node)
377
+
378
+ is_async = self._is_async(node)
379
+
380
+ # Check for getter/setter
381
+ prefix = ""
382
+ for child in node.children:
383
+ if child.type == "get":
384
+ prefix = "get "
385
+ elif child.type == "set":
386
+ prefix = "set "
387
+ elif child.type == "static":
388
+ prefix = "static " + prefix
389
+
390
+ if is_async:
391
+ prefix = prefix + "async "
392
+
393
+ signature = f"{prefix}{name}{params_text}"
394
+
395
+ docstring = self._extract_jsdoc(node)
396
+ decorators = self._extract_decorators(node)
397
+
398
+ return ExtractedFunction(
399
+ name=name,
400
+ qualified_name=qualified_name,
401
+ start_line=start_line,
402
+ end_line=end_line,
403
+ signature=signature,
404
+ docstring=docstring,
405
+ decorators=decorators,
406
+ is_async=is_async,
407
+ is_method=True,
408
+ class_name=class_name,
409
+ parameters=parameters,
410
+ source_code=self.get_node_text(node),
411
+ node=node,
412
+ )
413
+
414
+ def _extract_field_method(
415
+ self, node: Node, class_name: str
416
+ ) -> ExtractedFunction | None:
417
+ """Extract arrow function from class field."""
418
+ name_node = node.child_by_field_name("name")
419
+ value_node = node.child_by_field_name("value")
420
+
421
+ if not name_node or not value_node:
422
+ return None
423
+
424
+ if value_node.type not in ("arrow_function", "function_expression"):
425
+ return None
426
+
427
+ name = self.get_node_text(name_node)
428
+ qualified_name = f"{self.module_qn}.{class_name}.{name}"
429
+ start_line, end_line = self.get_node_lines(node)
430
+
431
+ parameters = self._extract_parameters(value_node)
432
+ params_text = self._get_parameters_text(value_node)
433
+
434
+ is_async = self._is_async(value_node)
435
+
436
+ prefix = "async " if is_async else ""
437
+ signature = f"{name} = {prefix}{params_text} =>"
438
+
439
+ docstring = self._extract_jsdoc(node)
440
+
441
+ return ExtractedFunction(
442
+ name=name,
443
+ qualified_name=qualified_name,
444
+ start_line=start_line,
445
+ end_line=end_line,
446
+ signature=signature,
447
+ docstring=docstring,
448
+ decorators=[],
449
+ is_async=is_async,
450
+ is_method=True,
451
+ class_name=class_name,
452
+ parameters=parameters,
453
+ source_code=self.get_node_text(node),
454
+ node=value_node,
455
+ )
456
+
457
+ def extract_calls(self, node: Node, caller_qn: str) -> list[ExtractedCall]:
458
+ """Extract function calls from within a function/method node."""
459
+ calls: list[ExtractedCall] = []
460
+ self._extract_calls_recursive(node, caller_qn, calls)
461
+ return calls
462
+
463
+ def _extract_calls_recursive(
464
+ self, node: Node, caller_qn: str, calls: list[ExtractedCall]
465
+ ) -> None:
466
+ """Recursively extract call expressions."""
467
+ if node.type == "call_expression":
468
+ call = self._extract_single_call(node, caller_qn)
469
+ if call:
470
+ calls.append(call)
471
+
472
+ for child in node.children:
473
+ # Don't recurse into nested functions/classes
474
+ if child.type not in (
475
+ "function_declaration",
476
+ "function_expression",
477
+ "arrow_function",
478
+ "class_declaration",
479
+ "class_expression",
480
+ ):
481
+ self._extract_calls_recursive(child, caller_qn, calls)
482
+
483
+ def _extract_single_call(
484
+ self, call_node: Node, caller_qn: str
485
+ ) -> ExtractedCall | None:
486
+ """Extract a single call expression."""
487
+ func_node = call_node.child_by_field_name("function")
488
+ if not func_node:
489
+ return None
490
+
491
+ line = call_node.start_point[0] + 1
492
+ call_text = self.get_node_text(func_node)
493
+
494
+ receiver = None
495
+ name = call_text
496
+
497
+ if func_node.type == "member_expression":
498
+ # obj.method() or obj?.method()
499
+ obj_node = func_node.child_by_field_name("object")
500
+ prop_node = func_node.child_by_field_name("property")
501
+ if obj_node and prop_node:
502
+ receiver = self.get_node_text(obj_node)
503
+ name = self.get_node_text(prop_node)
504
+
505
+ # Extract arguments
506
+ arguments: list[str] = []
507
+ args_node = call_node.child_by_field_name("arguments")
508
+ if args_node:
509
+ for child in args_node.children:
510
+ if child.type not in ("(", ")", ","):
511
+ arguments.append(self.get_node_text(child))
512
+
513
+ return ExtractedCall(
514
+ name=name,
515
+ line=line,
516
+ caller_qualified_name=caller_qn,
517
+ receiver=receiver,
518
+ arguments=arguments,
519
+ )
520
+
521
+ def extract_imports(self, root: Node) -> list[ExtractedImport]:
522
+ """Extract all import statements."""
523
+ imports: list[ExtractedImport] = []
524
+ self._extract_imports_recursive(root, imports)
525
+ return imports
526
+
527
+ def _extract_imports_recursive(
528
+ self, node: Node, imports: list[ExtractedImport]
529
+ ) -> None:
530
+ """Recursively extract import statements."""
531
+ if node.type == "import_statement":
532
+ self._extract_es6_import(node, imports)
533
+ elif node.type in ("lexical_declaration", "variable_declaration"):
534
+ # Check for require() calls
535
+ self._extract_require_import(node, imports)
536
+ else:
537
+ for child in node.children:
538
+ self._extract_imports_recursive(child, imports)
539
+
540
+ def _extract_es6_import(
541
+ self, node: Node, imports: list[ExtractedImport]
542
+ ) -> None:
543
+ """Extract ES6 import statement."""
544
+ line = node.start_point[0] + 1
545
+
546
+ # Get the source module
547
+ source_node = node.child_by_field_name("source")
548
+ if not source_node:
549
+ return
550
+
551
+ # Remove quotes from module path
552
+ module = self.get_node_text(source_node).strip("'\"")
553
+
554
+ # Check for different import patterns
555
+ for child in node.children:
556
+ if child.type == "import_clause":
557
+ self._extract_import_clause(child, module, line, imports)
558
+ elif child.type == "namespace_import":
559
+ # import * as name from 'module'
560
+ for c in child.children:
561
+ if c.type == "identifier":
562
+ imports.append(
563
+ ExtractedImport(
564
+ module=module,
565
+ alias=self.get_node_text(c),
566
+ is_wildcard=True,
567
+ line=line,
568
+ )
569
+ )
570
+
571
+ def _extract_import_clause(
572
+ self, clause: Node, module: str, line: int, imports: list[ExtractedImport]
573
+ ) -> None:
574
+ """Extract imports from import clause."""
575
+ for child in clause.children:
576
+ if child.type == "identifier":
577
+ # Default import: import foo from 'module'
578
+ imports.append(
579
+ ExtractedImport(
580
+ module=module,
581
+ name="default",
582
+ alias=self.get_node_text(child),
583
+ line=line,
584
+ )
585
+ )
586
+ elif child.type == "named_imports":
587
+ # Named imports: import { foo, bar } from 'module'
588
+ for spec in child.children:
589
+ if spec.type == "import_specifier":
590
+ name_node = spec.child_by_field_name("name")
591
+ alias_node = spec.child_by_field_name("alias")
592
+ if name_node:
593
+ name = self.get_node_text(name_node)
594
+ alias = (
595
+ self.get_node_text(alias_node) if alias_node else None
596
+ )
597
+ imports.append(
598
+ ExtractedImport(
599
+ module=module, name=name, alias=alias, line=line
600
+ )
601
+ )
602
+ elif child.type == "namespace_import":
603
+ # import * as name
604
+ for c in child.children:
605
+ if c.type == "identifier":
606
+ imports.append(
607
+ ExtractedImport(
608
+ module=module,
609
+ alias=self.get_node_text(c),
610
+ is_wildcard=True,
611
+ line=line,
612
+ )
613
+ )
614
+
615
+ def _extract_require_import(
616
+ self, decl_node: Node, imports: list[ExtractedImport]
617
+ ) -> None:
618
+ """Extract CommonJS require() imports."""
619
+ line = decl_node.start_point[0] + 1
620
+
621
+ for declarator in self._get_declarators(decl_node):
622
+ name_node = declarator.child_by_field_name("name")
623
+ value_node = declarator.child_by_field_name("value")
624
+
625
+ if not value_node or value_node.type != "call_expression":
626
+ continue
627
+
628
+ # Check if it's a require() call
629
+ func_node = value_node.child_by_field_name("function")
630
+ if not func_node or self.get_node_text(func_node) != "require":
631
+ continue
632
+
633
+ # Get the module path
634
+ args_node = value_node.child_by_field_name("arguments")
635
+ if not args_node:
636
+ continue
637
+
638
+ for arg in args_node.children:
639
+ if arg.type == "string":
640
+ module = self.get_node_text(arg).strip("'\"")
641
+
642
+ if name_node:
643
+ if name_node.type == "identifier":
644
+ # const foo = require('module')
645
+ imports.append(
646
+ ExtractedImport(
647
+ module=module,
648
+ alias=self.get_node_text(name_node),
649
+ line=line,
650
+ )
651
+ )
652
+ elif name_node.type == "object_pattern":
653
+ # const { foo, bar } = require('module')
654
+ for prop in name_node.children:
655
+ if prop.type == "shorthand_property_identifier_pattern":
656
+ name = self.get_node_text(prop)
657
+ imports.append(
658
+ ExtractedImport(
659
+ module=module, name=name, line=line
660
+ )
661
+ )
662
+ elif prop.type == "pair_pattern":
663
+ key_node = prop.child_by_field_name("key")
664
+ val_node = prop.child_by_field_name("value")
665
+ if key_node and val_node:
666
+ imports.append(
667
+ ExtractedImport(
668
+ module=module,
669
+ name=self.get_node_text(key_node),
670
+ alias=self.get_node_text(val_node),
671
+ line=line,
672
+ )
673
+ )
674
+ break