codeanalyzer-python 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. codeanalyzer/__init__.py +0 -0
  2. codeanalyzer/__main__.py +84 -0
  3. codeanalyzer/core.py +321 -0
  4. codeanalyzer/jedi/__init__.py +0 -0
  5. codeanalyzer/jedi/jedi.py +0 -0
  6. codeanalyzer/py.typed +0 -0
  7. codeanalyzer/schema/__init__.py +23 -0
  8. codeanalyzer/schema/py_schema.py +360 -0
  9. codeanalyzer/semantic_analysis/__init__.py +0 -0
  10. codeanalyzer/semantic_analysis/codeql/__init__.py +26 -0
  11. codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +133 -0
  12. codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py +12 -0
  13. codeanalyzer/semantic_analysis/codeql/codeql_loader.py +74 -0
  14. codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +164 -0
  15. codeanalyzer/semantic_analysis/wala/__init__.py +15 -0
  16. codeanalyzer/syntactic_analysis/__init__.py +0 -0
  17. codeanalyzer/syntactic_analysis/symbol_table_builder.py +903 -0
  18. codeanalyzer/utils/__init__.py +5 -0
  19. codeanalyzer/utils/logging.py +18 -0
  20. codeanalyzer/utils/progress_bar.py +69 -0
  21. {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/METADATA +1 -1
  22. codeanalyzer_python-0.1.2.dist-info/RECORD +26 -0
  23. codeanalyzer_python-0.1.1.dist-info/RECORD +0 -6
  24. {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/WHEEL +0 -0
  25. {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/entry_points.txt +0 -0
  26. {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/licenses/LICENSE +0 -0
  27. {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,903 @@
1
+ from io import StringIO
2
+ from pathlib import Path
3
+ import tokenize
4
+ from typing import Dict, List, Optional
5
+ import astor
6
+ import jedi
7
+ from codeanalyzer.utils import logger
8
+ from jedi.api.project import Project
9
+ from jedi.api import Script
10
+ from rich.progress import track
11
+ from codeanalyzer.schema.py_schema import (
12
+ PyCallable,
13
+ PyCallableParameter,
14
+ PyCallsite,
15
+ PyClass,
16
+ PyClassAttribute,
17
+ PyComment,
18
+ PyImport,
19
+ PyModule,
20
+ PySymbol,
21
+ PyVariableDeclaration,
22
+ )
23
+ import ast
24
+ from ast import AST, ClassDef
25
+
26
+ from codeanalyzer.utils.progress_bar import ProgressBar
27
+
28
+
29
+ class SymbolTableBuilder:
30
+ """A class for building a symbol table for a Python project."""
31
+
32
+ def __init__(self, project_dir: Path | str, virtualenv: Path | str | None) -> None:
33
+ self.project_dir = Path(project_dir)
34
+ if virtualenv is None:
35
+ # If no virtual environment is provided, create a jedi project without an environment.
36
+ self.jedi_project: Project = jedi.Project(path=self.project_dir)
37
+ else:
38
+ # If there is a virtual environment, add its site-packages to sys_path so jedi can find the installed packages.
39
+ self.jedi_project: Project = jedi.Project(
40
+ path=self.project_dir,
41
+ environment_path=Path(virtualenv) / "bin" / "python",
42
+ )
43
+
44
+ @staticmethod
45
+ def _infer_type(script: Script, line: int, column: int) -> str:
46
+ """Tries to infer the type at a given position using Jedi."""
47
+ try:
48
+ inference = script.infer(line=line, column=column)
49
+ if inference:
50
+ return inference[0].name # or .full_name
51
+ except Exception:
52
+ pass
53
+ return None
54
+
55
+ @staticmethod
56
+ def _infer_qualified_name(script: Script, line: int, column: int) -> Optional[str]:
57
+ """
58
+ Tries to infer the fully qualified name (e.g., os.path.join) at the given position using Jedi.
59
+
60
+ Args:
61
+ script (jedi.Script): The Jedi script object.
62
+ line (int): Line number of the expression.
63
+ column (int): Column offset of the expression.
64
+
65
+ Returns:
66
+ Optional[str]: The fully qualified name if available, else None.
67
+ """
68
+ try:
69
+ definitions = script.infer(line=line, column=column)
70
+ if definitions:
71
+ return definitions[0].full_name
72
+ except Exception:
73
+ pass
74
+ return None
75
+
76
+ def _module(self, py_file: Path) -> PyModule:
77
+ """Builds a PyModule from a Python file.
78
+
79
+ Args:
80
+ py_file (Path): Path to the python file.
81
+
82
+ Returns:
83
+ PyModule object for the input file.
84
+ """
85
+ # Get the raw source code from the file
86
+ source = py_file.read_text(encoding="utf-8")
87
+ # Create a Jedi script for the file
88
+ script: Script = Script(path=str(py_file), project=self.jedi_project)
89
+ module = ast.parse(source, filename=str(py_file))
90
+
91
+ classes = {}
92
+ functions = {}
93
+ for node in ast.iter_child_nodes(module):
94
+ if isinstance(node, ClassDef):
95
+ classes.update(self._add_class(node, script))
96
+ elif isinstance(node, ast.FunctionDef):
97
+ functions.update(self._callables(node, script))
98
+
99
+ return (
100
+ PyModule.builder()
101
+ .with_file_path(str(py_file))
102
+ .with_module_name(py_file.stem)
103
+ .with_comments(self._pycomments(module, source))
104
+ .with_imports(self._imports(module))
105
+ .with_variables(self._module_variables(module, script))
106
+ .with_classes(classes)
107
+ .with_functions(functions)
108
+ .build()
109
+ )
110
+
111
+ def _imports(self, module: ast.Module) -> List[PyImport]:
112
+ """
113
+ Extracts all import statements from the module.
114
+
115
+ Args:
116
+ module (ast.Module): The AST node representing the module.
117
+ script (Script): The Jedi script object for the module.
118
+
119
+ Returns:
120
+ List[PyImport]: A list of PyImport objects representing the import statements.
121
+ """
122
+ imports: List[PyImport] = []
123
+
124
+ for node in ast.walk(module):
125
+ if isinstance(node, ast.Import):
126
+ for alias in node.names:
127
+ imports.append(
128
+ PyImport.builder()
129
+ .with_module(alias.name) # for "import os", alias.name = "os"
130
+ .with_name(alias.asname or alias.name) # name in local scope
131
+ .with_alias(alias.name if alias.asname else None)
132
+ .with_start_line(getattr(node, "lineno", -1))
133
+ .with_end_line(getattr(node, "end_lineno", node.lineno))
134
+ .with_start_column(getattr(node, "col_offset", -1))
135
+ .with_end_column(getattr(node, "end_col_offset", -1))
136
+ .build()
137
+ )
138
+
139
+ elif isinstance(node, ast.ImportFrom):
140
+ module_name = node.module or "" # e.g., from . import x
141
+ for alias in node.names:
142
+ qualified_module = module_name
143
+ if node.level:
144
+ # Handle relative import
145
+ qualified_module = "." * node.level + (module_name or "")
146
+ imports.append(
147
+ PyImport.builder()
148
+ .with_module(qualified_module)
149
+ .with_name(alias.asname or alias.name)
150
+ .with_alias(alias.name if alias.asname else None)
151
+ .with_start_line(getattr(node, "lineno", -1))
152
+ .with_end_line(getattr(node, "end_lineno", node.lineno))
153
+ .with_start_column(getattr(node, "col_offset", -1))
154
+ .with_end_column(getattr(node, "end_col_offset", -1))
155
+ .build()
156
+ )
157
+
158
+ return imports
159
+
160
+ def _add_class(
161
+ self, class_node: ast.ClassDef, script: Script
162
+ ) -> Dict[str, PyClass]:
163
+ """Builds a PyClass from a class definition node.
164
+
165
+ Args:
166
+ class_node (ast.ClassDef): The AST node representing the class.
167
+ script (Script): The Jedi script object for the module.
168
+
169
+ Returns:
170
+ Dict[str, PyClass]: Mapping of class signature to PyClass object.
171
+ """
172
+ # Try resolving full signature with Jedi
173
+ try:
174
+ definitions = script.goto(
175
+ line=class_node.lineno, column=class_node.col_offset
176
+ )
177
+ signature = next(
178
+ (d.full_name for d in definitions if d.type == "class"),
179
+ f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
180
+ )
181
+ except Exception:
182
+ signature = (
183
+ f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
184
+ )
185
+
186
+ code: str = astor.to_source(class_node).strip()
187
+
188
+ py_class = (
189
+ PyClass.builder()
190
+ .with_name(class_node.name)
191
+ .with_signature(signature)
192
+ .with_start_line(class_node.lineno)
193
+ .with_end_line(
194
+ getattr(
195
+ class_node, "end_lineno", class_node.lineno + len(class_node.body)
196
+ )
197
+ )
198
+ .with_comments(self._pycomments(class_node, code))
199
+ .with_code(code)
200
+ .with_base_classes(
201
+ [
202
+ ast.unparse(base)
203
+ for base in class_node.bases
204
+ if isinstance(base, ast.expr)
205
+ ]
206
+ )
207
+ .with_methods(self._callables(class_node, script))
208
+ .with_attributes(self._class_attributes(class_node, script))
209
+ .with_inner_classes(
210
+ {
211
+ k: v
212
+ for child in class_node.body
213
+ if isinstance(child, ast.ClassDef)
214
+ for k, v in self._add_class(child, script).items()
215
+ }
216
+ )
217
+ .build()
218
+ )
219
+
220
+ return {signature: py_class}
221
+
222
+ def _callables(self, node: AST, script: Script) -> Dict[str, PyCallable]:
223
+ """
224
+ Builds PyCallable objects from any AST node that may contain functions.
225
+
226
+ Args:
227
+ node (AST): The AST node to process (e.g., Module, ClassDef, FunctionDef).
228
+ script (Script): The Jedi script object for the module.
229
+
230
+ Returns:
231
+ Dict[str, PyCallable]: A dictionary mapping function/method names to PyCallable objects.
232
+ """
233
+ callables: Dict[str, PyCallable] = {}
234
+ module_path: str = script.path or "<unknown_module>"
235
+ module_name: str = Path(module_path).stem if module_path else "<unknown>"
236
+
237
+ def visit(n: AST, class_prefix: str = ""):
238
+ for child in ast.iter_child_nodes(n):
239
+ if isinstance(child, ast.FunctionDef):
240
+ method_name = child.name
241
+ start_line = child.lineno
242
+ end_line = getattr(
243
+ child, "end_lineno", start_line + len(child.body)
244
+ )
245
+ code_start_line = child.body[0].lineno if child.body else start_line
246
+ code = astor.to_source(child).strip()
247
+ decorators = [ast.unparse(d) for d in child.decorator_list]
248
+
249
+ try:
250
+ definitions = script.goto(
251
+ line=start_line, column=child.col_offset
252
+ )
253
+ except Exception:
254
+ definitions = []
255
+
256
+ signature = next(
257
+ (d.full_name for d in definitions if d.type == "function"),
258
+ f"{module_name}.{class_prefix}{method_name}",
259
+ )
260
+
261
+ callables[method_name] = (
262
+ PyCallable.builder()
263
+ .with_name(method_name)
264
+ .with_path(script.path.__str__())
265
+ .with_signature(signature)
266
+ .with_decorators(decorators)
267
+ .with_code(code)
268
+ .with_start_line(start_line)
269
+ .with_end_line(end_line)
270
+ .with_code_start_line(code_start_line)
271
+ .with_accessed_symbols(self._accessed_symbols(child, script))
272
+ .with_call_sites(self._call_sites(child, script))
273
+ .with_local_variables(self._local_variables(child, script))
274
+ .with_cyclomatic_complexity(self._cyclomatic_complexity(child))
275
+ .with_parameters(self._callable_parameters(child, script))
276
+ .with_return_type(
277
+ ast.unparse(child.returns)
278
+ if child.returns
279
+ else self._infer_type(
280
+ script, child.lineno, child.col_offset
281
+ )
282
+ )
283
+ .with_comments(self._pycomments(child, code))
284
+ .build()
285
+ )
286
+
287
+ visit(child, class_prefix + method_name + ".")
288
+
289
+ elif isinstance(child, ast.ClassDef):
290
+ visit(child, class_prefix + child.name + ".")
291
+
292
+ elif hasattr(child, "body"):
293
+ visit(child, class_prefix)
294
+
295
+ visit(node)
296
+ return callables
297
+
298
+ def _pycomments(self, node: ast.AST, source: str) -> List[PyComment]:
299
+ """
300
+ Extracts all PyComment instances (docstring and # comments) from within a specific AST node's body.
301
+
302
+ Args:
303
+ node (AST): The AST node (e.g., Module, ClassDef, FunctionDef).
304
+ source (str): Source code of the file.
305
+
306
+ Returns:
307
+ List[PyComment]: List of PyComment instances.
308
+ """
309
+ comments: List[PyComment] = []
310
+
311
+ # 1. Extract docstring (if any)
312
+ docstring_content = ast.get_docstring(node, clean=False)
313
+ if docstring_content:
314
+ try:
315
+ string_node = node.body[0].value # type: ignore
316
+ start_line = getattr(string_node, "lineno", getattr(node, "lineno", -1))
317
+ end_line = getattr(string_node, "end_lineno", start_line)
318
+ start_column = getattr(string_node, "col_offset", -1)
319
+ end_column = getattr(
320
+ string_node, "end_col_offset", start_column + len(docstring_content)
321
+ )
322
+ except Exception:
323
+ start_line = getattr(node, "lineno", -1)
324
+ end_line = getattr(node, "end_lineno", start_line)
325
+ start_column = getattr(node, "col_offset", -1)
326
+ end_column = start_column + len(docstring_content)
327
+
328
+ comments.append(
329
+ PyComment.builder()
330
+ .with_content(docstring_content)
331
+ .with_start_line(start_line)
332
+ .with_end_line(end_line)
333
+ .with_start_column(start_column)
334
+ .with_end_column(end_column)
335
+ .with_is_docstring(True)
336
+ .build()
337
+ )
338
+
339
+ # 2. Extract # comments scoped within the node's line range
340
+ node_start = getattr(node, "lineno", -1)
341
+ node_end = getattr(node, "end_lineno", node_start)
342
+
343
+ tokens = tokenize.generate_tokens(StringIO(source).readline)
344
+ for tok in tokens:
345
+ if tok.type == tokenize.COMMENT:
346
+ tok_line, tok_col = tok.start
347
+ if node_start <= tok_line <= node_end:
348
+ comment_text = tok.string.lstrip("#").strip()
349
+ comments.append(
350
+ PyComment.builder()
351
+ .with_content(comment_text)
352
+ .with_start_line(tok_line)
353
+ .with_end_line(tok_line)
354
+ .with_start_column(tok_col)
355
+ .with_end_column(tok_col + len(tok.string))
356
+ .with_is_docstring(False)
357
+ .build()
358
+ )
359
+
360
+ return comments
361
+
362
+ def _class_attributes(
363
+ self, ast_node: ast.AST, script: Script
364
+ ) -> Dict[str, PyClassAttribute]:
365
+ """
366
+ Extracts class attributes from the class definition.
367
+
368
+ Args:
369
+ ast_node (AST): The AST node representing the class.
370
+ script (Script): The Jedi script object for the module.
371
+
372
+ Returns:
373
+ Dict[str, PyClassAttribute]: A dictionary mapping attribute names to their metadata.
374
+ """
375
+ attributes: Dict[str, PyClassAttribute] = {}
376
+
377
+ for stmt in ast_node.body:
378
+ if isinstance(stmt, ast.Assign):
379
+ for target in stmt.targets:
380
+ if isinstance(target, ast.Name):
381
+ attributes[target.id] = (
382
+ PyClassAttribute.builder()
383
+ .with_name(target.id)
384
+ .with_type(
385
+ self._infer_type(
386
+ script, target.lineno, target.col_offset
387
+ )
388
+ )
389
+ .with_start_line(getattr(target, "lineno", -1))
390
+ .with_end_line(getattr(stmt, "end_lineno", stmt.lineno))
391
+ .build()
392
+ )
393
+
394
+ elif isinstance(stmt, ast.AnnAssign):
395
+ target = stmt.target
396
+ if isinstance(target, ast.Name):
397
+ attributes[target.id] = (
398
+ PyClassAttribute.builder()
399
+ .with_name(target.id)
400
+ .with_type(
401
+ ast.unparse(stmt.annotation)
402
+ if stmt.annotation
403
+ else self._infer_type(
404
+ script, target.lineno, target.col_offset
405
+ )
406
+ )
407
+ .with_start_line(getattr(target, "lineno", -1))
408
+ .with_end_line(getattr(stmt, "end_lineno", stmt.lineno))
409
+ .build()
410
+ )
411
+ # We may also encounter `__slots__` in class definitions.
412
+ # This is a special case where attributes are defined in a list or tuple.
413
+ # class Foo:
414
+ # __slots__ = ('x', 'y')
415
+ #
416
+ # Doing so restricts dynamic attribute assignment.
417
+ # This means that you can do
418
+ # Foo.x = 1
419
+ # Foo.y = 2
420
+ # But, not
421
+ # Foo.z = 3
422
+ elif isinstance(stmt, ast.Assign) and any(
423
+ isinstance(t, ast.Name) and t.id == "__slots__" for t in stmt.targets
424
+ ):
425
+ if isinstance(stmt.value, (ast.List, ast.Tuple)):
426
+ for elt in stmt.value.elts:
427
+ if isinstance(elt, (ast.Str, ast.Constant)):
428
+ value = elt.s if isinstance(elt, ast.Str) else elt.value
429
+ attributes[value] = (
430
+ PyClassAttribute.builder()
431
+ .with_name(value)
432
+ .with_type("slot")
433
+ .with_start_line(getattr(stmt, "lineno", -1))
434
+ .with_end_line(getattr(stmt, "end_lineno", stmt.lineno))
435
+ .build()
436
+ )
437
+
438
+ return attributes
439
+
440
+ def _callable_parameters(
441
+ self, fn_node: ast.FunctionDef, script: Script
442
+ ) -> List[PyCallableParameter]:
443
+ """
444
+ Extracts callable parameters from the function definition.
445
+ """
446
+
447
+ # Pull full name from Jedi (e.g., mypkg.module.MyClass.my_func)
448
+ try:
449
+ definitions = script.goto(line=fn_node.lineno, column=fn_node.col_offset)
450
+ full_name = next(
451
+ (d.full_name for d in definitions if d.type == "function"), None
452
+ )
453
+ except Exception:
454
+ full_name = None
455
+
456
+ class_name = (
457
+ full_name.split(".")[-2] if full_name and "." in full_name else None
458
+ )
459
+
460
+ params: List[PyCallableParameter] = []
461
+ args = fn_node.args
462
+
463
+ def resolve_type(arg_node: ast.arg) -> Optional[str]:
464
+ if arg_node.annotation:
465
+ return ast.unparse(arg_node.annotation)
466
+ if arg_node.arg in {"self", "cls"} and class_name:
467
+ return class_name
468
+ return self._infer_type(script, arg_node.lineno, arg_node.col_offset)
469
+
470
+ def build_param(
471
+ arg_node: ast.arg, default: Optional[ast.expr]
472
+ ) -> PyCallableParameter:
473
+ return (
474
+ PyCallableParameter.builder()
475
+ .with_name(arg_node.arg)
476
+ .with_type(resolve_type(arg_node))
477
+ .with_default_value(ast.unparse(default) if default else None)
478
+ .with_start_line(getattr(arg_node, "lineno", -1))
479
+ .with_end_line(
480
+ getattr(arg_node, "end_lineno", getattr(arg_node, "lineno", -1))
481
+ )
482
+ .with_start_column(getattr(arg_node, "col_offset", -1))
483
+ .with_end_column(getattr(arg_node, "end_col_offset", -1))
484
+ .build()
485
+ )
486
+
487
+ # Fill out all parameter types
488
+ for arg in getattr(args, "posonlyargs", []):
489
+ params.append(build_param(arg, None))
490
+
491
+ default_start = len(args.args) - len(args.defaults)
492
+ for i, arg in enumerate(args.args):
493
+ default = args.defaults[i - default_start] if i >= default_start else None
494
+ params.append(build_param(arg, default))
495
+
496
+ if args.vararg:
497
+ params.append(build_param(args.vararg, None))
498
+
499
+ for arg, default in zip(args.kwonlyargs, args.kw_defaults):
500
+ params.append(build_param(arg, default))
501
+
502
+ if args.kwarg:
503
+ params.append(build_param(args.kwarg, None))
504
+
505
+ return params
506
+
507
+ def _accessed_symbols(self, fn_node: ast.FunctionDef, script: Script) -> List[str]:
508
+ """Analyzes the function body to extract all accessed symbols."""
509
+ symbols = []
510
+ for node in ast.walk(fn_node):
511
+ if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
512
+ symbol = self._symbol_from_name_node(
513
+ node, script, enclosing_scope="local"
514
+ )
515
+ symbols.append(symbol)
516
+ return symbols
517
+
518
+ def _call_sites(self, fn_node: ast.FunctionDef, script: Script) -> List[PyCallsite]:
519
+ """
520
+ Finds all call sites made from within the function using Jedi for type inference.
521
+
522
+ Args:
523
+ fn_node (ast.FunctionDef): The AST node representing the function.
524
+ script (jedi.Script): The Jedi script object.
525
+
526
+ Returns:
527
+ List[PyCallsite]: A list of PyCallsite objects representing each call.
528
+ """
529
+ call_sites: List[PyCallsite] = []
530
+
531
+ for node in ast.walk(fn_node):
532
+ if not isinstance(node, ast.Call):
533
+ continue
534
+
535
+ func_expr = node.func
536
+
537
+ method_name = "<unknown>"
538
+ callee_signature = self._infer_qualified_name(
539
+ script, node.lineno, node.col_offset
540
+ )
541
+ return_type = self._infer_type(script, node.lineno, node.col_offset)
542
+
543
+ receiver_expr = None
544
+ receiver_type = None
545
+ if isinstance(func_expr, ast.Attribute):
546
+ receiver_expr = ast.unparse(func_expr.value)
547
+ receiver_type = self._infer_type(
548
+ script, func_expr.value.lineno, func_expr.value.col_offset
549
+ )
550
+ method_name = func_expr.attr
551
+ elif isinstance(func_expr, ast.Name):
552
+ method_name = func_expr.id
553
+
554
+ argument_types = [
555
+ self._infer_type(script, arg.lineno, arg.col_offset)
556
+ or type(arg).__name__
557
+ for arg in node.args
558
+ ]
559
+
560
+ call_sites.append(
561
+ PyCallsite.builder()
562
+ .with_method_name(method_name)
563
+ .with_receiver_expr(receiver_expr)
564
+ .with_receiver_type(receiver_type)
565
+ .with_argument_types(argument_types)
566
+ .with_return_type(return_type)
567
+ .with_callee_signature(callee_signature)
568
+ .with_is_constructor_call(method_name == "__init__")
569
+ .with_start_line(getattr(node, "lineno", -1))
570
+ .with_start_column(getattr(node, "col_offset", -1))
571
+ .with_end_line(getattr(node, "end_lineno", -1))
572
+ .with_end_column(getattr(node, "end_col_offset", -1))
573
+ .build()
574
+ )
575
+
576
+ return call_sites
577
+
578
+ def _module_variables(
579
+ self, module: ast.Module, script: Script
580
+ ) -> List[PyVariableDeclaration]:
581
+ """
582
+ Extracts all variable declarations at the module level (excluding functions/classes).
583
+ Includes variables in `if __name__ == "__main__"` blocks.
584
+
585
+ Args:
586
+ module (ast.Module): The root module AST.
587
+ script (jedi.Script): For type inference.
588
+
589
+ Returns:
590
+ List[PyVariableDeclaration]
591
+ """
592
+ module_vars = []
593
+
594
+ def is_nested_in_function_or_class(n: ast.AST) -> bool:
595
+ while hasattr(n, "parent"):
596
+ n = n.parent
597
+ if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
598
+ return True
599
+ return False
600
+
601
+ # Add parent pointers (needed for scope check)
602
+ for node in ast.walk(module):
603
+ for child in ast.iter_child_nodes(node):
604
+ child.parent = node # type: ignore
605
+
606
+ for node in ast.walk(module):
607
+ if isinstance(node, ast.Assign):
608
+ if is_nested_in_function_or_class(node):
609
+ continue
610
+ for target in node.targets:
611
+ if isinstance(target, ast.Name):
612
+ module_vars.append(
613
+ PyVariableDeclaration.builder()
614
+ .with_name(target.id)
615
+ .with_type(
616
+ self._infer_type(
617
+ script, target.lineno, target.col_offset
618
+ )
619
+ )
620
+ .with_initializer(
621
+ ast.unparse(node.value) if node.value else None
622
+ )
623
+ .with_value(None)
624
+ .with_scope("module")
625
+ .with_start_line(getattr(target, "lineno", -1))
626
+ .with_end_line(
627
+ getattr(node, "end_lineno", getattr(node, "lineno", -1))
628
+ )
629
+ .with_start_column(getattr(target, "col_offset", -1))
630
+ .with_end_column(getattr(target, "end_col_offset", -1))
631
+ .build()
632
+ )
633
+
634
+ elif isinstance(node, ast.AnnAssign):
635
+ if is_nested_in_function_or_class(node):
636
+ continue
637
+ target = node.target
638
+ if isinstance(target, ast.Name):
639
+ module_vars.append(
640
+ PyVariableDeclaration.builder()
641
+ .with_name(target.id)
642
+ .with_type(
643
+ ast.unparse(node.annotation)
644
+ if node.annotation
645
+ else self._infer_type(script, node.lineno, node.col_offset)
646
+ )
647
+ .with_initializer(
648
+ ast.unparse(node.value) if node.value else None
649
+ )
650
+ .with_value(None)
651
+ .with_scope("module")
652
+ .with_start_line(getattr(target, "lineno", -1))
653
+ .with_end_line(
654
+ getattr(node, "end_lineno", getattr(node, "lineno", -1))
655
+ )
656
+ .with_start_column(getattr(target, "col_offset", -1))
657
+ .with_end_column(getattr(target, "end_col_offset", -1))
658
+ .build()
659
+ )
660
+
661
+ return module_vars
662
+
663
+ def _local_variables(
664
+ self, fn_node: ast.FunctionDef, script: Script
665
+ ) -> List[PyVariableDeclaration]:
666
+ """
667
+ Extracts all local variables and instance attribute assignments from the function.
668
+
669
+ Args:
670
+ fn_node (ast.FunctionDef): The function AST node.
671
+ script (jedi.Script): Jedi script for type inference.
672
+
673
+ Returns:
674
+ List[PyVariableDeclaration]: All variables assigned inside this function.
675
+ """
676
+ local_vars: List[PyVariableDeclaration] = []
677
+
678
+ for node in ast.walk(fn_node):
679
+ if isinstance(node, ast.Assign):
680
+ for target in node.targets:
681
+ # This one handles simple variable assignments
682
+ if isinstance(target, ast.Name):
683
+ local_vars.append(
684
+ PyVariableDeclaration.builder()
685
+ .with_name(target.id)
686
+ .with_type(
687
+ self._infer_type(
688
+ script, target.lineno, target.col_offset
689
+ )
690
+ )
691
+ .with_initializer(
692
+ ast.unparse(node.value) if node.value else None
693
+ )
694
+ .with_value(None)
695
+ .with_scope("function")
696
+ .with_start_line(getattr(target, "lineno", -1))
697
+ .with_end_line(
698
+ getattr(node, "end_lineno", getattr(node, "lineno", -1))
699
+ )
700
+ .with_start_column(getattr(target, "col_offset", -1))
701
+ .with_end_column(getattr(target, "end_col_offset", -1))
702
+ .build()
703
+ )
704
+ # This handles instance attribute assignments like self.attr = value
705
+ elif (
706
+ isinstance(target, ast.Attribute)
707
+ and isinstance(target.value, ast.Name)
708
+ and target.value.id == "self"
709
+ ):
710
+ local_vars.append(
711
+ PyVariableDeclaration.builder()
712
+ .with_name(target.attr)
713
+ .with_type(
714
+ self._infer_type(
715
+ script, target.lineno, target.col_offset
716
+ )
717
+ )
718
+ .with_initializer(
719
+ ast.unparse(node.value) if node.value else None
720
+ )
721
+ .with_value(None)
722
+ .with_scope("class")
723
+ .with_start_line(getattr(target, "lineno", -1))
724
+ .with_end_line(
725
+ getattr(node, "end_lineno", getattr(node, "lineno", -1))
726
+ )
727
+ .with_start_column(getattr(target, "col_offset", -1))
728
+ .with_end_column(getattr(target, "end_col_offset", -1))
729
+ .build()
730
+ )
731
+
732
+ elif isinstance(node, ast.AnnAssign):
733
+ target = node.target
734
+ annotation_str = (
735
+ ast.unparse(node.annotation)
736
+ if node.annotation
737
+ else self._infer_type(script, node.lineno, node.col_offset)
738
+ )
739
+ initializer_str = ast.unparse(node.value) if node.value else None
740
+ # Annotated local variable: x: int = SOME_VALUE
741
+ if isinstance(target, ast.Name):
742
+ local_vars.append(
743
+ PyVariableDeclaration.builder()
744
+ .with_name(target.id)
745
+ .with_type(annotation_str)
746
+ .with_initializer(initializer_str)
747
+ .with_value(None)
748
+ .with_scope("function")
749
+ .with_start_line(getattr(target, "lineno", -1))
750
+ .with_end_line(
751
+ getattr(node, "end_lineno", getattr(node, "lineno", -1))
752
+ )
753
+ .with_start_column(getattr(target, "col_offset", -1))
754
+ .with_end_column(getattr(target, "end_col_offset", -1))
755
+ .build()
756
+ )
757
+ # Annotated instance attribute: self.attr: int = SOME_VALUE
758
+ elif (
759
+ isinstance(target, ast.Attribute)
760
+ and isinstance(target.value, ast.Name)
761
+ and target.value.id == "self"
762
+ ):
763
+ local_vars.append(
764
+ PyVariableDeclaration.builder()
765
+ .with_name(target.attr)
766
+ .with_type(annotation_str)
767
+ .with_initializer(initializer_str)
768
+ .with_value(None)
769
+ .with_scope("class")
770
+ .with_start_line(getattr(target, "lineno", -1))
771
+ .with_end_line(
772
+ getattr(node, "end_lineno", getattr(node, "lineno", -1))
773
+ )
774
+ .with_start_column(getattr(target, "col_offset", -1))
775
+ .with_end_column(getattr(target, "end_col_offset", -1))
776
+ .build()
777
+ )
778
+
779
+ return local_vars
780
+
781
+ def _cyclomatic_complexity(self, fn_node: ast.FunctionDef) -> int:
782
+ """
783
+ Computes the cyclomatic complexity of a function based on its control flow constructs.
784
+
785
+ Args:
786
+ fn_node (ast.FunctionDef): AST node representing the function.
787
+
788
+ Returns:
789
+ int: Cyclomatic complexity score (>= 1).
790
+ """
791
+ complexity = 1 # Base path
792
+
793
+ for node in ast.walk(fn_node):
794
+ if isinstance(node, (ast.If, ast.For, ast.While, ast.With, ast.Try)):
795
+ complexity += 1
796
+
797
+ elif isinstance(node, ast.BoolOp):
798
+ # Count 'and' / 'or' as individual decision points
799
+ complexity += len(node.values) - 1
800
+
801
+ elif isinstance(node, ast.IfExp):
802
+ # Ternary conditional: x if cond else y
803
+ complexity += 1
804
+
805
+ elif isinstance(node, ast.ExceptHandler):
806
+ # Try and catch statement
807
+ complexity += 1
808
+
809
+ # TODO: I am also counting 'assert' or 'return' or 'yield' as complexity bumps
810
+ elif isinstance(node, (ast.Assert, ast.Return, ast.Yield, ast.YieldFrom)):
811
+ complexity += 1
812
+
813
+ return complexity
814
+
815
+ def _symbol_from_name_node(
816
+ self,
817
+ name_node: ast.Name,
818
+ script: Optional[Script] = None,
819
+ enclosing_scope: Optional[str] = None, # e.g. "function", "class", "module"
820
+ ) -> PySymbol:
821
+ """
822
+ Builds a PySymbol object from a given ast.Name node.
823
+
824
+ Args:
825
+ name_node (ast.Name): The AST node representing the variable.
826
+ script (Optional[jedi.Script]): Jedi script for type/scope inference.
827
+ enclosing_scope (Optional[str]): The logical scope the name is inside of.
828
+
829
+ Returns:
830
+ PySymbol: A fully built symbol object.
831
+ """
832
+ name = name_node.id
833
+ lineno = getattr(name_node, "lineno", -1)
834
+ col_offset = getattr(name_node, "col_offset", -1)
835
+ is_builtin = name in dir(__builtins__)
836
+ qname = None
837
+ inferred_type = None
838
+ kind = "variable"
839
+ scope = enclosing_scope or "local"
840
+
841
+ if script:
842
+ try:
843
+ definitions = script.infer(line=lineno, column=col_offset)
844
+ if definitions:
845
+ d = definitions[0]
846
+ inferred_type = d.name
847
+ qname = d.full_name
848
+ if d.type == "function":
849
+ kind = "function"
850
+ elif d.type == "module":
851
+ kind = "module"
852
+ scope = "global"
853
+ elif d.type == "class":
854
+ kind = "class"
855
+ elif d.type == "param":
856
+ kind = "parameter"
857
+ except Exception:
858
+ pass
859
+
860
+ return (
861
+ PySymbol.builder()
862
+ .with_name(name)
863
+ .with_scope(scope)
864
+ .with_kind(kind)
865
+ .with_type(inferred_type)
866
+ .with_qualified_name(qname)
867
+ .with_is_builtin(is_builtin)
868
+ .with_lineno(lineno)
869
+ .with_col_offset(col_offset)
870
+ .build()
871
+ )
872
+
873
+ def build(self) -> Dict[str, PyModule]:
874
+ """Builds the symbol table for the project.
875
+
876
+ This method scans the project directory, identifies Python files,
877
+ and constructs a symbol table containing information about classes,
878
+ functions, and variables defined in those files.
879
+ """
880
+ symbol_table: Dict[str, PyModule] = {}
881
+ # Get all Python files first to show accurate progress
882
+ py_files = [
883
+ py_file
884
+ for py_file in self.project_dir.rglob("*.py")
885
+ if "site-packages"
886
+ not in py_file.resolve().__str__() # exclude site-packages
887
+ and ".venv"
888
+ not in py_file.resolve().__str__() # exclude virtual environments
889
+ and ".codeanalyzer"
890
+ not in py_file.resolve().__str__() # exclude internal cache directories
891
+ ]
892
+
893
+ with ProgressBar(len(py_files), "Building symbol table") as progress:
894
+ for py_file in py_files:
895
+ try:
896
+ py_module = self._module(py_file)
897
+ symbol_table[str(py_file)] = py_module
898
+ except Exception as e:
899
+ logger.error(f"Failed to process {py_file}: {e}")
900
+ progress.advance()
901
+ progress.finish("✅ Symbol table generation complete.")
902
+
903
+ return symbol_table