codeanalyzer-python 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeanalyzer/__init__.py +0 -0
- codeanalyzer/__main__.py +84 -0
- codeanalyzer/core.py +321 -0
- codeanalyzer/jedi/__init__.py +0 -0
- codeanalyzer/jedi/jedi.py +0 -0
- codeanalyzer/py.typed +0 -0
- codeanalyzer/schema/__init__.py +23 -0
- codeanalyzer/schema/py_schema.py +360 -0
- codeanalyzer/semantic_analysis/__init__.py +0 -0
- codeanalyzer/semantic_analysis/codeql/__init__.py +26 -0
- codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +133 -0
- codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py +12 -0
- codeanalyzer/semantic_analysis/codeql/codeql_loader.py +74 -0
- codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +164 -0
- codeanalyzer/semantic_analysis/wala/__init__.py +15 -0
- codeanalyzer/syntactic_analysis/__init__.py +0 -0
- codeanalyzer/syntactic_analysis/symbol_table_builder.py +903 -0
- codeanalyzer/utils/__init__.py +5 -0
- codeanalyzer/utils/logging.py +18 -0
- codeanalyzer/utils/progress_bar.py +69 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.3.dist-info}/METADATA +3 -3
- codeanalyzer_python-0.1.3.dist-info/RECORD +26 -0
- codeanalyzer_python-0.1.1.dist-info/RECORD +0 -6
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.3.dist-info}/WHEEL +0 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.3.dist-info}/entry_points.txt +0 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.3.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,903 @@
|
|
|
1
|
+
from io import StringIO
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import tokenize
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
import astor
|
|
6
|
+
import jedi
|
|
7
|
+
from codeanalyzer.utils import logger
|
|
8
|
+
from jedi.api.project import Project
|
|
9
|
+
from jedi.api import Script
|
|
10
|
+
from rich.progress import track
|
|
11
|
+
from codeanalyzer.schema.py_schema import (
|
|
12
|
+
PyCallable,
|
|
13
|
+
PyCallableParameter,
|
|
14
|
+
PyCallsite,
|
|
15
|
+
PyClass,
|
|
16
|
+
PyClassAttribute,
|
|
17
|
+
PyComment,
|
|
18
|
+
PyImport,
|
|
19
|
+
PyModule,
|
|
20
|
+
PySymbol,
|
|
21
|
+
PyVariableDeclaration,
|
|
22
|
+
)
|
|
23
|
+
import ast
|
|
24
|
+
from ast import AST, ClassDef
|
|
25
|
+
|
|
26
|
+
from codeanalyzer.utils.progress_bar import ProgressBar
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SymbolTableBuilder:
|
|
30
|
+
"""A class for building a symbol table for a Python project."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, project_dir: Path | str, virtualenv: Path | str | None) -> None:
|
|
33
|
+
self.project_dir = Path(project_dir)
|
|
34
|
+
if virtualenv is None:
|
|
35
|
+
# If no virtual environment is provided, create a jedi project without an environment.
|
|
36
|
+
self.jedi_project: Project = jedi.Project(path=self.project_dir)
|
|
37
|
+
else:
|
|
38
|
+
# If there is a virtual environment, add its site-packages to sys_path so jedi can find the installed packages.
|
|
39
|
+
self.jedi_project: Project = jedi.Project(
|
|
40
|
+
path=self.project_dir,
|
|
41
|
+
environment_path=Path(virtualenv) / "bin" / "python",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def _infer_type(script: Script, line: int, column: int) -> str:
|
|
46
|
+
"""Tries to infer the type at a given position using Jedi."""
|
|
47
|
+
try:
|
|
48
|
+
inference = script.infer(line=line, column=column)
|
|
49
|
+
if inference:
|
|
50
|
+
return inference[0].name # or .full_name
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def _infer_qualified_name(script: Script, line: int, column: int) -> Optional[str]:
|
|
57
|
+
"""
|
|
58
|
+
Tries to infer the fully qualified name (e.g., os.path.join) at the given position using Jedi.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
script (jedi.Script): The Jedi script object.
|
|
62
|
+
line (int): Line number of the expression.
|
|
63
|
+
column (int): Column offset of the expression.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Optional[str]: The fully qualified name if available, else None.
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
definitions = script.infer(line=line, column=column)
|
|
70
|
+
if definitions:
|
|
71
|
+
return definitions[0].full_name
|
|
72
|
+
except Exception:
|
|
73
|
+
pass
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def _module(self, py_file: Path) -> PyModule:
|
|
77
|
+
"""Builds a PyModule from a Python file.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
py_file (Path): Path to the python file.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
PyModule object for the input file.
|
|
84
|
+
"""
|
|
85
|
+
# Get the raw source code from the file
|
|
86
|
+
source = py_file.read_text(encoding="utf-8")
|
|
87
|
+
# Create a Jedi script for the file
|
|
88
|
+
script: Script = Script(path=str(py_file), project=self.jedi_project)
|
|
89
|
+
module = ast.parse(source, filename=str(py_file))
|
|
90
|
+
|
|
91
|
+
classes = {}
|
|
92
|
+
functions = {}
|
|
93
|
+
for node in ast.iter_child_nodes(module):
|
|
94
|
+
if isinstance(node, ClassDef):
|
|
95
|
+
classes.update(self._add_class(node, script))
|
|
96
|
+
elif isinstance(node, ast.FunctionDef):
|
|
97
|
+
functions.update(self._callables(node, script))
|
|
98
|
+
|
|
99
|
+
return (
|
|
100
|
+
PyModule.builder()
|
|
101
|
+
.with_file_path(str(py_file))
|
|
102
|
+
.with_module_name(py_file.stem)
|
|
103
|
+
.with_comments(self._pycomments(module, source))
|
|
104
|
+
.with_imports(self._imports(module))
|
|
105
|
+
.with_variables(self._module_variables(module, script))
|
|
106
|
+
.with_classes(classes)
|
|
107
|
+
.with_functions(functions)
|
|
108
|
+
.build()
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def _imports(self, module: ast.Module) -> List[PyImport]:
|
|
112
|
+
"""
|
|
113
|
+
Extracts all import statements from the module.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
module (ast.Module): The AST node representing the module.
|
|
117
|
+
script (Script): The Jedi script object for the module.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List[PyImport]: A list of PyImport objects representing the import statements.
|
|
121
|
+
"""
|
|
122
|
+
imports: List[PyImport] = []
|
|
123
|
+
|
|
124
|
+
for node in ast.walk(module):
|
|
125
|
+
if isinstance(node, ast.Import):
|
|
126
|
+
for alias in node.names:
|
|
127
|
+
imports.append(
|
|
128
|
+
PyImport.builder()
|
|
129
|
+
.with_module(alias.name) # for "import os", alias.name = "os"
|
|
130
|
+
.with_name(alias.asname or alias.name) # name in local scope
|
|
131
|
+
.with_alias(alias.name if alias.asname else None)
|
|
132
|
+
.with_start_line(getattr(node, "lineno", -1))
|
|
133
|
+
.with_end_line(getattr(node, "end_lineno", node.lineno))
|
|
134
|
+
.with_start_column(getattr(node, "col_offset", -1))
|
|
135
|
+
.with_end_column(getattr(node, "end_col_offset", -1))
|
|
136
|
+
.build()
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
elif isinstance(node, ast.ImportFrom):
|
|
140
|
+
module_name = node.module or "" # e.g., from . import x
|
|
141
|
+
for alias in node.names:
|
|
142
|
+
qualified_module = module_name
|
|
143
|
+
if node.level:
|
|
144
|
+
# Handle relative import
|
|
145
|
+
qualified_module = "." * node.level + (module_name or "")
|
|
146
|
+
imports.append(
|
|
147
|
+
PyImport.builder()
|
|
148
|
+
.with_module(qualified_module)
|
|
149
|
+
.with_name(alias.asname or alias.name)
|
|
150
|
+
.with_alias(alias.name if alias.asname else None)
|
|
151
|
+
.with_start_line(getattr(node, "lineno", -1))
|
|
152
|
+
.with_end_line(getattr(node, "end_lineno", node.lineno))
|
|
153
|
+
.with_start_column(getattr(node, "col_offset", -1))
|
|
154
|
+
.with_end_column(getattr(node, "end_col_offset", -1))
|
|
155
|
+
.build()
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return imports
|
|
159
|
+
|
|
160
|
+
def _add_class(
|
|
161
|
+
self, class_node: ast.ClassDef, script: Script
|
|
162
|
+
) -> Dict[str, PyClass]:
|
|
163
|
+
"""Builds a PyClass from a class definition node.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
class_node (ast.ClassDef): The AST node representing the class.
|
|
167
|
+
script (Script): The Jedi script object for the module.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
Dict[str, PyClass]: Mapping of class signature to PyClass object.
|
|
171
|
+
"""
|
|
172
|
+
# Try resolving full signature with Jedi
|
|
173
|
+
try:
|
|
174
|
+
definitions = script.goto(
|
|
175
|
+
line=class_node.lineno, column=class_node.col_offset
|
|
176
|
+
)
|
|
177
|
+
signature = next(
|
|
178
|
+
(d.full_name for d in definitions if d.type == "class"),
|
|
179
|
+
f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
|
|
180
|
+
)
|
|
181
|
+
except Exception:
|
|
182
|
+
signature = (
|
|
183
|
+
f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
code: str = astor.to_source(class_node).strip()
|
|
187
|
+
|
|
188
|
+
py_class = (
|
|
189
|
+
PyClass.builder()
|
|
190
|
+
.with_name(class_node.name)
|
|
191
|
+
.with_signature(signature)
|
|
192
|
+
.with_start_line(class_node.lineno)
|
|
193
|
+
.with_end_line(
|
|
194
|
+
getattr(
|
|
195
|
+
class_node, "end_lineno", class_node.lineno + len(class_node.body)
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
.with_comments(self._pycomments(class_node, code))
|
|
199
|
+
.with_code(code)
|
|
200
|
+
.with_base_classes(
|
|
201
|
+
[
|
|
202
|
+
ast.unparse(base)
|
|
203
|
+
for base in class_node.bases
|
|
204
|
+
if isinstance(base, ast.expr)
|
|
205
|
+
]
|
|
206
|
+
)
|
|
207
|
+
.with_methods(self._callables(class_node, script))
|
|
208
|
+
.with_attributes(self._class_attributes(class_node, script))
|
|
209
|
+
.with_inner_classes(
|
|
210
|
+
{
|
|
211
|
+
k: v
|
|
212
|
+
for child in class_node.body
|
|
213
|
+
if isinstance(child, ast.ClassDef)
|
|
214
|
+
for k, v in self._add_class(child, script).items()
|
|
215
|
+
}
|
|
216
|
+
)
|
|
217
|
+
.build()
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return {signature: py_class}
|
|
221
|
+
|
|
222
|
+
def _callables(self, node: AST, script: Script) -> Dict[str, PyCallable]:
|
|
223
|
+
"""
|
|
224
|
+
Builds PyCallable objects from any AST node that may contain functions.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
node (AST): The AST node to process (e.g., Module, ClassDef, FunctionDef).
|
|
228
|
+
script (Script): The Jedi script object for the module.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Dict[str, PyCallable]: A dictionary mapping function/method names to PyCallable objects.
|
|
232
|
+
"""
|
|
233
|
+
callables: Dict[str, PyCallable] = {}
|
|
234
|
+
module_path: str = script.path or "<unknown_module>"
|
|
235
|
+
module_name: str = Path(module_path).stem if module_path else "<unknown>"
|
|
236
|
+
|
|
237
|
+
def visit(n: AST, class_prefix: str = ""):
|
|
238
|
+
for child in ast.iter_child_nodes(n):
|
|
239
|
+
if isinstance(child, ast.FunctionDef):
|
|
240
|
+
method_name = child.name
|
|
241
|
+
start_line = child.lineno
|
|
242
|
+
end_line = getattr(
|
|
243
|
+
child, "end_lineno", start_line + len(child.body)
|
|
244
|
+
)
|
|
245
|
+
code_start_line = child.body[0].lineno if child.body else start_line
|
|
246
|
+
code = astor.to_source(child).strip()
|
|
247
|
+
decorators = [ast.unparse(d) for d in child.decorator_list]
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
definitions = script.goto(
|
|
251
|
+
line=start_line, column=child.col_offset
|
|
252
|
+
)
|
|
253
|
+
except Exception:
|
|
254
|
+
definitions = []
|
|
255
|
+
|
|
256
|
+
signature = next(
|
|
257
|
+
(d.full_name for d in definitions if d.type == "function"),
|
|
258
|
+
f"{module_name}.{class_prefix}{method_name}",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
callables[method_name] = (
|
|
262
|
+
PyCallable.builder()
|
|
263
|
+
.with_name(method_name)
|
|
264
|
+
.with_path(script.path.__str__())
|
|
265
|
+
.with_signature(signature)
|
|
266
|
+
.with_decorators(decorators)
|
|
267
|
+
.with_code(code)
|
|
268
|
+
.with_start_line(start_line)
|
|
269
|
+
.with_end_line(end_line)
|
|
270
|
+
.with_code_start_line(code_start_line)
|
|
271
|
+
.with_accessed_symbols(self._accessed_symbols(child, script))
|
|
272
|
+
.with_call_sites(self._call_sites(child, script))
|
|
273
|
+
.with_local_variables(self._local_variables(child, script))
|
|
274
|
+
.with_cyclomatic_complexity(self._cyclomatic_complexity(child))
|
|
275
|
+
.with_parameters(self._callable_parameters(child, script))
|
|
276
|
+
.with_return_type(
|
|
277
|
+
ast.unparse(child.returns)
|
|
278
|
+
if child.returns
|
|
279
|
+
else self._infer_type(
|
|
280
|
+
script, child.lineno, child.col_offset
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
.with_comments(self._pycomments(child, code))
|
|
284
|
+
.build()
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
visit(child, class_prefix + method_name + ".")
|
|
288
|
+
|
|
289
|
+
elif isinstance(child, ast.ClassDef):
|
|
290
|
+
visit(child, class_prefix + child.name + ".")
|
|
291
|
+
|
|
292
|
+
elif hasattr(child, "body"):
|
|
293
|
+
visit(child, class_prefix)
|
|
294
|
+
|
|
295
|
+
visit(node)
|
|
296
|
+
return callables
|
|
297
|
+
|
|
298
|
+
def _pycomments(self, node: ast.AST, source: str) -> List[PyComment]:
|
|
299
|
+
"""
|
|
300
|
+
Extracts all PyComment instances (docstring and # comments) from within a specific AST node's body.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
node (AST): The AST node (e.g., Module, ClassDef, FunctionDef).
|
|
304
|
+
source (str): Source code of the file.
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
List[PyComment]: List of PyComment instances.
|
|
308
|
+
"""
|
|
309
|
+
comments: List[PyComment] = []
|
|
310
|
+
|
|
311
|
+
# 1. Extract docstring (if any)
|
|
312
|
+
docstring_content = ast.get_docstring(node, clean=False)
|
|
313
|
+
if docstring_content:
|
|
314
|
+
try:
|
|
315
|
+
string_node = node.body[0].value # type: ignore
|
|
316
|
+
start_line = getattr(string_node, "lineno", getattr(node, "lineno", -1))
|
|
317
|
+
end_line = getattr(string_node, "end_lineno", start_line)
|
|
318
|
+
start_column = getattr(string_node, "col_offset", -1)
|
|
319
|
+
end_column = getattr(
|
|
320
|
+
string_node, "end_col_offset", start_column + len(docstring_content)
|
|
321
|
+
)
|
|
322
|
+
except Exception:
|
|
323
|
+
start_line = getattr(node, "lineno", -1)
|
|
324
|
+
end_line = getattr(node, "end_lineno", start_line)
|
|
325
|
+
start_column = getattr(node, "col_offset", -1)
|
|
326
|
+
end_column = start_column + len(docstring_content)
|
|
327
|
+
|
|
328
|
+
comments.append(
|
|
329
|
+
PyComment.builder()
|
|
330
|
+
.with_content(docstring_content)
|
|
331
|
+
.with_start_line(start_line)
|
|
332
|
+
.with_end_line(end_line)
|
|
333
|
+
.with_start_column(start_column)
|
|
334
|
+
.with_end_column(end_column)
|
|
335
|
+
.with_is_docstring(True)
|
|
336
|
+
.build()
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
# 2. Extract # comments scoped within the node's line range
|
|
340
|
+
node_start = getattr(node, "lineno", -1)
|
|
341
|
+
node_end = getattr(node, "end_lineno", node_start)
|
|
342
|
+
|
|
343
|
+
tokens = tokenize.generate_tokens(StringIO(source).readline)
|
|
344
|
+
for tok in tokens:
|
|
345
|
+
if tok.type == tokenize.COMMENT:
|
|
346
|
+
tok_line, tok_col = tok.start
|
|
347
|
+
if node_start <= tok_line <= node_end:
|
|
348
|
+
comment_text = tok.string.lstrip("#").strip()
|
|
349
|
+
comments.append(
|
|
350
|
+
PyComment.builder()
|
|
351
|
+
.with_content(comment_text)
|
|
352
|
+
.with_start_line(tok_line)
|
|
353
|
+
.with_end_line(tok_line)
|
|
354
|
+
.with_start_column(tok_col)
|
|
355
|
+
.with_end_column(tok_col + len(tok.string))
|
|
356
|
+
.with_is_docstring(False)
|
|
357
|
+
.build()
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return comments
|
|
361
|
+
|
|
362
|
+
def _class_attributes(
|
|
363
|
+
self, ast_node: ast.AST, script: Script
|
|
364
|
+
) -> Dict[str, PyClassAttribute]:
|
|
365
|
+
"""
|
|
366
|
+
Extracts class attributes from the class definition.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
ast_node (AST): The AST node representing the class.
|
|
370
|
+
script (Script): The Jedi script object for the module.
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
Dict[str, PyClassAttribute]: A dictionary mapping attribute names to their metadata.
|
|
374
|
+
"""
|
|
375
|
+
attributes: Dict[str, PyClassAttribute] = {}
|
|
376
|
+
|
|
377
|
+
for stmt in ast_node.body:
|
|
378
|
+
if isinstance(stmt, ast.Assign):
|
|
379
|
+
for target in stmt.targets:
|
|
380
|
+
if isinstance(target, ast.Name):
|
|
381
|
+
attributes[target.id] = (
|
|
382
|
+
PyClassAttribute.builder()
|
|
383
|
+
.with_name(target.id)
|
|
384
|
+
.with_type(
|
|
385
|
+
self._infer_type(
|
|
386
|
+
script, target.lineno, target.col_offset
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
390
|
+
.with_end_line(getattr(stmt, "end_lineno", stmt.lineno))
|
|
391
|
+
.build()
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
elif isinstance(stmt, ast.AnnAssign):
|
|
395
|
+
target = stmt.target
|
|
396
|
+
if isinstance(target, ast.Name):
|
|
397
|
+
attributes[target.id] = (
|
|
398
|
+
PyClassAttribute.builder()
|
|
399
|
+
.with_name(target.id)
|
|
400
|
+
.with_type(
|
|
401
|
+
ast.unparse(stmt.annotation)
|
|
402
|
+
if stmt.annotation
|
|
403
|
+
else self._infer_type(
|
|
404
|
+
script, target.lineno, target.col_offset
|
|
405
|
+
)
|
|
406
|
+
)
|
|
407
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
408
|
+
.with_end_line(getattr(stmt, "end_lineno", stmt.lineno))
|
|
409
|
+
.build()
|
|
410
|
+
)
|
|
411
|
+
# We may also encounter `__slots__` in class definitions.
|
|
412
|
+
# This is a special case where attributes are defined in a list or tuple.
|
|
413
|
+
# class Foo:
|
|
414
|
+
# __slots__ = ('x', 'y')
|
|
415
|
+
#
|
|
416
|
+
# Doing so restricts dynamic attribute assignment.
|
|
417
|
+
# This means that you can do
|
|
418
|
+
# Foo.x = 1
|
|
419
|
+
# Foo.y = 2
|
|
420
|
+
# But, not
|
|
421
|
+
# Foo.z = 3
|
|
422
|
+
elif isinstance(stmt, ast.Assign) and any(
|
|
423
|
+
isinstance(t, ast.Name) and t.id == "__slots__" for t in stmt.targets
|
|
424
|
+
):
|
|
425
|
+
if isinstance(stmt.value, (ast.List, ast.Tuple)):
|
|
426
|
+
for elt in stmt.value.elts:
|
|
427
|
+
if isinstance(elt, (ast.Str, ast.Constant)):
|
|
428
|
+
value = elt.s if isinstance(elt, ast.Str) else elt.value
|
|
429
|
+
attributes[value] = (
|
|
430
|
+
PyClassAttribute.builder()
|
|
431
|
+
.with_name(value)
|
|
432
|
+
.with_type("slot")
|
|
433
|
+
.with_start_line(getattr(stmt, "lineno", -1))
|
|
434
|
+
.with_end_line(getattr(stmt, "end_lineno", stmt.lineno))
|
|
435
|
+
.build()
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
return attributes
|
|
439
|
+
|
|
440
|
+
def _callable_parameters(
|
|
441
|
+
self, fn_node: ast.FunctionDef, script: Script
|
|
442
|
+
) -> List[PyCallableParameter]:
|
|
443
|
+
"""
|
|
444
|
+
Extracts callable parameters from the function definition.
|
|
445
|
+
"""
|
|
446
|
+
|
|
447
|
+
# Pull full name from Jedi (e.g., mypkg.module.MyClass.my_func)
|
|
448
|
+
try:
|
|
449
|
+
definitions = script.goto(line=fn_node.lineno, column=fn_node.col_offset)
|
|
450
|
+
full_name = next(
|
|
451
|
+
(d.full_name for d in definitions if d.type == "function"), None
|
|
452
|
+
)
|
|
453
|
+
except Exception:
|
|
454
|
+
full_name = None
|
|
455
|
+
|
|
456
|
+
class_name = (
|
|
457
|
+
full_name.split(".")[-2] if full_name and "." in full_name else None
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
params: List[PyCallableParameter] = []
|
|
461
|
+
args = fn_node.args
|
|
462
|
+
|
|
463
|
+
def resolve_type(arg_node: ast.arg) -> Optional[str]:
|
|
464
|
+
if arg_node.annotation:
|
|
465
|
+
return ast.unparse(arg_node.annotation)
|
|
466
|
+
if arg_node.arg in {"self", "cls"} and class_name:
|
|
467
|
+
return class_name
|
|
468
|
+
return self._infer_type(script, arg_node.lineno, arg_node.col_offset)
|
|
469
|
+
|
|
470
|
+
def build_param(
|
|
471
|
+
arg_node: ast.arg, default: Optional[ast.expr]
|
|
472
|
+
) -> PyCallableParameter:
|
|
473
|
+
return (
|
|
474
|
+
PyCallableParameter.builder()
|
|
475
|
+
.with_name(arg_node.arg)
|
|
476
|
+
.with_type(resolve_type(arg_node))
|
|
477
|
+
.with_default_value(ast.unparse(default) if default else None)
|
|
478
|
+
.with_start_line(getattr(arg_node, "lineno", -1))
|
|
479
|
+
.with_end_line(
|
|
480
|
+
getattr(arg_node, "end_lineno", getattr(arg_node, "lineno", -1))
|
|
481
|
+
)
|
|
482
|
+
.with_start_column(getattr(arg_node, "col_offset", -1))
|
|
483
|
+
.with_end_column(getattr(arg_node, "end_col_offset", -1))
|
|
484
|
+
.build()
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
# Fill out all parameter types
|
|
488
|
+
for arg in getattr(args, "posonlyargs", []):
|
|
489
|
+
params.append(build_param(arg, None))
|
|
490
|
+
|
|
491
|
+
default_start = len(args.args) - len(args.defaults)
|
|
492
|
+
for i, arg in enumerate(args.args):
|
|
493
|
+
default = args.defaults[i - default_start] if i >= default_start else None
|
|
494
|
+
params.append(build_param(arg, default))
|
|
495
|
+
|
|
496
|
+
if args.vararg:
|
|
497
|
+
params.append(build_param(args.vararg, None))
|
|
498
|
+
|
|
499
|
+
for arg, default in zip(args.kwonlyargs, args.kw_defaults):
|
|
500
|
+
params.append(build_param(arg, default))
|
|
501
|
+
|
|
502
|
+
if args.kwarg:
|
|
503
|
+
params.append(build_param(args.kwarg, None))
|
|
504
|
+
|
|
505
|
+
return params
|
|
506
|
+
|
|
507
|
+
def _accessed_symbols(self, fn_node: ast.FunctionDef, script: Script) -> List[str]:
|
|
508
|
+
"""Analyzes the function body to extract all accessed symbols."""
|
|
509
|
+
symbols = []
|
|
510
|
+
for node in ast.walk(fn_node):
|
|
511
|
+
if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
|
|
512
|
+
symbol = self._symbol_from_name_node(
|
|
513
|
+
node, script, enclosing_scope="local"
|
|
514
|
+
)
|
|
515
|
+
symbols.append(symbol)
|
|
516
|
+
return symbols
|
|
517
|
+
|
|
518
|
+
def _call_sites(self, fn_node: ast.FunctionDef, script: Script) -> List[PyCallsite]:
|
|
519
|
+
"""
|
|
520
|
+
Finds all call sites made from within the function using Jedi for type inference.
|
|
521
|
+
|
|
522
|
+
Args:
|
|
523
|
+
fn_node (ast.FunctionDef): The AST node representing the function.
|
|
524
|
+
script (jedi.Script): The Jedi script object.
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
List[PyCallsite]: A list of PyCallsite objects representing each call.
|
|
528
|
+
"""
|
|
529
|
+
call_sites: List[PyCallsite] = []
|
|
530
|
+
|
|
531
|
+
for node in ast.walk(fn_node):
|
|
532
|
+
if not isinstance(node, ast.Call):
|
|
533
|
+
continue
|
|
534
|
+
|
|
535
|
+
func_expr = node.func
|
|
536
|
+
|
|
537
|
+
method_name = "<unknown>"
|
|
538
|
+
callee_signature = self._infer_qualified_name(
|
|
539
|
+
script, node.lineno, node.col_offset
|
|
540
|
+
)
|
|
541
|
+
return_type = self._infer_type(script, node.lineno, node.col_offset)
|
|
542
|
+
|
|
543
|
+
receiver_expr = None
|
|
544
|
+
receiver_type = None
|
|
545
|
+
if isinstance(func_expr, ast.Attribute):
|
|
546
|
+
receiver_expr = ast.unparse(func_expr.value)
|
|
547
|
+
receiver_type = self._infer_type(
|
|
548
|
+
script, func_expr.value.lineno, func_expr.value.col_offset
|
|
549
|
+
)
|
|
550
|
+
method_name = func_expr.attr
|
|
551
|
+
elif isinstance(func_expr, ast.Name):
|
|
552
|
+
method_name = func_expr.id
|
|
553
|
+
|
|
554
|
+
argument_types = [
|
|
555
|
+
self._infer_type(script, arg.lineno, arg.col_offset)
|
|
556
|
+
or type(arg).__name__
|
|
557
|
+
for arg in node.args
|
|
558
|
+
]
|
|
559
|
+
|
|
560
|
+
call_sites.append(
|
|
561
|
+
PyCallsite.builder()
|
|
562
|
+
.with_method_name(method_name)
|
|
563
|
+
.with_receiver_expr(receiver_expr)
|
|
564
|
+
.with_receiver_type(receiver_type)
|
|
565
|
+
.with_argument_types(argument_types)
|
|
566
|
+
.with_return_type(return_type)
|
|
567
|
+
.with_callee_signature(callee_signature)
|
|
568
|
+
.with_is_constructor_call(method_name == "__init__")
|
|
569
|
+
.with_start_line(getattr(node, "lineno", -1))
|
|
570
|
+
.with_start_column(getattr(node, "col_offset", -1))
|
|
571
|
+
.with_end_line(getattr(node, "end_lineno", -1))
|
|
572
|
+
.with_end_column(getattr(node, "end_col_offset", -1))
|
|
573
|
+
.build()
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
return call_sites
|
|
577
|
+
|
|
578
|
+
def _module_variables(
|
|
579
|
+
self, module: ast.Module, script: Script
|
|
580
|
+
) -> List[PyVariableDeclaration]:
|
|
581
|
+
"""
|
|
582
|
+
Extracts all variable declarations at the module level (excluding functions/classes).
|
|
583
|
+
Includes variables in `if __name__ == "__main__"` blocks.
|
|
584
|
+
|
|
585
|
+
Args:
|
|
586
|
+
module (ast.Module): The root module AST.
|
|
587
|
+
script (jedi.Script): For type inference.
|
|
588
|
+
|
|
589
|
+
Returns:
|
|
590
|
+
List[PyVariableDeclaration]
|
|
591
|
+
"""
|
|
592
|
+
module_vars = []
|
|
593
|
+
|
|
594
|
+
def is_nested_in_function_or_class(n: ast.AST) -> bool:
|
|
595
|
+
while hasattr(n, "parent"):
|
|
596
|
+
n = n.parent
|
|
597
|
+
if isinstance(n, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
598
|
+
return True
|
|
599
|
+
return False
|
|
600
|
+
|
|
601
|
+
# Add parent pointers (needed for scope check)
|
|
602
|
+
for node in ast.walk(module):
|
|
603
|
+
for child in ast.iter_child_nodes(node):
|
|
604
|
+
child.parent = node # type: ignore
|
|
605
|
+
|
|
606
|
+
for node in ast.walk(module):
|
|
607
|
+
if isinstance(node, ast.Assign):
|
|
608
|
+
if is_nested_in_function_or_class(node):
|
|
609
|
+
continue
|
|
610
|
+
for target in node.targets:
|
|
611
|
+
if isinstance(target, ast.Name):
|
|
612
|
+
module_vars.append(
|
|
613
|
+
PyVariableDeclaration.builder()
|
|
614
|
+
.with_name(target.id)
|
|
615
|
+
.with_type(
|
|
616
|
+
self._infer_type(
|
|
617
|
+
script, target.lineno, target.col_offset
|
|
618
|
+
)
|
|
619
|
+
)
|
|
620
|
+
.with_initializer(
|
|
621
|
+
ast.unparse(node.value) if node.value else None
|
|
622
|
+
)
|
|
623
|
+
.with_value(None)
|
|
624
|
+
.with_scope("module")
|
|
625
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
626
|
+
.with_end_line(
|
|
627
|
+
getattr(node, "end_lineno", getattr(node, "lineno", -1))
|
|
628
|
+
)
|
|
629
|
+
.with_start_column(getattr(target, "col_offset", -1))
|
|
630
|
+
.with_end_column(getattr(target, "end_col_offset", -1))
|
|
631
|
+
.build()
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
elif isinstance(node, ast.AnnAssign):
|
|
635
|
+
if is_nested_in_function_or_class(node):
|
|
636
|
+
continue
|
|
637
|
+
target = node.target
|
|
638
|
+
if isinstance(target, ast.Name):
|
|
639
|
+
module_vars.append(
|
|
640
|
+
PyVariableDeclaration.builder()
|
|
641
|
+
.with_name(target.id)
|
|
642
|
+
.with_type(
|
|
643
|
+
ast.unparse(node.annotation)
|
|
644
|
+
if node.annotation
|
|
645
|
+
else self._infer_type(script, node.lineno, node.col_offset)
|
|
646
|
+
)
|
|
647
|
+
.with_initializer(
|
|
648
|
+
ast.unparse(node.value) if node.value else None
|
|
649
|
+
)
|
|
650
|
+
.with_value(None)
|
|
651
|
+
.with_scope("module")
|
|
652
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
653
|
+
.with_end_line(
|
|
654
|
+
getattr(node, "end_lineno", getattr(node, "lineno", -1))
|
|
655
|
+
)
|
|
656
|
+
.with_start_column(getattr(target, "col_offset", -1))
|
|
657
|
+
.with_end_column(getattr(target, "end_col_offset", -1))
|
|
658
|
+
.build()
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
return module_vars
|
|
662
|
+
|
|
663
|
+
def _local_variables(
|
|
664
|
+
self, fn_node: ast.FunctionDef, script: Script
|
|
665
|
+
) -> List[PyVariableDeclaration]:
|
|
666
|
+
"""
|
|
667
|
+
Extracts all local variables and instance attribute assignments from the function.
|
|
668
|
+
|
|
669
|
+
Args:
|
|
670
|
+
fn_node (ast.FunctionDef): The function AST node.
|
|
671
|
+
script (jedi.Script): Jedi script for type inference.
|
|
672
|
+
|
|
673
|
+
Returns:
|
|
674
|
+
List[PyVariableDeclaration]: All variables assigned inside this function.
|
|
675
|
+
"""
|
|
676
|
+
local_vars: List[PyVariableDeclaration] = []
|
|
677
|
+
|
|
678
|
+
for node in ast.walk(fn_node):
|
|
679
|
+
if isinstance(node, ast.Assign):
|
|
680
|
+
for target in node.targets:
|
|
681
|
+
# This one handles simple variable assignments
|
|
682
|
+
if isinstance(target, ast.Name):
|
|
683
|
+
local_vars.append(
|
|
684
|
+
PyVariableDeclaration.builder()
|
|
685
|
+
.with_name(target.id)
|
|
686
|
+
.with_type(
|
|
687
|
+
self._infer_type(
|
|
688
|
+
script, target.lineno, target.col_offset
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
.with_initializer(
|
|
692
|
+
ast.unparse(node.value) if node.value else None
|
|
693
|
+
)
|
|
694
|
+
.with_value(None)
|
|
695
|
+
.with_scope("function")
|
|
696
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
697
|
+
.with_end_line(
|
|
698
|
+
getattr(node, "end_lineno", getattr(node, "lineno", -1))
|
|
699
|
+
)
|
|
700
|
+
.with_start_column(getattr(target, "col_offset", -1))
|
|
701
|
+
.with_end_column(getattr(target, "end_col_offset", -1))
|
|
702
|
+
.build()
|
|
703
|
+
)
|
|
704
|
+
# This handles instance attribute assignments like self.attr = value
|
|
705
|
+
elif (
|
|
706
|
+
isinstance(target, ast.Attribute)
|
|
707
|
+
and isinstance(target.value, ast.Name)
|
|
708
|
+
and target.value.id == "self"
|
|
709
|
+
):
|
|
710
|
+
local_vars.append(
|
|
711
|
+
PyVariableDeclaration.builder()
|
|
712
|
+
.with_name(target.attr)
|
|
713
|
+
.with_type(
|
|
714
|
+
self._infer_type(
|
|
715
|
+
script, target.lineno, target.col_offset
|
|
716
|
+
)
|
|
717
|
+
)
|
|
718
|
+
.with_initializer(
|
|
719
|
+
ast.unparse(node.value) if node.value else None
|
|
720
|
+
)
|
|
721
|
+
.with_value(None)
|
|
722
|
+
.with_scope("class")
|
|
723
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
724
|
+
.with_end_line(
|
|
725
|
+
getattr(node, "end_lineno", getattr(node, "lineno", -1))
|
|
726
|
+
)
|
|
727
|
+
.with_start_column(getattr(target, "col_offset", -1))
|
|
728
|
+
.with_end_column(getattr(target, "end_col_offset", -1))
|
|
729
|
+
.build()
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
elif isinstance(node, ast.AnnAssign):
|
|
733
|
+
target = node.target
|
|
734
|
+
annotation_str = (
|
|
735
|
+
ast.unparse(node.annotation)
|
|
736
|
+
if node.annotation
|
|
737
|
+
else self._infer_type(script, node.lineno, node.col_offset)
|
|
738
|
+
)
|
|
739
|
+
initializer_str = ast.unparse(node.value) if node.value else None
|
|
740
|
+
# Annotated local variable: x: int = SOME_VALUE
|
|
741
|
+
if isinstance(target, ast.Name):
|
|
742
|
+
local_vars.append(
|
|
743
|
+
PyVariableDeclaration.builder()
|
|
744
|
+
.with_name(target.id)
|
|
745
|
+
.with_type(annotation_str)
|
|
746
|
+
.with_initializer(initializer_str)
|
|
747
|
+
.with_value(None)
|
|
748
|
+
.with_scope("function")
|
|
749
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
750
|
+
.with_end_line(
|
|
751
|
+
getattr(node, "end_lineno", getattr(node, "lineno", -1))
|
|
752
|
+
)
|
|
753
|
+
.with_start_column(getattr(target, "col_offset", -1))
|
|
754
|
+
.with_end_column(getattr(target, "end_col_offset", -1))
|
|
755
|
+
.build()
|
|
756
|
+
)
|
|
757
|
+
# Annotated instance attribute: self.attr: int = SOME_VALUE
|
|
758
|
+
elif (
|
|
759
|
+
isinstance(target, ast.Attribute)
|
|
760
|
+
and isinstance(target.value, ast.Name)
|
|
761
|
+
and target.value.id == "self"
|
|
762
|
+
):
|
|
763
|
+
local_vars.append(
|
|
764
|
+
PyVariableDeclaration.builder()
|
|
765
|
+
.with_name(target.attr)
|
|
766
|
+
.with_type(annotation_str)
|
|
767
|
+
.with_initializer(initializer_str)
|
|
768
|
+
.with_value(None)
|
|
769
|
+
.with_scope("class")
|
|
770
|
+
.with_start_line(getattr(target, "lineno", -1))
|
|
771
|
+
.with_end_line(
|
|
772
|
+
getattr(node, "end_lineno", getattr(node, "lineno", -1))
|
|
773
|
+
)
|
|
774
|
+
.with_start_column(getattr(target, "col_offset", -1))
|
|
775
|
+
.with_end_column(getattr(target, "end_col_offset", -1))
|
|
776
|
+
.build()
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
return local_vars
|
|
780
|
+
|
|
781
|
+
def _cyclomatic_complexity(self, fn_node: ast.FunctionDef) -> int:
|
|
782
|
+
"""
|
|
783
|
+
Computes the cyclomatic complexity of a function based on its control flow constructs.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
fn_node (ast.FunctionDef): AST node representing the function.
|
|
787
|
+
|
|
788
|
+
Returns:
|
|
789
|
+
int: Cyclomatic complexity score (>= 1).
|
|
790
|
+
"""
|
|
791
|
+
complexity = 1 # Base path
|
|
792
|
+
|
|
793
|
+
for node in ast.walk(fn_node):
|
|
794
|
+
if isinstance(node, (ast.If, ast.For, ast.While, ast.With, ast.Try)):
|
|
795
|
+
complexity += 1
|
|
796
|
+
|
|
797
|
+
elif isinstance(node, ast.BoolOp):
|
|
798
|
+
# Count 'and' / 'or' as individual decision points
|
|
799
|
+
complexity += len(node.values) - 1
|
|
800
|
+
|
|
801
|
+
elif isinstance(node, ast.IfExp):
|
|
802
|
+
# Ternary conditional: x if cond else y
|
|
803
|
+
complexity += 1
|
|
804
|
+
|
|
805
|
+
elif isinstance(node, ast.ExceptHandler):
|
|
806
|
+
# Try and catch statement
|
|
807
|
+
complexity += 1
|
|
808
|
+
|
|
809
|
+
# TODO: I am also counting 'assert' or 'return' or 'yield' as complexity bumps
|
|
810
|
+
elif isinstance(node, (ast.Assert, ast.Return, ast.Yield, ast.YieldFrom)):
|
|
811
|
+
complexity += 1
|
|
812
|
+
|
|
813
|
+
return complexity
|
|
814
|
+
|
|
815
|
+
def _symbol_from_name_node(
|
|
816
|
+
self,
|
|
817
|
+
name_node: ast.Name,
|
|
818
|
+
script: Optional[Script] = None,
|
|
819
|
+
enclosing_scope: Optional[str] = None, # e.g. "function", "class", "module"
|
|
820
|
+
) -> PySymbol:
|
|
821
|
+
"""
|
|
822
|
+
Builds a PySymbol object from a given ast.Name node.
|
|
823
|
+
|
|
824
|
+
Args:
|
|
825
|
+
name_node (ast.Name): The AST node representing the variable.
|
|
826
|
+
script (Optional[jedi.Script]): Jedi script for type/scope inference.
|
|
827
|
+
enclosing_scope (Optional[str]): The logical scope the name is inside of.
|
|
828
|
+
|
|
829
|
+
Returns:
|
|
830
|
+
PySymbol: A fully built symbol object.
|
|
831
|
+
"""
|
|
832
|
+
name = name_node.id
|
|
833
|
+
lineno = getattr(name_node, "lineno", -1)
|
|
834
|
+
col_offset = getattr(name_node, "col_offset", -1)
|
|
835
|
+
is_builtin = name in dir(__builtins__)
|
|
836
|
+
qname = None
|
|
837
|
+
inferred_type = None
|
|
838
|
+
kind = "variable"
|
|
839
|
+
scope = enclosing_scope or "local"
|
|
840
|
+
|
|
841
|
+
if script:
|
|
842
|
+
try:
|
|
843
|
+
definitions = script.infer(line=lineno, column=col_offset)
|
|
844
|
+
if definitions:
|
|
845
|
+
d = definitions[0]
|
|
846
|
+
inferred_type = d.name
|
|
847
|
+
qname = d.full_name
|
|
848
|
+
if d.type == "function":
|
|
849
|
+
kind = "function"
|
|
850
|
+
elif d.type == "module":
|
|
851
|
+
kind = "module"
|
|
852
|
+
scope = "global"
|
|
853
|
+
elif d.type == "class":
|
|
854
|
+
kind = "class"
|
|
855
|
+
elif d.type == "param":
|
|
856
|
+
kind = "parameter"
|
|
857
|
+
except Exception:
|
|
858
|
+
pass
|
|
859
|
+
|
|
860
|
+
return (
|
|
861
|
+
PySymbol.builder()
|
|
862
|
+
.with_name(name)
|
|
863
|
+
.with_scope(scope)
|
|
864
|
+
.with_kind(kind)
|
|
865
|
+
.with_type(inferred_type)
|
|
866
|
+
.with_qualified_name(qname)
|
|
867
|
+
.with_is_builtin(is_builtin)
|
|
868
|
+
.with_lineno(lineno)
|
|
869
|
+
.with_col_offset(col_offset)
|
|
870
|
+
.build()
|
|
871
|
+
)
|
|
872
|
+
|
|
873
|
+
def build(self) -> Dict[str, PyModule]:
|
|
874
|
+
"""Builds the symbol table for the project.
|
|
875
|
+
|
|
876
|
+
This method scans the project directory, identifies Python files,
|
|
877
|
+
and constructs a symbol table containing information about classes,
|
|
878
|
+
functions, and variables defined in those files.
|
|
879
|
+
"""
|
|
880
|
+
symbol_table: Dict[str, PyModule] = {}
|
|
881
|
+
# Get all Python files first to show accurate progress
|
|
882
|
+
py_files = [
|
|
883
|
+
py_file
|
|
884
|
+
for py_file in self.project_dir.rglob("*.py")
|
|
885
|
+
if "site-packages"
|
|
886
|
+
not in py_file.resolve().__str__() # exclude site-packages
|
|
887
|
+
and ".venv"
|
|
888
|
+
not in py_file.resolve().__str__() # exclude virtual environments
|
|
889
|
+
and ".codeanalyzer"
|
|
890
|
+
not in py_file.resolve().__str__() # exclude internal cache directories
|
|
891
|
+
]
|
|
892
|
+
|
|
893
|
+
with ProgressBar(len(py_files), "Building symbol table") as progress:
|
|
894
|
+
for py_file in py_files:
|
|
895
|
+
try:
|
|
896
|
+
py_module = self._module(py_file)
|
|
897
|
+
symbol_table[str(py_file)] = py_module
|
|
898
|
+
except Exception as e:
|
|
899
|
+
logger.error(f"Failed to process {py_file}: {e}")
|
|
900
|
+
progress.advance()
|
|
901
|
+
progress.finish("✅ Symbol table generation complete.")
|
|
902
|
+
|
|
903
|
+
return symbol_table
|