codeanalyzer-python 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeanalyzer/__main__.py +53 -13
- codeanalyzer/core.py +5 -12
- codeanalyzer/schema/__init__.py +6 -6
- codeanalyzer/schema/py_schema.py +103 -125
- codeanalyzer/semantic_analysis/codeql/__init__.py +2 -2
- codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +2 -2
- codeanalyzer/semantic_analysis/codeql/codeql_loader.py +3 -14
- codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +2 -1
- codeanalyzer/syntactic_analysis/symbol_table_builder.py +159 -162
- codeanalyzer/utils/__init__.py +1 -2
- codeanalyzer/utils/logging.py +2 -1
- codeanalyzer/utils/progress_bar.py +5 -4
- codeanalyzer_python-0.1.5.dist-info/METADATA +392 -0
- codeanalyzer_python-0.1.5.dist-info/RECORD +26 -0
- codeanalyzer_python-0.1.3.dist-info/METADATA +0 -198
- codeanalyzer_python-0.1.3.dist-info/RECORD +0 -26
- {codeanalyzer_python-0.1.3.dist-info → codeanalyzer_python-0.1.5.dist-info}/WHEEL +0 -0
- {codeanalyzer_python-0.1.3.dist-info → codeanalyzer_python-0.1.5.dist-info}/entry_points.txt +0 -0
- {codeanalyzer_python-0.1.3.dist-info → codeanalyzer_python-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {codeanalyzer_python-0.1.3.dist-info → codeanalyzer_python-0.1.5.dist-info}/licenses/NOTICE +0 -0
codeanalyzer/__main__.py
CHANGED
|
@@ -1,11 +1,16 @@
|
|
|
1
|
-
from contextlib import nullcontext
|
|
2
|
-
import sys
|
|
3
|
-
import typer
|
|
4
|
-
from typing import Optional, Annotated
|
|
5
1
|
from pathlib import Path
|
|
6
|
-
from
|
|
7
|
-
from
|
|
2
|
+
from typing import Annotated, Optional
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
8
7
|
from codeanalyzer.core import AnalyzerCore
|
|
8
|
+
from codeanalyzer.utils import _set_log_level, logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OutputFormat(str, Enum):
|
|
12
|
+
JSON = "json"
|
|
13
|
+
MSGPACK = "msgpack"
|
|
9
14
|
|
|
10
15
|
|
|
11
16
|
def main(
|
|
@@ -16,6 +21,15 @@ def main(
|
|
|
16
21
|
Optional[Path],
|
|
17
22
|
typer.Option("-o", "--output", help="Output directory for artifacts."),
|
|
18
23
|
] = None,
|
|
24
|
+
format: Annotated[
|
|
25
|
+
OutputFormat,
|
|
26
|
+
typer.Option(
|
|
27
|
+
"-f",
|
|
28
|
+
"--format",
|
|
29
|
+
help="Output format: json or msgpack.",
|
|
30
|
+
case_sensitive=False,
|
|
31
|
+
),
|
|
32
|
+
] = OutputFormat.JSON,
|
|
19
33
|
analysis_level: Annotated[
|
|
20
34
|
int,
|
|
21
35
|
typer.Option("-a", "--analysis-level", help="1: symbol table, 2: call graph."),
|
|
@@ -57,16 +71,42 @@ def main(
|
|
|
57
71
|
input, analysis_level, using_codeql, rebuild_analysis, cache_dir, clear_cache
|
|
58
72
|
) as analyzer:
|
|
59
73
|
artifacts = analyzer.analyze()
|
|
60
|
-
print_stream = sys.stdout
|
|
61
|
-
stream_context = nullcontext(print_stream)
|
|
62
74
|
|
|
63
|
-
|
|
75
|
+
# Handle output based on format
|
|
76
|
+
if output is None:
|
|
77
|
+
# Output to stdout (only for JSON)
|
|
78
|
+
if format == OutputFormat.JSON:
|
|
79
|
+
print(artifacts.model_dump_json(separators=(",", ":")))
|
|
80
|
+
else:
|
|
81
|
+
logger.error(
|
|
82
|
+
f"Format '{format.value}' requires an output directory (use -o/--output)"
|
|
83
|
+
)
|
|
84
|
+
raise typer.Exit(code=1)
|
|
85
|
+
else:
|
|
86
|
+
# Output to file
|
|
64
87
|
output.mkdir(parents=True, exist_ok=True)
|
|
65
|
-
|
|
66
|
-
|
|
88
|
+
_write_output(artifacts, output, format)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _write_output(artifacts, output_dir: Path, format: OutputFormat):
|
|
92
|
+
"""Write artifacts to file in the specified format."""
|
|
93
|
+
if format == OutputFormat.JSON:
|
|
94
|
+
output_file = output_dir / "analysis.json"
|
|
95
|
+
# Use Pydantic's json() with separators for compact output
|
|
96
|
+
json_str = artifacts.model_dump_json(indent=None)
|
|
97
|
+
with output_file.open("w") as f:
|
|
98
|
+
f.write(json_str)
|
|
99
|
+
logger.info(f"Analysis saved to {output_file}")
|
|
67
100
|
|
|
68
|
-
|
|
69
|
-
|
|
101
|
+
elif format == OutputFormat.MSGPACK:
|
|
102
|
+
output_file = output_dir / "analysis.msgpack"
|
|
103
|
+
msgpack_data = artifacts.to_msgpack_bytes()
|
|
104
|
+
with output_file.open("wb") as f:
|
|
105
|
+
f.write(msgpack_data)
|
|
106
|
+
logger.info(f"Analysis saved to {output_file}")
|
|
107
|
+
logger.info(
|
|
108
|
+
f"Compression ratio: {artifacts.get_compression_ratio():.1%} of JSON size"
|
|
109
|
+
)
|
|
70
110
|
|
|
71
111
|
|
|
72
112
|
app = typer.Typer(
|
codeanalyzer/core.py
CHANGED
|
@@ -1,19 +1,16 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import os
|
|
3
|
-
from pdb import set_trace
|
|
4
3
|
import shutil
|
|
5
4
|
import subprocess
|
|
6
|
-
from pathlib import Path
|
|
7
5
|
import sys
|
|
8
|
-
from
|
|
9
|
-
from
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Optional, Union
|
|
10
8
|
|
|
11
9
|
from codeanalyzer.schema.py_schema import PyApplication, PyModule
|
|
12
10
|
from codeanalyzer.semantic_analysis.codeql import CodeQLLoader
|
|
13
|
-
from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import
|
|
14
|
-
CodeQLExceptions,
|
|
15
|
-
)
|
|
11
|
+
from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import CodeQLExceptions
|
|
16
12
|
from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
|
|
13
|
+
from codeanalyzer.utils import logger
|
|
17
14
|
|
|
18
15
|
|
|
19
16
|
class AnalyzerCore:
|
|
@@ -290,11 +287,7 @@ class AnalyzerCore:
|
|
|
290
287
|
|
|
291
288
|
def analyze(self) -> PyApplication:
|
|
292
289
|
"""Return the path to the CodeQL database."""
|
|
293
|
-
return (
|
|
294
|
-
PyApplication.builder()
|
|
295
|
-
.with_symbol_table(self._build_symbol_table())
|
|
296
|
-
.build()
|
|
297
|
-
)
|
|
290
|
+
return PyApplication.builder().symbol_table(self._build_symbol_table()).build()
|
|
298
291
|
|
|
299
292
|
def _compute_checksum(self, root: Path) -> str:
|
|
300
293
|
"""Compute SHA256 checksum of all Python source files in a project directory. If somethings changes, the
|
codeanalyzer/schema/__init__.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from .py_schema import (
|
|
2
2
|
PyApplication,
|
|
3
|
-
|
|
3
|
+
PyCallable,
|
|
4
|
+
PyCallableParameter,
|
|
5
|
+
PyClass,
|
|
6
|
+
PyClassAttribute,
|
|
4
7
|
PyComment,
|
|
8
|
+
PyImport,
|
|
5
9
|
PyModule,
|
|
6
|
-
PyClass,
|
|
7
10
|
PyVariableDeclaration,
|
|
8
|
-
PyCallable,
|
|
9
|
-
PyClassAttribute,
|
|
10
|
-
PyCallableParameter
|
|
11
11
|
)
|
|
12
12
|
|
|
13
13
|
__all__ = [
|
|
@@ -19,5 +19,5 @@ __all__ = [
|
|
|
19
19
|
"PyVariableDeclaration",
|
|
20
20
|
"PyCallable",
|
|
21
21
|
"PyClassAttribute",
|
|
22
|
-
"PyCallableParameter"
|
|
22
|
+
"PyCallableParameter",
|
|
23
23
|
]
|
codeanalyzer/schema/py_schema.py
CHANGED
|
@@ -20,12 +20,88 @@ This module defines the data models used to represent Python code structures
|
|
|
20
20
|
for static analysis purposes.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
import inspect
|
|
23
24
|
from pathlib import Path
|
|
24
25
|
from typing import Any, Dict, List, Optional
|
|
25
|
-
|
|
26
|
+
import gzip
|
|
27
|
+
|
|
26
28
|
from pydantic import BaseModel
|
|
29
|
+
from typing_extensions import Literal
|
|
30
|
+
import msgpack
|
|
27
31
|
|
|
28
|
-
|
|
32
|
+
|
|
33
|
+
def msgpk(cls):
|
|
34
|
+
"""
|
|
35
|
+
Decorator that adds MessagePack serialization methods to Pydantic models.
|
|
36
|
+
|
|
37
|
+
Adds methods:
|
|
38
|
+
- to_msgpack_bytes() -> bytes: Serialize to compact binary format
|
|
39
|
+
- from_msgpack_bytes(data: bytes) -> cls: Deserialize from binary format
|
|
40
|
+
- to_msgpack_dict() -> dict: Convert to msgpack-compatible dict
|
|
41
|
+
- from_msgpack_dict(data: dict) -> cls: Create instance from msgpack dict
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def _prepare_for_serialization(obj: Any) -> Any:
|
|
45
|
+
"""Convert objects to serialization-friendly format."""
|
|
46
|
+
if isinstance(obj, Path):
|
|
47
|
+
return str(obj)
|
|
48
|
+
elif isinstance(obj, dict):
|
|
49
|
+
return {
|
|
50
|
+
_prepare_for_serialization(k): _prepare_for_serialization(v)
|
|
51
|
+
for k, v in obj.items()
|
|
52
|
+
}
|
|
53
|
+
elif isinstance(obj, list):
|
|
54
|
+
return [_prepare_for_serialization(item) for item in obj]
|
|
55
|
+
elif isinstance(obj, tuple):
|
|
56
|
+
return tuple(_prepare_for_serialization(item) for item in obj)
|
|
57
|
+
elif isinstance(obj, set):
|
|
58
|
+
return [_prepare_for_serialization(item) for item in obj]
|
|
59
|
+
elif hasattr(obj, "model_dump"): # Pydantic model
|
|
60
|
+
return _prepare_for_serialization(obj.model_dump())
|
|
61
|
+
else:
|
|
62
|
+
return obj
|
|
63
|
+
|
|
64
|
+
def to_msgpack_bytes(self) -> bytes:
|
|
65
|
+
"""Serialize the model to compact binary format using MessagePack + gzip."""
|
|
66
|
+
data = _prepare_for_serialization(self.model_dump())
|
|
67
|
+
msgpack_data = msgpack.packb(data, use_bin_type=True)
|
|
68
|
+
return gzip.compress(msgpack_data)
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_msgpack_bytes(cls_obj, data: bytes):
|
|
72
|
+
"""Deserialize from MessagePack + gzip binary format."""
|
|
73
|
+
decompressed_data = gzip.decompress(data)
|
|
74
|
+
obj_dict = msgpack.unpackb(decompressed_data, raw=False)
|
|
75
|
+
return cls_obj.model_validate(obj_dict)
|
|
76
|
+
|
|
77
|
+
def to_msgpack_dict(self) -> dict:
|
|
78
|
+
"""Convert to msgpack-compatible dictionary format."""
|
|
79
|
+
return _prepare_for_serialization(self.model_dump())
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def from_msgpack_dict(cls_obj, data: dict):
|
|
83
|
+
"""Create instance from msgpack-compatible dictionary."""
|
|
84
|
+
return cls_obj.model_validate(data)
|
|
85
|
+
|
|
86
|
+
def get_msgpack_size(self) -> int:
|
|
87
|
+
"""Get the size of the msgpack serialization in bytes."""
|
|
88
|
+
return len(self.to_msgpack_bytes())
|
|
89
|
+
|
|
90
|
+
def get_compression_ratio(self) -> float:
|
|
91
|
+
"""Get compression ratio compared to JSON."""
|
|
92
|
+
json_size = len(self.model_dump_json().encode("utf-8"))
|
|
93
|
+
msgpack_gzip_size = self.get_msgpack_size()
|
|
94
|
+
return msgpack_gzip_size / json_size if json_size > 0 else 1.0
|
|
95
|
+
|
|
96
|
+
# Add methods to the class
|
|
97
|
+
cls.to_msgpack_bytes = to_msgpack_bytes
|
|
98
|
+
cls.from_msgpack_bytes = from_msgpack_bytes
|
|
99
|
+
cls.to_msgpack_dict = to_msgpack_dict
|
|
100
|
+
cls.from_msgpack_dict = from_msgpack_dict
|
|
101
|
+
cls.get_msgpack_size = get_msgpack_size
|
|
102
|
+
cls.get_compression_ratio = get_compression_ratio
|
|
103
|
+
|
|
104
|
+
return cls
|
|
29
105
|
|
|
30
106
|
|
|
31
107
|
def builder(cls):
|
|
@@ -33,7 +109,7 @@ def builder(cls):
|
|
|
33
109
|
Decorator that generates a builder class for a Pydantic models defined below.
|
|
34
110
|
|
|
35
111
|
It creates methods like:
|
|
36
|
-
-
|
|
112
|
+
- <fieldname>(value)
|
|
37
113
|
- build() to instantiate the model
|
|
38
114
|
|
|
39
115
|
It supports nested builder patterns and is mypy-compatible.
|
|
@@ -70,12 +146,12 @@ def builder(cls):
|
|
|
70
146
|
setattr(self, f"_{f}", value)
|
|
71
147
|
return self
|
|
72
148
|
|
|
73
|
-
method.__name__ = f"
|
|
149
|
+
method.__name__ = f"{f}"
|
|
74
150
|
method.__annotations__ = {"value": t, "return": builder_name}
|
|
75
151
|
method.__doc__ = f"Set {f} ({t.__name__})"
|
|
76
152
|
return method
|
|
77
153
|
|
|
78
|
-
namespace[f"
|
|
154
|
+
namespace[f"{field}"] = make_method()
|
|
79
155
|
|
|
80
156
|
# Create a build method that constructs the model instance using the values set in the builder.
|
|
81
157
|
def build(self):
|
|
@@ -92,26 +168,9 @@ def builder(cls):
|
|
|
92
168
|
|
|
93
169
|
|
|
94
170
|
@builder
|
|
171
|
+
@msgpk
|
|
95
172
|
class PyImport(BaseModel):
|
|
96
|
-
"""Represents a Python import statement.
|
|
97
|
-
|
|
98
|
-
Attributes:
|
|
99
|
-
module (str): The name of the module being imported.
|
|
100
|
-
name (str): The name of the imported entity (e.g., function, class).
|
|
101
|
-
alias (Optional[str]): An optional alias for the imported entity.
|
|
102
|
-
start_line (int): The line number where the import statement starts.
|
|
103
|
-
end_line (int): The line number where the import statement ends.
|
|
104
|
-
start_column (int): The starting column of the import statement.
|
|
105
|
-
end_column (int): The ending column of the import statement.
|
|
106
|
-
|
|
107
|
-
Example:
|
|
108
|
-
- import numpy as np will be represented as:
|
|
109
|
-
PyImport(module="numpy", name="np", alias="np", start_line=1, end_line=1, start_column=0, end_column=16)
|
|
110
|
-
- from math import sqrt will be represented as:
|
|
111
|
-
PyImport(module="math", name="sqrt", alias=None, start_line=2, end_line=2, start_column=0, end_column=20
|
|
112
|
-
- from os.path import join as path_join will be represented as:
|
|
113
|
-
PyImport(module="os.path", name="path_join", alias="join", start_line=3, end_line=3, start_column=0, end_column=30)
|
|
114
|
-
"""
|
|
173
|
+
"""Represents a Python import statement."""
|
|
115
174
|
|
|
116
175
|
module: str
|
|
117
176
|
name: str
|
|
@@ -123,18 +182,9 @@ class PyImport(BaseModel):
|
|
|
123
182
|
|
|
124
183
|
|
|
125
184
|
@builder
|
|
185
|
+
@msgpk
|
|
126
186
|
class PyComment(BaseModel):
|
|
127
|
-
"""
|
|
128
|
-
Represents a Python comment.
|
|
129
|
-
|
|
130
|
-
Attributes:
|
|
131
|
-
content (str): The actual comment string (without the leading '#').
|
|
132
|
-
start_line (int): The line number where the comment starts.
|
|
133
|
-
end_line (int): The line number where the comment ends (same as start_line for single-line comments).
|
|
134
|
-
start_column (int): The starting column of the comment.
|
|
135
|
-
end_column (int): The ending column of the comment.
|
|
136
|
-
is_docstring (bool): Whether this comment is actually a docstring (triple-quoted string).
|
|
137
|
-
"""
|
|
187
|
+
"""Represents a Python comment."""
|
|
138
188
|
|
|
139
189
|
content: str
|
|
140
190
|
start_line: int = -1
|
|
@@ -145,20 +195,9 @@ class PyComment(BaseModel):
|
|
|
145
195
|
|
|
146
196
|
|
|
147
197
|
@builder
|
|
198
|
+
@msgpk
|
|
148
199
|
class PySymbol(BaseModel):
|
|
149
|
-
"""
|
|
150
|
-
Represents a symbol used or declared in Python code.
|
|
151
|
-
|
|
152
|
-
Attributes:
|
|
153
|
-
name (str): The name of the symbol (e.g., 'x', 'self.x', 'os.path').
|
|
154
|
-
scope (Literal['local', 'nonlocal', 'global', 'class', 'module']): The scope where the symbol is accessed.
|
|
155
|
-
kind (Literal['variable', 'parameter', 'attribute', 'function', 'class', 'module']): The kind of symbol.
|
|
156
|
-
type (Optional[str]): Inferred or annotated type, if available.
|
|
157
|
-
qualified_name (Optional[str]): Fully qualified name (e.g., 'self.x', 'os.path.join').
|
|
158
|
-
is_builtin (bool): Whether this is a Python builtin.
|
|
159
|
-
lineno (int): Line number where the symbol is accessed or declared.
|
|
160
|
-
col_offset (int): Column offset.
|
|
161
|
-
"""
|
|
200
|
+
"""Represents a symbol used or declared in Python code."""
|
|
162
201
|
|
|
163
202
|
name: str
|
|
164
203
|
scope: Literal["local", "nonlocal", "global", "class", "module"]
|
|
@@ -171,11 +210,9 @@ class PySymbol(BaseModel):
|
|
|
171
210
|
|
|
172
211
|
|
|
173
212
|
@builder
|
|
213
|
+
@msgpk
|
|
174
214
|
class PyVariableDeclaration(BaseModel):
|
|
175
|
-
"""Represents a Python variable declaration.
|
|
176
|
-
|
|
177
|
-
Attributes:
|
|
178
|
-
"""
|
|
215
|
+
"""Represents a Python variable declaration."""
|
|
179
216
|
|
|
180
217
|
name: str
|
|
181
218
|
type: Optional[str]
|
|
@@ -189,18 +226,9 @@ class PyVariableDeclaration(BaseModel):
|
|
|
189
226
|
|
|
190
227
|
|
|
191
228
|
@builder
|
|
229
|
+
@msgpk
|
|
192
230
|
class PyCallableParameter(BaseModel):
|
|
193
|
-
"""Represents a parameter of a Python callable (function/method).
|
|
194
|
-
|
|
195
|
-
Attributes:
|
|
196
|
-
name (str): The name of the parameter.
|
|
197
|
-
type (str): The type of the parameter.
|
|
198
|
-
default_value (str): The default value of the parameter, if any.
|
|
199
|
-
start_line (int): The line number where the parameter is defined.
|
|
200
|
-
end_line (int): The line number where the parameter definition ends.
|
|
201
|
-
start_column (int): The column number where the parameter starts.
|
|
202
|
-
end_column (int): The column number where the parameter ends.
|
|
203
|
-
"""
|
|
231
|
+
"""Represents a parameter of a Python callable (function/method)."""
|
|
204
232
|
|
|
205
233
|
name: str
|
|
206
234
|
type: Optional[str] = None
|
|
@@ -212,10 +240,9 @@ class PyCallableParameter(BaseModel):
|
|
|
212
240
|
|
|
213
241
|
|
|
214
242
|
@builder
|
|
243
|
+
@msgpk
|
|
215
244
|
class PyCallsite(BaseModel):
|
|
216
|
-
"""
|
|
217
|
-
Represents a Python call site (function or method invocation) with contextual metadata.
|
|
218
|
-
"""
|
|
245
|
+
"""Represents a Python call site (function or method invocation) with contextual metadata."""
|
|
219
246
|
|
|
220
247
|
method_name: str
|
|
221
248
|
receiver_expr: Optional[str] = None
|
|
@@ -231,26 +258,9 @@ class PyCallsite(BaseModel):
|
|
|
231
258
|
|
|
232
259
|
|
|
233
260
|
@builder
|
|
261
|
+
@msgpk
|
|
234
262
|
class PyCallable(BaseModel):
|
|
235
|
-
"""Represents a Python callable (function/method).
|
|
236
|
-
|
|
237
|
-
Attributes:
|
|
238
|
-
name (str): The name of the callable.
|
|
239
|
-
signature (str): The fully qualified name of the callable (e.g., module.function_name).
|
|
240
|
-
docstring (PyComment): The docstring of the callable.
|
|
241
|
-
decorators (List[str]): List of decorators applied to the callable.
|
|
242
|
-
parameters (List[PyCallableParameter]): List of parameters for the callable.
|
|
243
|
-
return_type (Optional[str]): The type of the return value, if specified.
|
|
244
|
-
code (str): The actual code of the callable.
|
|
245
|
-
start_line (int): The line number where the callable is defined.
|
|
246
|
-
end_line (int): The line number where the callable definition ends.
|
|
247
|
-
code_start_line (int): The line number where the code block starts.
|
|
248
|
-
accessed_symbols (List[str]): Symbols accessed within the callable.
|
|
249
|
-
call_sites (List[str]): Call sites of this callable.
|
|
250
|
-
is_entrypoint (bool): Whether this callable is an entry point.
|
|
251
|
-
local_variables (List[PyVariableDeclaration]): Local variables within the callable.
|
|
252
|
-
cyclomatic_complexity (int): Cyclomatic complexity of the callable.
|
|
253
|
-
"""
|
|
263
|
+
"""Represents a Python callable (function/method)."""
|
|
254
264
|
|
|
255
265
|
name: str
|
|
256
266
|
path: str
|
|
@@ -274,16 +284,9 @@ class PyCallable(BaseModel):
|
|
|
274
284
|
|
|
275
285
|
|
|
276
286
|
@builder
|
|
287
|
+
@msgpk
|
|
277
288
|
class PyClassAttribute(BaseModel):
|
|
278
|
-
"""Represents a Python class attribute.
|
|
279
|
-
|
|
280
|
-
Attributes:
|
|
281
|
-
name (str): The name of the attribute.
|
|
282
|
-
type (str): The type of the attribute.
|
|
283
|
-
docstring (PyComment): The docstring of the attribute.
|
|
284
|
-
start_line (int): The line number where the attribute is defined.
|
|
285
|
-
end_line (int): The line number where the attribute definition ends.
|
|
286
|
-
"""
|
|
289
|
+
"""Represents a Python class attribute."""
|
|
287
290
|
|
|
288
291
|
name: str
|
|
289
292
|
type: Optional[str] = None
|
|
@@ -293,20 +296,9 @@ class PyClassAttribute(BaseModel):
|
|
|
293
296
|
|
|
294
297
|
|
|
295
298
|
@builder
|
|
299
|
+
@msgpk
|
|
296
300
|
class PyClass(BaseModel):
|
|
297
|
-
"""Represents a Python class.
|
|
298
|
-
|
|
299
|
-
Attributes:
|
|
300
|
-
name (str): The name of the class.
|
|
301
|
-
signature (str): The fully qualified name of the class (e.g., module.class_name).
|
|
302
|
-
docstring (PyComment): The docstring of the class.
|
|
303
|
-
base_classes (List[str]): List of base class names.
|
|
304
|
-
methods (Dict[str, PyCallable]): Mapping of method names to their callable representations.
|
|
305
|
-
attributes (Dict[str, PyClassAttribute]): Mapping of attribute names to their variable declarations.
|
|
306
|
-
inner_classes (Dict[str, "PyClass"]): Mapping of inner class names to their class representations.
|
|
307
|
-
start_line (int): The line number where the class definition starts.
|
|
308
|
-
end_line (int): The line number where the class definition ends.
|
|
309
|
-
"""
|
|
301
|
+
"""Represents a Python class."""
|
|
310
302
|
|
|
311
303
|
name: str
|
|
312
304
|
signature: str # e.g., module.class_name
|
|
@@ -325,18 +317,9 @@ class PyClass(BaseModel):
|
|
|
325
317
|
|
|
326
318
|
|
|
327
319
|
@builder
|
|
320
|
+
@msgpk
|
|
328
321
|
class PyModule(BaseModel):
|
|
329
|
-
"""Represents a Python module.
|
|
330
|
-
|
|
331
|
-
Attributes:
|
|
332
|
-
file_path (str): The file path of the module.
|
|
333
|
-
module_name (str): The name of the module (e.g., module.submodule).
|
|
334
|
-
imports (List[PyImport]): List of import statements in the module.
|
|
335
|
-
comments (List[PyComment]): List of comments in the module.
|
|
336
|
-
classes (Dict[str, PyClass]): Mapping of class names to their class representations.
|
|
337
|
-
functions (Dict[str, PyCallable]): Mapping of function names to their callable representations.
|
|
338
|
-
variables (List[PyVariableDeclaration]): List of variable declarations in the module.
|
|
339
|
-
"""
|
|
322
|
+
"""Represents a Python module."""
|
|
340
323
|
|
|
341
324
|
file_path: str
|
|
342
325
|
module_name: str
|
|
@@ -348,13 +331,8 @@ class PyModule(BaseModel):
|
|
|
348
331
|
|
|
349
332
|
|
|
350
333
|
@builder
|
|
334
|
+
@msgpk
|
|
351
335
|
class PyApplication(BaseModel):
|
|
352
|
-
"""Represents a Python application.
|
|
353
|
-
|
|
354
|
-
Attributes:
|
|
355
|
-
name (str): The name of the application.
|
|
356
|
-
version (str): The version of the application.
|
|
357
|
-
description (str): A brief description of the application.
|
|
358
|
-
"""
|
|
336
|
+
"""Represents a Python application."""
|
|
359
337
|
|
|
360
338
|
symbol_table: dict[Path, PyModule]
|
|
@@ -19,8 +19,8 @@ CodeQL package
|
|
|
19
19
|
"""
|
|
20
20
|
|
|
21
21
|
from .codeql_analysis import CodeQL
|
|
22
|
-
from .codeql_query_runner import CodeQLQueryRunner
|
|
23
|
-
from .codeql_loader import CodeQLLoader
|
|
24
22
|
from .codeql_exceptions import CodeQLExceptions
|
|
23
|
+
from .codeql_loader import CodeQLLoader
|
|
24
|
+
from .codeql_query_runner import CodeQLQueryRunner
|
|
25
25
|
|
|
26
26
|
__all__ = ["CodeQL", "CodeQLQueryRunner", "CodeQLLoader", "CodeQLExceptions"]
|
|
@@ -21,9 +21,10 @@ for Python projects and execute queries against them.
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
from pathlib import Path
|
|
24
|
+
from typing import Union
|
|
25
|
+
|
|
24
26
|
from networkx import DiGraph
|
|
25
27
|
from pandas import DataFrame
|
|
26
|
-
from typing import Union
|
|
27
28
|
|
|
28
29
|
from codeanalyzer.semantic_analysis.codeql.codeql_query_runner import CodeQLQueryRunner
|
|
29
30
|
|
|
@@ -130,4 +131,3 @@ class CodeQL:
|
|
|
130
131
|
Returns:
|
|
131
132
|
DiGraph: A directed graph representing the call graph of the application.
|
|
132
133
|
"""
|
|
133
|
-
pass
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import platform
|
|
2
|
-
import requests
|
|
3
2
|
import zipfile
|
|
4
3
|
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import requests
|
|
5
6
|
from codeanalyzer.utils import logger
|
|
6
|
-
from tqdm import tqdm
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class CodeQLLoader:
|
|
@@ -43,22 +43,11 @@ class CodeQLLoader:
|
|
|
43
43
|
logger.info(f"Downloading CodeQL CLI from {download_url}")
|
|
44
44
|
with requests.get(download_url, stream=True) as r:
|
|
45
45
|
r.raise_for_status()
|
|
46
|
-
total_size = int(r.headers.get("content-length", 0))
|
|
47
46
|
block_size = 8192 # 8KB
|
|
48
47
|
|
|
49
|
-
with (
|
|
50
|
-
open(archive_path, "wb") as f,
|
|
51
|
-
tqdm(
|
|
52
|
-
total=total_size,
|
|
53
|
-
unit="B",
|
|
54
|
-
unit_scale=True,
|
|
55
|
-
unit_divisor=1024,
|
|
56
|
-
desc="Downloading CodeQL",
|
|
57
|
-
) as bar,
|
|
58
|
-
):
|
|
48
|
+
with open(archive_path, "wb") as f:
|
|
59
49
|
for chunk in r.iter_content(chunk_size=block_size):
|
|
60
50
|
f.write(chunk)
|
|
61
|
-
bar.update(len(chunk))
|
|
62
51
|
|
|
63
52
|
extract_dir = temp_dir / filename.replace(".zip", "")
|
|
64
53
|
extract_dir.mkdir(exist_ok=True)
|
|
@@ -20,11 +20,12 @@ This module provides functionality to run CodeQL queries against CodeQL database
|
|
|
20
20
|
and process the results.
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
import shlex
|
|
23
24
|
import subprocess
|
|
24
25
|
import tempfile
|
|
25
26
|
from pathlib import Path
|
|
26
|
-
import shlex
|
|
27
27
|
from typing import List
|
|
28
|
+
|
|
28
29
|
import pandas as pd
|
|
29
30
|
from pandas import DataFrame
|
|
30
31
|
|