codeanalyzer-python 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeanalyzer/__init__.py +0 -0
- codeanalyzer/__main__.py +84 -0
- codeanalyzer/core.py +321 -0
- codeanalyzer/jedi/__init__.py +0 -0
- codeanalyzer/jedi/jedi.py +0 -0
- codeanalyzer/py.typed +0 -0
- codeanalyzer/schema/__init__.py +23 -0
- codeanalyzer/schema/py_schema.py +360 -0
- codeanalyzer/semantic_analysis/__init__.py +0 -0
- codeanalyzer/semantic_analysis/codeql/__init__.py +26 -0
- codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +133 -0
- codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py +12 -0
- codeanalyzer/semantic_analysis/codeql/codeql_loader.py +74 -0
- codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +164 -0
- codeanalyzer/semantic_analysis/wala/__init__.py +15 -0
- codeanalyzer/syntactic_analysis/__init__.py +0 -0
- codeanalyzer/syntactic_analysis/symbol_table_builder.py +903 -0
- codeanalyzer/utils/__init__.py +5 -0
- codeanalyzer/utils/logging.py +18 -0
- codeanalyzer/utils/progress_bar.py +69 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/METADATA +1 -1
- codeanalyzer_python-0.1.2.dist-info/RECORD +26 -0
- codeanalyzer_python-0.1.1.dist-info/RECORD +0 -6
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/WHEEL +0 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/entry_points.txt +0 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""Python schema models module.
|
|
18
|
+
|
|
19
|
+
This module defines the data models used to represent Python code structures
|
|
20
|
+
for static analysis purposes.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Any, Dict, List, Optional
|
|
25
|
+
from typing_extensions import Literal
|
|
26
|
+
from pydantic import BaseModel
|
|
27
|
+
|
|
28
|
+
import inspect
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def builder(cls):
|
|
32
|
+
"""
|
|
33
|
+
Decorator that generates a builder class for a Pydantic models defined below.
|
|
34
|
+
|
|
35
|
+
It creates methods like:
|
|
36
|
+
- with_<fieldname>(value)
|
|
37
|
+
- build() to instantiate the model
|
|
38
|
+
|
|
39
|
+
It supports nested builder patterns and is mypy-compatible.
|
|
40
|
+
"""
|
|
41
|
+
cls_name = cls.__name__
|
|
42
|
+
builder_name = f"{cls_name}Builder"
|
|
43
|
+
|
|
44
|
+
# Get type hints and default values for the fields in the model.
|
|
45
|
+
# For example, {file_path: Path, module_name: str, imports: List[PyImport], ...}
|
|
46
|
+
annotations = cls.__annotations__
|
|
47
|
+
# Get default values for the fields in the model.
|
|
48
|
+
defaults = {
|
|
49
|
+
f.name: f.default
|
|
50
|
+
for f in inspect.signature(cls).parameters.values()
|
|
51
|
+
if f.default is not inspect.Parameter.empty
|
|
52
|
+
}
|
|
53
|
+
# Create a namespace for the builder class.
|
|
54
|
+
namespace = {}
|
|
55
|
+
|
|
56
|
+
# Create an __init__ method for the builder class that initializes all fields to their default values.
|
|
57
|
+
def __init__(self):
|
|
58
|
+
for field in annotations:
|
|
59
|
+
default = defaults.get(field, None)
|
|
60
|
+
setattr(self, f"_{field}", default)
|
|
61
|
+
|
|
62
|
+
namespace["__init__"] = __init__
|
|
63
|
+
|
|
64
|
+
# Iterate over all fields in the model and create a method for each field that sets the value and returns the builder instance.
|
|
65
|
+
# This allows for method chaining. The method name will be "<fieldname>".
|
|
66
|
+
for field, field_type in annotations.items():
|
|
67
|
+
|
|
68
|
+
def make_method(f=field, t=field_type):
|
|
69
|
+
def method(self, value):
|
|
70
|
+
setattr(self, f"_{f}", value)
|
|
71
|
+
return self
|
|
72
|
+
|
|
73
|
+
method.__name__ = f"with_{f}"
|
|
74
|
+
method.__annotations__ = {"value": t, "return": builder_name}
|
|
75
|
+
method.__doc__ = f"Set {f} ({t.__name__})"
|
|
76
|
+
return method
|
|
77
|
+
|
|
78
|
+
namespace[f"with_{field}"] = make_method()
|
|
79
|
+
|
|
80
|
+
# Create a build method that constructs the model instance using the values set in the builder.
|
|
81
|
+
def build(self):
|
|
82
|
+
return cls(**{k: getattr(self, f"_{k}") for k in annotations})
|
|
83
|
+
|
|
84
|
+
# Add the build method to the namespace.
|
|
85
|
+
namespace["build"] = build
|
|
86
|
+
|
|
87
|
+
# Assemble the builder class dynamically
|
|
88
|
+
builder_cls = type(builder_name, (object,), namespace)
|
|
89
|
+
# Attach the builder class to the original class as an attribute so we can now call `MyModel.builder().name(...)`.
|
|
90
|
+
setattr(cls, "builder", builder_cls)
|
|
91
|
+
return cls
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@builder
|
|
95
|
+
class PyImport(BaseModel):
|
|
96
|
+
"""Represents a Python import statement.
|
|
97
|
+
|
|
98
|
+
Attributes:
|
|
99
|
+
module (str): The name of the module being imported.
|
|
100
|
+
name (str): The name of the imported entity (e.g., function, class).
|
|
101
|
+
alias (Optional[str]): An optional alias for the imported entity.
|
|
102
|
+
start_line (int): The line number where the import statement starts.
|
|
103
|
+
end_line (int): The line number where the import statement ends.
|
|
104
|
+
start_column (int): The starting column of the import statement.
|
|
105
|
+
end_column (int): The ending column of the import statement.
|
|
106
|
+
|
|
107
|
+
Example:
|
|
108
|
+
- import numpy as np will be represented as:
|
|
109
|
+
PyImport(module="numpy", name="np", alias="np", start_line=1, end_line=1, start_column=0, end_column=16)
|
|
110
|
+
- from math import sqrt will be represented as:
|
|
111
|
+
PyImport(module="math", name="sqrt", alias=None, start_line=2, end_line=2, start_column=0, end_column=20
|
|
112
|
+
- from os.path import join as path_join will be represented as:
|
|
113
|
+
PyImport(module="os.path", name="path_join", alias="join", start_line=3, end_line=3, start_column=0, end_column=30)
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
module: str
|
|
117
|
+
name: str
|
|
118
|
+
alias: Optional[str] = None
|
|
119
|
+
start_line: int = -1
|
|
120
|
+
end_line: int = -1
|
|
121
|
+
start_column: int = -1
|
|
122
|
+
end_column: int = -1
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@builder
|
|
126
|
+
class PyComment(BaseModel):
|
|
127
|
+
"""
|
|
128
|
+
Represents a Python comment.
|
|
129
|
+
|
|
130
|
+
Attributes:
|
|
131
|
+
content (str): The actual comment string (without the leading '#').
|
|
132
|
+
start_line (int): The line number where the comment starts.
|
|
133
|
+
end_line (int): The line number where the comment ends (same as start_line for single-line comments).
|
|
134
|
+
start_column (int): The starting column of the comment.
|
|
135
|
+
end_column (int): The ending column of the comment.
|
|
136
|
+
is_docstring (bool): Whether this comment is actually a docstring (triple-quoted string).
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
content: str
|
|
140
|
+
start_line: int = -1
|
|
141
|
+
end_line: int = -1
|
|
142
|
+
start_column: int = -1
|
|
143
|
+
end_column: int = -1
|
|
144
|
+
is_docstring: bool = False
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@builder
|
|
148
|
+
class PySymbol(BaseModel):
|
|
149
|
+
"""
|
|
150
|
+
Represents a symbol used or declared in Python code.
|
|
151
|
+
|
|
152
|
+
Attributes:
|
|
153
|
+
name (str): The name of the symbol (e.g., 'x', 'self.x', 'os.path').
|
|
154
|
+
scope (Literal['local', 'nonlocal', 'global', 'class', 'module']): The scope where the symbol is accessed.
|
|
155
|
+
kind (Literal['variable', 'parameter', 'attribute', 'function', 'class', 'module']): The kind of symbol.
|
|
156
|
+
type (Optional[str]): Inferred or annotated type, if available.
|
|
157
|
+
qualified_name (Optional[str]): Fully qualified name (e.g., 'self.x', 'os.path.join').
|
|
158
|
+
is_builtin (bool): Whether this is a Python builtin.
|
|
159
|
+
lineno (int): Line number where the symbol is accessed or declared.
|
|
160
|
+
col_offset (int): Column offset.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
name: str
|
|
164
|
+
scope: Literal["local", "nonlocal", "global", "class", "module"]
|
|
165
|
+
kind: Literal["variable", "parameter", "attribute", "function", "class", "module"]
|
|
166
|
+
type: Optional[str] = None
|
|
167
|
+
qualified_name: Optional[str] = None
|
|
168
|
+
is_builtin: bool = False
|
|
169
|
+
lineno: int = -1
|
|
170
|
+
col_offset: int = -1
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
@builder
|
|
174
|
+
class PyVariableDeclaration(BaseModel):
|
|
175
|
+
"""Represents a Python variable declaration.
|
|
176
|
+
|
|
177
|
+
Attributes:
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
name: str
|
|
181
|
+
type: Optional[str]
|
|
182
|
+
initializer: Optional[str] = None
|
|
183
|
+
value: Optional[Any] = None
|
|
184
|
+
scope: Literal["module", "class", "function"] = "module"
|
|
185
|
+
start_line: int = -1
|
|
186
|
+
end_line: int = -1
|
|
187
|
+
start_column: int = -1
|
|
188
|
+
end_column: int = -1
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@builder
|
|
192
|
+
class PyCallableParameter(BaseModel):
|
|
193
|
+
"""Represents a parameter of a Python callable (function/method).
|
|
194
|
+
|
|
195
|
+
Attributes:
|
|
196
|
+
name (str): The name of the parameter.
|
|
197
|
+
type (str): The type of the parameter.
|
|
198
|
+
default_value (str): The default value of the parameter, if any.
|
|
199
|
+
start_line (int): The line number where the parameter is defined.
|
|
200
|
+
end_line (int): The line number where the parameter definition ends.
|
|
201
|
+
start_column (int): The column number where the parameter starts.
|
|
202
|
+
end_column (int): The column number where the parameter ends.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
name: str
|
|
206
|
+
type: Optional[str] = None
|
|
207
|
+
default_value: Optional[str] = None
|
|
208
|
+
start_line: int = -1
|
|
209
|
+
end_line: int = -1
|
|
210
|
+
start_column: int = -1
|
|
211
|
+
end_column: int = -1
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@builder
|
|
215
|
+
class PyCallsite(BaseModel):
|
|
216
|
+
"""
|
|
217
|
+
Represents a Python call site (function or method invocation) with contextual metadata.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
method_name: str
|
|
221
|
+
receiver_expr: Optional[str] = None
|
|
222
|
+
receiver_type: Optional[str] = None
|
|
223
|
+
argument_types: List[str] = []
|
|
224
|
+
return_type: Optional[str] = None
|
|
225
|
+
callee_signature: Optional[str] = None
|
|
226
|
+
is_constructor_call: bool = False
|
|
227
|
+
start_line: int = -1
|
|
228
|
+
start_column: int = -1
|
|
229
|
+
end_line: int = -1
|
|
230
|
+
end_column: int = -1
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
@builder
|
|
234
|
+
class PyCallable(BaseModel):
|
|
235
|
+
"""Represents a Python callable (function/method).
|
|
236
|
+
|
|
237
|
+
Attributes:
|
|
238
|
+
name (str): The name of the callable.
|
|
239
|
+
signature (str): The fully qualified name of the callable (e.g., module.function_name).
|
|
240
|
+
docstring (PyComment): The docstring of the callable.
|
|
241
|
+
decorators (List[str]): List of decorators applied to the callable.
|
|
242
|
+
parameters (List[PyCallableParameter]): List of parameters for the callable.
|
|
243
|
+
return_type (Optional[str]): The type of the return value, if specified.
|
|
244
|
+
code (str): The actual code of the callable.
|
|
245
|
+
start_line (int): The line number where the callable is defined.
|
|
246
|
+
end_line (int): The line number where the callable definition ends.
|
|
247
|
+
code_start_line (int): The line number where the code block starts.
|
|
248
|
+
accessed_symbols (List[str]): Symbols accessed within the callable.
|
|
249
|
+
call_sites (List[str]): Call sites of this callable.
|
|
250
|
+
is_entrypoint (bool): Whether this callable is an entry point.
|
|
251
|
+
local_variables (List[PyVariableDeclaration]): Local variables within the callable.
|
|
252
|
+
cyclomatic_complexity (int): Cyclomatic complexity of the callable.
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
name: str
|
|
256
|
+
path: str
|
|
257
|
+
signature: str # e.g., module.<class_name>.function_name
|
|
258
|
+
comments: List[PyComment] = []
|
|
259
|
+
decorators: List[str] = []
|
|
260
|
+
parameters: List[PyCallableParameter] = []
|
|
261
|
+
return_type: Optional[str] = None
|
|
262
|
+
code: str = None
|
|
263
|
+
start_line: int = -1
|
|
264
|
+
end_line: int = -1
|
|
265
|
+
code_start_line: int = -1
|
|
266
|
+
accessed_symbols: List[PySymbol] = []
|
|
267
|
+
call_sites: List[PyCallsite] = []
|
|
268
|
+
local_variables: List[PyVariableDeclaration] = []
|
|
269
|
+
cyclomatic_complexity: int = 0
|
|
270
|
+
|
|
271
|
+
def __hash__(self) -> int:
|
|
272
|
+
"""Generate a hash based on the callable's signature."""
|
|
273
|
+
return hash(self.signature)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@builder
|
|
277
|
+
class PyClassAttribute(BaseModel):
|
|
278
|
+
"""Represents a Python class attribute.
|
|
279
|
+
|
|
280
|
+
Attributes:
|
|
281
|
+
name (str): The name of the attribute.
|
|
282
|
+
type (str): The type of the attribute.
|
|
283
|
+
docstring (PyComment): The docstring of the attribute.
|
|
284
|
+
start_line (int): The line number where the attribute is defined.
|
|
285
|
+
end_line (int): The line number where the attribute definition ends.
|
|
286
|
+
"""
|
|
287
|
+
|
|
288
|
+
name: str
|
|
289
|
+
type: Optional[str] = None
|
|
290
|
+
comments: List[PyComment] = []
|
|
291
|
+
start_line: int = -1
|
|
292
|
+
end_line: int = -1
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@builder
|
|
296
|
+
class PyClass(BaseModel):
|
|
297
|
+
"""Represents a Python class.
|
|
298
|
+
|
|
299
|
+
Attributes:
|
|
300
|
+
name (str): The name of the class.
|
|
301
|
+
signature (str): The fully qualified name of the class (e.g., module.class_name).
|
|
302
|
+
docstring (PyComment): The docstring of the class.
|
|
303
|
+
base_classes (List[str]): List of base class names.
|
|
304
|
+
methods (Dict[str, PyCallable]): Mapping of method names to their callable representations.
|
|
305
|
+
attributes (Dict[str, PyClassAttribute]): Mapping of attribute names to their variable declarations.
|
|
306
|
+
inner_classes (Dict[str, "PyClass"]): Mapping of inner class names to their class representations.
|
|
307
|
+
start_line (int): The line number where the class definition starts.
|
|
308
|
+
end_line (int): The line number where the class definition ends.
|
|
309
|
+
"""
|
|
310
|
+
|
|
311
|
+
name: str
|
|
312
|
+
signature: str # e.g., module.class_name
|
|
313
|
+
comments: List[PyComment] = []
|
|
314
|
+
code: str = None
|
|
315
|
+
base_classes: List[str] = []
|
|
316
|
+
methods: Dict[str, PyCallable] = {}
|
|
317
|
+
attributes: Dict[str, PyClassAttribute] = {}
|
|
318
|
+
inner_classes: Dict[str, "PyClass"] = {}
|
|
319
|
+
start_line: int = -1
|
|
320
|
+
end_line: int = -1
|
|
321
|
+
|
|
322
|
+
def __hash__(self):
|
|
323
|
+
"""Generate a hash based on the class's signature."""
|
|
324
|
+
return hash(self.signature)
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
@builder
|
|
328
|
+
class PyModule(BaseModel):
|
|
329
|
+
"""Represents a Python module.
|
|
330
|
+
|
|
331
|
+
Attributes:
|
|
332
|
+
file_path (str): The file path of the module.
|
|
333
|
+
module_name (str): The name of the module (e.g., module.submodule).
|
|
334
|
+
imports (List[PyImport]): List of import statements in the module.
|
|
335
|
+
comments (List[PyComment]): List of comments in the module.
|
|
336
|
+
classes (Dict[str, PyClass]): Mapping of class names to their class representations.
|
|
337
|
+
functions (Dict[str, PyCallable]): Mapping of function names to their callable representations.
|
|
338
|
+
variables (List[PyVariableDeclaration]): List of variable declarations in the module.
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
file_path: str
|
|
342
|
+
module_name: str
|
|
343
|
+
imports: List[PyImport] = []
|
|
344
|
+
comments: List[PyComment] = []
|
|
345
|
+
classes: Dict[str, PyClass] = {}
|
|
346
|
+
functions: Dict[str, PyCallable] = {}
|
|
347
|
+
variables: List[PyVariableDeclaration] = []
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@builder
|
|
351
|
+
class PyApplication(BaseModel):
|
|
352
|
+
"""Represents a Python application.
|
|
353
|
+
|
|
354
|
+
Attributes:
|
|
355
|
+
name (str): The name of the application.
|
|
356
|
+
version (str): The version of the application.
|
|
357
|
+
description (str): A brief description of the application.
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
symbol_table: dict[Path, PyModule]
|
|
File without changes
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""
|
|
18
|
+
CodeQL package
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .codeql_analysis import CodeQL
|
|
22
|
+
from .codeql_query_runner import CodeQLQueryRunner
|
|
23
|
+
from .codeql_loader import CodeQLLoader
|
|
24
|
+
from .codeql_exceptions import CodeQLExceptions
|
|
25
|
+
|
|
26
|
+
__all__ = ["CodeQL", "CodeQLQueryRunner", "CodeQLLoader", "CodeQLExceptions"]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""CodeQL module for analyzing Python code using CodeQL.
|
|
18
|
+
|
|
19
|
+
This module provides functionality to create and manage CodeQL databases
|
|
20
|
+
for Python projects and execute queries against them.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from networkx import DiGraph
|
|
25
|
+
from pandas import DataFrame
|
|
26
|
+
from typing import Union
|
|
27
|
+
|
|
28
|
+
from codeanalyzer.semantic_analysis.codeql.codeql_query_runner import CodeQLQueryRunner
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CodeQL:
|
|
32
|
+
"""A class for building the application view of a Python application using CodeQL.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
project_dir (str or Path): The path to the root of the Python project.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
db_path (Path): The path to the CodeQL database.
|
|
39
|
+
temp_db (TemporaryDirectory or None): The temporary directory object if a temporary database was created.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, project_dir: Union[str, Path], db_path: Path) -> None:
|
|
43
|
+
self.project_dir = project_dir
|
|
44
|
+
self.db_path = db_path
|
|
45
|
+
|
|
46
|
+
def _build_call_graph(self) -> DiGraph:
|
|
47
|
+
"""Builds the call graph of the application.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
DiGraph: A directed graph representing the call graph of the application.
|
|
51
|
+
"""
|
|
52
|
+
query = []
|
|
53
|
+
|
|
54
|
+
# Add import
|
|
55
|
+
query += ["import python"]
|
|
56
|
+
|
|
57
|
+
# Add Call edges between caller and callee and filter to only capture application methods.
|
|
58
|
+
query += [
|
|
59
|
+
"from Method caller, Method callee",
|
|
60
|
+
"where",
|
|
61
|
+
"caller.fromSource() and",
|
|
62
|
+
"callee.fromSource() and",
|
|
63
|
+
"caller.calls(callee)",
|
|
64
|
+
"select",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
# Caller metadata
|
|
68
|
+
query += [
|
|
69
|
+
"caller.getFile().getAbsolutePath(),",
|
|
70
|
+
'"[" + caller.getBody().getLocation().getStartLine() + ", " + caller.getBody().getLocation().getEndLine() + "]", //Caller body slice indices',
|
|
71
|
+
"caller.getQualifiedName(), // Caller's fullsignature",
|
|
72
|
+
"caller.getAModifier(), // caller's method modifier",
|
|
73
|
+
"caller.paramsString(), // caller's method parameter types",
|
|
74
|
+
"caller.getReturnType().toString(), // Caller's return type",
|
|
75
|
+
"caller.getDeclaringType().getQualifiedName(), // Caller's class",
|
|
76
|
+
"caller.getDeclaringType().getAModifier(), // Caller's class modifier",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
# Callee metadata
|
|
80
|
+
query += [
|
|
81
|
+
"callee.getFile().getAbsolutePath(),",
|
|
82
|
+
'"[" + callee.getBody().getLocation().getStartLine() + ", " + callee.getBody().getLocation().getEndLine() + "]", //Caller body slice indices',
|
|
83
|
+
"callee.getQualifiedName(), // Caller's fullsignature",
|
|
84
|
+
"callee.getAModifier(), // callee's method modifier",
|
|
85
|
+
"callee.paramsString(), // callee's method parameter types",
|
|
86
|
+
"callee.getReturnType().toString(), // Caller's return type",
|
|
87
|
+
"callee.getDeclaringType().getQualifiedName(), // Caller's class",
|
|
88
|
+
"callee.getDeclaringType().getAModifier() // Caller's class modifier",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
query_string = "\n".join(query)
|
|
92
|
+
|
|
93
|
+
# Execute the query using the CodeQLQueryRunner context manager
|
|
94
|
+
with CodeQLQueryRunner(self.db_path) as query:
|
|
95
|
+
query_result: DataFrame = query.execute(
|
|
96
|
+
query_string,
|
|
97
|
+
column_names=[
|
|
98
|
+
# Caller Columns
|
|
99
|
+
"caller_file",
|
|
100
|
+
"caller_body_slice_index",
|
|
101
|
+
"caller_signature",
|
|
102
|
+
"caller_modifier",
|
|
103
|
+
"caller_params",
|
|
104
|
+
"caller_return_type",
|
|
105
|
+
"caller_class_signature",
|
|
106
|
+
"caller_class_modifier",
|
|
107
|
+
# Callee Columns
|
|
108
|
+
"callee_file",
|
|
109
|
+
"callee_body_slice_index",
|
|
110
|
+
"callee_signature",
|
|
111
|
+
"callee_modifier",
|
|
112
|
+
"callee_params",
|
|
113
|
+
"callee_return_type",
|
|
114
|
+
"callee_class_signature",
|
|
115
|
+
"callee_class_modifier",
|
|
116
|
+
],
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Process the query results into JMethod instances
|
|
120
|
+
callgraph: DiGraph = self.__process_call_edges_to_callgraph(query_result)
|
|
121
|
+
return callgraph
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def __process_call_edges_to_callgraph(query_result: DataFrame) -> DiGraph:
|
|
125
|
+
"""Processes call edges from query results into a call graph.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
query_result (DataFrame): The DataFrame containing call edge information.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
DiGraph: A directed graph representing the call graph of the application.
|
|
132
|
+
"""
|
|
133
|
+
pass
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
class CodeQLExceptions:
|
|
2
|
+
class CodeQLDatabaseBuildException(Exception):
|
|
3
|
+
"""Exception raised when there is an error building the CodeQL database."""
|
|
4
|
+
|
|
5
|
+
def __init__(self, message: str) -> None:
|
|
6
|
+
super().__init__(message)
|
|
7
|
+
|
|
8
|
+
class CodeQLQueryExecutionException(Exception):
|
|
9
|
+
"""Exception raised when there is an error building the CodeQL database."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, message: str) -> None:
|
|
12
|
+
super().__init__(message)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import platform
|
|
2
|
+
import requests
|
|
3
|
+
import zipfile
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from codeanalyzer.utils import logger
|
|
6
|
+
from tqdm import tqdm
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CodeQLLoader:
|
|
10
|
+
@classmethod
|
|
11
|
+
def detect_platform_key(cls) -> str:
|
|
12
|
+
system = platform.system()
|
|
13
|
+
arch = platform.machine().lower()
|
|
14
|
+
|
|
15
|
+
if system == "Linux" and arch in {"x86_64", "amd64"}:
|
|
16
|
+
return "codeql-linux64.zip"
|
|
17
|
+
elif system == "Darwin" and arch in {"x86_64", "arm64"}:
|
|
18
|
+
return "codeql-osx64.zip"
|
|
19
|
+
elif system == "Windows" and arch in {"x86_64", "amd64"}:
|
|
20
|
+
return "codeql-win64.zip"
|
|
21
|
+
else:
|
|
22
|
+
return "codeql.zip" # fallback to generic binary if needed
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def get_codeql_download_url(cls, expected_filename: str) -> str:
|
|
26
|
+
response = requests.get(
|
|
27
|
+
"https://api.github.com/repos/github/codeql-cli-binaries/releases/latest"
|
|
28
|
+
)
|
|
29
|
+
response.raise_for_status()
|
|
30
|
+
for asset in response.json()["assets"]:
|
|
31
|
+
if asset["name"] == expected_filename:
|
|
32
|
+
return asset["browser_download_url"]
|
|
33
|
+
raise RuntimeError(f"No asset found for filename: {expected_filename}")
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def download_and_extract_codeql(cls, temp_dir: Path) -> Path:
|
|
37
|
+
filename = cls.detect_platform_key()
|
|
38
|
+
download_url = cls.get_codeql_download_url(filename)
|
|
39
|
+
|
|
40
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
archive_path = temp_dir / filename
|
|
42
|
+
|
|
43
|
+
logger.info(f"Downloading CodeQL CLI from {download_url}")
|
|
44
|
+
with requests.get(download_url, stream=True) as r:
|
|
45
|
+
r.raise_for_status()
|
|
46
|
+
total_size = int(r.headers.get("content-length", 0))
|
|
47
|
+
block_size = 8192 # 8KB
|
|
48
|
+
|
|
49
|
+
with (
|
|
50
|
+
open(archive_path, "wb") as f,
|
|
51
|
+
tqdm(
|
|
52
|
+
total=total_size,
|
|
53
|
+
unit="B",
|
|
54
|
+
unit_scale=True,
|
|
55
|
+
unit_divisor=1024,
|
|
56
|
+
desc="Downloading CodeQL",
|
|
57
|
+
) as bar,
|
|
58
|
+
):
|
|
59
|
+
for chunk in r.iter_content(chunk_size=block_size):
|
|
60
|
+
f.write(chunk)
|
|
61
|
+
bar.update(len(chunk))
|
|
62
|
+
|
|
63
|
+
extract_dir = temp_dir / filename.replace(".zip", "")
|
|
64
|
+
extract_dir.mkdir(exist_ok=True)
|
|
65
|
+
|
|
66
|
+
print(f"Extracting CodeQL CLI to {extract_dir}")
|
|
67
|
+
with zipfile.ZipFile(archive_path, "r") as zip_ref:
|
|
68
|
+
zip_ref.extractall(extract_dir)
|
|
69
|
+
|
|
70
|
+
codeql_bin = next(extract_dir.rglob("codeql"), None)
|
|
71
|
+
if not codeql_bin or not codeql_bin.exists():
|
|
72
|
+
raise FileNotFoundError("CodeQL binary not found in extracted contents.")
|
|
73
|
+
|
|
74
|
+
return codeql_bin.resolve()
|