codeanalyzer-python 0.1.12__tar.gz → 0.1.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/.gitignore +1 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/PKG-INFO +20 -42
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/README.md +19 -41
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/__main__.py +71 -16
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/core.py +154 -19
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/options/options.py +0 -1
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/schema/py_schema.py +20 -0
- codeanalyzer_python-0.1.14/codeanalyzer/semantic_analysis/call_graph.py +266 -0
- codeanalyzer_python-0.1.14/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +300 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/codeql_loader.py +32 -4
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +51 -31
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/syntactic_analysis/symbol_table_builder.py +87 -4
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/pyproject.toml +1 -1
- codeanalyzer_python-0.1.12/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +0 -133
- codeanalyzer_python-0.1.12/codeanalyzer/semantic_analysis/wala/__init__.py +0 -15
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/LICENSE +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/NOTICE +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/config/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/config/config.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/jedi/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/jedi/jedi.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/options/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/py.typed +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/schema/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/syntactic_analysis/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/syntactic_analysis/exceptions.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/utils/__init__.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/utils/logging.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/codeanalyzer/utils/progress_bar.py +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/LICENSE +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/README.md +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/properties/README.md +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/xarray/datatree_/LICENSE +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/xarray/datatree_/README.md +0 -0
- {codeanalyzer_python-0.1.12 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/xarray/datatree_/docs/README.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeanalyzer-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.14
|
|
4
4
|
Summary: Static Analysis on Python source code using Jedi, CodeQL and Treesitter.
|
|
5
5
|
Author-email: Rahul Krishna <i.m.ralk@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -110,16 +110,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
113
|
-
│ * --input -i PATH Path to the project root directory. [default: None] [required]
|
|
114
|
-
│ --output -o PATH Output directory for artifacts. [default: None]
|
|
115
|
-
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
|
|
116
|
-
│ --
|
|
117
|
-
│ --
|
|
118
|
-
│ --
|
|
119
|
-
│ --cache-
|
|
120
|
-
│
|
|
121
|
-
│
|
|
122
|
-
│ --help Show this message and exit. │
|
|
113
|
+
│ * --input -i PATH Path to the project root directory. [default: None] [required] │
|
|
114
|
+
│ --output -o PATH Output directory for artifacts. [default: None] │
|
|
115
|
+
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json] │
|
|
116
|
+
│ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
|
|
117
|
+
│ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
|
|
118
|
+
│ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
|
|
119
|
+
│ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache] │
|
|
120
|
+
│ -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0] │
|
|
121
|
+
│ --help Show this message and exit. │
|
|
123
122
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
124
123
|
```
|
|
125
124
|
|
|
@@ -145,25 +144,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
145
144
|
|
|
146
145
|
This will save the analysis results in `analysis.msgpack` in the specified directory.
|
|
147
146
|
|
|
148
|
-
3. **
|
|
149
|
-
```bash
|
|
150
|
-
codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
|
|
151
|
-
```
|
|
152
|
-
Call graph analysis can be enabled by setting the level to `2`:
|
|
153
|
-
```bash
|
|
154
|
-
codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
|
|
155
|
-
```
|
|
156
|
-
***Note: The `--analysis-level=2` is not yet implemented in this version.***
|
|
157
|
-
|
|
158
|
-
4. **Analysis with CodeQL enabled:**
|
|
147
|
+
3. **Analysis with CodeQL enabled:**
|
|
159
148
|
```bash
|
|
160
149
|
codeanalyzer --input ./my-python-project --codeql
|
|
161
150
|
```
|
|
162
|
-
|
|
151
|
+
Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
|
|
163
152
|
|
|
164
|
-
***Note:
|
|
153
|
+
***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
|
|
165
154
|
|
|
166
|
-
|
|
155
|
+
4. **Eager analysis with custom cache directory:**
|
|
167
156
|
```bash
|
|
168
157
|
codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
|
|
169
158
|
```
|
|
@@ -171,7 +160,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
171
160
|
|
|
172
161
|
If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
|
|
173
162
|
|
|
174
|
-
|
|
163
|
+
5. **Quiet mode (minimal output):**
|
|
175
164
|
```bash
|
|
176
165
|
codeanalyzer --input /path/to/my-python-project --quiet
|
|
177
166
|
```
|
|
@@ -269,7 +258,6 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
269
258
|
│ * --input -i PATH Path to the project root directory. [default: None] [required] │
|
|
270
259
|
│ --output -o PATH Output directory for artifacts. [default: None] │
|
|
271
260
|
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json]. │
|
|
272
|
-
│ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1] │
|
|
273
261
|
│ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
|
|
274
262
|
│ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
|
|
275
263
|
│ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
|
|
@@ -294,25 +282,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
294
282
|
|
|
295
283
|
Now, you can find the analysis results in `analysis.json` in the specified directory.
|
|
296
284
|
|
|
297
|
-
2. **
|
|
298
|
-
```bash
|
|
299
|
-
codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
|
|
300
|
-
```
|
|
301
|
-
Call graph analysis can be enabled by setting the level to `2`:
|
|
302
|
-
```bash
|
|
303
|
-
codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
|
|
304
|
-
```
|
|
305
|
-
***Note: The `--analysis-level=2` is not yet implemented in this version.***
|
|
306
|
-
|
|
307
|
-
3. **Analysis with CodeQL enabled:**
|
|
285
|
+
2. **Analysis with CodeQL enabled:**
|
|
308
286
|
```bash
|
|
309
287
|
codeanalyzer --input ./my-python-project --codeql
|
|
310
288
|
```
|
|
311
|
-
|
|
289
|
+
Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
|
|
312
290
|
|
|
313
|
-
|
|
291
|
+
***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
|
|
314
292
|
|
|
315
|
-
|
|
293
|
+
3. **Eager analysis with custom cache directory:**
|
|
316
294
|
```bash
|
|
317
295
|
codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
|
|
318
296
|
```
|
|
@@ -320,7 +298,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
320
298
|
|
|
321
299
|
If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
|
|
322
300
|
|
|
323
|
-
|
|
301
|
+
4. **Save output in msgpack format:**
|
|
324
302
|
```bash
|
|
325
303
|
codeanalyzer --input ./my-python-project --output /path/to/analysis-results --format msgpack
|
|
326
304
|
```
|
|
@@ -77,16 +77,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
80
|
-
│ * --input -i PATH Path to the project root directory. [default: None] [required]
|
|
81
|
-
│ --output -o PATH Output directory for artifacts. [default: None]
|
|
82
|
-
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
|
|
83
|
-
│ --
|
|
84
|
-
│ --
|
|
85
|
-
│ --
|
|
86
|
-
│ --cache-
|
|
87
|
-
│
|
|
88
|
-
│
|
|
89
|
-
│ --help Show this message and exit. │
|
|
80
|
+
│ * --input -i PATH Path to the project root directory. [default: None] [required] │
|
|
81
|
+
│ --output -o PATH Output directory for artifacts. [default: None] │
|
|
82
|
+
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json] │
|
|
83
|
+
│ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
|
|
84
|
+
│ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
|
|
85
|
+
│ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
|
|
86
|
+
│ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache] │
|
|
87
|
+
│ -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0] │
|
|
88
|
+
│ --help Show this message and exit. │
|
|
90
89
|
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
91
90
|
```
|
|
92
91
|
|
|
@@ -112,25 +111,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
112
111
|
|
|
113
112
|
This will save the analysis results in `analysis.msgpack` in the specified directory.
|
|
114
113
|
|
|
115
|
-
3. **
|
|
116
|
-
```bash
|
|
117
|
-
codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
|
|
118
|
-
```
|
|
119
|
-
Call graph analysis can be enabled by setting the level to `2`:
|
|
120
|
-
```bash
|
|
121
|
-
codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
|
|
122
|
-
```
|
|
123
|
-
***Note: The `--analysis-level=2` is not yet implemented in this version.***
|
|
124
|
-
|
|
125
|
-
4. **Analysis with CodeQL enabled:**
|
|
114
|
+
3. **Analysis with CodeQL enabled:**
|
|
126
115
|
```bash
|
|
127
116
|
codeanalyzer --input ./my-python-project --codeql
|
|
128
117
|
```
|
|
129
|
-
|
|
118
|
+
Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
|
|
130
119
|
|
|
131
|
-
***Note:
|
|
120
|
+
***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
|
|
132
121
|
|
|
133
|
-
|
|
122
|
+
4. **Eager analysis with custom cache directory:**
|
|
134
123
|
```bash
|
|
135
124
|
codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
|
|
136
125
|
```
|
|
@@ -138,7 +127,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
138
127
|
|
|
139
128
|
If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
|
|
140
129
|
|
|
141
|
-
|
|
130
|
+
5. **Quiet mode (minimal output):**
|
|
142
131
|
```bash
|
|
143
132
|
codeanalyzer --input /path/to/my-python-project --quiet
|
|
144
133
|
```
|
|
@@ -236,7 +225,6 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
236
225
|
│ * --input -i PATH Path to the project root directory. [default: None] [required] │
|
|
237
226
|
│ --output -o PATH Output directory for artifacts. [default: None] │
|
|
238
227
|
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json]. │
|
|
239
|
-
│ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1] │
|
|
240
228
|
│ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
|
|
241
229
|
│ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
|
|
242
230
|
│ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
|
|
@@ -261,25 +249,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
261
249
|
|
|
262
250
|
Now, you can find the analysis results in `analysis.json` in the specified directory.
|
|
263
251
|
|
|
264
|
-
2. **
|
|
265
|
-
```bash
|
|
266
|
-
codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
|
|
267
|
-
```
|
|
268
|
-
Call graph analysis can be enabled by setting the level to `2`:
|
|
269
|
-
```bash
|
|
270
|
-
codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
|
|
271
|
-
```
|
|
272
|
-
***Note: The `--analysis-level=2` is not yet implemented in this version.***
|
|
273
|
-
|
|
274
|
-
3. **Analysis with CodeQL enabled:**
|
|
252
|
+
2. **Analysis with CodeQL enabled:**
|
|
275
253
|
```bash
|
|
276
254
|
codeanalyzer --input ./my-python-project --codeql
|
|
277
255
|
```
|
|
278
|
-
|
|
256
|
+
Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
|
|
279
257
|
|
|
280
|
-
|
|
258
|
+
***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
|
|
281
259
|
|
|
282
|
-
|
|
260
|
+
3. **Eager analysis with custom cache directory:**
|
|
283
261
|
```bash
|
|
284
262
|
codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
|
|
285
263
|
```
|
|
@@ -287,7 +265,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
287
265
|
|
|
288
266
|
If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
|
|
289
267
|
|
|
290
|
-
|
|
268
|
+
4. **Save output in msgpack format:**
|
|
291
269
|
```bash
|
|
292
270
|
codeanalyzer --input ./my-python-project --output /path/to/analysis-results --format msgpack
|
|
293
271
|
```
|
|
@@ -9,25 +9,75 @@ from codeanalyzer.config import OutputFormat
|
|
|
9
9
|
from codeanalyzer.schema import model_dump_json
|
|
10
10
|
from codeanalyzer.options import AnalysisOptions
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
def main(
|
|
13
|
-
input: Annotated[
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
input: Annotated[
|
|
15
|
+
Path, typer.Option("-i", "--input", help="Path to the project root directory.")
|
|
16
|
+
],
|
|
17
|
+
output: Annotated[
|
|
18
|
+
Optional[Path],
|
|
19
|
+
typer.Option("-o", "--output", help="Output directory for artifacts."),
|
|
20
|
+
] = None,
|
|
21
|
+
format: Annotated[
|
|
22
|
+
OutputFormat,
|
|
23
|
+
typer.Option(
|
|
24
|
+
"-f",
|
|
25
|
+
"--format",
|
|
26
|
+
help="Output format: json or msgpack.",
|
|
27
|
+
case_sensitive=False,
|
|
28
|
+
),
|
|
29
|
+
] = OutputFormat.JSON,
|
|
30
|
+
using_codeql: Annotated[
|
|
31
|
+
bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
|
|
32
|
+
] = False,
|
|
33
|
+
using_ray: Annotated[
|
|
34
|
+
bool,
|
|
35
|
+
typer.Option("--ray/--no-ray", help="Enable Ray for distributed analysis."),
|
|
36
|
+
] = False,
|
|
37
|
+
rebuild_analysis: Annotated[
|
|
38
|
+
bool,
|
|
39
|
+
typer.Option(
|
|
40
|
+
"--eager/--lazy",
|
|
41
|
+
help="Enable eager or lazy analysis. Defaults to lazy.",
|
|
42
|
+
),
|
|
43
|
+
] = False,
|
|
44
|
+
skip_tests: Annotated[
|
|
45
|
+
bool,
|
|
46
|
+
typer.Option(
|
|
47
|
+
"--skip-tests/--include-tests",
|
|
48
|
+
help="Skip test files in analysis.",
|
|
49
|
+
),
|
|
50
|
+
] = True,
|
|
51
|
+
file_name: Annotated[
|
|
52
|
+
Optional[Path],
|
|
53
|
+
typer.Option(
|
|
54
|
+
"--file-name",
|
|
55
|
+
help="Analyze only the specified file (relative to input directory).",
|
|
56
|
+
),
|
|
57
|
+
] = None,
|
|
58
|
+
cache_dir: Annotated[
|
|
59
|
+
Optional[Path],
|
|
60
|
+
typer.Option(
|
|
61
|
+
"-c",
|
|
62
|
+
"--cache-dir",
|
|
63
|
+
help="Directory to store analysis cache. Defaults to '.codeanalyzer' in the input directory.",
|
|
64
|
+
),
|
|
65
|
+
] = None,
|
|
66
|
+
clear_cache: Annotated[
|
|
67
|
+
bool,
|
|
68
|
+
typer.Option(
|
|
69
|
+
"--clear-cache/--keep-cache",
|
|
70
|
+
help="Clear cache after analysis. By default, cache is retained.",
|
|
71
|
+
),
|
|
72
|
+
] = False,
|
|
73
|
+
verbosity: Annotated[
|
|
74
|
+
int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
|
|
75
|
+
] = 0,
|
|
25
76
|
):
|
|
26
77
|
options = AnalysisOptions(
|
|
27
78
|
input=input,
|
|
28
79
|
output=output,
|
|
29
80
|
format=format,
|
|
30
|
-
analysis_level=analysis_level,
|
|
31
81
|
using_codeql=using_codeql,
|
|
32
82
|
using_ray=using_ray,
|
|
33
83
|
rebuild_analysis=rebuild_analysis,
|
|
@@ -46,13 +96,17 @@ def main(
|
|
|
46
96
|
if options.file_name is not None:
|
|
47
97
|
full_file_path = options.input / options.file_name
|
|
48
98
|
if not full_file_path.exists():
|
|
49
|
-
logger.error(
|
|
99
|
+
logger.error(
|
|
100
|
+
f"Specified file '{options.file_name}' does not exist in '{options.input}'."
|
|
101
|
+
)
|
|
50
102
|
raise typer.Exit(code=1)
|
|
51
103
|
if not full_file_path.is_file():
|
|
52
104
|
logger.error(f"Specified path '{options.file_name}' is not a file.")
|
|
53
105
|
raise typer.Exit(code=1)
|
|
54
|
-
if not str(options.file_name).endswith(
|
|
55
|
-
logger.error(
|
|
106
|
+
if not str(options.file_name).endswith(".py"):
|
|
107
|
+
logger.error(
|
|
108
|
+
f"Specified file '{options.file_name}' is not a Python file (.py)."
|
|
109
|
+
)
|
|
56
110
|
raise typer.Exit(code=1)
|
|
57
111
|
|
|
58
112
|
with Codeanalyzer(options) as analyzer:
|
|
@@ -85,6 +139,7 @@ def _write_output(artifacts, output_dir: Path, format: OutputFormat):
|
|
|
85
139
|
f"Compression ratio: {artifacts.get_compression_ratio():.1%} of JSON size"
|
|
86
140
|
)
|
|
87
141
|
|
|
142
|
+
|
|
88
143
|
app = typer.Typer(
|
|
89
144
|
callback=main,
|
|
90
145
|
name="codeanalyzer",
|
|
@@ -9,7 +9,14 @@ from typing import Any, Dict, Optional, Union, List
|
|
|
9
9
|
import ray
|
|
10
10
|
from codeanalyzer.utils import logger
|
|
11
11
|
from codeanalyzer.schema import PyApplication, PyModule, model_dump_json, model_validate_json
|
|
12
|
+
from codeanalyzer.schema.py_schema import PyCallEdge
|
|
13
|
+
from codeanalyzer.semantic_analysis.call_graph import (
|
|
14
|
+
jedi_call_graph_edges,
|
|
15
|
+
merge_edges,
|
|
16
|
+
resolve_unresolved_constructors,
|
|
17
|
+
)
|
|
12
18
|
from codeanalyzer.semantic_analysis.codeql import CodeQLLoader
|
|
19
|
+
from codeanalyzer.semantic_analysis.codeql.codeql_analysis import CodeQL
|
|
13
20
|
from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import CodeQLExceptions
|
|
14
21
|
from codeanalyzer.syntactic_analysis.exceptions import SymbolTableBuilderRayError
|
|
15
22
|
from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
|
|
@@ -49,7 +56,6 @@ class Codeanalyzer:
|
|
|
49
56
|
|
|
50
57
|
def __init__(self, options: AnalysisOptions) -> None:
|
|
51
58
|
self.options = options
|
|
52
|
-
self.analysis_depth = options.analysis_level
|
|
53
59
|
self.project_dir = Path(options.input).resolve()
|
|
54
60
|
self.skip_tests = options.skip_tests
|
|
55
61
|
self.using_codeql = options.using_codeql
|
|
@@ -60,6 +66,7 @@ class Codeanalyzer:
|
|
|
60
66
|
self.clear_cache = options.clear_cache
|
|
61
67
|
self.db_path: Optional[Path] = None
|
|
62
68
|
self.codeql_bin: Optional[Path] = None
|
|
69
|
+
self.codeql_packs_dir: Optional[Path] = None
|
|
63
70
|
self.virtualenv: Optional[Path] = None
|
|
64
71
|
self.using_ray: bool = options.using_ray
|
|
65
72
|
self.file_name: Optional[Path] = options.file_name
|
|
@@ -292,6 +299,15 @@ class Codeanalyzer:
|
|
|
292
299
|
|
|
293
300
|
if self.using_codeql:
|
|
294
301
|
logger.info(f"(Re-)initializing CodeQL analysis for {self.project_dir}")
|
|
302
|
+
|
|
303
|
+
# Resolve the CLI binary before anything else uses it: DB build
|
|
304
|
+
# below needs it, and so does every subsequent query run.
|
|
305
|
+
self.codeql_bin = self._ensure_codeql_bin()
|
|
306
|
+
# Download the standard query library pack (idempotent). The
|
|
307
|
+
# CLI install ships only the language extractors; the
|
|
308
|
+
# ``codeql/python-all`` library pack must be fetched separately.
|
|
309
|
+
self.codeql_packs_dir = self._ensure_codeql_packs(self.codeql_bin)
|
|
310
|
+
|
|
295
311
|
cache_root = self.cache_dir / "codeql"
|
|
296
312
|
cache_root.mkdir(parents=True, exist_ok=True)
|
|
297
313
|
self.db_path = cache_root / f"{self.project_dir.name}-db"
|
|
@@ -310,19 +326,6 @@ class Codeanalyzer:
|
|
|
310
326
|
if self.rebuild_analysis or not is_cache_valid():
|
|
311
327
|
logger.info("Creating new CodeQL database...")
|
|
312
328
|
|
|
313
|
-
codeql_in_path = shutil.which("codeql")
|
|
314
|
-
if codeql_in_path:
|
|
315
|
-
self.codeql_bin = Path(codeql_in_path)
|
|
316
|
-
else:
|
|
317
|
-
self.codeql_bin = CodeQLLoader.download_and_extract_codeql(
|
|
318
|
-
self.cache_dir / "codeql" / "bin"
|
|
319
|
-
)
|
|
320
|
-
|
|
321
|
-
if not shutil.which(str(self.codeql_bin)):
|
|
322
|
-
raise FileNotFoundError(
|
|
323
|
-
f"CodeQL binary not executable: {self.codeql_bin}"
|
|
324
|
-
)
|
|
325
|
-
|
|
326
329
|
cmd = [
|
|
327
330
|
str(self.codeql_bin),
|
|
328
331
|
"database",
|
|
@@ -375,8 +378,27 @@ class Codeanalyzer:
|
|
|
375
378
|
# Build symbol table from cached application if available (if no available, the build a new one)
|
|
376
379
|
symbol_table = self._build_symbol_table(cached_pyapplication.symbol_table if cached_pyapplication else {})
|
|
377
380
|
|
|
381
|
+
# Build the call graph in four steps:
|
|
382
|
+
# 1. Run CodeQL (when enabled). Produces resolved edges with
|
|
383
|
+
# ``provenance=["codeql"]`` and augments ``PyCallsite``s
|
|
384
|
+
# in-place — filling ``callee_signature`` for sites Jedi
|
|
385
|
+
# couldn't resolve.
|
|
386
|
+
# 2. Heuristic fallback for constructor calls neither Jedi nor
|
|
387
|
+
# CodeQL could resolve (commonly classes nested inside
|
|
388
|
+
# functions). Walks the symbol table by class short-name +
|
|
389
|
+
# scope and writes ``<class>.__init__`` into the site.
|
|
390
|
+
# 3. Derive Jedi edges from the now-fully-augmented symbol
|
|
391
|
+
# table — these reflect every resolution the symbol table
|
|
392
|
+
# contains, regardless of which pass put it there.
|
|
393
|
+
# 4. Merge with CodeQL edges; provenance unions for edges both
|
|
394
|
+
# backends saw.
|
|
395
|
+
codeql_edges = self._get_call_graph(symbol_table, augment_sites=True)
|
|
396
|
+
resolve_unresolved_constructors(symbol_table)
|
|
397
|
+
jedi_edges = jedi_call_graph_edges(symbol_table)
|
|
398
|
+
call_graph = merge_edges(jedi_edges, codeql_edges)
|
|
399
|
+
|
|
378
400
|
# Recreate pyapplication
|
|
379
|
-
app = PyApplication.builder().symbol_table(symbol_table).build()
|
|
401
|
+
app = PyApplication.builder().symbol_table(symbol_table).call_graph(call_graph).build()
|
|
380
402
|
|
|
381
403
|
# Save to cache
|
|
382
404
|
self._save_analysis_cache(app, cache_file)
|
|
@@ -579,7 +601,120 @@ class Codeanalyzer:
|
|
|
579
601
|
logger.info("✅ Symbol table generation complete.")
|
|
580
602
|
return symbol_table
|
|
581
603
|
|
|
582
|
-
def
|
|
583
|
-
"""
|
|
584
|
-
|
|
585
|
-
|
|
604
|
+
def _ensure_codeql_packs(self, codeql_bin: Path) -> Path:
|
|
605
|
+
"""Materialize a qlpack that depends on ``codeql/python-all``.
|
|
606
|
+
|
|
607
|
+
The CodeQL CLI install ships only the language extractors — query
|
|
608
|
+
library packs (and their transitive dependencies like
|
|
609
|
+
``codeql/concepts``) must be resolved separately. The canonical
|
|
610
|
+
way is to declare the dependency in a ``qlpack.yml`` and run
|
|
611
|
+
``codeql pack install`` in that directory; CodeQL writes a
|
|
612
|
+
``codeql-pack.lock.yml`` and downloads everything needed.
|
|
613
|
+
|
|
614
|
+
We do this once per project under ``<cache_dir>/codeql/qlpack/``
|
|
615
|
+
and return that directory. The query runner then writes its
|
|
616
|
+
temporary ``.ql`` file inside this pack — colocation makes
|
|
617
|
+
``import python`` resolve without any ``--additional-packs`` or
|
|
618
|
+
``--search-path`` gymnastics.
|
|
619
|
+
"""
|
|
620
|
+
pack_dir = self.cache_dir / "codeql" / "qlpack"
|
|
621
|
+
pack_dir.mkdir(parents=True, exist_ok=True)
|
|
622
|
+
qlpack_yml = pack_dir / "qlpack.yml"
|
|
623
|
+
lock_file = pack_dir / "codeql-pack.lock.yml"
|
|
624
|
+
|
|
625
|
+
if not qlpack_yml.exists():
|
|
626
|
+
qlpack_yml.write_text(
|
|
627
|
+
"name: codeanalyzer-deps\n"
|
|
628
|
+
"version: 1.0.0\n"
|
|
629
|
+
"dependencies:\n"
|
|
630
|
+
' codeql/python-all: "*"\n'
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
if lock_file.exists():
|
|
634
|
+
logger.debug(f"CodeQL pack dependencies already installed in {pack_dir}")
|
|
635
|
+
return pack_dir
|
|
636
|
+
|
|
637
|
+
logger.info(f"Installing CodeQL pack dependencies in {pack_dir}.")
|
|
638
|
+
proc = subprocess.Popen(
|
|
639
|
+
[str(codeql_bin), "pack", "install", str(pack_dir)],
|
|
640
|
+
stdout=subprocess.PIPE,
|
|
641
|
+
stderr=subprocess.PIPE,
|
|
642
|
+
)
|
|
643
|
+
_, err = proc.communicate()
|
|
644
|
+
if proc.returncode != 0:
|
|
645
|
+
raise CodeQLExceptions.CodeQLDatabaseBuildException(
|
|
646
|
+
f"Failed to install CodeQL pack dependencies:\n"
|
|
647
|
+
f"{(err or b'').decode(errors='replace')}"
|
|
648
|
+
)
|
|
649
|
+
return pack_dir
|
|
650
|
+
|
|
651
|
+
def _ensure_codeql_bin(self) -> Path:
|
|
652
|
+
"""Locate (or download) the CodeQL CLI binary into the project cache.
|
|
653
|
+
|
|
654
|
+
Resolution order:
|
|
655
|
+
1. An existing binary inside ``<cache_dir>/codeql/bin/`` —
|
|
656
|
+
reused across runs on the same project.
|
|
657
|
+
2. ``codeql`` already on the user's PATH — picked up verbatim.
|
|
658
|
+
3. Otherwise, download into ``<cache_dir>/codeql/bin/``.
|
|
659
|
+
|
|
660
|
+
The project-local cache is preferred over PATH so the version we
|
|
661
|
+
installed earlier wins over whatever the OS ships — keeps behavior
|
|
662
|
+
deterministic when the user has both.
|
|
663
|
+
"""
|
|
664
|
+
bin_root = self.cache_dir / "codeql" / "bin"
|
|
665
|
+
bin_root.mkdir(parents=True, exist_ok=True)
|
|
666
|
+
|
|
667
|
+
existing = next(
|
|
668
|
+
(p for p in bin_root.rglob("codeql") if p.is_file()),
|
|
669
|
+
None,
|
|
670
|
+
)
|
|
671
|
+
if existing and os.access(existing, os.X_OK):
|
|
672
|
+
logger.debug(f"Reusing cached CodeQL CLI at {existing}")
|
|
673
|
+
return existing.resolve()
|
|
674
|
+
|
|
675
|
+
on_path = shutil.which("codeql")
|
|
676
|
+
if on_path:
|
|
677
|
+
logger.debug(f"Using CodeQL CLI from PATH at {on_path}")
|
|
678
|
+
return Path(on_path)
|
|
679
|
+
|
|
680
|
+
logger.info(f"CodeQL CLI not found; downloading into {bin_root}.")
|
|
681
|
+
downloaded = CodeQLLoader.download_and_extract_codeql(bin_root)
|
|
682
|
+
if not downloaded.exists() or not os.access(downloaded, os.X_OK):
|
|
683
|
+
raise FileNotFoundError(
|
|
684
|
+
f"CodeQL binary not executable after download: {downloaded}"
|
|
685
|
+
)
|
|
686
|
+
return downloaded
|
|
687
|
+
|
|
688
|
+
def _get_call_graph(
|
|
689
|
+
self,
|
|
690
|
+
symbol_table: Dict[str, PyModule],
|
|
691
|
+
augment_sites: bool = False,
|
|
692
|
+
) -> List[PyCallEdge]:
|
|
693
|
+
"""Build CodeQL-resolved call edges and optionally augment sites.
|
|
694
|
+
|
|
695
|
+
Returns an empty list when CodeQL isn't enabled or the database
|
|
696
|
+
isn't available. Edges carry ``provenance=["codeql"]`` — merge
|
|
697
|
+
with Jedi-derived edges via ``call_graph.merge_edges``.
|
|
698
|
+
|
|
699
|
+
When ``augment_sites`` is True, also mutates
|
|
700
|
+
``PyCallable.call_sites`` in the symbol table to backfill
|
|
701
|
+
``callee_signature`` for sites Jedi couldn't resolve. The single
|
|
702
|
+
CodeQL query is shared (cached on the ``CodeQL`` instance) so
|
|
703
|
+
this costs no extra DB work.
|
|
704
|
+
"""
|
|
705
|
+
if not self.using_codeql or self.db_path is None:
|
|
706
|
+
return []
|
|
707
|
+
try:
|
|
708
|
+
cq = CodeQL(
|
|
709
|
+
self.project_dir,
|
|
710
|
+
self.db_path,
|
|
711
|
+
codeql_bin=self.codeql_bin,
|
|
712
|
+
codeql_packs_dir=self.codeql_packs_dir,
|
|
713
|
+
)
|
|
714
|
+
edges = cq.build_call_graph_edges(symbol_table)
|
|
715
|
+
if augment_sites:
|
|
716
|
+
cq.augment_call_sites(symbol_table)
|
|
717
|
+
return edges
|
|
718
|
+
except Exception as exc:
|
|
719
|
+
logger.warning(f"CodeQL call-graph extraction failed: {exc}")
|
|
720
|
+
return []
|
|
@@ -339,9 +339,29 @@ class PyModule(BaseModel):
|
|
|
339
339
|
file_size: Optional[int] = None
|
|
340
340
|
|
|
341
341
|
|
|
342
|
+
@builder
|
|
343
|
+
@msgpk
|
|
344
|
+
class PyCallEdge(BaseModel):
|
|
345
|
+
"""Identity-only call-graph edge with weight.
|
|
346
|
+
|
|
347
|
+
Mirrors Java's ``CallDependency``. ``source`` and ``target`` are
|
|
348
|
+
``PyCallable.signature`` strings — nodes of the graph are the existing
|
|
349
|
+
``PyCallable`` entries in the symbol table, not a separate vertex type.
|
|
350
|
+
Rich per-call metadata (receiver, arguments, location, ...) lives on
|
|
351
|
+
``PyCallsite`` inside the source ``PyCallable.call_sites``.
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
source: str # caller's PyCallable.signature
|
|
355
|
+
target: str # callee's PyCallable.signature
|
|
356
|
+
type: Literal["CALL_DEP"] = "CALL_DEP"
|
|
357
|
+
weight: int = 1
|
|
358
|
+
provenance: List[Literal["jedi", "codeql", "joern"]] = []
|
|
359
|
+
|
|
360
|
+
|
|
342
361
|
@builder
|
|
343
362
|
@msgpk
|
|
344
363
|
class PyApplication(BaseModel):
|
|
345
364
|
"""Represents a Python application."""
|
|
346
365
|
|
|
347
366
|
symbol_table: Dict[str, PyModule]
|
|
367
|
+
call_graph: List[PyCallEdge] = []
|