codeanalyzer-python 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/.gitignore +1 -0
  2. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/PKG-INFO +20 -42
  3. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/README.md +19 -41
  4. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/__main__.py +0 -5
  5. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/core.py +154 -19
  6. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/options/options.py +0 -1
  7. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/schema/py_schema.py +20 -0
  8. codeanalyzer_python-0.1.14/codeanalyzer/semantic_analysis/call_graph.py +266 -0
  9. codeanalyzer_python-0.1.14/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +300 -0
  10. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/codeql_loader.py +32 -4
  11. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +51 -31
  12. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/syntactic_analysis/symbol_table_builder.py +87 -4
  13. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/pyproject.toml +1 -1
  14. codeanalyzer_python-0.1.13/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +0 -133
  15. codeanalyzer_python-0.1.13/codeanalyzer/semantic_analysis/wala/__init__.py +0 -15
  16. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/LICENSE +0 -0
  17. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/NOTICE +0 -0
  18. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/__init__.py +0 -0
  19. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/config/__init__.py +0 -0
  20. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/config/config.py +0 -0
  21. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/jedi/__init__.py +0 -0
  22. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/jedi/jedi.py +0 -0
  23. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/options/__init__.py +0 -0
  24. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/py.typed +0 -0
  25. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/schema/__init__.py +0 -0
  26. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/__init__.py +0 -0
  27. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/__init__.py +0 -0
  28. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py +0 -0
  29. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/syntactic_analysis/__init__.py +0 -0
  30. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/syntactic_analysis/exceptions.py +0 -0
  31. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/utils/__init__.py +0 -0
  32. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/utils/logging.py +0 -0
  33. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/codeanalyzer/utils/progress_bar.py +0 -0
  34. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/LICENSE +0 -0
  35. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/README.md +0 -0
  36. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/properties/README.md +0 -0
  37. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/xarray/datatree_/LICENSE +0 -0
  38. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/xarray/datatree_/README.md +0 -0
  39. {codeanalyzer_python-0.1.13 → codeanalyzer_python-0.1.14}/test/fixtures/whole_applications/xarray/xarray/datatree_/docs/README.md +0 -0
@@ -176,6 +176,7 @@ cython_debug/
176
176
  # Project-specific files
177
177
  .codeanalyzer
178
178
  .vscode/
179
+ analysis.json
179
180
 
180
181
  # UV
181
182
  uv.lock
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeanalyzer-python
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: Static Analysis on Python source code using Jedi, CodeQL and Treesitter.
5
5
  Author-email: Rahul Krishna <i.m.ralk@gmail.com>
6
6
  License-File: LICENSE
@@ -110,16 +110,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
110
110
 
111
111
 
112
112
  ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
113
- │ * --input -i PATH Path to the project root directory. [default: None] [required]
114
- │ --output -o PATH Output directory for artifacts. [default: None]
115
- │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
116
- │ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1]
117
- │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql]
118
- │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy]
119
- │ --cache-dir -c PATH Directory to store analysis cache. [default: None]
120
- --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache]
121
- -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0]
122
- │ --help Show this message and exit. │
113
+ │ * --input -i PATH Path to the project root directory. [default: None] [required]
114
+ │ --output -o PATH Output directory for artifacts. [default: None]
115
+ │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
116
+ │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql]
117
+ │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy]
118
+ │ --cache-dir -c PATH Directory to store analysis cache. [default: None]
119
+ │ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache]
120
+ -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0]
121
+ --help Show this message and exit.
123
122
  ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
124
123
  ```
125
124
 
@@ -145,25 +144,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
145
144
 
146
145
  This will save the analysis results in `analysis.msgpack` in the specified directory.
147
146
 
148
- 3. **Toggle analysis levels with `--analysis-level`:**
149
- ```bash
150
- codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
151
- ```
152
- Call graph analysis can be enabled by setting the level to `2`:
153
- ```bash
154
- codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
155
- ```
156
- ***Note: The `--analysis-level=2` is not yet implemented in this version.***
157
-
158
- 4. **Analysis with CodeQL enabled:**
147
+ 3. **Analysis with CodeQL enabled:**
159
148
  ```bash
160
149
  codeanalyzer --input ./my-python-project --codeql
161
150
  ```
162
- This will perform CodeQL-based analysis in addition to the standard symbol table generation.
151
+ Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
163
152
 
164
- ***Note: Not yet fully implemented. Please refrain from using this option until further notice.***
153
+ ***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
165
154
 
166
- 5. **Eager analysis with custom cache directory:**
155
+ 4. **Eager analysis with custom cache directory:**
167
156
  ```bash
168
157
  codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
169
158
  ```
@@ -171,7 +160,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
171
160
 
172
161
  If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
173
162
 
174
- 6. **Quiet mode (minimal output):**
163
+ 5. **Quiet mode (minimal output):**
175
164
  ```bash
176
165
  codeanalyzer --input /path/to/my-python-project --quiet
177
166
  ```
@@ -269,7 +258,6 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
269
258
  │ * --input -i PATH Path to the project root directory. [default: None] [required] │
270
259
  │ --output -o PATH Output directory for artifacts. [default: None] │
271
260
  │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]. │
272
- │ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1] │
273
261
  │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
274
262
  │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
275
263
  │ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
@@ -294,25 +282,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
294
282
 
295
283
  Now, you can find the analysis results in `analysis.json` in the specified directory.
296
284
 
297
- 2. **Toggle analysis levels with `--analysis-level`:**
298
- ```bash
299
- codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
300
- ```
301
- Call graph analysis can be enabled by setting the level to `2`:
302
- ```bash
303
- codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
304
- ```
305
- ***Note: The `--analysis-level=2` is not yet implemented in this version.***
306
-
307
- 3. **Analysis with CodeQL enabled:**
285
+ 2. **Analysis with CodeQL enabled:**
308
286
  ```bash
309
287
  codeanalyzer --input ./my-python-project --codeql
310
288
  ```
311
- This will perform CodeQL-based analysis in addition to the standard symbol table generation.
289
+ Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
312
290
 
313
- ***Note: Not yet fully implemented. Please refrain from using this option until further notice.***
291
+ ***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
314
292
 
315
- 4. **Eager analysis with custom cache directory:**
293
+ 3. **Eager analysis with custom cache directory:**
316
294
  ```bash
317
295
  codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
318
296
  ```
@@ -320,7 +298,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
320
298
 
321
299
  If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
322
300
 
323
- 5. **Save output in msgpack format:**
301
+ 4. **Save output in msgpack format:**
324
302
  ```bash
325
303
  codeanalyzer --input ./my-python-project --output /path/to/analysis-results --format msgpack
326
304
  ```
@@ -77,16 +77,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
77
77
 
78
78
 
79
79
  ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
80
- │ * --input -i PATH Path to the project root directory. [default: None] [required]
81
- │ --output -o PATH Output directory for artifacts. [default: None]
82
- │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
83
- │ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1]
84
- │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql]
85
- │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy]
86
- │ --cache-dir -c PATH Directory to store analysis cache. [default: None]
87
- --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache]
88
- -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0]
89
- │ --help Show this message and exit. │
80
+ │ * --input -i PATH Path to the project root directory. [default: None] [required]
81
+ │ --output -o PATH Output directory for artifacts. [default: None]
82
+ │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
83
+ │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql]
84
+ │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy]
85
+ │ --cache-dir -c PATH Directory to store analysis cache. [default: None]
86
+ │ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache]
87
+ -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0]
88
+ --help Show this message and exit.
90
89
  ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
91
90
  ```
92
91
 
@@ -112,25 +111,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
112
111
 
113
112
  This will save the analysis results in `analysis.msgpack` in the specified directory.
114
113
 
115
- 3. **Toggle analysis levels with `--analysis-level`:**
116
- ```bash
117
- codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
118
- ```
119
- Call graph analysis can be enabled by setting the level to `2`:
120
- ```bash
121
- codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
122
- ```
123
- ***Note: The `--analysis-level=2` is not yet implemented in this version.***
124
-
125
- 4. **Analysis with CodeQL enabled:**
114
+ 3. **Analysis with CodeQL enabled:**
126
115
  ```bash
127
116
  codeanalyzer --input ./my-python-project --codeql
128
117
  ```
129
- This will perform CodeQL-based analysis in addition to the standard symbol table generation.
118
+ Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
130
119
 
131
- ***Note: Not yet fully implemented. Please refrain from using this option until further notice.***
120
+ ***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
132
121
 
133
- 5. **Eager analysis with custom cache directory:**
122
+ 4. **Eager analysis with custom cache directory:**
134
123
  ```bash
135
124
  codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
136
125
  ```
@@ -138,7 +127,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
138
127
 
139
128
  If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
140
129
 
141
- 6. **Quiet mode (minimal output):**
130
+ 5. **Quiet mode (minimal output):**
142
131
  ```bash
143
132
  codeanalyzer --input /path/to/my-python-project --quiet
144
133
  ```
@@ -236,7 +225,6 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
236
225
  │ * --input -i PATH Path to the project root directory. [default: None] [required] │
237
226
  │ --output -o PATH Output directory for artifacts. [default: None] │
238
227
  │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]. │
239
- │ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1] │
240
228
  │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
241
229
  │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
242
230
  │ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
@@ -261,25 +249,15 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
261
249
 
262
250
  Now, you can find the analysis results in `analysis.json` in the specified directory.
263
251
 
264
- 2. **Toggle analysis levels with `--analysis-level`:**
265
- ```bash
266
- codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
267
- ```
268
- Call graph analysis can be enabled by setting the level to `2`:
269
- ```bash
270
- codeanalyzer --input ./my-python-project --analysis-level 2 # Symbol table + Call graph
271
- ```
272
- ***Note: The `--analysis-level=2` is not yet implemented in this version.***
273
-
274
- 3. **Analysis with CodeQL enabled:**
252
+ 2. **Analysis with CodeQL enabled:**
275
253
  ```bash
276
254
  codeanalyzer --input ./my-python-project --codeql
277
255
  ```
278
- This will perform CodeQL-based analysis in addition to the standard symbol table generation.
256
+ Every run produces a symbol table **and** a call graph. By default, edges come from Jedi's lexical analysis. Adding `--codeql` resolves additional edges (including RPC / third-party / dynamically-dispatched targets) and merges them with the Jedi-derived edges. CodeQL also backfills resolved callees on Jedi-emitted call sites where Jedi couldn't resolve them.
279
257
 
280
- ***Note: Not yet fully implemented. Please refrain from using this option until further notice.***
258
+ ***Note: CodeQL integration is experimental. The CLI is downloaded into `<cache_dir>/codeql/` on first use and reused thereafter.***
281
259
 
282
- 4. **Eager analysis with custom cache directory:**
260
+ 3. **Eager analysis with custom cache directory:**
283
261
  ```bash
284
262
  codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
285
263
  ```
@@ -287,7 +265,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
287
265
 
288
266
  If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
289
267
 
290
- 5. **Save output in msgpack format:**
268
+ 4. **Save output in msgpack format:**
291
269
  ```bash
292
270
  codeanalyzer --input ./my-python-project --output /path/to/analysis-results --format msgpack
293
271
  ```
@@ -27,10 +27,6 @@ def main(
27
27
  case_sensitive=False,
28
28
  ),
29
29
  ] = OutputFormat.JSON,
30
- analysis_level: Annotated[
31
- int,
32
- typer.Option("-a", "--analysis-level", help="1: symbol table, 2: call graph."),
33
- ] = 1,
34
30
  using_codeql: Annotated[
35
31
  bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
36
32
  ] = False,
@@ -82,7 +78,6 @@ def main(
82
78
  input=input,
83
79
  output=output,
84
80
  format=format,
85
- analysis_level=analysis_level,
86
81
  using_codeql=using_codeql,
87
82
  using_ray=using_ray,
88
83
  rebuild_analysis=rebuild_analysis,
@@ -9,7 +9,14 @@ from typing import Any, Dict, Optional, Union, List
9
9
  import ray
10
10
  from codeanalyzer.utils import logger
11
11
  from codeanalyzer.schema import PyApplication, PyModule, model_dump_json, model_validate_json
12
+ from codeanalyzer.schema.py_schema import PyCallEdge
13
+ from codeanalyzer.semantic_analysis.call_graph import (
14
+ jedi_call_graph_edges,
15
+ merge_edges,
16
+ resolve_unresolved_constructors,
17
+ )
12
18
  from codeanalyzer.semantic_analysis.codeql import CodeQLLoader
19
+ from codeanalyzer.semantic_analysis.codeql.codeql_analysis import CodeQL
13
20
  from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import CodeQLExceptions
14
21
  from codeanalyzer.syntactic_analysis.exceptions import SymbolTableBuilderRayError
15
22
  from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
@@ -49,7 +56,6 @@ class Codeanalyzer:
49
56
 
50
57
  def __init__(self, options: AnalysisOptions) -> None:
51
58
  self.options = options
52
- self.analysis_depth = options.analysis_level
53
59
  self.project_dir = Path(options.input).resolve()
54
60
  self.skip_tests = options.skip_tests
55
61
  self.using_codeql = options.using_codeql
@@ -60,6 +66,7 @@ class Codeanalyzer:
60
66
  self.clear_cache = options.clear_cache
61
67
  self.db_path: Optional[Path] = None
62
68
  self.codeql_bin: Optional[Path] = None
69
+ self.codeql_packs_dir: Optional[Path] = None
63
70
  self.virtualenv: Optional[Path] = None
64
71
  self.using_ray: bool = options.using_ray
65
72
  self.file_name: Optional[Path] = options.file_name
@@ -292,6 +299,15 @@ class Codeanalyzer:
292
299
 
293
300
  if self.using_codeql:
294
301
  logger.info(f"(Re-)initializing CodeQL analysis for {self.project_dir}")
302
+
303
+ # Resolve the CLI binary before anything else uses it: DB build
304
+ # below needs it, and so does every subsequent query run.
305
+ self.codeql_bin = self._ensure_codeql_bin()
306
+ # Download the standard query library pack (idempotent). The
307
+ # CLI install ships only the language extractors; the
308
+ # ``codeql/python-all`` library pack must be fetched separately.
309
+ self.codeql_packs_dir = self._ensure_codeql_packs(self.codeql_bin)
310
+
295
311
  cache_root = self.cache_dir / "codeql"
296
312
  cache_root.mkdir(parents=True, exist_ok=True)
297
313
  self.db_path = cache_root / f"{self.project_dir.name}-db"
@@ -310,19 +326,6 @@ class Codeanalyzer:
310
326
  if self.rebuild_analysis or not is_cache_valid():
311
327
  logger.info("Creating new CodeQL database...")
312
328
 
313
- codeql_in_path = shutil.which("codeql")
314
- if codeql_in_path:
315
- self.codeql_bin = Path(codeql_in_path)
316
- else:
317
- self.codeql_bin = CodeQLLoader.download_and_extract_codeql(
318
- self.cache_dir / "codeql" / "bin"
319
- )
320
-
321
- if not shutil.which(str(self.codeql_bin)):
322
- raise FileNotFoundError(
323
- f"CodeQL binary not executable: {self.codeql_bin}"
324
- )
325
-
326
329
  cmd = [
327
330
  str(self.codeql_bin),
328
331
  "database",
@@ -375,8 +378,27 @@ class Codeanalyzer:
375
378
  # Build symbol table from cached application if available (if no available, the build a new one)
376
379
  symbol_table = self._build_symbol_table(cached_pyapplication.symbol_table if cached_pyapplication else {})
377
380
 
381
+ # Build the call graph in four steps:
382
+ # 1. Run CodeQL (when enabled). Produces resolved edges with
383
+ # ``provenance=["codeql"]`` and augments ``PyCallsite``s
384
+ # in-place — filling ``callee_signature`` for sites Jedi
385
+ # couldn't resolve.
386
+ # 2. Heuristic fallback for constructor calls neither Jedi nor
387
+ # CodeQL could resolve (commonly classes nested inside
388
+ # functions). Walks the symbol table by class short-name +
389
+ # scope and writes ``<class>.__init__`` into the site.
390
+ # 3. Derive Jedi edges from the now-fully-augmented symbol
391
+ # table — these reflect every resolution the symbol table
392
+ # contains, regardless of which pass put it there.
393
+ # 4. Merge with CodeQL edges; provenance unions for edges both
394
+ # backends saw.
395
+ codeql_edges = self._get_call_graph(symbol_table, augment_sites=True)
396
+ resolve_unresolved_constructors(symbol_table)
397
+ jedi_edges = jedi_call_graph_edges(symbol_table)
398
+ call_graph = merge_edges(jedi_edges, codeql_edges)
399
+
378
400
  # Recreate pyapplication
379
- app = PyApplication.builder().symbol_table(symbol_table).build()
401
+ app = PyApplication.builder().symbol_table(symbol_table).call_graph(call_graph).build()
380
402
 
381
403
  # Save to cache
382
404
  self._save_analysis_cache(app, cache_file)
@@ -579,7 +601,120 @@ class Codeanalyzer:
579
601
  logger.info("✅ Symbol table generation complete.")
580
602
  return symbol_table
581
603
 
582
- def _get_call_graph(self) -> Dict[str, Any]:
583
- """Retrieve call graph from CodeQL database."""
584
- logger.warning("Call graph extraction not yet implemented.")
585
- return {}
604
+ def _ensure_codeql_packs(self, codeql_bin: Path) -> Path:
605
+ """Materialize a qlpack that depends on ``codeql/python-all``.
606
+
607
+ The CodeQL CLI install ships only the language extractors — query
608
+ library packs (and their transitive dependencies like
609
+ ``codeql/concepts``) must be resolved separately. The canonical
610
+ way is to declare the dependency in a ``qlpack.yml`` and run
611
+ ``codeql pack install`` in that directory; CodeQL writes a
612
+ ``codeql-pack.lock.yml`` and downloads everything needed.
613
+
614
+ We do this once per project under ``<cache_dir>/codeql/qlpack/``
615
+ and return that directory. The query runner then writes its
616
+ temporary ``.ql`` file inside this pack — colocation makes
617
+ ``import python`` resolve without any ``--additional-packs`` or
618
+ ``--search-path`` gymnastics.
619
+ """
620
+ pack_dir = self.cache_dir / "codeql" / "qlpack"
621
+ pack_dir.mkdir(parents=True, exist_ok=True)
622
+ qlpack_yml = pack_dir / "qlpack.yml"
623
+ lock_file = pack_dir / "codeql-pack.lock.yml"
624
+
625
+ if not qlpack_yml.exists():
626
+ qlpack_yml.write_text(
627
+ "name: codeanalyzer-deps\n"
628
+ "version: 1.0.0\n"
629
+ "dependencies:\n"
630
+ ' codeql/python-all: "*"\n'
631
+ )
632
+
633
+ if lock_file.exists():
634
+ logger.debug(f"CodeQL pack dependencies already installed in {pack_dir}")
635
+ return pack_dir
636
+
637
+ logger.info(f"Installing CodeQL pack dependencies in {pack_dir}.")
638
+ proc = subprocess.Popen(
639
+ [str(codeql_bin), "pack", "install", str(pack_dir)],
640
+ stdout=subprocess.PIPE,
641
+ stderr=subprocess.PIPE,
642
+ )
643
+ _, err = proc.communicate()
644
+ if proc.returncode != 0:
645
+ raise CodeQLExceptions.CodeQLDatabaseBuildException(
646
+ f"Failed to install CodeQL pack dependencies:\n"
647
+ f"{(err or b'').decode(errors='replace')}"
648
+ )
649
+ return pack_dir
650
+
651
+ def _ensure_codeql_bin(self) -> Path:
652
+ """Locate (or download) the CodeQL CLI binary into the project cache.
653
+
654
+ Resolution order:
655
+ 1. An existing binary inside ``<cache_dir>/codeql/bin/`` —
656
+ reused across runs on the same project.
657
+ 2. ``codeql`` already on the user's PATH — picked up verbatim.
658
+ 3. Otherwise, download into ``<cache_dir>/codeql/bin/``.
659
+
660
+ The project-local cache is preferred over PATH so the version we
661
+ installed earlier wins over whatever the OS ships — keeps behavior
662
+ deterministic when the user has both.
663
+ """
664
+ bin_root = self.cache_dir / "codeql" / "bin"
665
+ bin_root.mkdir(parents=True, exist_ok=True)
666
+
667
+ existing = next(
668
+ (p for p in bin_root.rglob("codeql") if p.is_file()),
669
+ None,
670
+ )
671
+ if existing and os.access(existing, os.X_OK):
672
+ logger.debug(f"Reusing cached CodeQL CLI at {existing}")
673
+ return existing.resolve()
674
+
675
+ on_path = shutil.which("codeql")
676
+ if on_path:
677
+ logger.debug(f"Using CodeQL CLI from PATH at {on_path}")
678
+ return Path(on_path)
679
+
680
+ logger.info(f"CodeQL CLI not found; downloading into {bin_root}.")
681
+ downloaded = CodeQLLoader.download_and_extract_codeql(bin_root)
682
+ if not downloaded.exists() or not os.access(downloaded, os.X_OK):
683
+ raise FileNotFoundError(
684
+ f"CodeQL binary not executable after download: {downloaded}"
685
+ )
686
+ return downloaded
687
+
688
+ def _get_call_graph(
689
+ self,
690
+ symbol_table: Dict[str, PyModule],
691
+ augment_sites: bool = False,
692
+ ) -> List[PyCallEdge]:
693
+ """Build CodeQL-resolved call edges and optionally augment sites.
694
+
695
+ Returns an empty list when CodeQL isn't enabled or the database
696
+ isn't available. Edges carry ``provenance=["codeql"]`` — merge
697
+ with Jedi-derived edges via ``call_graph.merge_edges``.
698
+
699
+ When ``augment_sites`` is True, also mutates
700
+ ``PyCallable.call_sites`` in the symbol table to backfill
701
+ ``callee_signature`` for sites Jedi couldn't resolve. The single
702
+ CodeQL query is shared (cached on the ``CodeQL`` instance) so
703
+ this costs no extra DB work.
704
+ """
705
+ if not self.using_codeql or self.db_path is None:
706
+ return []
707
+ try:
708
+ cq = CodeQL(
709
+ self.project_dir,
710
+ self.db_path,
711
+ codeql_bin=self.codeql_bin,
712
+ codeql_packs_dir=self.codeql_packs_dir,
713
+ )
714
+ edges = cq.build_call_graph_edges(symbol_table)
715
+ if augment_sites:
716
+ cq.augment_call_sites(symbol_table)
717
+ return edges
718
+ except Exception as exc:
719
+ logger.warning(f"CodeQL call-graph extraction failed: {exc}")
720
+ return []
@@ -14,7 +14,6 @@ class AnalysisOptions:
14
14
  input: Path
15
15
  output: Optional[Path] = None
16
16
  format: OutputFormat = OutputFormat.JSON
17
- analysis_level: int = 1
18
17
  using_codeql: bool = False
19
18
  using_ray: bool = False
20
19
  rebuild_analysis: bool = False
@@ -339,9 +339,29 @@ class PyModule(BaseModel):
339
339
  file_size: Optional[int] = None
340
340
 
341
341
 
342
+ @builder
343
+ @msgpk
344
+ class PyCallEdge(BaseModel):
345
+ """Identity-only call-graph edge with weight.
346
+
347
+ Mirrors Java's ``CallDependency``. ``source`` and ``target`` are
348
+ ``PyCallable.signature`` strings — nodes of the graph are the existing
349
+ ``PyCallable`` entries in the symbol table, not a separate vertex type.
350
+ Rich per-call metadata (receiver, arguments, location, ...) lives on
351
+ ``PyCallsite`` inside the source ``PyCallable.call_sites``.
352
+ """
353
+
354
+ source: str # caller's PyCallable.signature
355
+ target: str # callee's PyCallable.signature
356
+ type: Literal["CALL_DEP"] = "CALL_DEP"
357
+ weight: int = 1
358
+ provenance: List[Literal["jedi", "codeql", "joern"]] = []
359
+
360
+
342
361
  @builder
343
362
  @msgpk
344
363
  class PyApplication(BaseModel):
345
364
  """Represents a Python application."""
346
365
 
347
366
  symbol_table: Dict[str, PyModule]
367
+ call_graph: List[PyCallEdge] = []