codeanalyzer-python 0.1.14__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeanalyzer/__main__.py +99 -6
- codeanalyzer/neo4j/__init__.py +46 -0
- codeanalyzer/neo4j/bolt.py +223 -0
- codeanalyzer/neo4j/catalog.py +245 -0
- codeanalyzer/neo4j/cypher.py +138 -0
- codeanalyzer/neo4j/emit.py +74 -0
- codeanalyzer/neo4j/project.py +322 -0
- codeanalyzer/neo4j/rows.py +176 -0
- codeanalyzer/neo4j/schema.py +39 -0
- codeanalyzer/options/__init__.py +2 -2
- codeanalyzer/options/options.py +20 -0
- codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +109 -27
- codeanalyzer_python-0.2.0.dist-info/METADATA +393 -0
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/RECORD +18 -10
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/WHEEL +1 -1
- codeanalyzer_python-0.2.0.dist-info/entry_points.txt +3 -0
- codeanalyzer_python-0.1.14.dist-info/METADATA +0 -392
- codeanalyzer_python-0.1.14.dist-info/entry_points.txt +0 -2
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""The output-agnostic intermediate between :func:`project` and the two writers
|
|
18
|
+
(cypher snapshot / bolt incremental). Pure data — no I/O, no driver. A
|
|
19
|
+
:class:`GraphRows` is a deterministic, deduped bag of nodes and edges that both
|
|
20
|
+
writers consume identically.
|
|
21
|
+
|
|
22
|
+
Property values are restricted to Neo4j-legal shapes: primitives and homogeneous
|
|
23
|
+
arrays of primitives. ``None`` values are pruned (in Neo4j a null property is
|
|
24
|
+
simply absence).
|
|
25
|
+
"""
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
from dataclasses import dataclass, field
|
|
29
|
+
from typing import Dict, List, Optional, Union
|
|
30
|
+
|
|
31
|
+
# A property value: a primitive, or a homogeneous list of primitives.
|
|
32
|
+
Scalar = Union[str, int, float, bool]
|
|
33
|
+
Prop = Union[Scalar, List[str], List[int], List[float], List[bool]]
|
|
34
|
+
Props = Dict[str, Prop]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class NodeRef:
|
|
39
|
+
"""How an edge addresses one of its endpoints: the label + key property to
|
|
40
|
+
MATCH on, and the value."""
|
|
41
|
+
|
|
42
|
+
label: str # the label carrying the uniqueness constraint (e.g. "PySymbol", "PyModule")
|
|
43
|
+
key_prop: str # "signature" | "file_key" | "name" | "id"
|
|
44
|
+
value: str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class NodeRow:
|
|
49
|
+
labels: List[str] # labels[0] is the constrained MERGE label; the rest are SET as extra labels
|
|
50
|
+
key_prop: str
|
|
51
|
+
value: str
|
|
52
|
+
props: Props
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class EdgeRow:
|
|
57
|
+
type: str
|
|
58
|
+
from_ref: NodeRef
|
|
59
|
+
to_ref: NodeRef
|
|
60
|
+
props: Props
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@dataclass
|
|
64
|
+
class GraphRows:
|
|
65
|
+
nodes: List[NodeRow] = field(default_factory=list)
|
|
66
|
+
edges: List[EdgeRow] = field(default_factory=list)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def prune(p: Dict[str, Optional[Prop]]) -> Props:
|
|
70
|
+
"""Drop ``None`` entries — in Neo4j a null property means "absent", so we
|
|
71
|
+
never store one. Empty lists are kept (a present-but-empty array is legal)."""
|
|
72
|
+
return {k: v for k, v in p.items() if v is not None}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class RowBuilder:
|
|
76
|
+
"""Accumulates nodes/edges with ``MERGE`` semantics in memory, so the same
|
|
77
|
+
node touched many times (a hot external symbol, a canonical decorator)
|
|
78
|
+
collapses to one row, and cross-reference edges to a target that never
|
|
79
|
+
materialized are dropped (the "edge-only-when-resolved" rule).
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(self) -> None:
|
|
83
|
+
self._nodes: Dict[str, NodeRow] = {} # key: f"{labels[0]} {value}"
|
|
84
|
+
self._edges: List[EdgeRow] = []
|
|
85
|
+
self._deferred: List[EdgeRow] = [] # edges gated against node existence at finish()
|
|
86
|
+
self._keys: set = set() # every node value seen, for resolved-gating
|
|
87
|
+
|
|
88
|
+
def node(self, labels: List[str], key_prop: str, value: str, props: Props) -> NodeRef:
|
|
89
|
+
"""Upsert a node. Re-seeing the same ``(labels[0], value)`` merges props
|
|
90
|
+
(last write wins) and unions labels — the in-memory analog of
|
|
91
|
+
``MERGE (n:Label {key}) SET n += props``."""
|
|
92
|
+
node_id = f"{labels[0]} {value}"
|
|
93
|
+
existing = self._nodes.get(node_id)
|
|
94
|
+
if existing is not None:
|
|
95
|
+
existing.props.update(props)
|
|
96
|
+
for label in labels:
|
|
97
|
+
if label not in existing.labels:
|
|
98
|
+
existing.labels.append(label)
|
|
99
|
+
else:
|
|
100
|
+
self._nodes[node_id] = NodeRow(list(labels), key_prop, value, dict(props))
|
|
101
|
+
self._keys.add(value)
|
|
102
|
+
return NodeRef(labels[0], key_prop, value)
|
|
103
|
+
|
|
104
|
+
def edge(self, type_: str, from_ref: NodeRef, to_ref: NodeRef, props: Optional[Props] = None) -> None:
|
|
105
|
+
"""An edge whose endpoints are known to exist (both ends emitted this run)."""
|
|
106
|
+
self._edges.append(EdgeRow(type_, from_ref, to_ref, dict(props or {})))
|
|
107
|
+
|
|
108
|
+
def edge_to_symbol(
|
|
109
|
+
self, type_: str, from_ref: NodeRef, target_signature: str, props: Optional[Props] = None
|
|
110
|
+
) -> None:
|
|
111
|
+
"""An edge to a ``:PySymbol`` target that may be external/library code not
|
|
112
|
+
present in the graph. Deferred and kept only if the target signature was
|
|
113
|
+
actually emitted as a node — so PY_EXTENDS / PY_RESOLVES_TO never dangle (the
|
|
114
|
+
string fallback lives on the source node's props)."""
|
|
115
|
+
self._deferred.append(
|
|
116
|
+
EdgeRow(
|
|
117
|
+
type_,
|
|
118
|
+
from_ref,
|
|
119
|
+
NodeRef("PySymbol", "signature", target_signature),
|
|
120
|
+
dict(props or {}),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def has_key(self, value: str) -> bool:
|
|
125
|
+
return value in self._keys
|
|
126
|
+
|
|
127
|
+
def finish(self) -> GraphRows:
|
|
128
|
+
for e in self._deferred:
|
|
129
|
+
if e.to_ref.value in self._keys:
|
|
130
|
+
self._edges.append(e)
|
|
131
|
+
nodes = sorted(self._nodes.values(), key=lambda n: f"{n.labels[0]} {n.value}")
|
|
132
|
+
edges = sorted(self._edges, key=lambda e: f"{e.type} {e.from_ref.value} {e.to_ref.value}")
|
|
133
|
+
return GraphRows(nodes, edges)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
# ----------------------------------------------------------------------------------------------
|
|
137
|
+
# Cypher literal rendering (used by the snapshot writer; the bolt writer passes params instead).
|
|
138
|
+
# ----------------------------------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def cypher_value(v: Prop) -> str:
|
|
142
|
+
"""Render a property value as a Cypher literal."""
|
|
143
|
+
if isinstance(v, bool):
|
|
144
|
+
return "true" if v else "false"
|
|
145
|
+
if isinstance(v, str):
|
|
146
|
+
return _cypher_string(v)
|
|
147
|
+
if isinstance(v, (int, float)):
|
|
148
|
+
# bools are handled above; int/float fall through here.
|
|
149
|
+
if isinstance(v, float) and (v != v or v in (float("inf"), float("-inf"))):
|
|
150
|
+
return "null"
|
|
151
|
+
return repr(v) if isinstance(v, float) else str(v)
|
|
152
|
+
if isinstance(v, list):
|
|
153
|
+
return "[" + ", ".join(cypher_value(x) for x in v) + "]"
|
|
154
|
+
return "null"
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def cypher_map(props: Props) -> str:
|
|
158
|
+
"""Render a props map as a Cypher map literal: ``{key: value, ...}``.
|
|
159
|
+
Keys are valid identifiers."""
|
|
160
|
+
return "{" + ", ".join(f"{k}: {cypher_value(v)}" for k, v in props.items()) + "}"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _cypher_string(s: str) -> str:
|
|
164
|
+
escaped = (
|
|
165
|
+
s.replace("\\", "\\\\")
|
|
166
|
+
.replace("'", "\\'")
|
|
167
|
+
.replace("\n", "\\n")
|
|
168
|
+
.replace("\r", "\\r")
|
|
169
|
+
.replace("\t", "\\t")
|
|
170
|
+
)
|
|
171
|
+
return f"'{escaped}'"
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def chunk(items: list, size: int) -> list:
|
|
175
|
+
"""Split a list into chunks of at most ``size`` (UNWIND batch sizing)."""
|
|
176
|
+
return [items[i : i + size] for i in range(0, len(items), size)]
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""The Cypher DDL — uniqueness constraints and indexes — shared by both writers.
|
|
18
|
+
Run BEFORE any load so MERGE uses an index seek (not a label scan) and the
|
|
19
|
+
identity invariant is enforced by the database. Every statement is idempotent
|
|
20
|
+
(``IF NOT EXISTS``).
|
|
21
|
+
"""
|
|
22
|
+
from typing import List
|
|
23
|
+
|
|
24
|
+
CONSTRAINTS: List[str] = [
|
|
25
|
+
"CREATE CONSTRAINT py_symbol_sig IF NOT EXISTS FOR (s:PySymbol) REQUIRE s.signature IS UNIQUE",
|
|
26
|
+
"CREATE CONSTRAINT py_app_name IF NOT EXISTS FOR (a:PyApplication) REQUIRE a.name IS UNIQUE",
|
|
27
|
+
"CREATE CONSTRAINT py_module_key IF NOT EXISTS FOR (m:PyModule) REQUIRE m.file_key IS UNIQUE",
|
|
28
|
+
"CREATE CONSTRAINT py_package_name IF NOT EXISTS FOR (p:PyPackage) REQUIRE p.name IS UNIQUE",
|
|
29
|
+
"CREATE CONSTRAINT py_decorator_name IF NOT EXISTS FOR (d:PyDecorator) REQUIRE d.name IS UNIQUE",
|
|
30
|
+
"CREATE CONSTRAINT py_callsite_id IF NOT EXISTS FOR (c:PyCallSite) REQUIRE c.id IS UNIQUE",
|
|
31
|
+
"CREATE CONSTRAINT py_attribute_id IF NOT EXISTS FOR (a:PyAttribute) REQUIRE a.id IS UNIQUE",
|
|
32
|
+
"CREATE CONSTRAINT py_variable_id IF NOT EXISTS FOR (v:PyVariable) REQUIRE v.id IS UNIQUE",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
INDEXES: List[str] = [
|
|
36
|
+
"CREATE INDEX py_callable_name IF NOT EXISTS FOR (c:PyCallable) ON (c.name)",
|
|
37
|
+
"CREATE INDEX py_class_name IF NOT EXISTS FOR (c:PyClass) ON (c.name)",
|
|
38
|
+
"CREATE FULLTEXT INDEX py_code_fts IF NOT EXISTS FOR (c:PyCallable) ON EACH [c.code, c.docstring]",
|
|
39
|
+
]
|
codeanalyzer/options/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
from .options import AnalysisOptions
|
|
1
|
+
from .options import AnalysisOptions, EmitTarget, OutputFormat
|
|
2
2
|
|
|
3
|
-
__all__ = ["AnalysisOptions"]
|
|
3
|
+
__all__ = ["AnalysisOptions", "EmitTarget", "OutputFormat"]
|
codeanalyzer/options/options.py
CHANGED
|
@@ -9,11 +9,31 @@ class OutputFormat(str, Enum):
|
|
|
9
9
|
MSGPACK = "msgpack"
|
|
10
10
|
|
|
11
11
|
|
|
12
|
+
class EmitTarget(str, Enum):
|
|
13
|
+
"""Output target selected by ``--emit``.
|
|
14
|
+
|
|
15
|
+
- ``json`` : the canonical ``analysis.json`` (symbol table + call graph).
|
|
16
|
+
- ``neo4j`` : project the analysis into a labeled property graph — a
|
|
17
|
+
``graph.cypher`` snapshot, or a live Bolt push with ``--neo4j-uri``.
|
|
18
|
+
- ``schema`` : the machine-readable, version-stamped Neo4j schema contract.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
JSON = "json"
|
|
22
|
+
NEO4J = "neo4j"
|
|
23
|
+
SCHEMA = "schema"
|
|
24
|
+
|
|
25
|
+
|
|
12
26
|
@dataclass
|
|
13
27
|
class AnalysisOptions:
|
|
14
28
|
input: Path
|
|
15
29
|
output: Optional[Path] = None
|
|
16
30
|
format: OutputFormat = OutputFormat.JSON
|
|
31
|
+
emit: EmitTarget = EmitTarget.JSON
|
|
32
|
+
app_name: Optional[str] = None
|
|
33
|
+
neo4j_uri: Optional[str] = None
|
|
34
|
+
neo4j_user: str = "neo4j"
|
|
35
|
+
neo4j_password: str = "neo4j"
|
|
36
|
+
neo4j_database: Optional[str] = None
|
|
17
37
|
using_codeql: bool = False
|
|
18
38
|
using_ray: bool = False
|
|
19
39
|
rebuild_analysis: bool = False
|
|
@@ -32,6 +32,67 @@ from codeanalyzer.semantic_analysis.codeql.codeql_query_runner import CodeQLQuer
|
|
|
32
32
|
from codeanalyzer.utils import logger
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
class _CallableResolver:
|
|
36
|
+
"""Maps a CodeQL endpoint ``(file, start_line, name, arity)`` to a Jedi
|
|
37
|
+
``PyCallable``.
|
|
38
|
+
|
|
39
|
+
Resolution ladder:
|
|
40
|
+
1. exact ``(abs_path, start_line)`` — the precise join;
|
|
41
|
+
2. on miss, candidates sharing ``(abs_path, short_name)``: a single
|
|
42
|
+
candidate is taken directly; otherwise prefer those whose
|
|
43
|
+
parameter count equals the CodeQL positional arity, then the
|
|
44
|
+
nearest ``start_line``;
|
|
45
|
+
3. no name match -> ``None`` (caller row skipped / callee becomes
|
|
46
|
+
a ghost node).
|
|
47
|
+
|
|
48
|
+
Step 2 recovers edges the ``(file, line)`` join silently drops when
|
|
49
|
+
CodeQL and Jedi disagree on a definition's start line (e.g. decorator
|
|
50
|
+
handling). Jedi's ``parameters`` counts every declared slot (incl.
|
|
51
|
+
``*args``/``**kwargs``/keyword-only) whereas CodeQL's arity is
|
|
52
|
+
positional only, so the arity filter is exact for plain signatures
|
|
53
|
+
and otherwise yields to the nearest-line tiebreak.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self) -> None:
|
|
57
|
+
self._by_loc: Dict[Tuple[str, int], Any] = {}
|
|
58
|
+
self._by_name: Dict[Tuple[str, str], List[Any]] = {}
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
def _abs(path: str) -> str:
|
|
62
|
+
try:
|
|
63
|
+
return str(Path(path).resolve())
|
|
64
|
+
except (OSError, RuntimeError):
|
|
65
|
+
return path
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_symbol_table(
|
|
69
|
+
cls, symbol_table: Dict[str, PyModule]
|
|
70
|
+
) -> "_CallableResolver":
|
|
71
|
+
resolver = cls()
|
|
72
|
+
for c in iter_callables_in_symbol_table(symbol_table):
|
|
73
|
+
abs_path = cls._abs(c.path)
|
|
74
|
+
resolver._by_loc[(abs_path, c.start_line)] = c
|
|
75
|
+
resolver._by_name.setdefault((abs_path, c.name), []).append(c)
|
|
76
|
+
return resolver
|
|
77
|
+
|
|
78
|
+
def resolve(
|
|
79
|
+
self, file: str, start_line: int, name: str, arity: int
|
|
80
|
+
) -> Any:
|
|
81
|
+
exact = self._by_loc.get((file, start_line))
|
|
82
|
+
if exact is not None:
|
|
83
|
+
return exact
|
|
84
|
+
if not name:
|
|
85
|
+
return None
|
|
86
|
+
candidates = self._by_name.get((file, name))
|
|
87
|
+
if not candidates:
|
|
88
|
+
return None
|
|
89
|
+
if len(candidates) == 1:
|
|
90
|
+
return candidates[0]
|
|
91
|
+
arity_matched = [c for c in candidates if len(c.parameters) == arity]
|
|
92
|
+
pool = arity_matched or candidates
|
|
93
|
+
return min(pool, key=lambda c: abs(c.start_line - start_line))
|
|
94
|
+
|
|
95
|
+
|
|
35
96
|
class CodeQL:
|
|
36
97
|
"""A class for building the application view of a Python application using CodeQL.
|
|
37
98
|
|
|
@@ -99,9 +160,14 @@ class CodeQL:
|
|
|
99
160
|
# codeql/python-all 7.x — it returns the ``CallNode`` (CFG)
|
|
100
161
|
# whose target was resolved to that ``Value``. Cleaner than
|
|
101
162
|
# poking at ``pointsTo`` directly.
|
|
102
|
-
|
|
163
|
+
# ``callee`` is bound to the FunctionValue's scope so the
|
|
164
|
+
# endpoint emits the same Function-level facts (name, arity,
|
|
165
|
+
# location) the post-processor needs for the name+arity
|
|
166
|
+
# fallback when the (file, start_line) join misses.
|
|
167
|
+
"from CallNode call, Function caller, FunctionValue calleeVal, Function callee",
|
|
103
168
|
"where",
|
|
104
169
|
" call.getScope() = caller and",
|
|
170
|
+
" callee = calleeVal.getScope() and",
|
|
105
171
|
" (",
|
|
106
172
|
# Direct function / bound-method call: foo() or obj.foo()
|
|
107
173
|
" call = calleeVal.getACall()",
|
|
@@ -115,15 +181,20 @@ class CodeQL:
|
|
|
115
181
|
" )",
|
|
116
182
|
" )",
|
|
117
183
|
"select",
|
|
118
|
-
# --- Caller endpoint --- (joins to PyCallable
|
|
184
|
+
# --- Caller endpoint --- (joins to PyCallable: exact by
|
|
185
|
+
# (file, start_line), else by (file, name) + arity)
|
|
119
186
|
" caller.getLocation().getFile().getAbsolutePath(),",
|
|
120
187
|
" caller.getLocation().getStartLine(),",
|
|
121
188
|
" caller.getQualifiedName(),",
|
|
189
|
+
" caller.getName(),",
|
|
190
|
+
" count(caller.getArg(_)),",
|
|
122
191
|
# --- Callee endpoint --- (file/line may live in a library stub;
|
|
123
192
|
# post-processor classifies as in-source or ghost)
|
|
124
|
-
"
|
|
125
|
-
"
|
|
193
|
+
" callee.getLocation().getFile().getAbsolutePath(),",
|
|
194
|
+
" callee.getLocation().getStartLine(),",
|
|
126
195
|
" calleeVal.getQualifiedName(),",
|
|
196
|
+
" callee.getName(),",
|
|
197
|
+
" count(callee.getArg(_)),",
|
|
127
198
|
# --- Call-site location --- (for PyCallsite augmentation)
|
|
128
199
|
" call.getLocation().getStartLine(),",
|
|
129
200
|
" call.getLocation().getStartColumn(),",
|
|
@@ -149,9 +220,13 @@ class CodeQL:
|
|
|
149
220
|
"caller_file",
|
|
150
221
|
"caller_start_line",
|
|
151
222
|
"caller_qname",
|
|
223
|
+
"caller_name",
|
|
224
|
+
"caller_arity",
|
|
152
225
|
"callee_file",
|
|
153
226
|
"callee_start_line",
|
|
154
227
|
"callee_qname",
|
|
228
|
+
"callee_name",
|
|
229
|
+
"callee_arity",
|
|
155
230
|
"call_start_line",
|
|
156
231
|
"call_start_column",
|
|
157
232
|
"call_end_line",
|
|
@@ -162,24 +237,15 @@ class CodeQL:
|
|
|
162
237
|
return df
|
|
163
238
|
|
|
164
239
|
@staticmethod
|
|
165
|
-
def
|
|
240
|
+
def _build_callable_resolver(
|
|
166
241
|
symbol_table: Dict[str, PyModule],
|
|
167
|
-
) ->
|
|
168
|
-
"""Build
|
|
242
|
+
) -> _CallableResolver:
|
|
243
|
+
"""Build the endpoint -> ``PyCallable`` resolver from Jedi.
|
|
169
244
|
|
|
170
245
|
Paths are resolved so they match CodeQL's ``getAbsolutePath()``
|
|
171
246
|
regardless of symlinks or the current working directory.
|
|
172
247
|
"""
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
index: Dict[Tuple[str, int], PyCallable] = {}
|
|
176
|
-
for c in iter_callables_in_symbol_table(symbol_table):
|
|
177
|
-
try:
|
|
178
|
-
abs_path = str(Path(c.path).resolve())
|
|
179
|
-
except (OSError, RuntimeError):
|
|
180
|
-
abs_path = c.path
|
|
181
|
-
index[(abs_path, c.start_line)] = c
|
|
182
|
-
return index
|
|
248
|
+
return _CallableResolver.from_symbol_table(symbol_table)
|
|
183
249
|
|
|
184
250
|
def _iter_resolved_rows(
|
|
185
251
|
self, symbol_table: Dict[str, PyModule]
|
|
@@ -194,19 +260,27 @@ class CodeQL:
|
|
|
194
260
|
df = self._query_call_edges()
|
|
195
261
|
if df.empty:
|
|
196
262
|
return
|
|
197
|
-
|
|
263
|
+
resolver = self._build_callable_resolver(symbol_table)
|
|
198
264
|
|
|
199
265
|
skipped_unknown_caller = 0
|
|
200
266
|
ghost_callees = 0
|
|
201
267
|
for row in df.itertuples(index=False):
|
|
202
|
-
|
|
203
|
-
|
|
268
|
+
caller = resolver.resolve(
|
|
269
|
+
row.caller_file,
|
|
270
|
+
int(row.caller_start_line),
|
|
271
|
+
row.caller_name,
|
|
272
|
+
int(row.caller_arity),
|
|
273
|
+
)
|
|
204
274
|
if caller is None:
|
|
205
275
|
skipped_unknown_caller += 1
|
|
206
276
|
continue
|
|
207
277
|
|
|
208
|
-
|
|
209
|
-
|
|
278
|
+
callee = resolver.resolve(
|
|
279
|
+
row.callee_file,
|
|
280
|
+
int(row.callee_start_line),
|
|
281
|
+
row.callee_name,
|
|
282
|
+
int(row.callee_arity),
|
|
283
|
+
)
|
|
210
284
|
if callee is not None:
|
|
211
285
|
target_sig = callee.signature
|
|
212
286
|
else:
|
|
@@ -267,20 +341,28 @@ class CodeQL:
|
|
|
267
341
|
Returns:
|
|
268
342
|
Number of ``PyCallsite`` entries augmented.
|
|
269
343
|
"""
|
|
270
|
-
|
|
344
|
+
resolver = self._build_callable_resolver(symbol_table)
|
|
271
345
|
df = self._query_call_edges()
|
|
272
346
|
if df.empty:
|
|
273
347
|
return 0
|
|
274
348
|
|
|
275
349
|
augmented = 0
|
|
276
350
|
for row in df.itertuples(index=False):
|
|
277
|
-
|
|
278
|
-
|
|
351
|
+
caller = resolver.resolve(
|
|
352
|
+
row.caller_file,
|
|
353
|
+
int(row.caller_start_line),
|
|
354
|
+
row.caller_name,
|
|
355
|
+
int(row.caller_arity),
|
|
356
|
+
)
|
|
279
357
|
if caller is None:
|
|
280
358
|
continue
|
|
281
359
|
|
|
282
|
-
|
|
283
|
-
|
|
360
|
+
callee = resolver.resolve(
|
|
361
|
+
row.callee_file,
|
|
362
|
+
int(row.callee_start_line),
|
|
363
|
+
row.callee_name,
|
|
364
|
+
int(row.callee_arity),
|
|
365
|
+
)
|
|
284
366
|
resolved_sig = callee.signature if callee is not None else row.callee_qname
|
|
285
367
|
|
|
286
368
|
call_start = int(row.call_start_line)
|