codeanalyzer-python 0.1.14__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeanalyzer/__main__.py +99 -6
- codeanalyzer/neo4j/__init__.py +46 -0
- codeanalyzer/neo4j/bolt.py +223 -0
- codeanalyzer/neo4j/catalog.py +245 -0
- codeanalyzer/neo4j/cypher.py +138 -0
- codeanalyzer/neo4j/emit.py +74 -0
- codeanalyzer/neo4j/project.py +322 -0
- codeanalyzer/neo4j/rows.py +176 -0
- codeanalyzer/neo4j/schema.py +39 -0
- codeanalyzer/options/__init__.py +2 -2
- codeanalyzer/options/options.py +20 -0
- codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +109 -27
- codeanalyzer_python-0.2.0.dist-info/METADATA +393 -0
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/RECORD +18 -10
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/WHEEL +1 -1
- codeanalyzer_python-0.2.0.dist-info/entry_points.txt +3 -0
- codeanalyzer_python-0.1.14.dist-info/METADATA +0 -392
- codeanalyzer_python-0.1.14.dist-info/entry_points.txt +0 -2
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {codeanalyzer_python-0.1.14.dist-info → codeanalyzer_python-0.2.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""The snapshot writer: render :class:`GraphRows` to a self-contained ``.cypher``
|
|
18
|
+
script. Running it (e.g. ``cypher-shell < graph.cypher``) rebuilds this project's
|
|
19
|
+
subgraph from scratch — constraints, a scoped wipe of the prior version, then
|
|
20
|
+
batched ``UNWIND … MERGE`` for nodes and edges.
|
|
21
|
+
|
|
22
|
+
This artifact is intentionally NOT incremental: a static script has no view of
|
|
23
|
+
the live DB, so it expresses the full truth. Incremental updates are the bolt
|
|
24
|
+
writer's job.
|
|
25
|
+
"""
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
from typing import Dict, List
|
|
29
|
+
|
|
30
|
+
from codeanalyzer.neo4j.rows import (
|
|
31
|
+
EdgeRow,
|
|
32
|
+
GraphRows,
|
|
33
|
+
NodeRow,
|
|
34
|
+
chunk,
|
|
35
|
+
cypher_map,
|
|
36
|
+
cypher_value,
|
|
37
|
+
)
|
|
38
|
+
from codeanalyzer.neo4j.schema import CONSTRAINTS, INDEXES
|
|
39
|
+
|
|
40
|
+
BATCH = 500
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def render_cypher(rows: GraphRows, app_name: str) -> str:
|
|
44
|
+
out: List[str] = []
|
|
45
|
+
|
|
46
|
+
out.append("// ── constraints & indexes ──")
|
|
47
|
+
for stmt in CONSTRAINTS:
|
|
48
|
+
out.append(f"{stmt};")
|
|
49
|
+
for stmt in INDEXES:
|
|
50
|
+
out.append(f"{stmt};")
|
|
51
|
+
|
|
52
|
+
out.append("")
|
|
53
|
+
out.append("// ── wipe this project's prior subgraph (externals/packages/decorators are shared) ──")
|
|
54
|
+
out.append(_wipe(app_name))
|
|
55
|
+
|
|
56
|
+
out.append("")
|
|
57
|
+
out.append("// ── nodes ──")
|
|
58
|
+
out.extend(_node_statements(rows.nodes))
|
|
59
|
+
|
|
60
|
+
out.append("")
|
|
61
|
+
out.append("// ── relationships ──")
|
|
62
|
+
out.extend(_edge_statements(rows.edges))
|
|
63
|
+
|
|
64
|
+
out.append("")
|
|
65
|
+
return "\n".join(out)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _wipe(app_name: str) -> str:
|
|
69
|
+
name = cypher_value(app_name)
|
|
70
|
+
return "\n".join(
|
|
71
|
+
[
|
|
72
|
+
f"MATCH (a:PyApplication {{name: {name}}})",
|
|
73
|
+
"OPTIONAL MATCH (a)-[:PY_HAS_MODULE]->(m:PyModule)",
|
|
74
|
+
"OPTIONAL MATCH (m)-[:PY_DECLARES|PY_HAS_METHOD|PY_HAS_ATTRIBUTE|PY_DECLARES_VAR|PY_HAS_CALLSITE*1..]->(x)",
|
|
75
|
+
"DETACH DELETE x, m, a;",
|
|
76
|
+
]
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# ----------------------------------------------------------------------------------------------
|
|
81
|
+
# Nodes — grouped by their full label set + key property, batched into UNWIND lists.
|
|
82
|
+
# ----------------------------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _node_statements(nodes: List[NodeRow]) -> List[str]:
|
|
86
|
+
groups: Dict[str, List[NodeRow]] = {}
|
|
87
|
+
for n in nodes:
|
|
88
|
+
key = f"{':'.join(n.labels)}|{n.key_prop}"
|
|
89
|
+
groups.setdefault(key, []).append(n)
|
|
90
|
+
|
|
91
|
+
blocks: List[str] = []
|
|
92
|
+
for group in groups.values():
|
|
93
|
+
labels = group[0].labels
|
|
94
|
+
key_prop = group[0].key_prop
|
|
95
|
+
merge_label = labels[0]
|
|
96
|
+
extra = labels[1:]
|
|
97
|
+
set_labels = f", n:{':'.join(extra)}" if extra else ""
|
|
98
|
+
for batch in chunk(group, BATCH):
|
|
99
|
+
rows_lit = ",\n".join(
|
|
100
|
+
f" {{k: {cypher_value(n.value)}, p: {cypher_map(n.props)}}}" for n in batch
|
|
101
|
+
)
|
|
102
|
+
blocks.append(
|
|
103
|
+
f"UNWIND [\n{rows_lit}\n] AS row\n"
|
|
104
|
+
f"MERGE (n:{merge_label} {{{key_prop}: row.k}})\n"
|
|
105
|
+
f"SET n += row.p{set_labels};"
|
|
106
|
+
)
|
|
107
|
+
return blocks
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ----------------------------------------------------------------------------------------------
|
|
111
|
+
# Edges — grouped by (type, endpoint labels + key props), batched.
|
|
112
|
+
# ----------------------------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _edge_statements(edges: List[EdgeRow]) -> List[str]:
|
|
116
|
+
groups: Dict[str, List[EdgeRow]] = {}
|
|
117
|
+
for e in edges:
|
|
118
|
+
key = f"{e.type}|{e.from_ref.label}.{e.from_ref.key_prop}|{e.to_ref.label}.{e.to_ref.key_prop}"
|
|
119
|
+
groups.setdefault(key, []).append(e)
|
|
120
|
+
|
|
121
|
+
blocks: List[str] = []
|
|
122
|
+
for group in groups.values():
|
|
123
|
+
first = group[0]
|
|
124
|
+
from_ref, to_ref = first.from_ref, first.to_ref
|
|
125
|
+
for batch in chunk(group, BATCH):
|
|
126
|
+
rows_lit = ",\n".join(
|
|
127
|
+
f" {{f: {cypher_value(e.from_ref.value)}, t: {cypher_value(e.to_ref.value)}, "
|
|
128
|
+
f"p: {cypher_map(e.props)}}}"
|
|
129
|
+
for e in batch
|
|
130
|
+
)
|
|
131
|
+
blocks.append(
|
|
132
|
+
f"UNWIND [\n{rows_lit}\n] AS row\n"
|
|
133
|
+
f"MATCH (a:{from_ref.label} {{{from_ref.key_prop}: row.f}})\n"
|
|
134
|
+
f"MATCH (b:{to_ref.label} {{{to_ref.key_prop}: row.t}})\n"
|
|
135
|
+
f"MERGE (a)-[r:{first.type}]->(b)\n"
|
|
136
|
+
f"SET r += row.p;"
|
|
137
|
+
)
|
|
138
|
+
return blocks
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""The facade between the CLI and the Neo4j backend. Two entry points:
|
|
18
|
+
|
|
19
|
+
- :func:`emit_schema` — serialize the static, version-stamped schema contract
|
|
20
|
+
(``schema.json``). Needs no analyzed project.
|
|
21
|
+
- :func:`emit_neo4j` — project a :class:`PyApplication` to a graph and either
|
|
22
|
+
write a ``graph.cypher`` snapshot or push it to a live Neo4j over Bolt.
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import json
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import Optional
|
|
29
|
+
|
|
30
|
+
from codeanalyzer.neo4j.bolt import BoltConfig, bolt_writer
|
|
31
|
+
from codeanalyzer.neo4j.catalog import build_schema_document
|
|
32
|
+
from codeanalyzer.neo4j.cypher import render_cypher
|
|
33
|
+
from codeanalyzer.neo4j.project import project
|
|
34
|
+
from codeanalyzer.options import AnalysisOptions
|
|
35
|
+
from codeanalyzer.schema import PyApplication
|
|
36
|
+
from codeanalyzer.utils import logger
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def emit_schema(output: Optional[Path]) -> None:
|
|
40
|
+
"""Emit the Neo4j schema contract (``schema.json``) — a static artifact derived
|
|
41
|
+
from the in-repo catalog, independent of any analyzed project. With no
|
|
42
|
+
``output`` it prints to stdout."""
|
|
43
|
+
doc = json.dumps(build_schema_document(), indent=2) + "\n"
|
|
44
|
+
if output is None:
|
|
45
|
+
print(doc, end="")
|
|
46
|
+
return
|
|
47
|
+
output.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
(output / "schema.json").write_text(doc)
|
|
49
|
+
logger.info(f"Neo4j schema written to {output / 'schema.json'}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def emit_neo4j(app: PyApplication, options: AnalysisOptions) -> None:
|
|
53
|
+
"""Project the analysis to a graph and write it: a live Bolt push when
|
|
54
|
+
``--neo4j-uri`` is set, otherwise a self-contained ``graph.cypher`` snapshot."""
|
|
55
|
+
app_name = options.app_name or Path(options.input).resolve().name
|
|
56
|
+
rows = project(app, app_name)
|
|
57
|
+
|
|
58
|
+
if options.neo4j_uri:
|
|
59
|
+
cfg = BoltConfig(
|
|
60
|
+
uri=options.neo4j_uri,
|
|
61
|
+
user=options.neo4j_user,
|
|
62
|
+
password=options.neo4j_password,
|
|
63
|
+
database=options.neo4j_database,
|
|
64
|
+
)
|
|
65
|
+
# A full run (no single-file restriction) makes orphan pruning safe.
|
|
66
|
+
full_run = options.file_name is None
|
|
67
|
+
bolt_writer(rows, cfg, full_run)
|
|
68
|
+
return
|
|
69
|
+
|
|
70
|
+
out_dir = options.output if options.output is not None else Path.cwd()
|
|
71
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
target = out_dir / "graph.cypher"
|
|
73
|
+
target.write_text(render_cypher(rows, app_name))
|
|
74
|
+
logger.info(f"Neo4j graph written to {target}")
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
################################################################################
|
|
2
|
+
# Copyright IBM Corporation 2025
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
################################################################################
|
|
16
|
+
|
|
17
|
+
"""``project()`` — the pure projection from the canonical :class:`PyApplication`
|
|
18
|
+
IR to graph rows. It walks the same recursive symbol table the call-graph builder
|
|
19
|
+
walks, but instead of collecting callables it emits nodes + edges. No I/O: the
|
|
20
|
+
writers (cypher snapshot / bolt incremental) consume the returned
|
|
21
|
+
:class:`GraphRows`.
|
|
22
|
+
|
|
23
|
+
Modelling decisions (mirror of the TypeScript backend):
|
|
24
|
+
- signature-keyed declarations (PyClass, PyCallable) carry a shared ``:PySymbol``
|
|
25
|
+
label (the global-identity / MERGE key).
|
|
26
|
+
- call sites, decorators, class attributes and variables are first-class nodes.
|
|
27
|
+
- call-graph endpoints absent from the symbol table become ``:PyExternal`` ghost
|
|
28
|
+
nodes, so RPC / third-party / framework edges are preserved (matching the
|
|
29
|
+
analyzer's own ghost-node behaviour).
|
|
30
|
+
- every project-owned node carries an internal ``_module`` provenance prop, so
|
|
31
|
+
the incremental writer can delete exactly what a re-analyzed module emitted.
|
|
32
|
+
"""
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import json
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Any, List, Optional
|
|
38
|
+
|
|
39
|
+
from codeanalyzer.neo4j.catalog import SCHEMA_VERSION
|
|
40
|
+
from codeanalyzer.neo4j.rows import GraphRows, NodeRef, Props, RowBuilder, prune
|
|
41
|
+
from codeanalyzer.schema import (
|
|
42
|
+
PyApplication,
|
|
43
|
+
PyCallable,
|
|
44
|
+
PyClass,
|
|
45
|
+
PyClassAttribute,
|
|
46
|
+
PyComment,
|
|
47
|
+
PyModule,
|
|
48
|
+
PyVariableDeclaration,
|
|
49
|
+
)
|
|
50
|
+
from codeanalyzer.schema.py_schema import PyCallsite
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def project(app: PyApplication, app_name: str) -> GraphRows:
|
|
54
|
+
b = RowBuilder()
|
|
55
|
+
|
|
56
|
+
app_ref = b.node(["PyApplication"], "name", app_name, {"schema_version": SCHEMA_VERSION})
|
|
57
|
+
|
|
58
|
+
for file_key, mod in app.symbol_table.items():
|
|
59
|
+
mod_ref = b.node(["PyModule"], "file_key", file_key, _module_props(mod, file_key))
|
|
60
|
+
b.edge("PY_HAS_MODULE", app_ref, mod_ref)
|
|
61
|
+
_project_module_body(b, file_key, mod_ref, mod)
|
|
62
|
+
|
|
63
|
+
# The aggregated :PY_CALLS twin. Endpoints not present in the symbol table become
|
|
64
|
+
# :PyExternal ghost nodes (the analyzer already preserves them as ghost nodes).
|
|
65
|
+
for e in app.call_graph:
|
|
66
|
+
src = _call_endpoint(b, e.source)
|
|
67
|
+
tgt = _call_endpoint(b, e.target)
|
|
68
|
+
b.edge("PY_CALLS", src, tgt, _call_edge_props(e.weight, list(e.provenance or [])))
|
|
69
|
+
|
|
70
|
+
return b.finish()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _sym(signature: str) -> NodeRef:
|
|
74
|
+
return NodeRef("PySymbol", "signature", signature)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _call_endpoint(b: RowBuilder, signature: str) -> NodeRef:
|
|
78
|
+
"""A call-graph endpoint: a known callable already emitted, or a phantom
|
|
79
|
+
:PyExternal symbol materialized on demand for a ghost target."""
|
|
80
|
+
if b.has_key(signature):
|
|
81
|
+
return _sym(signature)
|
|
82
|
+
name = signature.rsplit(".", 1)[-1] if "." in signature else signature
|
|
83
|
+
return b.node(["PySymbol", "PyExternal"], "signature", signature, {"name": name})
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ----------------------------------------------------------------------------------------------
|
|
87
|
+
# Module body
|
|
88
|
+
# ----------------------------------------------------------------------------------------------
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _project_module_body(b: RowBuilder, file_key: str, mod_ref: NodeRef, mod: PyModule) -> None:
|
|
92
|
+
for fn in (mod.functions or {}).values():
|
|
93
|
+
_project_callable(b, file_key, mod_ref, "PY_DECLARES", fn)
|
|
94
|
+
for cl in (mod.classes or {}).values():
|
|
95
|
+
_project_class(b, file_key, mod_ref, "PY_DECLARES", cl)
|
|
96
|
+
for v in mod.variables or []:
|
|
97
|
+
_project_variable(b, file_key, mod_ref, file_key, v)
|
|
98
|
+
_project_imports(b, mod_ref, mod)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _project_imports(b: RowBuilder, mod_ref: NodeRef, mod: PyModule) -> None:
|
|
102
|
+
# Per-target-module aggregation: collapse all bindings for a given imported
|
|
103
|
+
# module into one PY_IMPORTS edge to a shared :PyPackage node.
|
|
104
|
+
agg: dict = {}
|
|
105
|
+
for im in mod.imports or []:
|
|
106
|
+
if not im.module:
|
|
107
|
+
continue # relative `from . import x` — no resolvable package
|
|
108
|
+
a = agg.setdefault(im.module, {"names": set(), "aliases": set()})
|
|
109
|
+
if im.name:
|
|
110
|
+
a["names"].add(im.name)
|
|
111
|
+
if im.alias:
|
|
112
|
+
a["aliases"].add(im.alias)
|
|
113
|
+
for module_name, a in agg.items():
|
|
114
|
+
pkg = b.node(["PyPackage"], "name", module_name, {})
|
|
115
|
+
b.edge(
|
|
116
|
+
"PY_IMPORTS",
|
|
117
|
+
mod_ref,
|
|
118
|
+
pkg,
|
|
119
|
+
prune(
|
|
120
|
+
{
|
|
121
|
+
"imported_names": sorted(a["names"]) or None,
|
|
122
|
+
"aliases": sorted(a["aliases"]) or None,
|
|
123
|
+
}
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ----------------------------------------------------------------------------------------------
|
|
129
|
+
# Declarations
|
|
130
|
+
# ----------------------------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _project_class(
|
|
134
|
+
b: RowBuilder, file_key: str, parent: NodeRef, parent_rel: str, cl: PyClass
|
|
135
|
+
) -> None:
|
|
136
|
+
ref = b.node(["PySymbol", "PyClass"], "signature", cl.signature, _class_props(cl, file_key))
|
|
137
|
+
b.edge(parent_rel, parent, ref)
|
|
138
|
+
|
|
139
|
+
for base in cl.base_classes or []:
|
|
140
|
+
b.edge_to_symbol("PY_EXTENDS", ref, base)
|
|
141
|
+
|
|
142
|
+
for m in (cl.methods or {}).values():
|
|
143
|
+
_project_callable(b, file_key, ref, "PY_HAS_METHOD", m)
|
|
144
|
+
for a in (cl.attributes or {}).values():
|
|
145
|
+
_project_attribute(b, file_key, ref, cl.signature, a)
|
|
146
|
+
for ic in (cl.inner_classes or {}).values():
|
|
147
|
+
_project_class(b, file_key, ref, "PY_DECLARES", ic)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _project_callable(
|
|
151
|
+
b: RowBuilder, file_key: str, owner: NodeRef, owner_rel: str, c: PyCallable
|
|
152
|
+
) -> None:
|
|
153
|
+
ref = b.node(["PySymbol", "PyCallable"], "signature", c.signature, _callable_props(c, file_key))
|
|
154
|
+
b.edge(owner_rel, owner, ref)
|
|
155
|
+
|
|
156
|
+
for d in c.decorators or []:
|
|
157
|
+
_project_decorator(b, ref, d)
|
|
158
|
+
|
|
159
|
+
for s in c.call_sites or []:
|
|
160
|
+
# Key off the relative file (a call site lives in its callable's file) so ids stay portable.
|
|
161
|
+
cs_id = f"{file_key}#{s.start_line}:{s.start_column}-{s.end_line}:{s.end_column}"
|
|
162
|
+
cs = b.node(["PyCallSite"], "id", cs_id, _call_site_props(s, file_key))
|
|
163
|
+
b.edge("PY_HAS_CALLSITE", ref, cs)
|
|
164
|
+
if s.callee_signature:
|
|
165
|
+
b.edge_to_symbol("PY_RESOLVES_TO", cs, s.callee_signature)
|
|
166
|
+
|
|
167
|
+
for v in c.local_variables or []:
|
|
168
|
+
_project_variable(b, file_key, ref, c.signature, v)
|
|
169
|
+
for ic in (c.inner_callables or {}).values():
|
|
170
|
+
_project_callable(b, file_key, ref, "PY_DECLARES", ic)
|
|
171
|
+
for cl in (c.inner_classes or {}).values():
|
|
172
|
+
_project_class(b, file_key, ref, "PY_DECLARES", cl)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _project_attribute(
|
|
176
|
+
b: RowBuilder, file_key: str, owner: NodeRef, owner_sig: str, a: PyClassAttribute
|
|
177
|
+
) -> None:
|
|
178
|
+
attr_id = f"{owner_sig}.{a.name}"
|
|
179
|
+
ref = b.node(["PyAttribute"], "id", attr_id, _attribute_props(a, attr_id, file_key))
|
|
180
|
+
b.edge("PY_HAS_ATTRIBUTE", owner, ref)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _project_variable(
|
|
184
|
+
b: RowBuilder, file_key: str, owner: NodeRef, owner_id: str, v: PyVariableDeclaration
|
|
185
|
+
) -> None:
|
|
186
|
+
var_id = f"{owner_id}#{v.name}@{v.start_line}"
|
|
187
|
+
ref = b.node(["PyVariable"], "id", var_id, _variable_props(v, var_id, file_key))
|
|
188
|
+
b.edge("PY_DECLARES_VAR", owner, ref)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _project_decorator(b: RowBuilder, on: NodeRef, decorator: str) -> None:
|
|
192
|
+
dec = b.node(["PyDecorator"], "name", decorator, {"name": decorator})
|
|
193
|
+
b.edge("PY_DECORATED_BY", on, dec)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ----------------------------------------------------------------------------------------------
|
|
197
|
+
# Property flattening
|
|
198
|
+
# ----------------------------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _module_props(mod: PyModule, file_key: str) -> Props:
|
|
202
|
+
return prune(
|
|
203
|
+
{
|
|
204
|
+
"module_name": mod.module_name,
|
|
205
|
+
"content_hash": mod.content_hash,
|
|
206
|
+
"last_modified": mod.last_modified,
|
|
207
|
+
"file_size": mod.file_size,
|
|
208
|
+
"_module": file_key,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _class_props(cl: PyClass, file_key: str) -> Props:
|
|
214
|
+
return prune(
|
|
215
|
+
{
|
|
216
|
+
"name": cl.name,
|
|
217
|
+
"code": cl.code,
|
|
218
|
+
"base_classes": list(cl.base_classes or []),
|
|
219
|
+
"docstring": _docstring_of(cl.comments),
|
|
220
|
+
"start_line": cl.start_line,
|
|
221
|
+
"end_line": cl.end_line,
|
|
222
|
+
"_module": file_key,
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _callable_props(c: PyCallable, file_key: str) -> Props:
|
|
228
|
+
return prune(
|
|
229
|
+
{
|
|
230
|
+
"name": c.name,
|
|
231
|
+
"path": c.path,
|
|
232
|
+
"return_type": c.return_type,
|
|
233
|
+
"cyclomatic_complexity": c.cyclomatic_complexity,
|
|
234
|
+
"code": c.code,
|
|
235
|
+
"code_start_line": c.code_start_line,
|
|
236
|
+
"start_line": c.start_line,
|
|
237
|
+
"end_line": c.end_line,
|
|
238
|
+
"docstring": _docstring_of(c.comments),
|
|
239
|
+
"decorators": list(c.decorators or []),
|
|
240
|
+
"parameters_json": _stringify_if(c.parameters),
|
|
241
|
+
"accessed_symbols_json": _stringify_if(c.accessed_symbols),
|
|
242
|
+
"_module": file_key,
|
|
243
|
+
}
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _attribute_props(a: PyClassAttribute, attr_id: str, file_key: str) -> Props:
|
|
248
|
+
return prune(
|
|
249
|
+
{
|
|
250
|
+
"id": attr_id,
|
|
251
|
+
"name": a.name,
|
|
252
|
+
"type": a.type,
|
|
253
|
+
"docstring": _docstring_of(a.comments),
|
|
254
|
+
"start_line": a.start_line,
|
|
255
|
+
"end_line": a.end_line,
|
|
256
|
+
"_module": file_key,
|
|
257
|
+
}
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def _variable_props(v: PyVariableDeclaration, var_id: str, file_key: str) -> Props:
|
|
262
|
+
return prune(
|
|
263
|
+
{
|
|
264
|
+
"id": var_id,
|
|
265
|
+
"name": v.name,
|
|
266
|
+
"type": v.type,
|
|
267
|
+
"initializer": v.initializer,
|
|
268
|
+
"scope": v.scope,
|
|
269
|
+
"start_line": v.start_line,
|
|
270
|
+
"end_line": v.end_line,
|
|
271
|
+
"_module": file_key,
|
|
272
|
+
}
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _call_site_props(s: PyCallsite, file_key: str) -> Props:
|
|
277
|
+
cs_id = f"{file_key}#{s.start_line}:{s.start_column}-{s.end_line}:{s.end_column}"
|
|
278
|
+
return prune(
|
|
279
|
+
{
|
|
280
|
+
"id": cs_id,
|
|
281
|
+
"method_name": s.method_name,
|
|
282
|
+
"receiver_expr": s.receiver_expr,
|
|
283
|
+
"receiver_type": s.receiver_type,
|
|
284
|
+
"argument_types": list(s.argument_types or []),
|
|
285
|
+
"return_type": s.return_type,
|
|
286
|
+
"callee_signature": s.callee_signature,
|
|
287
|
+
"is_constructor_call": s.is_constructor_call,
|
|
288
|
+
"start_line": s.start_line,
|
|
289
|
+
"start_column": s.start_column,
|
|
290
|
+
"end_line": s.end_line,
|
|
291
|
+
"end_column": s.end_column,
|
|
292
|
+
"_module": file_key,
|
|
293
|
+
}
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _call_edge_props(weight: int, provenance: List[str]) -> Props:
|
|
298
|
+
return prune({"weight": weight, "provenance": list(provenance)})
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _docstring_of(comments: Optional[List[PyComment]]) -> Optional[str]:
|
|
302
|
+
docs = [c.content for c in (comments or []) if c.is_docstring]
|
|
303
|
+
return "\n".join(docs) if docs else None
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _stringify_if(value: Any) -> Optional[str]:
|
|
307
|
+
"""JSON-encode a list/dict of pydantic models, or None when empty."""
|
|
308
|
+
if value is None:
|
|
309
|
+
return None
|
|
310
|
+
if isinstance(value, (list, dict)) and len(value) == 0:
|
|
311
|
+
return None
|
|
312
|
+
return json.dumps(value, default=_jsonable, sort_keys=True)
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _jsonable(o: Any) -> Any:
|
|
316
|
+
if hasattr(o, "model_dump"):
|
|
317
|
+
return o.model_dump()
|
|
318
|
+
if hasattr(o, "dict"):
|
|
319
|
+
return o.dict()
|
|
320
|
+
if isinstance(o, Path):
|
|
321
|
+
return str(o)
|
|
322
|
+
return str(o)
|