codeanalyzer-python 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The declarative Neo4j schema catalog — the single in-repo source of truth for
18
+ the graph contract (node labels, their keys and typed properties, relationship
19
+ types and their endpoints). ``--emit schema`` serializes this (with the DDL from
20
+ :mod:`codeanalyzer.neo4j.schema`) to a machine-readable ``schema.json``, and the
21
+ conformance test (``test/test_neo4j_schema.py``) asserts the real emitter never
22
+ produces a label / relationship / property that isn't declared here — so this
23
+ file cannot silently drift from :mod:`codeanalyzer.neo4j.project`.
24
+
25
+ SCHEMA_VERSION is the contract version: bump MAJOR on a breaking change
26
+ (renamed/removed label, relationship or key), MINOR on an additive change (new
27
+ label/rel/property). It is stamped onto the ``:PyApplication`` node of every
28
+ emitted graph so any consumer can detect a producer/consumer mismatch at runtime.
29
+ """
30
+ from __future__ import annotations
31
+
32
+ from dataclasses import dataclass, field
33
+ from typing import Dict, List
34
+
35
+ from codeanalyzer.neo4j.schema import CONSTRAINTS, INDEXES
36
+
37
+ SCHEMA_VERSION = "1.0.0"
38
+
39
+ # PropType ∈ {"string", "integer", "float", "boolean", "string[]", "integer[]"}.
40
+
41
+
42
+ @dataclass
43
+ class NodeLabel:
44
+ label: str # the specific label (also the catalog key)
45
+ merge_label: str # the label the uniqueness constraint / MERGE is on
46
+ key: str
47
+ properties: Dict[str, str]
48
+
49
+
50
+ @dataclass
51
+ class RelType:
52
+ type: str
53
+ from_labels: List[str]
54
+ to_labels: List[str]
55
+ properties: Dict[str, str] = field(default_factory=dict)
56
+
57
+
58
+ # Labels layered onto a node in addition to its primary/specific label.
59
+ MARKER_LABELS: List[str] = []
60
+
61
+ _SPAN = {"start_line": "integer", "end_line": "integer"}
62
+
63
+
64
+ NODE_LABELS: List[NodeLabel] = [
65
+ NodeLabel(
66
+ "PyApplication",
67
+ "PyApplication",
68
+ "name",
69
+ {"name": "string", "schema_version": "string"},
70
+ ),
71
+ NodeLabel(
72
+ "PyModule",
73
+ "PyModule",
74
+ "file_key",
75
+ {
76
+ "file_key": "string",
77
+ "module_name": "string",
78
+ "content_hash": "string",
79
+ "last_modified": "float",
80
+ "file_size": "integer",
81
+ "_module": "string",
82
+ },
83
+ ),
84
+ NodeLabel(
85
+ "PyClass",
86
+ "PySymbol",
87
+ "signature",
88
+ {
89
+ "signature": "string",
90
+ "name": "string",
91
+ "code": "string",
92
+ "base_classes": "string[]",
93
+ "docstring": "string",
94
+ **_SPAN,
95
+ "_module": "string",
96
+ },
97
+ ),
98
+ NodeLabel(
99
+ "PyCallable",
100
+ "PySymbol",
101
+ "signature",
102
+ {
103
+ "signature": "string",
104
+ "name": "string",
105
+ "path": "string",
106
+ "return_type": "string",
107
+ "cyclomatic_complexity": "integer",
108
+ "code": "string",
109
+ "code_start_line": "integer",
110
+ **_SPAN,
111
+ "docstring": "string",
112
+ "decorators": "string[]",
113
+ "parameters_json": "string",
114
+ "accessed_symbols_json": "string",
115
+ "_module": "string",
116
+ },
117
+ ),
118
+ NodeLabel(
119
+ "PyExternal",
120
+ "PySymbol",
121
+ "signature",
122
+ {"signature": "string", "name": "string"},
123
+ ),
124
+ NodeLabel("PyPackage", "PyPackage", "name", {"name": "string"}),
125
+ NodeLabel(
126
+ "PyDecorator",
127
+ "PyDecorator",
128
+ "name",
129
+ {"name": "string"},
130
+ ),
131
+ NodeLabel(
132
+ "PyCallSite",
133
+ "PyCallSite",
134
+ "id",
135
+ {
136
+ "id": "string",
137
+ "method_name": "string",
138
+ "receiver_expr": "string",
139
+ "receiver_type": "string",
140
+ "argument_types": "string[]",
141
+ "return_type": "string",
142
+ "callee_signature": "string",
143
+ "is_constructor_call": "boolean",
144
+ "start_line": "integer",
145
+ "start_column": "integer",
146
+ "end_line": "integer",
147
+ "end_column": "integer",
148
+ "_module": "string",
149
+ },
150
+ ),
151
+ NodeLabel(
152
+ "PyAttribute",
153
+ "PyAttribute",
154
+ "id",
155
+ {
156
+ "id": "string",
157
+ "name": "string",
158
+ "type": "string",
159
+ "docstring": "string",
160
+ **_SPAN,
161
+ "_module": "string",
162
+ },
163
+ ),
164
+ NodeLabel(
165
+ "PyVariable",
166
+ "PyVariable",
167
+ "id",
168
+ {
169
+ "id": "string",
170
+ "name": "string",
171
+ "type": "string",
172
+ "initializer": "string",
173
+ "scope": "string",
174
+ **_SPAN,
175
+ "_module": "string",
176
+ },
177
+ ),
178
+ ]
179
+
180
+ _DECL_TARGETS = ["PyClass", "PyCallable"]
181
+
182
+
183
+ REL_TYPES: List[RelType] = [
184
+ RelType("PY_HAS_MODULE", ["PyApplication"], ["PyModule"]),
185
+ RelType("PY_DECLARES", ["PyModule", "PyClass", "PyCallable"], _DECL_TARGETS),
186
+ RelType("PY_HAS_METHOD", ["PyClass"], ["PyCallable"]),
187
+ RelType("PY_HAS_ATTRIBUTE", ["PyClass"], ["PyAttribute"]),
188
+ RelType("PY_DECLARES_VAR", ["PyModule", "PyCallable"], ["PyVariable"]),
189
+ RelType("PY_HAS_CALLSITE", ["PyCallable"], ["PyCallSite"]),
190
+ RelType("PY_RESOLVES_TO", ["PyCallSite"], ["PyCallable", "PyExternal"]),
191
+ RelType(
192
+ "PY_CALLS",
193
+ ["PyCallable", "PyExternal"],
194
+ ["PyCallable", "PyExternal"],
195
+ {"weight": "integer", "provenance": "string[]"},
196
+ ),
197
+ RelType("PY_EXTENDS", ["PyClass"], ["PyClass"]),
198
+ RelType(
199
+ "PY_IMPORTS",
200
+ ["PyModule"],
201
+ ["PyPackage"],
202
+ {"imported_names": "string[]", "aliases": "string[]"},
203
+ ),
204
+ RelType("PY_DECORATED_BY", ["PyCallable"], ["PyDecorator"]),
205
+ ]
206
+
207
+
208
+ @dataclass
209
+ class SchemaDocument:
210
+ schema_version: str
211
+ generator: str
212
+ marker_labels: List[str]
213
+ node_labels: List[NodeLabel]
214
+ relationship_types: List[RelType]
215
+ constraints: List[str]
216
+ indexes: List[str]
217
+
218
+
219
+ def build_schema_document() -> dict:
220
+ """Build the full machine-readable schema document emitted by ``--emit schema``."""
221
+ return {
222
+ "schema_version": SCHEMA_VERSION,
223
+ "generator": "codeanalyzer-python",
224
+ "marker_labels": list(MARKER_LABELS),
225
+ "node_labels": [
226
+ {
227
+ "label": n.label,
228
+ "merge_label": n.merge_label,
229
+ "key": n.key,
230
+ "properties": n.properties,
231
+ }
232
+ for n in NODE_LABELS
233
+ ],
234
+ "relationship_types": [
235
+ {
236
+ "type": r.type,
237
+ "from": r.from_labels,
238
+ "to": r.to_labels,
239
+ "properties": r.properties,
240
+ }
241
+ for r in REL_TYPES
242
+ ],
243
+ "constraints": list(CONSTRAINTS),
244
+ "indexes": list(INDEXES),
245
+ }
@@ -0,0 +1,138 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The snapshot writer: render :class:`GraphRows` to a self-contained ``.cypher``
18
+ script. Running it (e.g. ``cypher-shell < graph.cypher``) rebuilds this project's
19
+ subgraph from scratch — constraints, a scoped wipe of the prior version, then
20
+ batched ``UNWIND … MERGE`` for nodes and edges.
21
+
22
+ This artifact is intentionally NOT incremental: a static script has no view of
23
+ the live DB, so it expresses the full truth. Incremental updates are the bolt
24
+ writer's job.
25
+ """
26
+ from __future__ import annotations
27
+
28
+ from typing import Dict, List
29
+
30
+ from codeanalyzer.neo4j.rows import (
31
+ EdgeRow,
32
+ GraphRows,
33
+ NodeRow,
34
+ chunk,
35
+ cypher_map,
36
+ cypher_value,
37
+ )
38
+ from codeanalyzer.neo4j.schema import CONSTRAINTS, INDEXES
39
+
40
+ BATCH = 500
41
+
42
+
43
+ def render_cypher(rows: GraphRows, app_name: str) -> str:
44
+ out: List[str] = []
45
+
46
+ out.append("// ── constraints & indexes ──")
47
+ for stmt in CONSTRAINTS:
48
+ out.append(f"{stmt};")
49
+ for stmt in INDEXES:
50
+ out.append(f"{stmt};")
51
+
52
+ out.append("")
53
+ out.append("// ── wipe this project's prior subgraph (externals/packages/decorators are shared) ──")
54
+ out.append(_wipe(app_name))
55
+
56
+ out.append("")
57
+ out.append("// ── nodes ──")
58
+ out.extend(_node_statements(rows.nodes))
59
+
60
+ out.append("")
61
+ out.append("// ── relationships ──")
62
+ out.extend(_edge_statements(rows.edges))
63
+
64
+ out.append("")
65
+ return "\n".join(out)
66
+
67
+
68
+ def _wipe(app_name: str) -> str:
69
+ name = cypher_value(app_name)
70
+ return "\n".join(
71
+ [
72
+ f"MATCH (a:PyApplication {{name: {name}}})",
73
+ "OPTIONAL MATCH (a)-[:PY_HAS_MODULE]->(m:PyModule)",
74
+ "OPTIONAL MATCH (m)-[:PY_DECLARES|PY_HAS_METHOD|PY_HAS_ATTRIBUTE|PY_DECLARES_VAR|PY_HAS_CALLSITE*1..]->(x)",
75
+ "DETACH DELETE x, m, a;",
76
+ ]
77
+ )
78
+
79
+
80
+ # ----------------------------------------------------------------------------------------------
81
+ # Nodes — grouped by their full label set + key property, batched into UNWIND lists.
82
+ # ----------------------------------------------------------------------------------------------
83
+
84
+
85
+ def _node_statements(nodes: List[NodeRow]) -> List[str]:
86
+ groups: Dict[str, List[NodeRow]] = {}
87
+ for n in nodes:
88
+ key = f"{':'.join(n.labels)}|{n.key_prop}"
89
+ groups.setdefault(key, []).append(n)
90
+
91
+ blocks: List[str] = []
92
+ for group in groups.values():
93
+ labels = group[0].labels
94
+ key_prop = group[0].key_prop
95
+ merge_label = labels[0]
96
+ extra = labels[1:]
97
+ set_labels = f", n:{':'.join(extra)}" if extra else ""
98
+ for batch in chunk(group, BATCH):
99
+ rows_lit = ",\n".join(
100
+ f" {{k: {cypher_value(n.value)}, p: {cypher_map(n.props)}}}" for n in batch
101
+ )
102
+ blocks.append(
103
+ f"UNWIND [\n{rows_lit}\n] AS row\n"
104
+ f"MERGE (n:{merge_label} {{{key_prop}: row.k}})\n"
105
+ f"SET n += row.p{set_labels};"
106
+ )
107
+ return blocks
108
+
109
+
110
+ # ----------------------------------------------------------------------------------------------
111
+ # Edges — grouped by (type, endpoint labels + key props), batched.
112
+ # ----------------------------------------------------------------------------------------------
113
+
114
+
115
+ def _edge_statements(edges: List[EdgeRow]) -> List[str]:
116
+ groups: Dict[str, List[EdgeRow]] = {}
117
+ for e in edges:
118
+ key = f"{e.type}|{e.from_ref.label}.{e.from_ref.key_prop}|{e.to_ref.label}.{e.to_ref.key_prop}"
119
+ groups.setdefault(key, []).append(e)
120
+
121
+ blocks: List[str] = []
122
+ for group in groups.values():
123
+ first = group[0]
124
+ from_ref, to_ref = first.from_ref, first.to_ref
125
+ for batch in chunk(group, BATCH):
126
+ rows_lit = ",\n".join(
127
+ f" {{f: {cypher_value(e.from_ref.value)}, t: {cypher_value(e.to_ref.value)}, "
128
+ f"p: {cypher_map(e.props)}}}"
129
+ for e in batch
130
+ )
131
+ blocks.append(
132
+ f"UNWIND [\n{rows_lit}\n] AS row\n"
133
+ f"MATCH (a:{from_ref.label} {{{from_ref.key_prop}: row.f}})\n"
134
+ f"MATCH (b:{to_ref.label} {{{to_ref.key_prop}: row.t}})\n"
135
+ f"MERGE (a)-[r:{first.type}]->(b)\n"
136
+ f"SET r += row.p;"
137
+ )
138
+ return blocks
@@ -0,0 +1,74 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The facade between the CLI and the Neo4j backend. Two entry points:
18
+
19
+ - :func:`emit_schema` — serialize the static, version-stamped schema contract
20
+ (``schema.json``). Needs no analyzed project.
21
+ - :func:`emit_neo4j` — project a :class:`PyApplication` to a graph and either
22
+ write a ``graph.cypher`` snapshot or push it to a live Neo4j over Bolt.
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ from pathlib import Path
28
+ from typing import Optional
29
+
30
+ from codeanalyzer.neo4j.bolt import BoltConfig, bolt_writer
31
+ from codeanalyzer.neo4j.catalog import build_schema_document
32
+ from codeanalyzer.neo4j.cypher import render_cypher
33
+ from codeanalyzer.neo4j.project import project
34
+ from codeanalyzer.options import AnalysisOptions
35
+ from codeanalyzer.schema import PyApplication
36
+ from codeanalyzer.utils import logger
37
+
38
+
39
+ def emit_schema(output: Optional[Path]) -> None:
40
+ """Emit the Neo4j schema contract (``schema.json``) — a static artifact derived
41
+ from the in-repo catalog, independent of any analyzed project. With no
42
+ ``output`` it prints to stdout."""
43
+ doc = json.dumps(build_schema_document(), indent=2) + "\n"
44
+ if output is None:
45
+ print(doc, end="")
46
+ return
47
+ output.mkdir(parents=True, exist_ok=True)
48
+ (output / "schema.json").write_text(doc)
49
+ logger.info(f"Neo4j schema written to {output / 'schema.json'}")
50
+
51
+
52
+ def emit_neo4j(app: PyApplication, options: AnalysisOptions) -> None:
53
+ """Project the analysis to a graph and write it: a live Bolt push when
54
+ ``--neo4j-uri`` is set, otherwise a self-contained ``graph.cypher`` snapshot."""
55
+ app_name = options.app_name or Path(options.input).resolve().name
56
+ rows = project(app, app_name)
57
+
58
+ if options.neo4j_uri:
59
+ cfg = BoltConfig(
60
+ uri=options.neo4j_uri,
61
+ user=options.neo4j_user,
62
+ password=options.neo4j_password,
63
+ database=options.neo4j_database,
64
+ )
65
+ # A full run (no single-file restriction) makes orphan pruning safe.
66
+ full_run = options.file_name is None
67
+ bolt_writer(rows, cfg, full_run)
68
+ return
69
+
70
+ out_dir = options.output if options.output is not None else Path.cwd()
71
+ out_dir.mkdir(parents=True, exist_ok=True)
72
+ target = out_dir / "graph.cypher"
73
+ target.write_text(render_cypher(rows, app_name))
74
+ logger.info(f"Neo4j graph written to {target}")