codeanalyzer-python 0.1.14__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeanalyzer/__main__.py CHANGED
@@ -7,13 +7,18 @@ from codeanalyzer.core import Codeanalyzer
7
7
  from codeanalyzer.utils import _set_log_level, logger
8
8
  from codeanalyzer.config import OutputFormat
9
9
  from codeanalyzer.schema import model_dump_json
10
- from codeanalyzer.options import AnalysisOptions
10
+ from codeanalyzer.options import AnalysisOptions, EmitTarget
11
11
 
12
12
 
13
13
  def main(
14
14
  input: Annotated[
15
- Path, typer.Option("-i", "--input", help="Path to the project root directory.")
16
- ],
15
+ Optional[Path],
16
+ typer.Option(
17
+ "-i",
18
+ "--input",
19
+ help="Path to the project root directory (not required for --emit schema).",
20
+ ),
21
+ ] = None,
17
22
  output: Annotated[
18
23
  Optional[Path],
19
24
  typer.Option("-o", "--output", help="Output directory for artifacts."),
@@ -23,10 +28,61 @@ def main(
23
28
  typer.Option(
24
29
  "-f",
25
30
  "--format",
26
- help="Output format: json or msgpack.",
31
+ help="Output format for --emit json: json or msgpack.",
27
32
  case_sensitive=False,
28
33
  ),
29
34
  ] = OutputFormat.JSON,
35
+ emit: Annotated[
36
+ EmitTarget,
37
+ typer.Option(
38
+ "--emit",
39
+ help="Output target: json (analysis.json, default) | neo4j (graph.cypher or live "
40
+ "Bolt push) | schema (the Neo4j schema.json contract).",
41
+ case_sensitive=False,
42
+ ),
43
+ ] = EmitTarget.JSON,
44
+ app_name: Annotated[
45
+ Optional[str],
46
+ typer.Option(
47
+ "--app-name",
48
+ help="Logical application name for the graph :PyApplication anchor "
49
+ "(default: input dir name).",
50
+ ),
51
+ ] = None,
52
+ neo4j_uri: Annotated[
53
+ Optional[str],
54
+ typer.Option(
55
+ "--neo4j-uri",
56
+ envvar="NEO4J_URI",
57
+ help="Push the graph to a live Neo4j over Bolt (incremental); omit to write "
58
+ "graph.cypher. [env: NEO4J_URI]",
59
+ ),
60
+ ] = None,
61
+ neo4j_user: Annotated[
62
+ str,
63
+ typer.Option(
64
+ "--neo4j-user",
65
+ envvar="NEO4J_USERNAME",
66
+ help="Neo4j username. [env: NEO4J_USERNAME]",
67
+ ),
68
+ ] = "neo4j",
69
+ neo4j_password: Annotated[
70
+ str,
71
+ typer.Option(
72
+ "--neo4j-password",
73
+ envvar="NEO4J_PASSWORD",
74
+ help="Neo4j password. Prefer the env var over the flag (the flag is visible in shell "
75
+ "history / process list). [env: NEO4J_PASSWORD]",
76
+ ),
77
+ ] = "neo4j",
78
+ neo4j_database: Annotated[
79
+ Optional[str],
80
+ typer.Option(
81
+ "--neo4j-database",
82
+ envvar="NEO4J_DATABASE",
83
+ help="Neo4j database name (default: server default). [env: NEO4J_DATABASE]",
84
+ ),
85
+ ] = None,
30
86
  using_codeql: Annotated[
31
87
  bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
32
88
  ] = False,
@@ -78,6 +134,12 @@ def main(
78
134
  input=input,
79
135
  output=output,
80
136
  format=format,
137
+ emit=emit,
138
+ app_name=app_name,
139
+ neo4j_uri=neo4j_uri,
140
+ neo4j_user=neo4j_user,
141
+ neo4j_password=neo4j_password,
142
+ neo4j_database=neo4j_database,
81
143
  using_codeql=using_codeql,
82
144
  using_ray=using_ray,
83
145
  rebuild_analysis=rebuild_analysis,
@@ -89,6 +151,18 @@ def main(
89
151
  )
90
152
 
91
153
  _set_log_level(options.verbosity)
154
+
155
+ # The schema contract is a static artifact — no project analysis required.
156
+ if options.emit == EmitTarget.SCHEMA:
157
+ from codeanalyzer.neo4j.emit import emit_schema
158
+
159
+ emit_schema(options.output)
160
+ return
161
+
162
+ # Every other target requires an input project.
163
+ if options.input is None:
164
+ logger.error("Missing option '-i' / '--input' (required for --emit json | neo4j).")
165
+ raise typer.Exit(code=1)
92
166
  if not options.input.exists():
93
167
  logger.error(f"Input path '{options.input}' does not exist.")
94
168
  raise typer.Exit(code=1)
@@ -112,7 +186,11 @@ def main(
112
186
  with Codeanalyzer(options) as analyzer:
113
187
  artifacts = analyzer.analyze()
114
188
 
115
- if options.output is None:
189
+ if options.emit == EmitTarget.NEO4J:
190
+ from codeanalyzer.neo4j.emit import emit_neo4j
191
+
192
+ emit_neo4j(artifacts, options)
193
+ elif options.output is None:
116
194
  print(model_dump_json(artifacts, separators=(",", ":")))
117
195
  else:
118
196
  options.output.mkdir(parents=True, exist_ok=True)
@@ -142,7 +220,7 @@ def _write_output(artifacts, output_dir: Path, format: OutputFormat):
142
220
 
143
221
  app = typer.Typer(
144
222
  callback=main,
145
- name="codeanalyzer",
223
+ name="canpy",
146
224
  help="Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.",
147
225
  invoke_without_command=True,
148
226
  no_args_is_help=True,
@@ -151,5 +229,20 @@ app = typer.Typer(
151
229
  pretty_exceptions_show_locals=False,
152
230
  )
153
231
 
232
+ def deprecated_main() -> None:
233
+ """Entry point for the legacy ``codeanalyzer`` command. Prints a one-line
234
+ deprecation notice to stderr (so piped stdout — e.g. ``--emit schema`` — stays
235
+ clean) and then runs the CLI unchanged. Kept for backwards compatibility; will
236
+ be removed in a future release."""
237
+ import sys
238
+
239
+ print(
240
+ "codeanalyzer: this command has been renamed to `canpy`. The `codeanalyzer` "
241
+ "alias is deprecated and will be removed in a future release — please use `canpy`.",
242
+ file=sys.stderr,
243
+ )
244
+ app()
245
+
246
+
154
247
  if __name__ == "__main__":
155
248
  app()
@@ -0,0 +1,46 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """Neo4j output: a pure projection of the :class:`PyApplication` IR to graph rows,
18
+ plus the two writers (cypher snapshot / bolt incremental). Nothing here runs
19
+ unless ``--emit neo4j`` (or ``--emit schema``) is selected.
20
+ """
21
+ from codeanalyzer.neo4j.bolt import BoltConfig, bolt_writer
22
+ from codeanalyzer.neo4j.catalog import (
23
+ MARKER_LABELS,
24
+ NODE_LABELS,
25
+ REL_TYPES,
26
+ SCHEMA_VERSION,
27
+ build_schema_document,
28
+ )
29
+ from codeanalyzer.neo4j.cypher import render_cypher
30
+ from codeanalyzer.neo4j.project import project
31
+ from codeanalyzer.neo4j.rows import EdgeRow, GraphRows, NodeRow
32
+
33
+ __all__ = [
34
+ "project",
35
+ "render_cypher",
36
+ "bolt_writer",
37
+ "BoltConfig",
38
+ "build_schema_document",
39
+ "SCHEMA_VERSION",
40
+ "NODE_LABELS",
41
+ "REL_TYPES",
42
+ "MARKER_LABELS",
43
+ "GraphRows",
44
+ "NodeRow",
45
+ "EdgeRow",
46
+ ]
@@ -0,0 +1,223 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The incremental writer: push :class:`GraphRows` into a live Neo4j over Bolt.
18
+ Unlike the snapshot writer, this one reads the DB's current state and updates
19
+ only what changed.
20
+
21
+ Algorithm (the module subgraph is the unit of idempotent replacement):
22
+ 1. ensure constraints + indexes.
23
+ 2. diff each module's ``content_hash`` against the DB → the set of changed modules.
24
+ 3. per changed module, in a transaction: delete the edges it owned (edges out of
25
+ its nodes), detach-delete the declarations it no longer emits, then upsert
26
+ its current nodes.
27
+ 4. upsert edges owned by changed modules (+ the shared edges).
28
+ 5. on a FULL run only, prune modules whose source file vanished.
29
+
30
+ Nodes are MERGE-upserted, never blindly deleted, so a declaration another
31
+ (unchanged) module still references survives and its incoming edges stay valid.
32
+ ``:PyExternal`` / ``:PyPackage`` / ``:PyDecorator`` are shared (no ``_module``) and are
33
+ MERGE-only.
34
+
35
+ The ``neo4j`` driver is imported lazily so it stays an optional dependency and
36
+ off the default (json) output path entirely.
37
+ """
38
+ from __future__ import annotations
39
+
40
+ from dataclasses import dataclass
41
+ from typing import Dict, List, Optional
42
+
43
+ from codeanalyzer.neo4j.rows import EdgeRow, GraphRows, NodeRow, chunk
44
+ from codeanalyzer.neo4j.schema import CONSTRAINTS, INDEXES
45
+ from codeanalyzer.utils import logger
46
+
47
+ DESCENDANTS = "[:PY_DECLARES|PY_HAS_METHOD|PY_HAS_ATTRIBUTE|PY_DECLARES_VAR|PY_HAS_CALLSITE*1..]"
48
+ BATCH = 1000
49
+
50
+
51
+ @dataclass
52
+ class BoltConfig:
53
+ uri: str
54
+ user: str
55
+ password: str
56
+ database: Optional[str] = None
57
+
58
+
59
+ def bolt_writer(rows: GraphRows, cfg: BoltConfig, full_run: bool) -> None:
60
+ try:
61
+ import neo4j # noqa: WPS433 (lazy, optional dependency)
62
+ except ImportError as exc: # pragma: no cover - exercised only without the extra
63
+ raise RuntimeError(
64
+ "The 'neo4j' driver is required for '--emit neo4j --neo4j-uri'. "
65
+ "Install it with: pip install 'codeanalyzer-python[neo4j]'"
66
+ ) from exc
67
+
68
+ driver = neo4j.GraphDatabase.driver(cfg.uri, auth=(cfg.user, cfg.password))
69
+ session_kwargs = {"database": cfg.database} if cfg.database else {}
70
+
71
+ def session():
72
+ return driver.session(**session_kwargs)
73
+
74
+ try:
75
+ # 1. schema (DDL runs in its own autocommit transactions).
76
+ with session() as s:
77
+ for stmt in [*CONSTRAINTS, *INDEXES]:
78
+ s.run(stmt)
79
+
80
+ # Partition nodes by owning module; shared nodes have no _module.
81
+ by_module: Dict[str, List[NodeRow]] = {}
82
+ shared: List[NodeRow] = []
83
+ module_of: Dict[str, str] = {} # node value → owning module
84
+ for n in rows.nodes:
85
+ m = n.props.get("_module")
86
+ if isinstance(m, str):
87
+ by_module.setdefault(m, []).append(n)
88
+ module_of[n.value] = m
89
+ else:
90
+ shared.append(n)
91
+
92
+ # 2. diff content_hash.
93
+ db_hash: Dict[str, Optional[str]] = {}
94
+ with session() as s:
95
+ res = s.run("MATCH (m:PyModule) RETURN m.file_key AS k, m.content_hash AS h")
96
+ for rec in res:
97
+ db_hash[rec["k"]] = rec["h"]
98
+ changed = set()
99
+ for m, nodes in by_module.items():
100
+ row_hash = _hash_of(nodes, m)
101
+ if m not in db_hash or row_hash is None or row_hash != db_hash.get(m):
102
+ changed.add(m)
103
+ logger.info(
104
+ f"neo4j(bolt): {len(by_module)} modules ({len(changed)} changed), "
105
+ f"{len(shared)} shared nodes, {len(rows.edges)} edges"
106
+ )
107
+
108
+ # 3. shared nodes are always upserted (MERGE-only).
109
+ _upsert_nodes(session, neo4j, shared)
110
+
111
+ # 4. per changed module: purge owned edges + vanished decls, then upsert its nodes.
112
+ for m in changed:
113
+ nodes = by_module[m]
114
+ keys = [n.value for n in nodes]
115
+ with session() as s:
116
+ def _purge(tx, module=m, node_keys=keys):
117
+ tx.run("MATCH (x {_module: $m})-[r]->() DELETE r", m=module)
118
+ tx.run(
119
+ "MATCH (x {_module: $m}) "
120
+ "WHERE NOT coalesce(x.signature, x.id, x.file_key) IN $keys "
121
+ "DETACH DELETE x",
122
+ m=module,
123
+ keys=node_keys,
124
+ )
125
+
126
+ s.execute_write(_purge)
127
+ _upsert_nodes(session, neo4j, nodes)
128
+
129
+ # 5. upsert edges owned by a changed module (owner = source node's module) or shared.
130
+ edges = [
131
+ e
132
+ for e in rows.edges
133
+ if module_of.get(e.from_ref.value) is None or module_of.get(e.from_ref.value) in changed
134
+ ]
135
+ _upsert_edges(session, neo4j, edges)
136
+
137
+ # 6. orphan prune — only safe on a full run (a targeted run can't tell deleted from untargeted).
138
+ if full_run:
139
+ present = list(by_module.keys())
140
+ with session() as s:
141
+ res = s.run(
142
+ "MATCH (m:PyModule) WHERE NOT m.file_key IN $present "
143
+ f"OPTIONAL MATCH (m)-{DESCENDANTS}->(x) DETACH DELETE x, m "
144
+ "RETURN count(m) AS pruned",
145
+ present=present,
146
+ )
147
+ pruned = res.single()
148
+ pruned_count = pruned["pruned"] if pruned else 0
149
+ logger.info(f"neo4j(bolt): pruned {pruned_count} vanished module(s)")
150
+ else:
151
+ logger.info(
152
+ "neo4j(bolt): targeted run — orphan pruning skipped (deleted files not removed)"
153
+ )
154
+ finally:
155
+ driver.close()
156
+
157
+
158
+ # ----------------------------------------------------------------------------------------------
159
+ # Batched upserts
160
+ # ----------------------------------------------------------------------------------------------
161
+
162
+
163
+ def _upsert_nodes(session, neo4j, nodes: List[NodeRow]) -> None:
164
+ groups: Dict[str, List[NodeRow]] = {}
165
+ for n in nodes:
166
+ groups.setdefault(f"{':'.join(n.labels)}|{n.key_prop}", []).append(n)
167
+
168
+ for group in groups.values():
169
+ labels = group[0].labels
170
+ key_prop = group[0].key_prop
171
+ set_labels = f", n:{':'.join(labels[1:])}" if len(labels) > 1 else ""
172
+ cypher = (
173
+ f"UNWIND $rows AS row MERGE (n:{labels[0]} {{{key_prop}: row.k}}) "
174
+ f"SET n += row.p{set_labels}"
175
+ )
176
+ for batch in chunk(group, BATCH):
177
+ payload = [{"k": n.value, "p": _to_params(n.props, neo4j)} for n in batch]
178
+ with session() as s:
179
+ s.run(cypher, rows=payload)
180
+
181
+
182
+ def _upsert_edges(session, neo4j, edges: List[EdgeRow]) -> None:
183
+ groups: Dict[str, List[EdgeRow]] = {}
184
+ for e in edges:
185
+ key = f"{e.type}|{e.from_ref.label}.{e.from_ref.key_prop}|{e.to_ref.label}.{e.to_ref.key_prop}"
186
+ groups.setdefault(key, []).append(e)
187
+
188
+ for group in groups.values():
189
+ first = group[0]
190
+ from_ref, to_ref = first.from_ref, first.to_ref
191
+ cypher = (
192
+ f"UNWIND $rows AS row "
193
+ f"MATCH (a:{from_ref.label} {{{from_ref.key_prop}: row.f}}) "
194
+ f"MATCH (b:{to_ref.label} {{{to_ref.key_prop}: row.t}}) "
195
+ f"MERGE (a)-[r:{first.type}]->(b) SET r += row.p"
196
+ )
197
+ for batch in chunk(group, BATCH):
198
+ payload = [
199
+ {"f": e.from_ref.value, "t": e.to_ref.value, "p": _to_params(e.props, neo4j)}
200
+ for e in batch
201
+ ]
202
+ with session() as s:
203
+ s.run(cypher, rows=payload)
204
+
205
+
206
+ # ----------------------------------------------------------------------------------------------
207
+ # Helpers
208
+ # ----------------------------------------------------------------------------------------------
209
+
210
+
211
+ def _hash_of(nodes: List[NodeRow], file_key: str) -> Optional[str]:
212
+ for n in nodes:
213
+ if n.labels[0] == "PyModule" and n.value == file_key:
214
+ h = n.props.get("content_hash")
215
+ return h if isinstance(h, str) else None
216
+ return None
217
+
218
+
219
+ def _to_params(props, neo4j) -> dict:
220
+ """Map props to driver params. The Python driver already distinguishes int
221
+ from float, so unlike the JS driver no integer coercion is needed — this is a
222
+ straight passthrough kept symmetric with the snapshot writer's shape."""
223
+ return dict(props)
@@ -0,0 +1,245 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The declarative Neo4j schema catalog — the single in-repo source of truth for
18
+ the graph contract (node labels, their keys and typed properties, relationship
19
+ types and their endpoints). ``--emit schema`` serializes this (with the DDL from
20
+ :mod:`codeanalyzer.neo4j.schema`) to a machine-readable ``schema.json``, and the
21
+ conformance test (``test/test_neo4j_schema.py``) asserts the real emitter never
22
+ produces a label / relationship / property that isn't declared here — so this
23
+ file cannot silently drift from :mod:`codeanalyzer.neo4j.project`.
24
+
25
+ SCHEMA_VERSION is the contract version: bump MAJOR on a breaking change
26
+ (renamed/removed label, relationship or key), MINOR on an additive change (new
27
+ label/rel/property). It is stamped onto the ``:PyApplication`` node of every
28
+ emitted graph so any consumer can detect a producer/consumer mismatch at runtime.
29
+ """
30
+ from __future__ import annotations
31
+
32
+ from dataclasses import dataclass, field
33
+ from typing import Dict, List
34
+
35
+ from codeanalyzer.neo4j.schema import CONSTRAINTS, INDEXES
36
+
37
+ SCHEMA_VERSION = "1.0.0"
38
+
39
+ # PropType ∈ {"string", "integer", "float", "boolean", "string[]", "integer[]"}.
40
+
41
+
42
+ @dataclass
43
+ class NodeLabel:
44
+ label: str # the specific label (also the catalog key)
45
+ merge_label: str # the label the uniqueness constraint / MERGE is on
46
+ key: str
47
+ properties: Dict[str, str]
48
+
49
+
50
+ @dataclass
51
+ class RelType:
52
+ type: str
53
+ from_labels: List[str]
54
+ to_labels: List[str]
55
+ properties: Dict[str, str] = field(default_factory=dict)
56
+
57
+
58
+ # Labels layered onto a node in addition to its primary/specific label.
59
+ MARKER_LABELS: List[str] = []
60
+
61
+ _SPAN = {"start_line": "integer", "end_line": "integer"}
62
+
63
+
64
+ NODE_LABELS: List[NodeLabel] = [
65
+ NodeLabel(
66
+ "PyApplication",
67
+ "PyApplication",
68
+ "name",
69
+ {"name": "string", "schema_version": "string"},
70
+ ),
71
+ NodeLabel(
72
+ "PyModule",
73
+ "PyModule",
74
+ "file_key",
75
+ {
76
+ "file_key": "string",
77
+ "module_name": "string",
78
+ "content_hash": "string",
79
+ "last_modified": "float",
80
+ "file_size": "integer",
81
+ "_module": "string",
82
+ },
83
+ ),
84
+ NodeLabel(
85
+ "PyClass",
86
+ "PySymbol",
87
+ "signature",
88
+ {
89
+ "signature": "string",
90
+ "name": "string",
91
+ "code": "string",
92
+ "base_classes": "string[]",
93
+ "docstring": "string",
94
+ **_SPAN,
95
+ "_module": "string",
96
+ },
97
+ ),
98
+ NodeLabel(
99
+ "PyCallable",
100
+ "PySymbol",
101
+ "signature",
102
+ {
103
+ "signature": "string",
104
+ "name": "string",
105
+ "path": "string",
106
+ "return_type": "string",
107
+ "cyclomatic_complexity": "integer",
108
+ "code": "string",
109
+ "code_start_line": "integer",
110
+ **_SPAN,
111
+ "docstring": "string",
112
+ "decorators": "string[]",
113
+ "parameters_json": "string",
114
+ "accessed_symbols_json": "string",
115
+ "_module": "string",
116
+ },
117
+ ),
118
+ NodeLabel(
119
+ "PyExternal",
120
+ "PySymbol",
121
+ "signature",
122
+ {"signature": "string", "name": "string"},
123
+ ),
124
+ NodeLabel("PyPackage", "PyPackage", "name", {"name": "string"}),
125
+ NodeLabel(
126
+ "PyDecorator",
127
+ "PyDecorator",
128
+ "name",
129
+ {"name": "string"},
130
+ ),
131
+ NodeLabel(
132
+ "PyCallSite",
133
+ "PyCallSite",
134
+ "id",
135
+ {
136
+ "id": "string",
137
+ "method_name": "string",
138
+ "receiver_expr": "string",
139
+ "receiver_type": "string",
140
+ "argument_types": "string[]",
141
+ "return_type": "string",
142
+ "callee_signature": "string",
143
+ "is_constructor_call": "boolean",
144
+ "start_line": "integer",
145
+ "start_column": "integer",
146
+ "end_line": "integer",
147
+ "end_column": "integer",
148
+ "_module": "string",
149
+ },
150
+ ),
151
+ NodeLabel(
152
+ "PyAttribute",
153
+ "PyAttribute",
154
+ "id",
155
+ {
156
+ "id": "string",
157
+ "name": "string",
158
+ "type": "string",
159
+ "docstring": "string",
160
+ **_SPAN,
161
+ "_module": "string",
162
+ },
163
+ ),
164
+ NodeLabel(
165
+ "PyVariable",
166
+ "PyVariable",
167
+ "id",
168
+ {
169
+ "id": "string",
170
+ "name": "string",
171
+ "type": "string",
172
+ "initializer": "string",
173
+ "scope": "string",
174
+ **_SPAN,
175
+ "_module": "string",
176
+ },
177
+ ),
178
+ ]
179
+
180
+ _DECL_TARGETS = ["PyClass", "PyCallable"]
181
+
182
+
183
+ REL_TYPES: List[RelType] = [
184
+ RelType("PY_HAS_MODULE", ["PyApplication"], ["PyModule"]),
185
+ RelType("PY_DECLARES", ["PyModule", "PyClass", "PyCallable"], _DECL_TARGETS),
186
+ RelType("PY_HAS_METHOD", ["PyClass"], ["PyCallable"]),
187
+ RelType("PY_HAS_ATTRIBUTE", ["PyClass"], ["PyAttribute"]),
188
+ RelType("PY_DECLARES_VAR", ["PyModule", "PyCallable"], ["PyVariable"]),
189
+ RelType("PY_HAS_CALLSITE", ["PyCallable"], ["PyCallSite"]),
190
+ RelType("PY_RESOLVES_TO", ["PyCallSite"], ["PyCallable", "PyExternal"]),
191
+ RelType(
192
+ "PY_CALLS",
193
+ ["PyCallable", "PyExternal"],
194
+ ["PyCallable", "PyExternal"],
195
+ {"weight": "integer", "provenance": "string[]"},
196
+ ),
197
+ RelType("PY_EXTENDS", ["PyClass"], ["PyClass"]),
198
+ RelType(
199
+ "PY_IMPORTS",
200
+ ["PyModule"],
201
+ ["PyPackage"],
202
+ {"imported_names": "string[]", "aliases": "string[]"},
203
+ ),
204
+ RelType("PY_DECORATED_BY", ["PyCallable"], ["PyDecorator"]),
205
+ ]
206
+
207
+
208
+ @dataclass
209
+ class SchemaDocument:
210
+ schema_version: str
211
+ generator: str
212
+ marker_labels: List[str]
213
+ node_labels: List[NodeLabel]
214
+ relationship_types: List[RelType]
215
+ constraints: List[str]
216
+ indexes: List[str]
217
+
218
+
219
+ def build_schema_document() -> dict:
220
+ """Build the full machine-readable schema document emitted by ``--emit schema``."""
221
+ return {
222
+ "schema_version": SCHEMA_VERSION,
223
+ "generator": "codeanalyzer-python",
224
+ "marker_labels": list(MARKER_LABELS),
225
+ "node_labels": [
226
+ {
227
+ "label": n.label,
228
+ "merge_label": n.merge_label,
229
+ "key": n.key,
230
+ "properties": n.properties,
231
+ }
232
+ for n in NODE_LABELS
233
+ ],
234
+ "relationship_types": [
235
+ {
236
+ "type": r.type,
237
+ "from": r.from_labels,
238
+ "to": r.to_labels,
239
+ "properties": r.properties,
240
+ }
241
+ for r in REL_TYPES
242
+ ],
243
+ "constraints": list(CONSTRAINTS),
244
+ "indexes": list(INDEXES),
245
+ }