codeanalyzer-python 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """``project()`` — the pure projection from the canonical :class:`PyApplication`
18
+ IR to graph rows. It walks the same recursive symbol table the call-graph builder
19
+ walks, but instead of collecting callables it emits nodes + edges. No I/O: the
20
+ writers (cypher snapshot / bolt incremental) consume the returned
21
+ :class:`GraphRows`.
22
+
23
+ Modelling decisions (mirror of the TypeScript backend):
24
+ - signature-keyed declarations (PyClass, PyCallable) carry a shared ``:PySymbol``
25
+ label (the global-identity / MERGE key).
26
+ - call sites, decorators, class attributes and variables are first-class nodes.
27
+ - call-graph endpoints absent from the symbol table become ``:PyExternal`` ghost
28
+ nodes, so RPC / third-party / framework edges are preserved (matching the
29
+ analyzer's own ghost-node behaviour).
30
+ - every project-owned node carries an internal ``_module`` provenance prop, so
31
+ the incremental writer can delete exactly what a re-analyzed module emitted.
32
+ """
33
+ from __future__ import annotations
34
+
35
+ import json
36
+ from pathlib import Path
37
+ from typing import Any, List, Optional
38
+
39
+ from codeanalyzer.neo4j.catalog import SCHEMA_VERSION
40
+ from codeanalyzer.neo4j.rows import GraphRows, NodeRef, Props, RowBuilder, prune
41
+ from codeanalyzer.schema import (
42
+ PyApplication,
43
+ PyCallable,
44
+ PyClass,
45
+ PyClassAttribute,
46
+ PyComment,
47
+ PyModule,
48
+ PyVariableDeclaration,
49
+ )
50
+ from codeanalyzer.schema.py_schema import PyCallsite
51
+
52
+
53
+ def project(app: PyApplication, app_name: str) -> GraphRows:
54
+ b = RowBuilder()
55
+
56
+ app_ref = b.node(["PyApplication"], "name", app_name, {"schema_version": SCHEMA_VERSION})
57
+
58
+ for file_key, mod in app.symbol_table.items():
59
+ mod_ref = b.node(["PyModule"], "file_key", file_key, _module_props(mod, file_key))
60
+ b.edge("PY_HAS_MODULE", app_ref, mod_ref)
61
+ _project_module_body(b, file_key, mod_ref, mod)
62
+
63
+ # The aggregated :PY_CALLS twin. Endpoints not present in the symbol table become
64
+ # :PyExternal ghost nodes (the analyzer already preserves them as ghost nodes).
65
+ for e in app.call_graph:
66
+ src = _call_endpoint(b, e.source)
67
+ tgt = _call_endpoint(b, e.target)
68
+ b.edge("PY_CALLS", src, tgt, _call_edge_props(e.weight, list(e.provenance or [])))
69
+
70
+ return b.finish()
71
+
72
+
73
+ def _sym(signature: str) -> NodeRef:
74
+ return NodeRef("PySymbol", "signature", signature)
75
+
76
+
77
+ def _call_endpoint(b: RowBuilder, signature: str) -> NodeRef:
78
+ """A call-graph endpoint: a known callable already emitted, or a phantom
79
+ :PyExternal symbol materialized on demand for a ghost target."""
80
+ if b.has_key(signature):
81
+ return _sym(signature)
82
+ name = signature.rsplit(".", 1)[-1] if "." in signature else signature
83
+ return b.node(["PySymbol", "PyExternal"], "signature", signature, {"name": name})
84
+
85
+
86
+ # ----------------------------------------------------------------------------------------------
87
+ # Module body
88
+ # ----------------------------------------------------------------------------------------------
89
+
90
+
91
+ def _project_module_body(b: RowBuilder, file_key: str, mod_ref: NodeRef, mod: PyModule) -> None:
92
+ for fn in (mod.functions or {}).values():
93
+ _project_callable(b, file_key, mod_ref, "PY_DECLARES", fn)
94
+ for cl in (mod.classes or {}).values():
95
+ _project_class(b, file_key, mod_ref, "PY_DECLARES", cl)
96
+ for v in mod.variables or []:
97
+ _project_variable(b, file_key, mod_ref, file_key, v)
98
+ _project_imports(b, mod_ref, mod)
99
+
100
+
101
+ def _project_imports(b: RowBuilder, mod_ref: NodeRef, mod: PyModule) -> None:
102
+ # Per-target-module aggregation: collapse all bindings for a given imported
103
+ # module into one PY_IMPORTS edge to a shared :PyPackage node.
104
+ agg: dict = {}
105
+ for im in mod.imports or []:
106
+ if not im.module:
107
+ continue # relative `from . import x` — no resolvable package
108
+ a = agg.setdefault(im.module, {"names": set(), "aliases": set()})
109
+ if im.name:
110
+ a["names"].add(im.name)
111
+ if im.alias:
112
+ a["aliases"].add(im.alias)
113
+ for module_name, a in agg.items():
114
+ pkg = b.node(["PyPackage"], "name", module_name, {})
115
+ b.edge(
116
+ "PY_IMPORTS",
117
+ mod_ref,
118
+ pkg,
119
+ prune(
120
+ {
121
+ "imported_names": sorted(a["names"]) or None,
122
+ "aliases": sorted(a["aliases"]) or None,
123
+ }
124
+ ),
125
+ )
126
+
127
+
128
+ # ----------------------------------------------------------------------------------------------
129
+ # Declarations
130
+ # ----------------------------------------------------------------------------------------------
131
+
132
+
133
+ def _project_class(
134
+ b: RowBuilder, file_key: str, parent: NodeRef, parent_rel: str, cl: PyClass
135
+ ) -> None:
136
+ ref = b.node(["PySymbol", "PyClass"], "signature", cl.signature, _class_props(cl, file_key))
137
+ b.edge(parent_rel, parent, ref)
138
+
139
+ for base in cl.base_classes or []:
140
+ b.edge_to_symbol("PY_EXTENDS", ref, base)
141
+
142
+ for m in (cl.methods or {}).values():
143
+ _project_callable(b, file_key, ref, "PY_HAS_METHOD", m)
144
+ for a in (cl.attributes or {}).values():
145
+ _project_attribute(b, file_key, ref, cl.signature, a)
146
+ for ic in (cl.inner_classes or {}).values():
147
+ _project_class(b, file_key, ref, "PY_DECLARES", ic)
148
+
149
+
150
+ def _project_callable(
151
+ b: RowBuilder, file_key: str, owner: NodeRef, owner_rel: str, c: PyCallable
152
+ ) -> None:
153
+ ref = b.node(["PySymbol", "PyCallable"], "signature", c.signature, _callable_props(c, file_key))
154
+ b.edge(owner_rel, owner, ref)
155
+
156
+ for d in c.decorators or []:
157
+ _project_decorator(b, ref, d)
158
+
159
+ for s in c.call_sites or []:
160
+ # Key off the relative file (a call site lives in its callable's file) so ids stay portable.
161
+ cs_id = f"{file_key}#{s.start_line}:{s.start_column}-{s.end_line}:{s.end_column}"
162
+ cs = b.node(["PyCallSite"], "id", cs_id, _call_site_props(s, file_key))
163
+ b.edge("PY_HAS_CALLSITE", ref, cs)
164
+ if s.callee_signature:
165
+ b.edge_to_symbol("PY_RESOLVES_TO", cs, s.callee_signature)
166
+
167
+ for v in c.local_variables or []:
168
+ _project_variable(b, file_key, ref, c.signature, v)
169
+ for ic in (c.inner_callables or {}).values():
170
+ _project_callable(b, file_key, ref, "PY_DECLARES", ic)
171
+ for cl in (c.inner_classes or {}).values():
172
+ _project_class(b, file_key, ref, "PY_DECLARES", cl)
173
+
174
+
175
+ def _project_attribute(
176
+ b: RowBuilder, file_key: str, owner: NodeRef, owner_sig: str, a: PyClassAttribute
177
+ ) -> None:
178
+ attr_id = f"{owner_sig}.{a.name}"
179
+ ref = b.node(["PyAttribute"], "id", attr_id, _attribute_props(a, attr_id, file_key))
180
+ b.edge("PY_HAS_ATTRIBUTE", owner, ref)
181
+
182
+
183
+ def _project_variable(
184
+ b: RowBuilder, file_key: str, owner: NodeRef, owner_id: str, v: PyVariableDeclaration
185
+ ) -> None:
186
+ var_id = f"{owner_id}#{v.name}@{v.start_line}"
187
+ ref = b.node(["PyVariable"], "id", var_id, _variable_props(v, var_id, file_key))
188
+ b.edge("PY_DECLARES_VAR", owner, ref)
189
+
190
+
191
+ def _project_decorator(b: RowBuilder, on: NodeRef, decorator: str) -> None:
192
+ dec = b.node(["PyDecorator"], "name", decorator, {"name": decorator})
193
+ b.edge("PY_DECORATED_BY", on, dec)
194
+
195
+
196
+ # ----------------------------------------------------------------------------------------------
197
+ # Property flattening
198
+ # ----------------------------------------------------------------------------------------------
199
+
200
+
201
+ def _module_props(mod: PyModule, file_key: str) -> Props:
202
+ return prune(
203
+ {
204
+ "module_name": mod.module_name,
205
+ "content_hash": mod.content_hash,
206
+ "last_modified": mod.last_modified,
207
+ "file_size": mod.file_size,
208
+ "_module": file_key,
209
+ }
210
+ )
211
+
212
+
213
+ def _class_props(cl: PyClass, file_key: str) -> Props:
214
+ return prune(
215
+ {
216
+ "name": cl.name,
217
+ "code": cl.code,
218
+ "base_classes": list(cl.base_classes or []),
219
+ "docstring": _docstring_of(cl.comments),
220
+ "start_line": cl.start_line,
221
+ "end_line": cl.end_line,
222
+ "_module": file_key,
223
+ }
224
+ )
225
+
226
+
227
+ def _callable_props(c: PyCallable, file_key: str) -> Props:
228
+ return prune(
229
+ {
230
+ "name": c.name,
231
+ "path": c.path,
232
+ "return_type": c.return_type,
233
+ "cyclomatic_complexity": c.cyclomatic_complexity,
234
+ "code": c.code,
235
+ "code_start_line": c.code_start_line,
236
+ "start_line": c.start_line,
237
+ "end_line": c.end_line,
238
+ "docstring": _docstring_of(c.comments),
239
+ "decorators": list(c.decorators or []),
240
+ "parameters_json": _stringify_if(c.parameters),
241
+ "accessed_symbols_json": _stringify_if(c.accessed_symbols),
242
+ "_module": file_key,
243
+ }
244
+ )
245
+
246
+
247
+ def _attribute_props(a: PyClassAttribute, attr_id: str, file_key: str) -> Props:
248
+ return prune(
249
+ {
250
+ "id": attr_id,
251
+ "name": a.name,
252
+ "type": a.type,
253
+ "docstring": _docstring_of(a.comments),
254
+ "start_line": a.start_line,
255
+ "end_line": a.end_line,
256
+ "_module": file_key,
257
+ }
258
+ )
259
+
260
+
261
+ def _variable_props(v: PyVariableDeclaration, var_id: str, file_key: str) -> Props:
262
+ return prune(
263
+ {
264
+ "id": var_id,
265
+ "name": v.name,
266
+ "type": v.type,
267
+ "initializer": v.initializer,
268
+ "scope": v.scope,
269
+ "start_line": v.start_line,
270
+ "end_line": v.end_line,
271
+ "_module": file_key,
272
+ }
273
+ )
274
+
275
+
276
+ def _call_site_props(s: PyCallsite, file_key: str) -> Props:
277
+ cs_id = f"{file_key}#{s.start_line}:{s.start_column}-{s.end_line}:{s.end_column}"
278
+ return prune(
279
+ {
280
+ "id": cs_id,
281
+ "method_name": s.method_name,
282
+ "receiver_expr": s.receiver_expr,
283
+ "receiver_type": s.receiver_type,
284
+ "argument_types": list(s.argument_types or []),
285
+ "return_type": s.return_type,
286
+ "callee_signature": s.callee_signature,
287
+ "is_constructor_call": s.is_constructor_call,
288
+ "start_line": s.start_line,
289
+ "start_column": s.start_column,
290
+ "end_line": s.end_line,
291
+ "end_column": s.end_column,
292
+ "_module": file_key,
293
+ }
294
+ )
295
+
296
+
297
+ def _call_edge_props(weight: int, provenance: List[str]) -> Props:
298
+ return prune({"weight": weight, "provenance": list(provenance)})
299
+
300
+
301
+ def _docstring_of(comments: Optional[List[PyComment]]) -> Optional[str]:
302
+ docs = [c.content for c in (comments or []) if c.is_docstring]
303
+ return "\n".join(docs) if docs else None
304
+
305
+
306
+ def _stringify_if(value: Any) -> Optional[str]:
307
+ """JSON-encode a list/dict of pydantic models, or None when empty."""
308
+ if value is None:
309
+ return None
310
+ if isinstance(value, (list, dict)) and len(value) == 0:
311
+ return None
312
+ return json.dumps(value, default=_jsonable, sort_keys=True)
313
+
314
+
315
+ def _jsonable(o: Any) -> Any:
316
+ if hasattr(o, "model_dump"):
317
+ return o.model_dump()
318
+ if hasattr(o, "dict"):
319
+ return o.dict()
320
+ if isinstance(o, Path):
321
+ return str(o)
322
+ return str(o)
@@ -0,0 +1,176 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The output-agnostic intermediate between :func:`project` and the two writers
18
+ (cypher snapshot / bolt incremental). Pure data — no I/O, no driver. A
19
+ :class:`GraphRows` is a deterministic, deduped bag of nodes and edges that both
20
+ writers consume identically.
21
+
22
+ Property values are restricted to Neo4j-legal shapes: primitives and homogeneous
23
+ arrays of primitives. ``None`` values are pruned (in Neo4j a null property is
24
+ simply absence).
25
+ """
26
+ from __future__ import annotations
27
+
28
+ from dataclasses import dataclass, field
29
+ from typing import Dict, List, Optional, Union
30
+
31
+ # A property value: a primitive, or a homogeneous list of primitives.
32
+ Scalar = Union[str, int, float, bool]
33
+ Prop = Union[Scalar, List[str], List[int], List[float], List[bool]]
34
+ Props = Dict[str, Prop]
35
+
36
+
37
+ @dataclass(frozen=True)
38
+ class NodeRef:
39
+ """How an edge addresses one of its endpoints: the label + key property to
40
+ MATCH on, and the value."""
41
+
42
+ label: str # the label carrying the uniqueness constraint (e.g. "PySymbol", "PyModule")
43
+ key_prop: str # "signature" | "file_key" | "name" | "id"
44
+ value: str
45
+
46
+
47
+ @dataclass
48
+ class NodeRow:
49
+ labels: List[str] # labels[0] is the constrained MERGE label; the rest are SET as extra labels
50
+ key_prop: str
51
+ value: str
52
+ props: Props
53
+
54
+
55
+ @dataclass
56
+ class EdgeRow:
57
+ type: str
58
+ from_ref: NodeRef
59
+ to_ref: NodeRef
60
+ props: Props
61
+
62
+
63
+ @dataclass
64
+ class GraphRows:
65
+ nodes: List[NodeRow] = field(default_factory=list)
66
+ edges: List[EdgeRow] = field(default_factory=list)
67
+
68
+
69
+ def prune(p: Dict[str, Optional[Prop]]) -> Props:
70
+ """Drop ``None`` entries — in Neo4j a null property means "absent", so we
71
+ never store one. Empty lists are kept (a present-but-empty array is legal)."""
72
+ return {k: v for k, v in p.items() if v is not None}
73
+
74
+
75
+ class RowBuilder:
76
+ """Accumulates nodes/edges with ``MERGE`` semantics in memory, so the same
77
+ node touched many times (a hot external symbol, a canonical decorator)
78
+ collapses to one row, and cross-reference edges to a target that never
79
+ materialized are dropped (the "edge-only-when-resolved" rule).
80
+ """
81
+
82
+ def __init__(self) -> None:
83
+ self._nodes: Dict[str, NodeRow] = {} # key: f"{labels[0]} {value}"
84
+ self._edges: List[EdgeRow] = []
85
+ self._deferred: List[EdgeRow] = [] # edges gated against node existence at finish()
86
+ self._keys: set = set() # every node value seen, for resolved-gating
87
+
88
+ def node(self, labels: List[str], key_prop: str, value: str, props: Props) -> NodeRef:
89
+ """Upsert a node. Re-seeing the same ``(labels[0], value)`` merges props
90
+ (last write wins) and unions labels — the in-memory analog of
91
+ ``MERGE (n:Label {key}) SET n += props``."""
92
+ node_id = f"{labels[0]} {value}"
93
+ existing = self._nodes.get(node_id)
94
+ if existing is not None:
95
+ existing.props.update(props)
96
+ for label in labels:
97
+ if label not in existing.labels:
98
+ existing.labels.append(label)
99
+ else:
100
+ self._nodes[node_id] = NodeRow(list(labels), key_prop, value, dict(props))
101
+ self._keys.add(value)
102
+ return NodeRef(labels[0], key_prop, value)
103
+
104
+ def edge(self, type_: str, from_ref: NodeRef, to_ref: NodeRef, props: Optional[Props] = None) -> None:
105
+ """An edge whose endpoints are known to exist (both ends emitted this run)."""
106
+ self._edges.append(EdgeRow(type_, from_ref, to_ref, dict(props or {})))
107
+
108
+ def edge_to_symbol(
109
+ self, type_: str, from_ref: NodeRef, target_signature: str, props: Optional[Props] = None
110
+ ) -> None:
111
+ """An edge to a ``:PySymbol`` target that may be external/library code not
112
+ present in the graph. Deferred and kept only if the target signature was
113
+ actually emitted as a node — so PY_EXTENDS / PY_RESOLVES_TO never dangle (the
114
+ string fallback lives on the source node's props)."""
115
+ self._deferred.append(
116
+ EdgeRow(
117
+ type_,
118
+ from_ref,
119
+ NodeRef("PySymbol", "signature", target_signature),
120
+ dict(props or {}),
121
+ )
122
+ )
123
+
124
+ def has_key(self, value: str) -> bool:
125
+ return value in self._keys
126
+
127
+ def finish(self) -> GraphRows:
128
+ for e in self._deferred:
129
+ if e.to_ref.value in self._keys:
130
+ self._edges.append(e)
131
+ nodes = sorted(self._nodes.values(), key=lambda n: f"{n.labels[0]} {n.value}")
132
+ edges = sorted(self._edges, key=lambda e: f"{e.type} {e.from_ref.value} {e.to_ref.value}")
133
+ return GraphRows(nodes, edges)
134
+
135
+
136
+ # ----------------------------------------------------------------------------------------------
137
+ # Cypher literal rendering (used by the snapshot writer; the bolt writer passes params instead).
138
+ # ----------------------------------------------------------------------------------------------
139
+
140
+
141
+ def cypher_value(v: Prop) -> str:
142
+ """Render a property value as a Cypher literal."""
143
+ if isinstance(v, bool):
144
+ return "true" if v else "false"
145
+ if isinstance(v, str):
146
+ return _cypher_string(v)
147
+ if isinstance(v, (int, float)):
148
+ # bools are handled above; int/float fall through here.
149
+ if isinstance(v, float) and (v != v or v in (float("inf"), float("-inf"))):
150
+ return "null"
151
+ return repr(v) if isinstance(v, float) else str(v)
152
+ if isinstance(v, list):
153
+ return "[" + ", ".join(cypher_value(x) for x in v) + "]"
154
+ return "null"
155
+
156
+
157
+ def cypher_map(props: Props) -> str:
158
+ """Render a props map as a Cypher map literal: ``{key: value, ...}``.
159
+ Keys are valid identifiers."""
160
+ return "{" + ", ".join(f"{k}: {cypher_value(v)}" for k, v in props.items()) + "}"
161
+
162
+
163
+ def _cypher_string(s: str) -> str:
164
+ escaped = (
165
+ s.replace("\\", "\\\\")
166
+ .replace("'", "\\'")
167
+ .replace("\n", "\\n")
168
+ .replace("\r", "\\r")
169
+ .replace("\t", "\\t")
170
+ )
171
+ return f"'{escaped}'"
172
+
173
+
174
+ def chunk(items: list, size: int) -> list:
175
+ """Split a list into chunks of at most ``size`` (UNWIND batch sizing)."""
176
+ return [items[i : i + size] for i in range(0, len(items), size)]
@@ -0,0 +1,39 @@
1
+ ################################################################################
2
+ # Copyright IBM Corporation 2025
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ ################################################################################
16
+
17
+ """The Cypher DDL — uniqueness constraints and indexes — shared by both writers.
18
+ Run BEFORE any load so MERGE uses an index seek (not a label scan) and the
19
+ identity invariant is enforced by the database. Every statement is idempotent
20
+ (``IF NOT EXISTS``).
21
+ """
22
+ from typing import List
23
+
24
+ CONSTRAINTS: List[str] = [
25
+ "CREATE CONSTRAINT py_symbol_sig IF NOT EXISTS FOR (s:PySymbol) REQUIRE s.signature IS UNIQUE",
26
+ "CREATE CONSTRAINT py_app_name IF NOT EXISTS FOR (a:PyApplication) REQUIRE a.name IS UNIQUE",
27
+ "CREATE CONSTRAINT py_module_key IF NOT EXISTS FOR (m:PyModule) REQUIRE m.file_key IS UNIQUE",
28
+ "CREATE CONSTRAINT py_package_name IF NOT EXISTS FOR (p:PyPackage) REQUIRE p.name IS UNIQUE",
29
+ "CREATE CONSTRAINT py_decorator_name IF NOT EXISTS FOR (d:PyDecorator) REQUIRE d.name IS UNIQUE",
30
+ "CREATE CONSTRAINT py_callsite_id IF NOT EXISTS FOR (c:PyCallSite) REQUIRE c.id IS UNIQUE",
31
+ "CREATE CONSTRAINT py_attribute_id IF NOT EXISTS FOR (a:PyAttribute) REQUIRE a.id IS UNIQUE",
32
+ "CREATE CONSTRAINT py_variable_id IF NOT EXISTS FOR (v:PyVariable) REQUIRE v.id IS UNIQUE",
33
+ ]
34
+
35
+ INDEXES: List[str] = [
36
+ "CREATE INDEX py_callable_name IF NOT EXISTS FOR (c:PyCallable) ON (c.name)",
37
+ "CREATE INDEX py_class_name IF NOT EXISTS FOR (c:PyClass) ON (c.name)",
38
+ "CREATE FULLTEXT INDEX py_code_fts IF NOT EXISTS FOR (c:PyCallable) ON EACH [c.code, c.docstring]",
39
+ ]
@@ -1,3 +1,3 @@
1
- from .options import AnalysisOptions
1
+ from .options import AnalysisOptions, EmitTarget, OutputFormat
2
2
 
3
- __all__ = ["AnalysisOptions"]
3
+ __all__ = ["AnalysisOptions", "EmitTarget", "OutputFormat"]
@@ -9,12 +9,31 @@ class OutputFormat(str, Enum):
9
9
  MSGPACK = "msgpack"
10
10
 
11
11
 
12
+ class EmitTarget(str, Enum):
13
+ """Output target selected by ``--emit``.
14
+
15
+ - ``json`` : the canonical ``analysis.json`` (symbol table + call graph).
16
+ - ``neo4j`` : project the analysis into a labeled property graph — a
17
+ ``graph.cypher`` snapshot, or a live Bolt push with ``--neo4j-uri``.
18
+ - ``schema`` : the machine-readable, version-stamped Neo4j schema contract.
19
+ """
20
+
21
+ JSON = "json"
22
+ NEO4J = "neo4j"
23
+ SCHEMA = "schema"
24
+
25
+
12
26
  @dataclass
13
27
  class AnalysisOptions:
14
28
  input: Path
15
29
  output: Optional[Path] = None
16
30
  format: OutputFormat = OutputFormat.JSON
17
- analysis_level: int = 1
31
+ emit: EmitTarget = EmitTarget.JSON
32
+ app_name: Optional[str] = None
33
+ neo4j_uri: Optional[str] = None
34
+ neo4j_user: str = "neo4j"
35
+ neo4j_password: str = "neo4j"
36
+ neo4j_database: Optional[str] = None
18
37
  using_codeql: bool = False
19
38
  using_ray: bool = False
20
39
  rebuild_analysis: bool = False
@@ -339,9 +339,29 @@ class PyModule(BaseModel):
339
339
  file_size: Optional[int] = None
340
340
 
341
341
 
342
+ @builder
343
+ @msgpk
344
+ class PyCallEdge(BaseModel):
345
+ """Identity-only call-graph edge with weight.
346
+
347
+ Mirrors Java's ``CallDependency``. ``source`` and ``target`` are
348
+ ``PyCallable.signature`` strings — nodes of the graph are the existing
349
+ ``PyCallable`` entries in the symbol table, not a separate vertex type.
350
+ Rich per-call metadata (receiver, arguments, location, ...) lives on
351
+ ``PyCallsite`` inside the source ``PyCallable.call_sites``.
352
+ """
353
+
354
+ source: str # caller's PyCallable.signature
355
+ target: str # callee's PyCallable.signature
356
+ type: Literal["CALL_DEP"] = "CALL_DEP"
357
+ weight: int = 1
358
+ provenance: List[Literal["jedi", "codeql", "joern"]] = []
359
+
360
+
342
361
  @builder
343
362
  @msgpk
344
363
  class PyApplication(BaseModel):
345
364
  """Represents a Python application."""
346
365
 
347
366
  symbol_table: Dict[str, PyModule]
367
+ call_graph: List[PyCallEdge] = []