doxastica 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
doxastica/__init__.py ADDED
@@ -0,0 +1,37 @@
1
+ """Graph-native AGM belief-revision core (Kumiho M0)."""
2
+
3
+ from doxastica.backends.memory import InMemoryBackend
4
+ from doxastica.core import MemoryCore
5
+ from doxastica.errors import (
6
+ BackendDependencyError,
7
+ DoxasticaError,
8
+ WorldScopeContractionError,
9
+ )
10
+ from doxastica.models import (
11
+ WORLD_SCOPE_ID,
12
+ Belief,
13
+ BeliefFilter,
14
+ BeliefState,
15
+ EdgeType,
16
+ ImpactResult,
17
+ Scope,
18
+ Status,
19
+ )
20
+ from doxastica.protocol import BeliefStore
21
+
22
+ __all__ = [
23
+ "BackendDependencyError",
24
+ "Belief",
25
+ "BeliefFilter",
26
+ "BeliefState",
27
+ "BeliefStore",
28
+ "DoxasticaError",
29
+ "EdgeType",
30
+ "ImpactResult",
31
+ "InMemoryBackend",
32
+ "MemoryCore",
33
+ "Scope",
34
+ "Status",
35
+ "WORLD_SCOPE_ID",
36
+ "WorldScopeContractionError",
37
+ ]
@@ -0,0 +1,17 @@
1
+ """
2
+ Backend adapters behind the ``BackendPort`` seam (Phase 2).
3
+
4
+ This subpackage holds the concrete implementations of the internal LPG-primitive
5
+ :class:`doxastica.ports.BackendPort`. Per the driver-isolation discipline (D-02), this
6
+ ``__init__`` re-exports ONLY the zero-dependency :class:`InMemoryBackend` — it MUST NEVER
7
+ ``from .ladybug import ...``. Doing so would chain-load the optional ``ladybug`` driver on
8
+ every ``import doxastica.backends``, defeating the isolation the seam exists to provide.
9
+ The driver-backed adapter is imported directly by the caller from
10
+ ``doxastica.backends.ladybug`` (``LadybugBackend.open(...)`` / ``.from_connection(...)``)
11
+ and injected into ``MemoryCore`` — pure DI, so ``MemoryCore`` itself stays driver-blind and
12
+ never names or imports a backend.
13
+ """
14
+
15
+ from doxastica.backends.memory import InMemoryBackend
16
+
17
+ __all__ = ["InMemoryBackend"]
@@ -0,0 +1,565 @@
1
+ """
2
+ The LadybugDB ``BackendPort`` reference adapter (BACK-02 / CONN-01..03 / D-02 / D-04).
3
+
4
+ This is the SINGLE module that imports the ``ladybug`` driver (D-02). The import is guarded
5
+ so that, when the optional driver is absent, importing this module raises a friendly
6
+ :class:`doxastica.errors.BackendDependencyError` (``pip install doxastica[ladybug]``) rather
7
+ than a raw ``ModuleNotFoundError``. Every other module in the package stays driver-blind;
8
+ a consumer reaches this adapter only by importing it explicitly from
9
+ ``doxastica.backends.ladybug`` (it is NOT re-exported from the package root).
10
+
11
+ What this adapter realizes
12
+ --------------------------
13
+ - **Flexible connection ownership (CONN-01 / R19):** ``__init__`` records an ``owns_conn``
14
+ flag. A connection this backend opened itself (via :meth:`LadybugBackend.open`) is owned and
15
+ closed on :meth:`close`; an INJECTED connection (:meth:`LadybugBackend.from_connection`) is a
16
+ tenant's handle and is NEVER closed.
17
+ - **Namespaced, idempotent schema bootstrap (CONN-02 / CONN-03 / D-04):** the backend is the
18
+ sole writer of its ``{ns}_*`` closed subgraph. Bootstrap runs ``CREATE NODE/REL TABLE IF
19
+ NOT EXISTS`` on construction; re-running against a fresh OR shared injected DB is a safe
20
+ no-op.
21
+ - **Namespace-identifier safety (D-04, mitigates T-02-01):** DDL table identifiers cannot be
22
+ ``$param``-bound, so the namespace MUST be string-interpolated. It is therefore validated
23
+ against ``^[A-Za-z_][A-Za-z0-9_]*$`` BEFORE any interpolation — the one sanctioned
24
+ interpolation guard. All belief DATA flows through ``$param`` binds.
25
+ - **The five LPG primitives in Cypher (BACK-02):** ``upsert_node`` (``MERGE ... SET``),
26
+ ``add_edge`` (``MERGE`` edge), ``match_nodes`` (AND-exact ``$param`` predicates),
27
+ ``traverse`` (the SC4 resolution — see below), and ``unit_of_work``
28
+ (``BEGIN``/``COMMIT``/``ROLLBACK``). Each returns raw ``list[dict]`` below the model layer
29
+ (D-04); ``MemoryCore`` hydrates frozen pydantic models above the port.
30
+
31
+ The SC4 confirmation (port unchanged)
32
+ -------------------------------------
33
+ The live Phase-2 spike (``02-RESEARCH.md``) confirmed the Phase-1 ``BackendPort`` survives the
34
+ real ladybug API unchanged. The two adapter-internal details that make this work:
35
+
36
+ 1. Variable-length patterns cap the upper hop bound at 30 by default; ``traverse`` issues
37
+ ``CALL var_length_extend_max_depth=<bound>`` to raise that ceiling. ``max_depth=None``
38
+ ("full closure") compiles to the literal :data:`_DEPTH_CEILING`, not a truly-infinite walk.
39
+ 2. ``$param`` is rejected inside the var-length hop range (a parser error), so the bound is
40
+ interpolated as a validated ``int``. ``ACYCLIC`` (node-distinct) is the honest,
41
+ cycle-safe expression of the port's "de-duplicated reachable set". The ``(reached,
42
+ frontier)`` shape is computed in ONE query via ``min(length(p))`` + an ``EXISTS{}`` subquery.
43
+
44
+ These are documented adapter details, NOT port-signature changes — "port unchanged, SC4
45
+ confirmed".
46
+
47
+ This module holds NO AGM operation bodies (``revise`` / ``expand`` / ``contract`` are
48
+ Phases 3-6). The ``HAS_REVISION`` / ``CURRENT_STATE`` edge tables arrive in Phase 3.
49
+ """
50
+
51
+ from __future__ import annotations
52
+
53
+ import contextlib
54
+ import re
55
+ from typing import TYPE_CHECKING, Any, Literal
56
+
57
+ from doxastica import errors
58
+
59
+ try:
60
+ import ladybug as lb
61
+ except ImportError as exc: # pragma: no cover - exercised in the base-install CI job
62
+ raise errors.BackendDependencyError(
63
+ "The ladybug backend requires the 'ladybug' package. "
64
+ "Install it with: pip install doxastica[ladybug]"
65
+ ) from exc
66
+
67
+ if TYPE_CHECKING:
68
+ from collections.abc import Generator
69
+ from uuid import UUID
70
+
71
+ from doxastica.models import EdgeType
72
+
73
+
74
+ # The single sanctioned identifier guard (D-04): DDL table names cannot be $param-bound, so the
75
+ # namespace is string-interpolated and therefore MUST be validated before any interpolation.
76
+ _NS_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
77
+
78
+ # The literal hop bound compiled in for max_depth=None ("full closure"). This is a hard TRUNCATION
79
+ # limit, NOT a true infinity: ladybug var-length patterns need a concrete upper bound, so "full
80
+ # closure" compiles to `*1.._DEPTH_CEILING`. No real belief graph approaches ten thousand hops deep,
81
+ # so in practice the walk closes before the limit (A1, RESEARCH). If a walk ever DOES reach this
82
+ # depth, `traverse` raises rather than passing off a truncated set as a complete closure (WR-03,
83
+ # DATA-04 — never silently under-report).
84
+ #
85
+ # SIZE RATIONALE (ladybug-cycle-traverse-oom): the bound is interpolated into the var-length pattern
86
+ # `*1..N` AND lifted via `var_length_extend_max_depth=N`. Ladybug's recursive-join operator
87
+ # PRE-ALLOCATES buffer-pool memory LINEARLY in N (~18 KB per hop), independent of the actual graph
88
+ # size — so the original 1_000_000 reserved ~18 GB and OOM'd the buffer pool on ANY unbounded walk,
89
+ # even a 3-node cycle ("buffer pool is full and no memory could be freed"). This stayed latent
90
+ # because the [ladybug] extra is not synced in the dev env (those tests skip locally); CI is the
91
+ # first real execution. 10_000 caps the pre-allocation at ~290 MB peak (safe on any default,
92
+ # GB-scale buffer pool) while remaining astronomically beyond any plausible belief-revision chain —
93
+ # so the DATA-04 truncation-raise below is still a never-fires-in-practice safety net, not a limit a
94
+ # real closure approaches. Measured: bound 100->~98 MB, 1k->~124 MB, 10k->~290 MB, 50k+ -> OOM.
95
+ _DEPTH_CEILING = 10_000
96
+
97
+ # Ladybug's default var-length upper-hop cap, used ONLY as the fallback when the live cap cannot be
98
+ # read (it always can on 0.17.1). `traverse` raises the cap only when the requested bound exceeds
99
+ # the connection's CURRENT value, and restores that saved value afterward (WR-01) — so a shallow
100
+ # walk never mutates the connection's global config and even a deep walk leaves an INJECTED tenant
101
+ # connection (R19) with the EXACT ceiling it started with, default or not (WR-05).
102
+ _DEFAULT_HOP_CAP = 30
103
+
104
+ # Label -> PRIMARY KEY column. The SINGLE source of truth for each node table's PK: the
105
+ # bootstrap DDL (``_bootstrap_schema``) and the ``upsert_node`` MERGE key both read from here,
106
+ # so a schema change cannot silently diverge from the upsert key (CR-01). The port keys upsert
107
+ # on ``node_id`` for ANY label, so the MERGE key is always this PK bound to ``$id`` (matching
108
+ # the in-memory oracle, which keys on ``node_id`` regardless of label — D-05 parity).
109
+ _PK_BY_LABEL = {"Scope": "scope_id", "Belief": "belief_id", "BeliefState": "state_id"}
110
+
111
+ # Edge-type string -> (FROM label, TO label). The closed map (PATTERNS flag 1) that lets
112
+ # ``add_edge`` resolve per-edge-type endpoint labels + PK columns instead of hardcoding both
113
+ # endpoints to ``BeliefState``. ``HAS_REVISION`` is the hub-form structural edge (D-07): its FROM
114
+ # endpoint is a ``Belief`` (keyed ``belief_id``), NOT a ``BeliefState``. The three consumer-facing
115
+ # ``EdgeType`` members stay ``BeliefState -> BeliefState``. Keys are RAW STRINGS — ``HAS_REVISION``
116
+ # arrives as a string, never an ``EdgeType`` enum member (D-07), and ``str(EdgeType.X) == "X"``.
117
+ _EDGE_ENDPOINTS = {
118
+ "HAS_REVISION": ("Belief", "BeliefState"),
119
+ "SUPERSEDES": ("BeliefState", "BeliefState"),
120
+ "DEPENDS_ON": ("BeliefState", "BeliefState"),
121
+ "DERIVED_FROM": ("BeliefState", "BeliefState"),
122
+ }
123
+
124
+
125
+ def _validate_namespace(ns: str) -> None:
126
+ """Reject a namespace that is not a safe bare identifier (D-04, mitigates T-02-01)."""
127
+ if not _NS_RE.match(ns):
128
+ raise ValueError(f"namespace must match {_NS_RE.pattern!r}; got {ns!r}")
129
+
130
+
131
+ def _validate_identifier(name: str) -> None:
132
+ """
133
+ Reject a property/column name that is not a safe bare identifier (WR-04).
134
+
135
+ Column identifiers cannot be ``$param``-bound, so ``upsert_node`` / ``match_nodes`` interpolate
136
+ prop/where KEYS directly into ``n.{key}``. Every such key is validated against the SAME bare-
137
+ identifier regex the namespace uses, so the column-identifier surface is part of the data path's
138
+ by-construction injection-proofing — not just the values.
139
+ """
140
+ if not _NS_RE.match(name):
141
+ raise ValueError(f"property name must match {_NS_RE.pattern!r}; got {name!r}")
142
+
143
+
144
+ class LadybugBackend:
145
+ """
146
+ LadybugDB reference ``BackendPort`` adapter — the single guarded driver boundary (D-02).
147
+
148
+ Satisfies the port structurally (implements the five LPG primitives without inheriting).
149
+ Distinguishes an owned connection from an injected one (CONN-01 / R19); bootstraps a
150
+ namespaced, idempotent schema (CONN-02 / CONN-03); validates the namespace before the one
151
+ sanctioned interpolation (D-04). Belief data always flows through ``$param`` binds.
152
+ """
153
+
154
+ def __init__(
155
+ self,
156
+ conn: lb.Connection,
157
+ *,
158
+ namespace: str,
159
+ owns_conn: bool,
160
+ ) -> None:
161
+ """
162
+ Wrap a ladybug ``Connection`` under ``namespace`` and bootstrap its schema.
163
+
164
+ ``owns_conn`` records whether this backend opened the connection (and may close it) or
165
+ is a tenant of an injected one it must never close (R19). The namespace is validated
166
+ BEFORE any DDL interpolation (D-04), then the idempotent bootstrap runs (CONN-03).
167
+ """
168
+ _validate_namespace(namespace)
169
+ self._conn = conn
170
+ self._ns = namespace
171
+ self._owns_conn = owns_conn
172
+ self._bootstrap_schema()
173
+
174
+ @classmethod
175
+ def open(cls, path: str, *, namespace: str = "dx") -> LadybugBackend:
176
+ """
177
+ Open a self-managing backend over ``path`` (or ``":memory:"`` / ``""`` for in-memory).
178
+
179
+ Constructs its own ``lb.Database`` + ``lb.Connection`` and takes ownership
180
+ (``owns_conn=True``) — :meth:`close` will close the connection. The sibling
181
+ :meth:`from_connection` wraps a tenant-supplied connection instead. A ``:memory:`` /
182
+ ``""`` path yields a fresh in-memory DB.
183
+ """
184
+ db_path = None if path in (":memory:", "") else path
185
+ db = lb.Database(db_path) if db_path is not None else lb.Database()
186
+ conn = lb.Connection(db)
187
+ return cls(conn, namespace=namespace, owns_conn=True)
188
+
189
+ @classmethod
190
+ def from_connection(cls, conn: lb.Connection, *, namespace: str = "dx") -> LadybugBackend:
191
+ """
192
+ Wrap an INJECTED tenant connection the backend must NEVER close (CONN-01 / R19).
193
+
194
+ The sibling of :meth:`open`: where ``open`` creates and OWNS its connection
195
+ (``owns_conn=True``, closed on :meth:`close`), this wraps a caller-supplied
196
+ ``lb.Connection`` with ``owns_conn=False`` — the core is a tenant and must not close
197
+ someone else's handle (R19). Schema bootstrap still runs idempotently (``CREATE ... IF
198
+ NOT EXISTS``) against the injected (possibly fresh OR shared) DB, so wiring the backend
199
+ onto a leased connection is a safe no-op when the namespaced subgraph already exists.
200
+ """
201
+ return cls(conn, namespace=namespace, owns_conn=False)
202
+
203
+ def close(self) -> None:
204
+ """Close the connection ONLY if owned (R19: never close an injected handle)."""
205
+ if self._owns_conn:
206
+ self._conn.close() # idempotent — double-close is a no-op (verified live)
207
+
208
+ def _bootstrap_schema(self) -> None:
209
+ """
210
+ Idempotently create the namespaced node/rel tables (CONN-02 / CONN-03).
211
+
212
+ ``CREATE ... IF NOT EXISTS`` is a safe no-op when re-run against a fresh OR shared
213
+ injected DB. ``{self._ns}`` is the ONLY interpolated identifier (validated in
214
+ ``__init__``); everything else is structural DDL. ``state_id`` is a STRING PK holding
215
+ the UUID7 text form (the core mints + stringifies; CONN-03 uniqueness via PRIMARY KEY).
216
+ Phase 3 adds ONLY the hub-form ``HAS_REVISION`` REL table (``FROM Belief TO BeliefState``,
217
+ D-07); no ``CURRENT_STATE`` table is created — current is DERIVED, not a stored edge (D-01).
218
+ """
219
+ ns = self._ns
220
+ # PK columns are read from `_PK_BY_LABEL` so the DDL and `upsert_node`'s MERGE key
221
+ # cannot diverge (CR-01: one source of truth for each table's primary key).
222
+ self._exec(
223
+ f"CREATE NODE TABLE IF NOT EXISTS {ns}_Scope"
224
+ f"({_PK_BY_LABEL['Scope']} STRING, is_world BOOLEAN, "
225
+ f"PRIMARY KEY({_PK_BY_LABEL['Scope']}))"
226
+ )
227
+ self._exec(
228
+ f"CREATE NODE TABLE IF NOT EXISTS {ns}_Belief"
229
+ f"({_PK_BY_LABEL['Belief']} STRING, PRIMARY KEY({_PK_BY_LABEL['Belief']}))"
230
+ )
231
+ self._exec(
232
+ f"CREATE NODE TABLE IF NOT EXISTS {ns}_BeliefState"
233
+ f"({_PK_BY_LABEL['BeliefState']} STRING, belief_id STRING, scope_id STRING, "
234
+ f"source_event_id STRING, value STRING, status STRING, "
235
+ f"PRIMARY KEY({_PK_BY_LABEL['BeliefState']}))"
236
+ )
237
+ for edge_type in ("SUPERSEDES", "DEPENDS_ON", "DERIVED_FROM"):
238
+ self._exec(
239
+ f"CREATE REL TABLE IF NOT EXISTS {ns}_{edge_type}"
240
+ f"(FROM {ns}_BeliefState TO {ns}_BeliefState)"
241
+ )
242
+ # The hub-form HAS_REVISION structural edge (D-07): FROM is a Belief (keyed belief_id),
243
+ # NOT a BeliefState — so it is its own statement, not part of the BeliefState->BeliefState
244
+ # loop above. No CURRENT_STATE table (D-01: current is derived, not a stored edge).
245
+ self._exec(
246
+ f"CREATE REL TABLE IF NOT EXISTS {ns}_HAS_REVISION"
247
+ f"(FROM {ns}_Belief TO {ns}_BeliefState)"
248
+ )
249
+
250
+ def upsert_node(
251
+ self,
252
+ label: str,
253
+ node_id: UUID | str,
254
+ props: dict[str, Any],
255
+ ) -> None:
256
+ """
257
+ Insert-or-update a node keyed by ``node_id``; idempotent (BACK-02).
258
+
259
+ Compiles to ``MERGE (n:{ns}_{label} {pk:$id}) SET ...`` — ``MERGE`` (NOT ``CREATE``)
260
+ is idempotent by construction (verified: double-MERGE = 1 node). The MERGE key column is
261
+ the label's PRIMARY KEY, derived from ``_PK_BY_LABEL`` (CR-01) so ``Scope``/``Belief``
262
+ nodes key on their real PK (``scope_id``/``belief_id``), not a hardcoded ``state_id``.
263
+ The PK column is EXCLUDED from the SET loop (WR-01): ladybug rejects re-SETting the merge
264
+ key as an ordinary property, so a caller passing the PK in ``props`` would otherwise raise
265
+ on re-upsert. The node id and every prop value flow through ``$param`` binds (T-02-02);
266
+ only the validated namespace and the label are interpolated (a Cypher label cannot be
267
+ ``$param``-bound).
268
+ """
269
+ pk = _PK_BY_LABEL[label]
270
+ labelled = f"{self._ns}_{label}"
271
+ params: dict[str, Any] = {"id": str(node_id)}
272
+ set_clauses: list[str] = []
273
+ for i, (key, value) in enumerate(props.items()):
274
+ if key == pk:
275
+ continue # never SET the PK — it is the MERGE key (ladybug rejects re-SET).
276
+ # WR-04: prop KEYS are interpolated into `n.{key}` (column identifiers cannot be
277
+ # $param-bound), so each must be a safe bare identifier — the same guard the namespace
278
+ # uses. Values are still $param-bound; this defends the column-identifier surface so the
279
+ # data path stays injection-proof even for a future key-splatting caller.
280
+ _validate_identifier(key)
281
+ pname = f"p{i}"
282
+ params[pname] = value
283
+ set_clauses.append(f"n.{key} = ${pname}")
284
+ cypher = f"MERGE (n:{labelled} {{{pk}: $id}})"
285
+ if set_clauses:
286
+ cypher += " SET " + ", ".join(set_clauses)
287
+ self._exec(cypher, params)
288
+
289
+ def add_edge(
290
+ self,
291
+ edge_type: EdgeType | str,
292
+ from_id: UUID | str,
293
+ to_id: UUID | str,
294
+ props: dict[str, Any] | None = None,
295
+ ) -> None:
296
+ """
297
+ Add a typed directed edge; idempotent — a repeated edge yields exactly one (BACK-02).
298
+
299
+ Matches both endpoints then ``MERGE (a)-[:{ns}_{edge_type}]->(b)`` (verified:
300
+ double-MERGE = 1 edge; double-CREATE = 2). The endpoint LABELS + PK columns are resolved
301
+ per edge type from ``_EDGE_ENDPOINTS`` (+ ``_PK_BY_LABEL``), NOT hardcoded to
302
+ ``BeliefState``/``state_id`` — so the hub-form ``HAS_REVISION`` matches its FROM endpoint
303
+ as a ``Belief`` (keyed ``belief_id``) while the structural family stays
304
+ ``BeliefState``->``BeliefState`` (keyed ``state_id``). ``HAS_REVISION`` arrives as a raw
305
+ string, never an ``EdgeType`` member (D-07). Endpoint ids are ``$param`` binds; only the
306
+ validated namespace + fixed endpoint labels + edge-type label are interpolated.
307
+
308
+ Edge properties are NOT yet implemented (no Phase-3 edge carries any). ``props`` stays in
309
+ the signature for port parity, but a non-empty ``props`` is REJECTED with
310
+ ``NotImplementedError`` rather than silently dropped (IN-01) — a silent no-op would mask a
311
+ future consumer-facing edge that expects its properties stored.
312
+ """
313
+ if props:
314
+ raise NotImplementedError(
315
+ "add_edge does not yet store edge properties; got non-empty props"
316
+ )
317
+ from_label, to_label = _EDGE_ENDPOINTS[str(edge_type)]
318
+ rel = f"{self._ns}_{edge_type}"
319
+ a_node = f"{self._ns}_{from_label}"
320
+ b_node = f"{self._ns}_{to_label}"
321
+ a_pk = _PK_BY_LABEL[from_label]
322
+ b_pk = _PK_BY_LABEL[to_label]
323
+ self._exec(
324
+ f"MATCH (a:{a_node} {{{a_pk}: $from}}), (b:{b_node} {{{b_pk}: $to}}) "
325
+ f"MERGE (a)-[:{rel}]->(b)",
326
+ {"from": str(from_id), "to": str(to_id)},
327
+ )
328
+
329
+ def match_nodes(
330
+ self,
331
+ label: str,
332
+ where: dict[str, Any],
333
+ ) -> list[dict[str, Any]]:
334
+ """
335
+ Return nodes of ``label`` whose props exact-match the AND-combined ``where`` (BACK-02).
336
+
337
+ Empty ``where`` returns all nodes of that label. Each predicate value is a ``$param``
338
+ bind (T-02-02); only the namespace + label are interpolated. Returns raw ``list[dict]``
339
+ below the model layer (D-04).
340
+ """
341
+ labelled = f"{self._ns}_{label}"
342
+ params: dict[str, Any] = {}
343
+ predicates: list[str] = []
344
+ for i, (key, value) in enumerate(where.items()):
345
+ # WR-04: predicate KEYS are interpolated into `n.{key}` (column identifiers cannot be
346
+ # $param-bound), so each must be a safe bare identifier. Values stay $param-bound.
347
+ _validate_identifier(key)
348
+ pname = f"p{i}"
349
+ params[pname] = value
350
+ predicates.append(f"n.{key} = ${pname}")
351
+ cypher = f"MATCH (n:{labelled})"
352
+ if predicates:
353
+ cypher += " WHERE " + " AND ".join(predicates)
354
+ cypher += " RETURN n"
355
+ # `RETURN n` yields a node-object dict per row; unwrap it and strip ladybug's internal
356
+ # `_ID` / `_LABEL` keys so the row is the plain prop map the oracle returns (D-04 parity).
357
+ return [
358
+ {k: v for k, v in row["n"].items() if not k.startswith("_")}
359
+ for row in self._rows(self._exec(cypher, params))
360
+ ]
361
+
362
+ def traverse(
363
+ self,
364
+ start: UUID | str,
365
+ edge_types: frozenset[EdgeType | str],
366
+ max_depth: int | None,
367
+ direction: Literal["in", "out"] = "out",
368
+ ) -> tuple[list[dict[str, Any]], frozenset[UUID | str]]:
369
+ """
370
+ The single graph-walk primitive — the SC4 resolution, in ONE query (BACK-02 / SC4).
371
+
372
+ ``ACYCLIC`` var-length traversal returns the de-duplicated, cycle-safe reachable set
373
+ (excluding ``start`` itself, matching the in-memory oracle). ``max_depth=None`` compiles
374
+ to the literal :data:`_DEPTH_CEILING` (a hard truncation limit, NOT a true infinity) with
375
+ ``var_length_extend_max_depth`` raised to lift the default 30-hop cap (Pitfall 1) — so the
376
+ unbounded frontier is empty in practice. A full-closure walk that actually reaches the
377
+ ceiling RAISES rather than silently reporting a truncated set as complete (WR-03, DATA-04).
378
+ The
379
+ depth bound is a validated ``int`` interpolated into ``*1..N`` (``$param`` is rejected
380
+ there — Pitfall 2); ``$start`` is a ``$param`` bind. The ``(reached, frontier)`` shape is
381
+ computed in one query via ``min(length(p))`` + an ``EXISTS{}`` subquery: a node is on the
382
+ frontier iff its min depth equals the bound AND it has an unexpanded neighbour (parity
383
+ with the oracle, asserted in plan 02-03).
384
+
385
+ ``direction`` (D-05) selects which edges to follow: ``"out"`` (default) walks edges FROM
386
+ ``start`` (the original outgoing query); ``"in"`` walks edges INTO ``start`` (the cascade
387
+ ``get_impact`` needs). It flips the relationship arrow in exactly three places — the main
388
+ var-length query, its ``EXISTS{}`` frontier subquery, and the ``bound==0`` probe — by
389
+ deriving an ``(lhs, rhs)`` arrow pair from the closed ``Literal``. ``direction`` is a
390
+ validated, closed-``Literal`` internal token (like the namespace), NEVER a ``$param``
391
+ position and NEVER caller free-text, so it stays inside the one sanctioned-interpolation
392
+ story; ``$start`` stays ``$param``-bound, ``edge_types`` stays ``_EDGE_ENDPOINTS``-checked,
393
+ ``bound`` stays the runtime-guarded interpolated int. The ``var_length_extend_max_depth``
394
+ cap-raise/restore is direction-AGNOSTIC and wraps BOTH directions identically (Pitfall 4).
395
+ """
396
+ # IN-02: make the port's MAY-raise validation surface real — an out-of-set direction (the
397
+ # Literal is only statically enforced) must not silently fall through to the outgoing walk
398
+ # below; it would also be an unvalidated value steering the arrow interpolation.
399
+ if direction not in ("in", "out"):
400
+ raise ValueError(f"direction must be 'in' or 'out'; got {direction!r}")
401
+ # D-05: derive the reverse/forward arrow pair ONCE from the closed Literal. For "in" the
402
+ # pattern becomes (a)<-[:rels]-(b); for "out" it stays (a)-[:rels]->(b). This is the ONLY
403
+ # direction-dependent interpolation; everything else below is direction-agnostic.
404
+ lhs, rhs = ("<-", "-") if direction == "in" else ("-", "->")
405
+ ns = self._ns
406
+ bound = max_depth if max_depth is not None else _DEPTH_CEILING
407
+ # Runtime guard on the typed int (WR-03): the bound is INTERPOLATED into `*1..N`, never
408
+ # $param-bound, so it is part of the injection-safety story (T-02-03). A real `raise`
409
+ # (not `assert`) keeps the check alive under `python -O`.
410
+ if bound < 0:
411
+ raise ValueError(f"max_depth must be non-negative; got {bound}")
412
+ # WR-03: `edge_types` members are INTERPOLATED into the rel pattern (`[:{rels}* ...]`),
413
+ # never $param-bound — so each must be constrained to the known edge-type set before
414
+ # interpolation, mirroring `add_edge`'s `_EDGE_ENDPOINTS` lookup. An empty `edge_types`
415
+ # would also yield `rels == ""` and a malformed `[:* ...]` pattern, so reject it too.
416
+ # This keeps the rel-pattern interpolation inside the same injection-safety story as the
417
+ # namespace (the one sanctioned interpolation) rather than a second unvalidated surface.
418
+ if not edge_types:
419
+ raise ValueError("traverse requires at least one edge type")
420
+ for et in edge_types:
421
+ if str(et) not in _EDGE_ENDPOINTS:
422
+ raise ValueError(f"unknown edge type for traverse: {et!r}")
423
+ rels = "|".join(f"{ns}_{edge_type}" for edge_type in edge_types)
424
+ node = f"{ns}_BeliefState"
425
+ # WR-02: `max_depth=0` would compile to the degenerate var-length range `*1..0`. Match the
426
+ # in-memory oracle (memory.py:122-124): layer 0 is `start`, every neighbour-edge exceeds the
427
+ # bound, so nothing is reached and `start` is on the frontier iff it has any neighbour edge
428
+ # in the walked direction. FLIP 1: the probe arrow flips with `direction` (an out-edge for
429
+ # "out", an in-edge for "in") so max_depth=0 reports the correct directional frontier.
430
+ if bound == 0:
431
+ has_neighbour_edge = bool(
432
+ self._rows(
433
+ self._exec(
434
+ f"MATCH (a:{node} {{{_PK_BY_LABEL['BeliefState']}: $start}})"
435
+ f"{lhs}[:{rels}]{rhs}() RETURN a LIMIT 1",
436
+ {"start": str(start)},
437
+ )
438
+ )
439
+ )
440
+ frontier_zero: frozenset[UUID | str] = (
441
+ frozenset({str(start)}) if has_neighbour_edge else frozenset()
442
+ )
443
+ return [], frontier_zero
444
+ # WR-01: read the BeliefState PK from `_PK_BY_LABEL` (the SINGLE source of truth) rather
445
+ # than hardcoding the literal `state_id`, matching the max_depth=0 fast-path above and the
446
+ # CR-01 discipline — so a future PK rename in `_PK_BY_LABEL`/DDL stays in lockstep with the
447
+ # main traversal query instead of silently diverging. The interpolated identifier is a
448
+ # fixed internal constant (not caller input), so this stays inside the sanctioned-
449
+ # interpolation story (no untrusted value reaches the Cypher text; `$start` stays bound).
450
+ pk = _PK_BY_LABEL["BeliefState"]
451
+ # FLIP 2 (main var-length pattern) + FLIP 3 (EXISTS frontier subquery): both arrows flip
452
+ # with `direction` via the (lhs, rhs) pair. Flipping only some would leave a direction
453
+ # inconsistency (Pitfall 3) — the frontier probe must match the walk direction.
454
+ # WR-03: keep `d` in the returned rows so a `max_depth=None` (full-closure) walk can be
455
+ # audited for the truncation ceiling below. For a true full closure NO node reaches
456
+ # `_DEPTH_CEILING`; if any does, the closure was silently truncated by the literal cap and
457
+ # we must NOT report it as complete (DATA-04 — never silently under-report).
458
+ cypher = (
459
+ f"MATCH p=(a:{node} {{{pk}: $start}}){lhs}[:{rels}* ACYCLIC 1..{bound}]{rhs}(b:{node}) "
460
+ f"WHERE b.{pk} <> $start "
461
+ f"WITH b, min(length(p)) AS d "
462
+ f"RETURN b.{pk} AS state_id, d, "
463
+ f"(d = {bound} AND EXISTS {{ MATCH (b){lhs}[:{rels}]{rhs}() }}) AS at_frontier"
464
+ )
465
+ # WR-05/WR-01: `var_length_extend_max_depth` is a connection-GLOBAL config. Only raise it
466
+ # when the requested bound exceeds the cap the connection CURRENTLY holds (a shallow walk
467
+ # never touches tenant state). The prior value is READ before lifting and restored verbatim
468
+ # in a `finally`, so an INJECTED tenant connection (R19, owns_conn=False) that deliberately
469
+ # set its own non-default cap (say 100) is left EXACTLY as it was — not reset to the literal
470
+ # default 30 (WR-01). The cap is a per-connection int; reading it is the cheap, correct way
471
+ # to make the port's side effect truly invisible behind the seam.
472
+ prior_cap = self._read_var_length_cap()
473
+ lifted = bound > prior_cap
474
+ if lifted:
475
+ self._exec(f"CALL var_length_extend_max_depth={bound}") # lift the cap for this walk
476
+ try:
477
+ rows = self._rows(self._exec(cypher, {"start": str(start)}))
478
+ finally:
479
+ if lifted:
480
+ # restore the tenant's ORIGINAL cap (not a literal) so the connection is unchanged
481
+ self._exec(f"CALL var_length_extend_max_depth={prior_cap}")
482
+ # WR-03 (DATA-04): `max_depth=None` is "full closure", compiled to the literal
483
+ # `_DEPTH_CEILING` hop cap. That cap is a hard TRUNCATION limit, not a true infinity — a
484
+ # graph deeper than it would otherwise be reported as a complete closure when it is not (the
485
+ # silent under-report DATA-04 exists to prevent). A node whose min depth equals the ceiling
486
+ # means the walk hit that limit, so refuse to pass off a truncated set as a full closure.
487
+ # In practice no real belief graph approaches the ceiling, so this never fires; when it
488
+ # would, the caller gets a loud signal instead of a silently short answer. (A FINITE
489
+ # `max_depth` surfaces truncation through the `at_frontier`/`frontier` channel instead, so
490
+ # this guard is scoped to the unbounded case only.)
491
+ if max_depth is None and any(r["d"] >= _DEPTH_CEILING for r in rows):
492
+ raise RuntimeError(
493
+ "full-closure traverse hit the internal depth ceiling "
494
+ f"({_DEPTH_CEILING}); the cascade exceeds the adapter's unbounded-walk limit and "
495
+ "cannot be reported as a complete closure (pass an explicit max_depth to bound it)"
496
+ )
497
+ reached = [{"state_id": r["state_id"]} for r in rows]
498
+ frontier: frozenset[UUID | str] = frozenset(r["state_id"] for r in rows if r["at_frontier"])
499
+ return reached, frontier
500
+
501
+ @contextlib.contextmanager
502
+ def unit_of_work(self) -> Generator[None]:
503
+ """
504
+ Atomic (all-or-nothing) write scope via ``BEGIN``/``COMMIT``/``ROLLBACK`` (BACK-02 / A2).
505
+
506
+ Issues ``BEGIN TRANSACTION`` on entry; on any exception inside the block it issues
507
+ ``ROLLBACK`` (re-raising), otherwise ``COMMIT`` (serializable WAL — verified: ROLLBACK
508
+ discards the write). Matches the in-memory adapter's logical snapshot/restore semantics.
509
+ """
510
+ self._exec("BEGIN TRANSACTION")
511
+ try:
512
+ yield
513
+ except BaseException:
514
+ self._exec("ROLLBACK")
515
+ raise
516
+ else:
517
+ self._exec("COMMIT")
518
+
519
+ def _read_var_length_cap(self) -> int:
520
+ """
521
+ Read the connection's CURRENT ``var_length_extend_max_depth`` cap (WR-01).
522
+
523
+ ``traverse`` lifts this connection-global setting only when a deep walk needs it, then must
524
+ restore whatever the connection held BEFORE — not a hardcoded default — so an injected
525
+ tenant (R19) that set its own non-default cap is left untouched behind the port. Ladybug
526
+ exposes the live value via ``CALL current_setting('var_length_extend_max_depth') RETURN *``,
527
+ which yields a single row ``{'var_length_extend_max_depth': '<n>'}`` with the value as a
528
+ STRING; we coerce to ``int``. If the setting is ever absent or unreadable (it always exists
529
+ on ladybug 0.17.1, default ``30``), fall back to :data:`_DEFAULT_HOP_CAP` so the restore
530
+ still targets a sane value rather than raising mid-traverse.
531
+ """
532
+ rows = self._rows(
533
+ self._exec("CALL current_setting('var_length_extend_max_depth') RETURN *")
534
+ )
535
+ if rows and "var_length_extend_max_depth" in rows[0]:
536
+ return int(rows[0]["var_length_extend_max_depth"])
537
+ return _DEFAULT_HOP_CAP
538
+
539
+ def _rows(self, result: lb.QueryResult) -> list[dict[str, Any]]:
540
+ """Extract a ``QueryResult`` as raw ``list[dict]`` (the canonical port return, D-04)."""
541
+ # rows_as_dict() guarantees dict rows; get_all() is typed as the wider list|dict union.
542
+ return [dict(row) for row in result.rows_as_dict().get_all()]
543
+
544
+ def _exec(
545
+ self,
546
+ cypher: str,
547
+ parameters: dict[str, Any] | None = None,
548
+ ) -> lb.QueryResult:
549
+ """
550
+ Execute a single Cypher statement and narrow the result to a single ``QueryResult``.
551
+
552
+ ``Connection.execute`` is typed ``QueryResult | list[QueryResult]`` (the list form is
553
+ only for multi-statement scripts, which this adapter never issues). The explicit
554
+ ``isinstance`` narrows the union — the one genuine typing task at the driver boundary
555
+ (Pitfall 4: ladybug ships ``py.typed``, so no missing-type-stub suppression is needed).
556
+ A real ``raise`` (not ``assert``, WR-03) keeps the narrowing alive under ``python -O``;
557
+ otherwise a stripped assert would let a ``list`` leak out and fail with a confusing
558
+ ``AttributeError`` far from here.
559
+ """
560
+ result = self._conn.execute(cypher, parameters=parameters or {})
561
+ if not isinstance(result, lb.QueryResult):
562
+ raise TypeError(
563
+ f"single-statement execute must return one QueryResult; got {type(result)!r}"
564
+ )
565
+ return result