doxastica 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- doxastica/__init__.py +37 -0
- doxastica/backends/__init__.py +17 -0
- doxastica/backends/ladybug.py +565 -0
- doxastica/backends/memory.py +226 -0
- doxastica/core.py +678 -0
- doxastica/errors.py +22 -0
- doxastica/models.py +128 -0
- doxastica/ports.py +130 -0
- doxastica/protocol.py +165 -0
- doxastica/py.typed +0 -0
- doxastica-0.1.0.dist-info/METADATA +70 -0
- doxastica-0.1.0.dist-info/RECORD +14 -0
- doxastica-0.1.0.dist-info/WHEEL +4 -0
- doxastica-0.1.0.dist-info/licenses/LICENSE +21 -0
doxastica/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Graph-native AGM belief-revision core (Kumiho M0)."""
|
|
2
|
+
|
|
3
|
+
from doxastica.backends.memory import InMemoryBackend
|
|
4
|
+
from doxastica.core import MemoryCore
|
|
5
|
+
from doxastica.errors import (
|
|
6
|
+
BackendDependencyError,
|
|
7
|
+
DoxasticaError,
|
|
8
|
+
WorldScopeContractionError,
|
|
9
|
+
)
|
|
10
|
+
from doxastica.models import (
|
|
11
|
+
WORLD_SCOPE_ID,
|
|
12
|
+
Belief,
|
|
13
|
+
BeliefFilter,
|
|
14
|
+
BeliefState,
|
|
15
|
+
EdgeType,
|
|
16
|
+
ImpactResult,
|
|
17
|
+
Scope,
|
|
18
|
+
Status,
|
|
19
|
+
)
|
|
20
|
+
from doxastica.protocol import BeliefStore
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"BackendDependencyError",
|
|
24
|
+
"Belief",
|
|
25
|
+
"BeliefFilter",
|
|
26
|
+
"BeliefState",
|
|
27
|
+
"BeliefStore",
|
|
28
|
+
"DoxasticaError",
|
|
29
|
+
"EdgeType",
|
|
30
|
+
"ImpactResult",
|
|
31
|
+
"InMemoryBackend",
|
|
32
|
+
"MemoryCore",
|
|
33
|
+
"Scope",
|
|
34
|
+
"Status",
|
|
35
|
+
"WORLD_SCOPE_ID",
|
|
36
|
+
"WorldScopeContractionError",
|
|
37
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Backend adapters behind the ``BackendPort`` seam (Phase 2).
|
|
3
|
+
|
|
4
|
+
This subpackage holds the concrete implementations of the internal LPG-primitive
|
|
5
|
+
:class:`doxastica.ports.BackendPort`. Per the driver-isolation discipline (D-02), this
|
|
6
|
+
``__init__`` re-exports ONLY the zero-dependency :class:`InMemoryBackend` — it MUST NEVER
|
|
7
|
+
``from .ladybug import ...``. Doing so would chain-load the optional ``ladybug`` driver on
|
|
8
|
+
every ``import doxastica.backends``, defeating the isolation the seam exists to provide.
|
|
9
|
+
The driver-backed adapter is imported directly by the caller from
|
|
10
|
+
``doxastica.backends.ladybug`` (``LadybugBackend.open(...)`` / ``.from_connection(...)``)
|
|
11
|
+
and injected into ``MemoryCore`` — pure DI, so ``MemoryCore`` itself stays driver-blind and
|
|
12
|
+
never names or imports a backend.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from doxastica.backends.memory import InMemoryBackend
|
|
16
|
+
|
|
17
|
+
__all__ = ["InMemoryBackend"]
|
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
"""
|
|
2
|
+
The LadybugDB ``BackendPort`` reference adapter (BACK-02 / CONN-01..03 / D-02 / D-04).
|
|
3
|
+
|
|
4
|
+
This is the SINGLE module that imports the ``ladybug`` driver (D-02). The import is guarded
|
|
5
|
+
so that, when the optional driver is absent, importing this module raises a friendly
|
|
6
|
+
:class:`doxastica.errors.BackendDependencyError` (``pip install doxastica[ladybug]``) rather
|
|
7
|
+
than a raw ``ModuleNotFoundError``. Every other module in the package stays driver-blind;
|
|
8
|
+
a consumer reaches this adapter only by importing it explicitly from
|
|
9
|
+
``doxastica.backends.ladybug`` (it is NOT re-exported from the package root).
|
|
10
|
+
|
|
11
|
+
What this adapter realizes
|
|
12
|
+
--------------------------
|
|
13
|
+
- **Flexible connection ownership (CONN-01 / R19):** ``__init__`` records an ``owns_conn``
|
|
14
|
+
flag. A connection this backend opened itself (via :meth:`LadybugBackend.open`) is owned and
|
|
15
|
+
closed on :meth:`close`; an INJECTED connection (:meth:`LadybugBackend.from_connection`) is a
|
|
16
|
+
tenant's handle and is NEVER closed.
|
|
17
|
+
- **Namespaced, idempotent schema bootstrap (CONN-02 / CONN-03 / D-04):** the backend is the
|
|
18
|
+
sole writer of its ``{ns}_*`` closed subgraph. Bootstrap runs ``CREATE NODE/REL TABLE IF
|
|
19
|
+
NOT EXISTS`` on construction; re-running against a fresh OR shared injected DB is a safe
|
|
20
|
+
no-op.
|
|
21
|
+
- **Namespace-identifier safety (D-04, mitigates T-02-01):** DDL table identifiers cannot be
|
|
22
|
+
``$param``-bound, so the namespace MUST be string-interpolated. It is therefore validated
|
|
23
|
+
against ``^[A-Za-z_][A-Za-z0-9_]*$`` BEFORE any interpolation — the one sanctioned
|
|
24
|
+
interpolation guard. All belief DATA flows through ``$param`` binds.
|
|
25
|
+
- **The five LPG primitives in Cypher (BACK-02):** ``upsert_node`` (``MERGE ... SET``),
|
|
26
|
+
``add_edge`` (``MERGE`` edge), ``match_nodes`` (AND-exact ``$param`` predicates),
|
|
27
|
+
``traverse`` (the SC4 resolution — see below), and ``unit_of_work``
|
|
28
|
+
(``BEGIN``/``COMMIT``/``ROLLBACK``). Each returns raw ``list[dict]`` below the model layer
|
|
29
|
+
(D-04); ``MemoryCore`` hydrates frozen pydantic models above the port.
|
|
30
|
+
|
|
31
|
+
The SC4 confirmation (port unchanged)
|
|
32
|
+
-------------------------------------
|
|
33
|
+
The live Phase-2 spike (``02-RESEARCH.md``) confirmed the Phase-1 ``BackendPort`` survives the
|
|
34
|
+
real ladybug API unchanged. The two adapter-internal details that make this work:
|
|
35
|
+
|
|
36
|
+
1. Variable-length patterns cap the upper hop bound at 30 by default; ``traverse`` issues
|
|
37
|
+
``CALL var_length_extend_max_depth=<bound>`` to raise that ceiling. ``max_depth=None``
|
|
38
|
+
("full closure") compiles to the literal :data:`_DEPTH_CEILING`, not a truly-infinite walk.
|
|
39
|
+
2. ``$param`` is rejected inside the var-length hop range (a parser error), so the bound is
|
|
40
|
+
interpolated as a validated ``int``. ``ACYCLIC`` (node-distinct) is the honest,
|
|
41
|
+
cycle-safe expression of the port's "de-duplicated reachable set". The ``(reached,
|
|
42
|
+
frontier)`` shape is computed in ONE query via ``min(length(p))`` + an ``EXISTS{}`` subquery.
|
|
43
|
+
|
|
44
|
+
These are documented adapter details, NOT port-signature changes — "port unchanged, SC4
|
|
45
|
+
confirmed".
|
|
46
|
+
|
|
47
|
+
This module holds NO AGM operation bodies (``revise`` / ``expand`` / ``contract`` are
|
|
48
|
+
Phases 3-6). The ``HAS_REVISION`` / ``CURRENT_STATE`` edge tables arrive in Phase 3.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
from __future__ import annotations
|
|
52
|
+
|
|
53
|
+
import contextlib
|
|
54
|
+
import re
|
|
55
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
56
|
+
|
|
57
|
+
from doxastica import errors
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
import ladybug as lb
|
|
61
|
+
except ImportError as exc: # pragma: no cover - exercised in the base-install CI job
|
|
62
|
+
raise errors.BackendDependencyError(
|
|
63
|
+
"The ladybug backend requires the 'ladybug' package. "
|
|
64
|
+
"Install it with: pip install doxastica[ladybug]"
|
|
65
|
+
) from exc
|
|
66
|
+
|
|
67
|
+
if TYPE_CHECKING:
|
|
68
|
+
from collections.abc import Generator
|
|
69
|
+
from uuid import UUID
|
|
70
|
+
|
|
71
|
+
from doxastica.models import EdgeType
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# The single sanctioned identifier guard (D-04): DDL table names cannot be $param-bound, so the
|
|
75
|
+
# namespace is string-interpolated and therefore MUST be validated before any interpolation.
|
|
76
|
+
_NS_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
77
|
+
|
|
78
|
+
# The literal hop bound compiled in for max_depth=None ("full closure"). This is a hard TRUNCATION
|
|
79
|
+
# limit, NOT a true infinity: ladybug var-length patterns need a concrete upper bound, so "full
|
|
80
|
+
# closure" compiles to `*1.._DEPTH_CEILING`. No real belief graph approaches ten thousand hops deep,
|
|
81
|
+
# so in practice the walk closes before the limit (A1, RESEARCH). If a walk ever DOES reach this
|
|
82
|
+
# depth, `traverse` raises rather than passing off a truncated set as a complete closure (WR-03,
|
|
83
|
+
# DATA-04 — never silently under-report).
|
|
84
|
+
#
|
|
85
|
+
# SIZE RATIONALE (ladybug-cycle-traverse-oom): the bound is interpolated into the var-length pattern
|
|
86
|
+
# `*1..N` AND lifted via `var_length_extend_max_depth=N`. Ladybug's recursive-join operator
|
|
87
|
+
# PRE-ALLOCATES buffer-pool memory LINEARLY in N (~18 KB per hop), independent of the actual graph
|
|
88
|
+
# size — so the original 1_000_000 reserved ~18 GB and OOM'd the buffer pool on ANY unbounded walk,
|
|
89
|
+
# even a 3-node cycle ("buffer pool is full and no memory could be freed"). This stayed latent
|
|
90
|
+
# because the [ladybug] extra is not synced in the dev env (those tests skip locally); CI is the
|
|
91
|
+
# first real execution. 10_000 caps the pre-allocation at ~290 MB peak (safe on any default,
|
|
92
|
+
# GB-scale buffer pool) while remaining astronomically beyond any plausible belief-revision chain —
|
|
93
|
+
# so the DATA-04 truncation-raise below is still a never-fires-in-practice safety net, not a limit a
|
|
94
|
+
# real closure approaches. Measured: bound 100->~98 MB, 1k->~124 MB, 10k->~290 MB, 50k+ -> OOM.
|
|
95
|
+
_DEPTH_CEILING = 10_000
|
|
96
|
+
|
|
97
|
+
# Ladybug's default var-length upper-hop cap, used ONLY as the fallback when the live cap cannot be
|
|
98
|
+
# read (it always can on 0.17.1). `traverse` raises the cap only when the requested bound exceeds
|
|
99
|
+
# the connection's CURRENT value, and restores that saved value afterward (WR-01) — so a shallow
|
|
100
|
+
# walk never mutates the connection's global config and even a deep walk leaves an INJECTED tenant
|
|
101
|
+
# connection (R19) with the EXACT ceiling it started with, default or not (WR-05).
|
|
102
|
+
_DEFAULT_HOP_CAP = 30
|
|
103
|
+
|
|
104
|
+
# Label -> PRIMARY KEY column. The SINGLE source of truth for each node table's PK: the
|
|
105
|
+
# bootstrap DDL (``_bootstrap_schema``) and the ``upsert_node`` MERGE key both read from here,
|
|
106
|
+
# so a schema change cannot silently diverge from the upsert key (CR-01). The port keys upsert
|
|
107
|
+
# on ``node_id`` for ANY label, so the MERGE key is always this PK bound to ``$id`` (matching
|
|
108
|
+
# the in-memory oracle, which keys on ``node_id`` regardless of label — D-05 parity).
|
|
109
|
+
_PK_BY_LABEL = {"Scope": "scope_id", "Belief": "belief_id", "BeliefState": "state_id"}
|
|
110
|
+
|
|
111
|
+
# Edge-type string -> (FROM label, TO label). The closed map (PATTERNS flag 1) that lets
|
|
112
|
+
# ``add_edge`` resolve per-edge-type endpoint labels + PK columns instead of hardcoding both
|
|
113
|
+
# endpoints to ``BeliefState``. ``HAS_REVISION`` is the hub-form structural edge (D-07): its FROM
|
|
114
|
+
# endpoint is a ``Belief`` (keyed ``belief_id``), NOT a ``BeliefState``. The three consumer-facing
|
|
115
|
+
# ``EdgeType`` members stay ``BeliefState -> BeliefState``. Keys are RAW STRINGS — ``HAS_REVISION``
|
|
116
|
+
# arrives as a string, never an ``EdgeType`` enum member (D-07), and ``str(EdgeType.X) == "X"``.
|
|
117
|
+
_EDGE_ENDPOINTS = {
|
|
118
|
+
"HAS_REVISION": ("Belief", "BeliefState"),
|
|
119
|
+
"SUPERSEDES": ("BeliefState", "BeliefState"),
|
|
120
|
+
"DEPENDS_ON": ("BeliefState", "BeliefState"),
|
|
121
|
+
"DERIVED_FROM": ("BeliefState", "BeliefState"),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _validate_namespace(ns: str) -> None:
|
|
126
|
+
"""Reject a namespace that is not a safe bare identifier (D-04, mitigates T-02-01)."""
|
|
127
|
+
if not _NS_RE.match(ns):
|
|
128
|
+
raise ValueError(f"namespace must match {_NS_RE.pattern!r}; got {ns!r}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _validate_identifier(name: str) -> None:
|
|
132
|
+
"""
|
|
133
|
+
Reject a property/column name that is not a safe bare identifier (WR-04).
|
|
134
|
+
|
|
135
|
+
Column identifiers cannot be ``$param``-bound, so ``upsert_node`` / ``match_nodes`` interpolate
|
|
136
|
+
prop/where KEYS directly into ``n.{key}``. Every such key is validated against the SAME bare-
|
|
137
|
+
identifier regex the namespace uses, so the column-identifier surface is part of the data path's
|
|
138
|
+
by-construction injection-proofing — not just the values.
|
|
139
|
+
"""
|
|
140
|
+
if not _NS_RE.match(name):
|
|
141
|
+
raise ValueError(f"property name must match {_NS_RE.pattern!r}; got {name!r}")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class LadybugBackend:
|
|
145
|
+
"""
|
|
146
|
+
LadybugDB reference ``BackendPort`` adapter — the single guarded driver boundary (D-02).
|
|
147
|
+
|
|
148
|
+
Satisfies the port structurally (implements the five LPG primitives without inheriting).
|
|
149
|
+
Distinguishes an owned connection from an injected one (CONN-01 / R19); bootstraps a
|
|
150
|
+
namespaced, idempotent schema (CONN-02 / CONN-03); validates the namespace before the one
|
|
151
|
+
sanctioned interpolation (D-04). Belief data always flows through ``$param`` binds.
|
|
152
|
+
"""
|
|
153
|
+
|
|
154
|
+
def __init__(
|
|
155
|
+
self,
|
|
156
|
+
conn: lb.Connection,
|
|
157
|
+
*,
|
|
158
|
+
namespace: str,
|
|
159
|
+
owns_conn: bool,
|
|
160
|
+
) -> None:
|
|
161
|
+
"""
|
|
162
|
+
Wrap a ladybug ``Connection`` under ``namespace`` and bootstrap its schema.
|
|
163
|
+
|
|
164
|
+
``owns_conn`` records whether this backend opened the connection (and may close it) or
|
|
165
|
+
is a tenant of an injected one it must never close (R19). The namespace is validated
|
|
166
|
+
BEFORE any DDL interpolation (D-04), then the idempotent bootstrap runs (CONN-03).
|
|
167
|
+
"""
|
|
168
|
+
_validate_namespace(namespace)
|
|
169
|
+
self._conn = conn
|
|
170
|
+
self._ns = namespace
|
|
171
|
+
self._owns_conn = owns_conn
|
|
172
|
+
self._bootstrap_schema()
|
|
173
|
+
|
|
174
|
+
@classmethod
|
|
175
|
+
def open(cls, path: str, *, namespace: str = "dx") -> LadybugBackend:
|
|
176
|
+
"""
|
|
177
|
+
Open a self-managing backend over ``path`` (or ``":memory:"`` / ``""`` for in-memory).
|
|
178
|
+
|
|
179
|
+
Constructs its own ``lb.Database`` + ``lb.Connection`` and takes ownership
|
|
180
|
+
(``owns_conn=True``) — :meth:`close` will close the connection. The sibling
|
|
181
|
+
:meth:`from_connection` wraps a tenant-supplied connection instead. A ``:memory:`` /
|
|
182
|
+
``""`` path yields a fresh in-memory DB.
|
|
183
|
+
"""
|
|
184
|
+
db_path = None if path in (":memory:", "") else path
|
|
185
|
+
db = lb.Database(db_path) if db_path is not None else lb.Database()
|
|
186
|
+
conn = lb.Connection(db)
|
|
187
|
+
return cls(conn, namespace=namespace, owns_conn=True)
|
|
188
|
+
|
|
189
|
+
@classmethod
|
|
190
|
+
def from_connection(cls, conn: lb.Connection, *, namespace: str = "dx") -> LadybugBackend:
|
|
191
|
+
"""
|
|
192
|
+
Wrap an INJECTED tenant connection the backend must NEVER close (CONN-01 / R19).
|
|
193
|
+
|
|
194
|
+
The sibling of :meth:`open`: where ``open`` creates and OWNS its connection
|
|
195
|
+
(``owns_conn=True``, closed on :meth:`close`), this wraps a caller-supplied
|
|
196
|
+
``lb.Connection`` with ``owns_conn=False`` — the core is a tenant and must not close
|
|
197
|
+
someone else's handle (R19). Schema bootstrap still runs idempotently (``CREATE ... IF
|
|
198
|
+
NOT EXISTS``) against the injected (possibly fresh OR shared) DB, so wiring the backend
|
|
199
|
+
onto a leased connection is a safe no-op when the namespaced subgraph already exists.
|
|
200
|
+
"""
|
|
201
|
+
return cls(conn, namespace=namespace, owns_conn=False)
|
|
202
|
+
|
|
203
|
+
def close(self) -> None:
|
|
204
|
+
"""Close the connection ONLY if owned (R19: never close an injected handle)."""
|
|
205
|
+
if self._owns_conn:
|
|
206
|
+
self._conn.close() # idempotent — double-close is a no-op (verified live)
|
|
207
|
+
|
|
208
|
+
def _bootstrap_schema(self) -> None:
|
|
209
|
+
"""
|
|
210
|
+
Idempotently create the namespaced node/rel tables (CONN-02 / CONN-03).
|
|
211
|
+
|
|
212
|
+
``CREATE ... IF NOT EXISTS`` is a safe no-op when re-run against a fresh OR shared
|
|
213
|
+
injected DB. ``{self._ns}`` is the ONLY interpolated identifier (validated in
|
|
214
|
+
``__init__``); everything else is structural DDL. ``state_id`` is a STRING PK holding
|
|
215
|
+
the UUID7 text form (the core mints + stringifies; CONN-03 uniqueness via PRIMARY KEY).
|
|
216
|
+
Phase 3 adds ONLY the hub-form ``HAS_REVISION`` REL table (``FROM Belief TO BeliefState``,
|
|
217
|
+
D-07); no ``CURRENT_STATE`` table is created — current is DERIVED, not a stored edge (D-01).
|
|
218
|
+
"""
|
|
219
|
+
ns = self._ns
|
|
220
|
+
# PK columns are read from `_PK_BY_LABEL` so the DDL and `upsert_node`'s MERGE key
|
|
221
|
+
# cannot diverge (CR-01: one source of truth for each table's primary key).
|
|
222
|
+
self._exec(
|
|
223
|
+
f"CREATE NODE TABLE IF NOT EXISTS {ns}_Scope"
|
|
224
|
+
f"({_PK_BY_LABEL['Scope']} STRING, is_world BOOLEAN, "
|
|
225
|
+
f"PRIMARY KEY({_PK_BY_LABEL['Scope']}))"
|
|
226
|
+
)
|
|
227
|
+
self._exec(
|
|
228
|
+
f"CREATE NODE TABLE IF NOT EXISTS {ns}_Belief"
|
|
229
|
+
f"({_PK_BY_LABEL['Belief']} STRING, PRIMARY KEY({_PK_BY_LABEL['Belief']}))"
|
|
230
|
+
)
|
|
231
|
+
self._exec(
|
|
232
|
+
f"CREATE NODE TABLE IF NOT EXISTS {ns}_BeliefState"
|
|
233
|
+
f"({_PK_BY_LABEL['BeliefState']} STRING, belief_id STRING, scope_id STRING, "
|
|
234
|
+
f"source_event_id STRING, value STRING, status STRING, "
|
|
235
|
+
f"PRIMARY KEY({_PK_BY_LABEL['BeliefState']}))"
|
|
236
|
+
)
|
|
237
|
+
for edge_type in ("SUPERSEDES", "DEPENDS_ON", "DERIVED_FROM"):
|
|
238
|
+
self._exec(
|
|
239
|
+
f"CREATE REL TABLE IF NOT EXISTS {ns}_{edge_type}"
|
|
240
|
+
f"(FROM {ns}_BeliefState TO {ns}_BeliefState)"
|
|
241
|
+
)
|
|
242
|
+
# The hub-form HAS_REVISION structural edge (D-07): FROM is a Belief (keyed belief_id),
|
|
243
|
+
# NOT a BeliefState — so it is its own statement, not part of the BeliefState->BeliefState
|
|
244
|
+
# loop above. No CURRENT_STATE table (D-01: current is derived, not a stored edge).
|
|
245
|
+
self._exec(
|
|
246
|
+
f"CREATE REL TABLE IF NOT EXISTS {ns}_HAS_REVISION"
|
|
247
|
+
f"(FROM {ns}_Belief TO {ns}_BeliefState)"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
def upsert_node(
|
|
251
|
+
self,
|
|
252
|
+
label: str,
|
|
253
|
+
node_id: UUID | str,
|
|
254
|
+
props: dict[str, Any],
|
|
255
|
+
) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Insert-or-update a node keyed by ``node_id``; idempotent (BACK-02).
|
|
258
|
+
|
|
259
|
+
Compiles to ``MERGE (n:{ns}_{label} {pk:$id}) SET ...`` — ``MERGE`` (NOT ``CREATE``)
|
|
260
|
+
is idempotent by construction (verified: double-MERGE = 1 node). The MERGE key column is
|
|
261
|
+
the label's PRIMARY KEY, derived from ``_PK_BY_LABEL`` (CR-01) so ``Scope``/``Belief``
|
|
262
|
+
nodes key on their real PK (``scope_id``/``belief_id``), not a hardcoded ``state_id``.
|
|
263
|
+
The PK column is EXCLUDED from the SET loop (WR-01): ladybug rejects re-SETting the merge
|
|
264
|
+
key as an ordinary property, so a caller passing the PK in ``props`` would otherwise raise
|
|
265
|
+
on re-upsert. The node id and every prop value flow through ``$param`` binds (T-02-02);
|
|
266
|
+
only the validated namespace and the label are interpolated (a Cypher label cannot be
|
|
267
|
+
``$param``-bound).
|
|
268
|
+
"""
|
|
269
|
+
pk = _PK_BY_LABEL[label]
|
|
270
|
+
labelled = f"{self._ns}_{label}"
|
|
271
|
+
params: dict[str, Any] = {"id": str(node_id)}
|
|
272
|
+
set_clauses: list[str] = []
|
|
273
|
+
for i, (key, value) in enumerate(props.items()):
|
|
274
|
+
if key == pk:
|
|
275
|
+
continue # never SET the PK — it is the MERGE key (ladybug rejects re-SET).
|
|
276
|
+
# WR-04: prop KEYS are interpolated into `n.{key}` (column identifiers cannot be
|
|
277
|
+
# $param-bound), so each must be a safe bare identifier — the same guard the namespace
|
|
278
|
+
# uses. Values are still $param-bound; this defends the column-identifier surface so the
|
|
279
|
+
# data path stays injection-proof even for a future key-splatting caller.
|
|
280
|
+
_validate_identifier(key)
|
|
281
|
+
pname = f"p{i}"
|
|
282
|
+
params[pname] = value
|
|
283
|
+
set_clauses.append(f"n.{key} = ${pname}")
|
|
284
|
+
cypher = f"MERGE (n:{labelled} {{{pk}: $id}})"
|
|
285
|
+
if set_clauses:
|
|
286
|
+
cypher += " SET " + ", ".join(set_clauses)
|
|
287
|
+
self._exec(cypher, params)
|
|
288
|
+
|
|
289
|
+
def add_edge(
|
|
290
|
+
self,
|
|
291
|
+
edge_type: EdgeType | str,
|
|
292
|
+
from_id: UUID | str,
|
|
293
|
+
to_id: UUID | str,
|
|
294
|
+
props: dict[str, Any] | None = None,
|
|
295
|
+
) -> None:
|
|
296
|
+
"""
|
|
297
|
+
Add a typed directed edge; idempotent — a repeated edge yields exactly one (BACK-02).
|
|
298
|
+
|
|
299
|
+
Matches both endpoints then ``MERGE (a)-[:{ns}_{edge_type}]->(b)`` (verified:
|
|
300
|
+
double-MERGE = 1 edge; double-CREATE = 2). The endpoint LABELS + PK columns are resolved
|
|
301
|
+
per edge type from ``_EDGE_ENDPOINTS`` (+ ``_PK_BY_LABEL``), NOT hardcoded to
|
|
302
|
+
``BeliefState``/``state_id`` — so the hub-form ``HAS_REVISION`` matches its FROM endpoint
|
|
303
|
+
as a ``Belief`` (keyed ``belief_id``) while the structural family stays
|
|
304
|
+
``BeliefState``->``BeliefState`` (keyed ``state_id``). ``HAS_REVISION`` arrives as a raw
|
|
305
|
+
string, never an ``EdgeType`` member (D-07). Endpoint ids are ``$param`` binds; only the
|
|
306
|
+
validated namespace + fixed endpoint labels + edge-type label are interpolated.
|
|
307
|
+
|
|
308
|
+
Edge properties are NOT yet implemented (no Phase-3 edge carries any). ``props`` stays in
|
|
309
|
+
the signature for port parity, but a non-empty ``props`` is REJECTED with
|
|
310
|
+
``NotImplementedError`` rather than silently dropped (IN-01) — a silent no-op would mask a
|
|
311
|
+
future consumer-facing edge that expects its properties stored.
|
|
312
|
+
"""
|
|
313
|
+
if props:
|
|
314
|
+
raise NotImplementedError(
|
|
315
|
+
"add_edge does not yet store edge properties; got non-empty props"
|
|
316
|
+
)
|
|
317
|
+
from_label, to_label = _EDGE_ENDPOINTS[str(edge_type)]
|
|
318
|
+
rel = f"{self._ns}_{edge_type}"
|
|
319
|
+
a_node = f"{self._ns}_{from_label}"
|
|
320
|
+
b_node = f"{self._ns}_{to_label}"
|
|
321
|
+
a_pk = _PK_BY_LABEL[from_label]
|
|
322
|
+
b_pk = _PK_BY_LABEL[to_label]
|
|
323
|
+
self._exec(
|
|
324
|
+
f"MATCH (a:{a_node} {{{a_pk}: $from}}), (b:{b_node} {{{b_pk}: $to}}) "
|
|
325
|
+
f"MERGE (a)-[:{rel}]->(b)",
|
|
326
|
+
{"from": str(from_id), "to": str(to_id)},
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
def match_nodes(
|
|
330
|
+
self,
|
|
331
|
+
label: str,
|
|
332
|
+
where: dict[str, Any],
|
|
333
|
+
) -> list[dict[str, Any]]:
|
|
334
|
+
"""
|
|
335
|
+
Return nodes of ``label`` whose props exact-match the AND-combined ``where`` (BACK-02).
|
|
336
|
+
|
|
337
|
+
Empty ``where`` returns all nodes of that label. Each predicate value is a ``$param``
|
|
338
|
+
bind (T-02-02); only the namespace + label are interpolated. Returns raw ``list[dict]``
|
|
339
|
+
below the model layer (D-04).
|
|
340
|
+
"""
|
|
341
|
+
labelled = f"{self._ns}_{label}"
|
|
342
|
+
params: dict[str, Any] = {}
|
|
343
|
+
predicates: list[str] = []
|
|
344
|
+
for i, (key, value) in enumerate(where.items()):
|
|
345
|
+
# WR-04: predicate KEYS are interpolated into `n.{key}` (column identifiers cannot be
|
|
346
|
+
# $param-bound), so each must be a safe bare identifier. Values stay $param-bound.
|
|
347
|
+
_validate_identifier(key)
|
|
348
|
+
pname = f"p{i}"
|
|
349
|
+
params[pname] = value
|
|
350
|
+
predicates.append(f"n.{key} = ${pname}")
|
|
351
|
+
cypher = f"MATCH (n:{labelled})"
|
|
352
|
+
if predicates:
|
|
353
|
+
cypher += " WHERE " + " AND ".join(predicates)
|
|
354
|
+
cypher += " RETURN n"
|
|
355
|
+
# `RETURN n` yields a node-object dict per row; unwrap it and strip ladybug's internal
|
|
356
|
+
# `_ID` / `_LABEL` keys so the row is the plain prop map the oracle returns (D-04 parity).
|
|
357
|
+
return [
|
|
358
|
+
{k: v for k, v in row["n"].items() if not k.startswith("_")}
|
|
359
|
+
for row in self._rows(self._exec(cypher, params))
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
def traverse(
|
|
363
|
+
self,
|
|
364
|
+
start: UUID | str,
|
|
365
|
+
edge_types: frozenset[EdgeType | str],
|
|
366
|
+
max_depth: int | None,
|
|
367
|
+
direction: Literal["in", "out"] = "out",
|
|
368
|
+
) -> tuple[list[dict[str, Any]], frozenset[UUID | str]]:
|
|
369
|
+
"""
|
|
370
|
+
The single graph-walk primitive — the SC4 resolution, in ONE query (BACK-02 / SC4).
|
|
371
|
+
|
|
372
|
+
``ACYCLIC`` var-length traversal returns the de-duplicated, cycle-safe reachable set
|
|
373
|
+
(excluding ``start`` itself, matching the in-memory oracle). ``max_depth=None`` compiles
|
|
374
|
+
to the literal :data:`_DEPTH_CEILING` (a hard truncation limit, NOT a true infinity) with
|
|
375
|
+
``var_length_extend_max_depth`` raised to lift the default 30-hop cap (Pitfall 1) — so the
|
|
376
|
+
unbounded frontier is empty in practice. A full-closure walk that actually reaches the
|
|
377
|
+
ceiling RAISES rather than silently reporting a truncated set as complete (WR-03, DATA-04).
|
|
378
|
+
The
|
|
379
|
+
depth bound is a validated ``int`` interpolated into ``*1..N`` (``$param`` is rejected
|
|
380
|
+
there — Pitfall 2); ``$start`` is a ``$param`` bind. The ``(reached, frontier)`` shape is
|
|
381
|
+
computed in one query via ``min(length(p))`` + an ``EXISTS{}`` subquery: a node is on the
|
|
382
|
+
frontier iff its min depth equals the bound AND it has an unexpanded neighbour (parity
|
|
383
|
+
with the oracle, asserted in plan 02-03).
|
|
384
|
+
|
|
385
|
+
``direction`` (D-05) selects which edges to follow: ``"out"`` (default) walks edges FROM
|
|
386
|
+
``start`` (the original outgoing query); ``"in"`` walks edges INTO ``start`` (the cascade
|
|
387
|
+
``get_impact`` needs). It flips the relationship arrow in exactly three places — the main
|
|
388
|
+
var-length query, its ``EXISTS{}`` frontier subquery, and the ``bound==0`` probe — by
|
|
389
|
+
deriving an ``(lhs, rhs)`` arrow pair from the closed ``Literal``. ``direction`` is a
|
|
390
|
+
validated, closed-``Literal`` internal token (like the namespace), NEVER a ``$param``
|
|
391
|
+
position and NEVER caller free-text, so it stays inside the one sanctioned-interpolation
|
|
392
|
+
story; ``$start`` stays ``$param``-bound, ``edge_types`` stays ``_EDGE_ENDPOINTS``-checked,
|
|
393
|
+
``bound`` stays the runtime-guarded interpolated int. The ``var_length_extend_max_depth``
|
|
394
|
+
cap-raise/restore is direction-AGNOSTIC and wraps BOTH directions identically (Pitfall 4).
|
|
395
|
+
"""
|
|
396
|
+
# IN-02: make the port's MAY-raise validation surface real — an out-of-set direction (the
|
|
397
|
+
# Literal is only statically enforced) must not silently fall through to the outgoing walk
|
|
398
|
+
# below; it would also be an unvalidated value steering the arrow interpolation.
|
|
399
|
+
if direction not in ("in", "out"):
|
|
400
|
+
raise ValueError(f"direction must be 'in' or 'out'; got {direction!r}")
|
|
401
|
+
# D-05: derive the reverse/forward arrow pair ONCE from the closed Literal. For "in" the
|
|
402
|
+
# pattern becomes (a)<-[:rels]-(b); for "out" it stays (a)-[:rels]->(b). This is the ONLY
|
|
403
|
+
# direction-dependent interpolation; everything else below is direction-agnostic.
|
|
404
|
+
lhs, rhs = ("<-", "-") if direction == "in" else ("-", "->")
|
|
405
|
+
ns = self._ns
|
|
406
|
+
bound = max_depth if max_depth is not None else _DEPTH_CEILING
|
|
407
|
+
# Runtime guard on the typed int (WR-03): the bound is INTERPOLATED into `*1..N`, never
|
|
408
|
+
# $param-bound, so it is part of the injection-safety story (T-02-03). A real `raise`
|
|
409
|
+
# (not `assert`) keeps the check alive under `python -O`.
|
|
410
|
+
if bound < 0:
|
|
411
|
+
raise ValueError(f"max_depth must be non-negative; got {bound}")
|
|
412
|
+
# WR-03: `edge_types` members are INTERPOLATED into the rel pattern (`[:{rels}* ...]`),
|
|
413
|
+
# never $param-bound — so each must be constrained to the known edge-type set before
|
|
414
|
+
# interpolation, mirroring `add_edge`'s `_EDGE_ENDPOINTS` lookup. An empty `edge_types`
|
|
415
|
+
# would also yield `rels == ""` and a malformed `[:* ...]` pattern, so reject it too.
|
|
416
|
+
# This keeps the rel-pattern interpolation inside the same injection-safety story as the
|
|
417
|
+
# namespace (the one sanctioned interpolation) rather than a second unvalidated surface.
|
|
418
|
+
if not edge_types:
|
|
419
|
+
raise ValueError("traverse requires at least one edge type")
|
|
420
|
+
for et in edge_types:
|
|
421
|
+
if str(et) not in _EDGE_ENDPOINTS:
|
|
422
|
+
raise ValueError(f"unknown edge type for traverse: {et!r}")
|
|
423
|
+
rels = "|".join(f"{ns}_{edge_type}" for edge_type in edge_types)
|
|
424
|
+
node = f"{ns}_BeliefState"
|
|
425
|
+
# WR-02: `max_depth=0` would compile to the degenerate var-length range `*1..0`. Match the
|
|
426
|
+
# in-memory oracle (memory.py:122-124): layer 0 is `start`, every neighbour-edge exceeds the
|
|
427
|
+
# bound, so nothing is reached and `start` is on the frontier iff it has any neighbour edge
|
|
428
|
+
# in the walked direction. FLIP 1: the probe arrow flips with `direction` (an out-edge for
|
|
429
|
+
# "out", an in-edge for "in") so max_depth=0 reports the correct directional frontier.
|
|
430
|
+
if bound == 0:
|
|
431
|
+
has_neighbour_edge = bool(
|
|
432
|
+
self._rows(
|
|
433
|
+
self._exec(
|
|
434
|
+
f"MATCH (a:{node} {{{_PK_BY_LABEL['BeliefState']}: $start}})"
|
|
435
|
+
f"{lhs}[:{rels}]{rhs}() RETURN a LIMIT 1",
|
|
436
|
+
{"start": str(start)},
|
|
437
|
+
)
|
|
438
|
+
)
|
|
439
|
+
)
|
|
440
|
+
frontier_zero: frozenset[UUID | str] = (
|
|
441
|
+
frozenset({str(start)}) if has_neighbour_edge else frozenset()
|
|
442
|
+
)
|
|
443
|
+
return [], frontier_zero
|
|
444
|
+
# WR-01: read the BeliefState PK from `_PK_BY_LABEL` (the SINGLE source of truth) rather
|
|
445
|
+
# than hardcoding the literal `state_id`, matching the max_depth=0 fast-path above and the
|
|
446
|
+
# CR-01 discipline — so a future PK rename in `_PK_BY_LABEL`/DDL stays in lockstep with the
|
|
447
|
+
# main traversal query instead of silently diverging. The interpolated identifier is a
|
|
448
|
+
# fixed internal constant (not caller input), so this stays inside the sanctioned-
|
|
449
|
+
# interpolation story (no untrusted value reaches the Cypher text; `$start` stays bound).
|
|
450
|
+
pk = _PK_BY_LABEL["BeliefState"]
|
|
451
|
+
# FLIP 2 (main var-length pattern) + FLIP 3 (EXISTS frontier subquery): both arrows flip
|
|
452
|
+
# with `direction` via the (lhs, rhs) pair. Flipping only some would leave a direction
|
|
453
|
+
# inconsistency (Pitfall 3) — the frontier probe must match the walk direction.
|
|
454
|
+
# WR-03: keep `d` in the returned rows so a `max_depth=None` (full-closure) walk can be
|
|
455
|
+
# audited for the truncation ceiling below. For a true full closure NO node reaches
|
|
456
|
+
# `_DEPTH_CEILING`; if any does, the closure was silently truncated by the literal cap and
|
|
457
|
+
# we must NOT report it as complete (DATA-04 — never silently under-report).
|
|
458
|
+
cypher = (
|
|
459
|
+
f"MATCH p=(a:{node} {{{pk}: $start}}){lhs}[:{rels}* ACYCLIC 1..{bound}]{rhs}(b:{node}) "
|
|
460
|
+
f"WHERE b.{pk} <> $start "
|
|
461
|
+
f"WITH b, min(length(p)) AS d "
|
|
462
|
+
f"RETURN b.{pk} AS state_id, d, "
|
|
463
|
+
f"(d = {bound} AND EXISTS {{ MATCH (b){lhs}[:{rels}]{rhs}() }}) AS at_frontier"
|
|
464
|
+
)
|
|
465
|
+
# WR-05/WR-01: `var_length_extend_max_depth` is a connection-GLOBAL config. Only raise it
|
|
466
|
+
# when the requested bound exceeds the cap the connection CURRENTLY holds (a shallow walk
|
|
467
|
+
# never touches tenant state). The prior value is READ before lifting and restored verbatim
|
|
468
|
+
# in a `finally`, so an INJECTED tenant connection (R19, owns_conn=False) that deliberately
|
|
469
|
+
# set its own non-default cap (say 100) is left EXACTLY as it was — not reset to the literal
|
|
470
|
+
# default 30 (WR-01). The cap is a per-connection int; reading it is the cheap, correct way
|
|
471
|
+
# to make the port's side effect truly invisible behind the seam.
|
|
472
|
+
prior_cap = self._read_var_length_cap()
|
|
473
|
+
lifted = bound > prior_cap
|
|
474
|
+
if lifted:
|
|
475
|
+
self._exec(f"CALL var_length_extend_max_depth={bound}") # lift the cap for this walk
|
|
476
|
+
try:
|
|
477
|
+
rows = self._rows(self._exec(cypher, {"start": str(start)}))
|
|
478
|
+
finally:
|
|
479
|
+
if lifted:
|
|
480
|
+
# restore the tenant's ORIGINAL cap (not a literal) so the connection is unchanged
|
|
481
|
+
self._exec(f"CALL var_length_extend_max_depth={prior_cap}")
|
|
482
|
+
# WR-03 (DATA-04): `max_depth=None` is "full closure", compiled to the literal
|
|
483
|
+
# `_DEPTH_CEILING` hop cap. That cap is a hard TRUNCATION limit, not a true infinity — a
|
|
484
|
+
# graph deeper than it would otherwise be reported as a complete closure when it is not (the
|
|
485
|
+
# silent under-report DATA-04 exists to prevent). A node whose min depth equals the ceiling
|
|
486
|
+
# means the walk hit that limit, so refuse to pass off a truncated set as a full closure.
|
|
487
|
+
# In practice no real belief graph approaches the ceiling, so this never fires; when it
|
|
488
|
+
# would, the caller gets a loud signal instead of a silently short answer. (A FINITE
|
|
489
|
+
# `max_depth` surfaces truncation through the `at_frontier`/`frontier` channel instead, so
|
|
490
|
+
# this guard is scoped to the unbounded case only.)
|
|
491
|
+
if max_depth is None and any(r["d"] >= _DEPTH_CEILING for r in rows):
|
|
492
|
+
raise RuntimeError(
|
|
493
|
+
"full-closure traverse hit the internal depth ceiling "
|
|
494
|
+
f"({_DEPTH_CEILING}); the cascade exceeds the adapter's unbounded-walk limit and "
|
|
495
|
+
"cannot be reported as a complete closure (pass an explicit max_depth to bound it)"
|
|
496
|
+
)
|
|
497
|
+
reached = [{"state_id": r["state_id"]} for r in rows]
|
|
498
|
+
frontier: frozenset[UUID | str] = frozenset(r["state_id"] for r in rows if r["at_frontier"])
|
|
499
|
+
return reached, frontier
|
|
500
|
+
|
|
501
|
+
@contextlib.contextmanager
|
|
502
|
+
def unit_of_work(self) -> Generator[None]:
|
|
503
|
+
"""
|
|
504
|
+
Atomic (all-or-nothing) write scope via ``BEGIN``/``COMMIT``/``ROLLBACK`` (BACK-02 / A2).
|
|
505
|
+
|
|
506
|
+
Issues ``BEGIN TRANSACTION`` on entry; on any exception inside the block it issues
|
|
507
|
+
``ROLLBACK`` (re-raising), otherwise ``COMMIT`` (serializable WAL — verified: ROLLBACK
|
|
508
|
+
discards the write). Matches the in-memory adapter's logical snapshot/restore semantics.
|
|
509
|
+
"""
|
|
510
|
+
self._exec("BEGIN TRANSACTION")
|
|
511
|
+
try:
|
|
512
|
+
yield
|
|
513
|
+
except BaseException:
|
|
514
|
+
self._exec("ROLLBACK")
|
|
515
|
+
raise
|
|
516
|
+
else:
|
|
517
|
+
self._exec("COMMIT")
|
|
518
|
+
|
|
519
|
+
def _read_var_length_cap(self) -> int:
|
|
520
|
+
"""
|
|
521
|
+
Read the connection's CURRENT ``var_length_extend_max_depth`` cap (WR-01).
|
|
522
|
+
|
|
523
|
+
``traverse`` lifts this connection-global setting only when a deep walk needs it, then must
|
|
524
|
+
restore whatever the connection held BEFORE — not a hardcoded default — so an injected
|
|
525
|
+
tenant (R19) that set its own non-default cap is left untouched behind the port. Ladybug
|
|
526
|
+
exposes the live value via ``CALL current_setting('var_length_extend_max_depth') RETURN *``,
|
|
527
|
+
which yields a single row ``{'var_length_extend_max_depth': '<n>'}`` with the value as a
|
|
528
|
+
STRING; we coerce to ``int``. If the setting is ever absent or unreadable (it always exists
|
|
529
|
+
on ladybug 0.17.1, default ``30``), fall back to :data:`_DEFAULT_HOP_CAP` so the restore
|
|
530
|
+
still targets a sane value rather than raising mid-traverse.
|
|
531
|
+
"""
|
|
532
|
+
rows = self._rows(
|
|
533
|
+
self._exec("CALL current_setting('var_length_extend_max_depth') RETURN *")
|
|
534
|
+
)
|
|
535
|
+
if rows and "var_length_extend_max_depth" in rows[0]:
|
|
536
|
+
return int(rows[0]["var_length_extend_max_depth"])
|
|
537
|
+
return _DEFAULT_HOP_CAP
|
|
538
|
+
|
|
539
|
+
def _rows(self, result: lb.QueryResult) -> list[dict[str, Any]]:
|
|
540
|
+
"""Extract a ``QueryResult`` as raw ``list[dict]`` (the canonical port return, D-04)."""
|
|
541
|
+
# rows_as_dict() guarantees dict rows; get_all() is typed as the wider list|dict union.
|
|
542
|
+
return [dict(row) for row in result.rows_as_dict().get_all()]
|
|
543
|
+
|
|
544
|
+
def _exec(
|
|
545
|
+
self,
|
|
546
|
+
cypher: str,
|
|
547
|
+
parameters: dict[str, Any] | None = None,
|
|
548
|
+
) -> lb.QueryResult:
|
|
549
|
+
"""
|
|
550
|
+
Execute a single Cypher statement and narrow the result to a single ``QueryResult``.
|
|
551
|
+
|
|
552
|
+
``Connection.execute`` is typed ``QueryResult | list[QueryResult]`` (the list form is
|
|
553
|
+
only for multi-statement scripts, which this adapter never issues). The explicit
|
|
554
|
+
``isinstance`` narrows the union — the one genuine typing task at the driver boundary
|
|
555
|
+
(Pitfall 4: ladybug ships ``py.typed``, so no missing-type-stub suppression is needed).
|
|
556
|
+
A real ``raise`` (not ``assert``, WR-03) keeps the narrowing alive under ``python -O``;
|
|
557
|
+
otherwise a stripped assert would let a ``list`` leak out and fail with a confusing
|
|
558
|
+
``AttributeError`` far from here.
|
|
559
|
+
"""
|
|
560
|
+
result = self._conn.execute(cypher, parameters=parameters or {})
|
|
561
|
+
if not isinstance(result, lb.QueryResult):
|
|
562
|
+
raise TypeError(
|
|
563
|
+
f"single-statement execute must return one QueryResult; got {type(result)!r}"
|
|
564
|
+
)
|
|
565
|
+
return result
|