knowledge-graph-rdbms 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kgrdbms/__init__.py +50 -0
- kgrdbms/backends/__init__.py +69 -0
- kgrdbms/backends/base.py +104 -0
- kgrdbms/backends/neo4j.py +45 -0
- kgrdbms/backends/postgres.py +525 -0
- kgrdbms/backends/sqlite.py +23 -0
- kgrdbms/cli.py +538 -0
- kgrdbms/events.py +319 -0
- kgrdbms/graph.py +749 -0
- kgrdbms/invariants.py +45 -0
- kgrdbms/mcp_server.py +417 -0
- kgrdbms/policy.py +111 -0
- kgrdbms/resolver.py +269 -0
- kgrdbms/service.py +221 -0
- knowledge_graph_rdbms-0.1.0.dist-info/METADATA +746 -0
- knowledge_graph_rdbms-0.1.0.dist-info/RECORD +19 -0
- knowledge_graph_rdbms-0.1.0.dist-info/WHEEL +4 -0
- knowledge_graph_rdbms-0.1.0.dist-info/entry_points.txt +3 -0
- knowledge_graph_rdbms-0.1.0.dist-info/licenses/LICENSE +21 -0
kgrdbms/__init__.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""kgrdbms — a label property graph on an RDBMS (SQLite).
|
|
2
|
+
|
|
3
|
+
A small, dependency-free knowledge-graph core:
|
|
4
|
+
|
|
5
|
+
* a label property graph (nodes, typed directed edges, labels, JSON props)
|
|
6
|
+
backed by SQLite — no external graph database required
|
|
7
|
+
* an append-only, replayable event log with compensation (undo-as-event)
|
|
8
|
+
* a two-layer mutation gate: compiled-in invariants + configurable policy
|
|
9
|
+
* an optional MCP server exposing the graph to any MCP-aware client
|
|
10
|
+
(install the `mcp` extra)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
__version__ = "0.1.0"
|
|
16
|
+
|
|
17
|
+
from kgrdbms.graph import Edge, Graph, Node, default_graph_path, slug
|
|
18
|
+
from kgrdbms.events import (
|
|
19
|
+
EventLog,
|
|
20
|
+
GraphEvent,
|
|
21
|
+
apply_event,
|
|
22
|
+
edge_spec,
|
|
23
|
+
node_spec,
|
|
24
|
+
replay,
|
|
25
|
+
)
|
|
26
|
+
from kgrdbms.policy import Decision, MutationContext, mutation_check
|
|
27
|
+
from kgrdbms.invariants import InvariantViolation, enforce
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"__version__",
|
|
31
|
+
# graph
|
|
32
|
+
"Graph",
|
|
33
|
+
"Node",
|
|
34
|
+
"Edge",
|
|
35
|
+
"slug",
|
|
36
|
+
"default_graph_path",
|
|
37
|
+
# events
|
|
38
|
+
"EventLog",
|
|
39
|
+
"GraphEvent",
|
|
40
|
+
"apply_event",
|
|
41
|
+
"replay",
|
|
42
|
+
"node_spec",
|
|
43
|
+
"edge_spec",
|
|
44
|
+
# policy / invariants
|
|
45
|
+
"MutationContext",
|
|
46
|
+
"Decision",
|
|
47
|
+
"mutation_check",
|
|
48
|
+
"InvariantViolation",
|
|
49
|
+
"enforce",
|
|
50
|
+
]
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Backend registry — the pluggable data plane.
|
|
2
|
+
|
|
3
|
+
An engine is a factory `(*, location, **options) -> GraphBackend` registered
|
|
4
|
+
under a name. The resolver looks the name up here and calls it; it never knows
|
|
5
|
+
which engines exist. Adding one is: write a module, decorate its factory with
|
|
6
|
+
`@backend("name")`, import it below. No switch to edit, nothing else to touch.
|
|
7
|
+
|
|
8
|
+
from kgrdbms.backends import backend
|
|
9
|
+
from kgrdbms.backends.base import GraphBackend
|
|
10
|
+
|
|
11
|
+
@backend("myengine")
|
|
12
|
+
def open_myengine(*, location: str, **options) -> GraphBackend:
|
|
13
|
+
return MyEngineGraph(location, **options)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any, Callable
|
|
19
|
+
|
|
20
|
+
from kgrdbms.backends.base import GraphBackend, _StubBackend
|
|
21
|
+
|
|
22
|
+
BackendFactory = Callable[..., GraphBackend]
|
|
23
|
+
|
|
24
|
+
_REGISTRY: dict[str, BackendFactory] = {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def backend(name: str) -> Callable[[BackendFactory], BackendFactory]:
|
|
28
|
+
"""Decorator: register a factory under an engine name."""
|
|
29
|
+
|
|
30
|
+
def register(factory: BackendFactory) -> BackendFactory:
|
|
31
|
+
_REGISTRY[name] = factory
|
|
32
|
+
return factory
|
|
33
|
+
|
|
34
|
+
return register
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_backend(name: str) -> BackendFactory:
|
|
38
|
+
"""Resolve an engine name to its factory, or fail with what *is* available."""
|
|
39
|
+
try:
|
|
40
|
+
return _REGISTRY[name]
|
|
41
|
+
except KeyError:
|
|
42
|
+
have = ", ".join(sorted(_REGISTRY)) or "(none)"
|
|
43
|
+
raise ValueError(f"unknown backend {name!r}; registered engines: {have}") from None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def available_backends() -> list[str]:
|
|
47
|
+
return sorted(_REGISTRY)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def open_backend(name: str, *, location: str, **options: Any) -> GraphBackend:
|
|
51
|
+
"""Convenience: resolve + construct in one call."""
|
|
52
|
+
return get_backend(name)(location=location, **options)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Import the engine modules so their @backend(...) registrations run. New
|
|
56
|
+
# engines get one line here.
|
|
57
|
+
from kgrdbms.backends import sqlite as _sqlite # noqa: E402,F401
|
|
58
|
+
from kgrdbms.backends import postgres as _postgres # noqa: E402,F401
|
|
59
|
+
from kgrdbms.backends import neo4j as _neo4j # noqa: E402,F401
|
|
60
|
+
|
|
61
|
+
__all__ = [
|
|
62
|
+
"GraphBackend",
|
|
63
|
+
"_StubBackend",
|
|
64
|
+
"backend",
|
|
65
|
+
"get_backend",
|
|
66
|
+
"available_backends",
|
|
67
|
+
"open_backend",
|
|
68
|
+
"BackendFactory",
|
|
69
|
+
]
|
kgrdbms/backends/base.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""The backend contract and a raising skeleton for new engines.
|
|
2
|
+
|
|
3
|
+
`GraphBackend` is the finite method surface the rest of kgrdbms depends on
|
|
4
|
+
(service.py for writes, the read paths for queries). Any engine that implements
|
|
5
|
+
it can sit behind an ontology name — SQLite today, Postgres or Neo4j tomorrow.
|
|
6
|
+
|
|
7
|
+
`_StubBackend` is a courtesy: it implements every method by raising a clear
|
|
8
|
+
"not implemented yet" so a new engine can be registered and *routed to*
|
|
9
|
+
immediately, failing loudly per-call with exactly what's missing. Subclass it,
|
|
10
|
+
set `engine`, and replace methods one at a time as you build the real thing.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from contextlib import contextmanager
|
|
16
|
+
from typing import Any, Iterator, Protocol, runtime_checkable
|
|
17
|
+
|
|
18
|
+
from kgrdbms.graph import Edge, Node
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@runtime_checkable
|
|
22
|
+
class GraphBackend(Protocol):
|
|
23
|
+
"""The uniform graph surface. The interface is engine-agnostic; the *cost*
|
|
24
|
+
of each call is not (a deep traversal is ~7µs of index lookups on SQLite and
|
|
25
|
+
a pointer-chase under a Bolt round-trip on Neo4j) — which is exactly why the
|
|
26
|
+
control plane routes by workload rather than pretending engines are fungible.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
# writes — the gated path in service.py calls these
|
|
30
|
+
def add_node(self, *args: Any, **kwargs: Any) -> Node: ...
|
|
31
|
+
def add_label(self, node_id: str, *labels: str) -> None: ...
|
|
32
|
+
def set_property(self, node_id: str, key: str, value: Any) -> None: ...
|
|
33
|
+
def delete_node(self, node_id: str) -> bool: ...
|
|
34
|
+
def add_edge(self, *args: Any, **kwargs: Any) -> Edge: ...
|
|
35
|
+
def delete_edge(self, from_node: str, to_node: str, type: str) -> int: ...
|
|
36
|
+
def incident_edges(self, node_id: str) -> list[Edge]: ...
|
|
37
|
+
# reads
|
|
38
|
+
def node(self, id: str) -> Node | None: ...
|
|
39
|
+
def nodes_by_kind(self, kind: str) -> list[Node]: ...
|
|
40
|
+
def nodes_by_label(self, label: str) -> list[Node]: ...
|
|
41
|
+
def out(self, node_id: str, edge_type: str | None = ...) -> list[tuple[Edge, Node]]: ...
|
|
42
|
+
def in_(self, node_id: str, edge_type: str | None = ...) -> list[tuple[Edge, Node]]: ...
|
|
43
|
+
def neighborhood(self, node_id: str, depth: int = ...) -> dict[str, Node]: ...
|
|
44
|
+
def shortest_path(self, from_id: str, to_id: str, max_depth: int = ...) -> list[Node] | None: ...
|
|
45
|
+
def descendants(self, node_id: str, edge_type: str, max_depth: int = ...) -> list[Node]: ...
|
|
46
|
+
def count_nodes_by_kind(self) -> dict[str, int]: ...
|
|
47
|
+
def count_edges_by_type(self) -> dict[str, int]: ...
|
|
48
|
+
def total_nodes(self) -> int: ...
|
|
49
|
+
def total_edges(self) -> int: ...
|
|
50
|
+
# bulk: a context manager that defers commits to one transaction
|
|
51
|
+
def batch(self) -> Any: ...
|
|
52
|
+
def close(self) -> None: ...
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class _StubBackend:
|
|
56
|
+
"""A registered-but-unbuilt engine: every method raises with what's missing.
|
|
57
|
+
|
|
58
|
+
Satisfies `GraphBackend` structurally (all methods present), so the resolver
|
|
59
|
+
will route to it and each call fails loudly rather than silently. Subclass,
|
|
60
|
+
set `engine`, override as you implement.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
engine = "stub"
|
|
64
|
+
|
|
65
|
+
def __init__(self, location: str, **options: Any) -> None:
|
|
66
|
+
self.location = location
|
|
67
|
+
self.options = options
|
|
68
|
+
|
|
69
|
+
def _todo(self, method: str) -> Any:
|
|
70
|
+
raise NotImplementedError(
|
|
71
|
+
f"{self.engine} backend: .{method}() not implemented yet "
|
|
72
|
+
f"(location={self.location!r}). Override it on {type(self).__name__}."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# writes
|
|
76
|
+
def add_node(self, *a: Any, **k: Any) -> Node: return self._todo("add_node")
|
|
77
|
+
def add_label(self, *a: Any, **k: Any) -> None: return self._todo("add_label")
|
|
78
|
+
def set_property(self, *a: Any, **k: Any) -> None: return self._todo("set_property")
|
|
79
|
+
def delete_node(self, *a: Any, **k: Any) -> bool: return self._todo("delete_node")
|
|
80
|
+
def add_edge(self, *a: Any, **k: Any) -> Edge: return self._todo("add_edge")
|
|
81
|
+
def delete_edge(self, *a: Any, **k: Any) -> int: return self._todo("delete_edge")
|
|
82
|
+
def incident_edges(self, *a: Any, **k: Any) -> list[Edge]: return self._todo("incident_edges")
|
|
83
|
+
# reads
|
|
84
|
+
def node(self, *a: Any, **k: Any) -> Node | None: return self._todo("node")
|
|
85
|
+
def nodes_by_kind(self, *a: Any, **k: Any) -> list[Node]: return self._todo("nodes_by_kind")
|
|
86
|
+
def nodes_by_label(self, *a: Any, **k: Any) -> list[Node]: return self._todo("nodes_by_label")
|
|
87
|
+
def out(self, *a: Any, **k: Any) -> list[tuple[Edge, Node]]: return self._todo("out")
|
|
88
|
+
def in_(self, *a: Any, **k: Any) -> list[tuple[Edge, Node]]: return self._todo("in_")
|
|
89
|
+
def neighborhood(self, *a: Any, **k: Any) -> dict[str, Node]: return self._todo("neighborhood")
|
|
90
|
+
def shortest_path(self, *a: Any, **k: Any) -> list[Node] | None: return self._todo("shortest_path")
|
|
91
|
+
def descendants(self, *a: Any, **k: Any) -> list[Node]: return self._todo("descendants")
|
|
92
|
+
def count_nodes_by_kind(self, *a: Any, **k: Any) -> dict[str, int]: return self._todo("count_nodes_by_kind")
|
|
93
|
+
def count_edges_by_type(self, *a: Any, **k: Any) -> dict[str, int]: return self._todo("count_edges_by_type")
|
|
94
|
+
def total_nodes(self, *a: Any, **k: Any) -> int: return self._todo("total_nodes")
|
|
95
|
+
def total_edges(self, *a: Any, **k: Any) -> int: return self._todo("total_edges")
|
|
96
|
+
|
|
97
|
+
@contextmanager
|
|
98
|
+
def batch(self) -> Iterator["_StubBackend"]:
|
|
99
|
+
"""A no-op batch — writes inside still raise via _todo when attempted."""
|
|
100
|
+
yield self
|
|
101
|
+
|
|
102
|
+
def close(self) -> None:
|
|
103
|
+
"""Closing a never-opened stub is a no-op (keeps resolver cleanup safe)."""
|
|
104
|
+
return None
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Neo4j engine — STUB.
|
|
2
|
+
|
|
3
|
+
Strategic role: the *escalation target* for a specific ontology whose workload
|
|
4
|
+
turns deep. kgrdbms's own benchmark measured the crossover — shallow point reads
|
|
5
|
+
favour embedded SQLite by ~30-60×, but a 1,000-deep traversal favours Neo4j's
|
|
6
|
+
index-free adjacency by ~76×. So this engine isn't a replacement for the default;
|
|
7
|
+
it's where the control plane *routes a heavy ontology* while everything else
|
|
8
|
+
stays embedded. Different engine per ontology, one uniform interface.
|
|
9
|
+
|
|
10
|
+
Implementation sketch (when we build it):
|
|
11
|
+
* `location` is a Bolt URI (+ auth in `options`); methods compile to Cypher.
|
|
12
|
+
`shortest_path` -> `shortestPath((a)-[*]-(b))`, `descendants` -> variable-
|
|
13
|
+
length `MATCH`, point lookups -> `MATCH (n {id:$id})`.
|
|
14
|
+
* Neo4j is its OWN source of truth, so the event log MUST live in the control
|
|
15
|
+
plane (SQLite): apply each gated event to Neo4j as a projection, keep the
|
|
16
|
+
log — that's how audit / replay / undo survive a non-relational backend.
|
|
17
|
+
That seam is now built: the resolver already pairs any non-sqlite backend
|
|
18
|
+
with a `_ControlPlaneLogStore` (an events.db sidecar) via
|
|
19
|
+
`EventLog(store, projection=backend)`, exactly as the live postgres engine
|
|
20
|
+
uses it. A Neo4j engine just implements the `GraphBackend` surface; the log
|
|
21
|
+
plumbing is done.
|
|
22
|
+
* Mind the Bolt round-trip (~0.4ms) — it's the fixed cost the workload router
|
|
23
|
+
is weighing against SQLite's in-process ~7µs.
|
|
24
|
+
|
|
25
|
+
Stubbed: `location` stored, no driver opened; every method raises via
|
|
26
|
+
`_StubBackend` with what's missing.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from typing import Any
|
|
32
|
+
|
|
33
|
+
from kgrdbms.backends import backend
|
|
34
|
+
from kgrdbms.backends.base import GraphBackend, _StubBackend
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Neo4jGraph(_StubBackend):
|
|
38
|
+
engine = "neo4j"
|
|
39
|
+
# TODO: open a Bolt driver to `self.location`, and override _StubBackend's
|
|
40
|
+
# methods with Cypher. Pair with a control-plane event log (see module docs).
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@backend("neo4j")
|
|
44
|
+
def open_neo4j(*, location: str, **options: Any) -> GraphBackend:
|
|
45
|
+
return Neo4jGraph(location, **options)
|