knowledge-graph-rdbms 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kgrdbms/__init__.py +50 -0
- kgrdbms/backends/__init__.py +69 -0
- kgrdbms/backends/base.py +104 -0
- kgrdbms/backends/neo4j.py +45 -0
- kgrdbms/backends/postgres.py +525 -0
- kgrdbms/backends/sqlite.py +23 -0
- kgrdbms/cli.py +538 -0
- kgrdbms/events.py +319 -0
- kgrdbms/graph.py +749 -0
- kgrdbms/invariants.py +45 -0
- kgrdbms/mcp_server.py +417 -0
- kgrdbms/policy.py +111 -0
- kgrdbms/resolver.py +269 -0
- kgrdbms/service.py +221 -0
- knowledge_graph_rdbms-0.1.0.dist-info/METADATA +746 -0
- knowledge_graph_rdbms-0.1.0.dist-info/RECORD +19 -0
- knowledge_graph_rdbms-0.1.0.dist-info/WHEEL +4 -0
- knowledge_graph_rdbms-0.1.0.dist-info/entry_points.txt +3 -0
- knowledge_graph_rdbms-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
"""Postgres engine — live.
|
|
2
|
+
|
|
3
|
+
The scale-up of the SQLite default without changing engines conceptually. Same
|
|
4
|
+
five-table relational model and the same query shapes (point lookups, a
|
|
5
|
+
`WITH RECURSIVE` traversal, BFS over `out`/`in_`), ported to Postgres: `%s`
|
|
6
|
+
placeholders, `ON CONFLICT` upserts, `jsonb` properties, `= ANY(%s)` array
|
|
7
|
+
membership instead of SQLite's chunked `IN (...)`. The win over SQLite is real
|
|
8
|
+
concurrent writers and a server you can scale, while keeping SQL you can read.
|
|
9
|
+
|
|
10
|
+
`location` is a libpq connection string / DSN (e.g.
|
|
11
|
+
`postgresql://user:pass@host:5432/db`), not a file path. The event log does NOT
|
|
12
|
+
live here — for a non-sqlite backend it lives in a control-plane SQLite store
|
|
13
|
+
(see `resolver`), so this class is purely the graph *projection*.
|
|
14
|
+
|
|
15
|
+
`psycopg` (v3) is imported lazily so `import kgrdbms.backends` works without it;
|
|
16
|
+
a missing driver raises `NotImplementedError` (routed to "unavailable" by the
|
|
17
|
+
front doors) telling you to install the `postgres` extra.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import uuid
|
|
23
|
+
from contextlib import contextmanager
|
|
24
|
+
from typing import Any, Iterable, Iterator
|
|
25
|
+
|
|
26
|
+
from kgrdbms.backends import backend
|
|
27
|
+
from kgrdbms.backends.base import GraphBackend
|
|
28
|
+
from kgrdbms.graph import Edge, Node, _normalize_edge, _normalize_node
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_SCHEMA_STATEMENTS = [
|
|
32
|
+
"""CREATE TABLE IF NOT EXISTS nodes (
|
|
33
|
+
id TEXT PRIMARY KEY,
|
|
34
|
+
kind TEXT NOT NULL,
|
|
35
|
+
name TEXT NOT NULL,
|
|
36
|
+
created_at TIMESTAMPTZ DEFAULT now()
|
|
37
|
+
)""",
|
|
38
|
+
"""CREATE TABLE IF NOT EXISTS node_labels (
|
|
39
|
+
node_id TEXT NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
|
|
40
|
+
label TEXT NOT NULL,
|
|
41
|
+
PRIMARY KEY (node_id, label)
|
|
42
|
+
)""",
|
|
43
|
+
"""CREATE TABLE IF NOT EXISTS node_properties (
|
|
44
|
+
node_id TEXT NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
|
|
45
|
+
key TEXT NOT NULL,
|
|
46
|
+
value_json JSONB NOT NULL,
|
|
47
|
+
PRIMARY KEY (node_id, key)
|
|
48
|
+
)""",
|
|
49
|
+
"""CREATE TABLE IF NOT EXISTS edges (
|
|
50
|
+
id TEXT PRIMARY KEY,
|
|
51
|
+
from_node TEXT NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
|
|
52
|
+
to_node TEXT NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
|
|
53
|
+
type TEXT NOT NULL,
|
|
54
|
+
created_at TIMESTAMPTZ DEFAULT now()
|
|
55
|
+
)""",
|
|
56
|
+
"""CREATE TABLE IF NOT EXISTS edge_properties (
|
|
57
|
+
edge_id TEXT NOT NULL REFERENCES edges(id) ON DELETE CASCADE,
|
|
58
|
+
key TEXT NOT NULL,
|
|
59
|
+
value_json JSONB NOT NULL,
|
|
60
|
+
PRIMARY KEY (edge_id, key)
|
|
61
|
+
)""",
|
|
62
|
+
"CREATE UNIQUE INDEX IF NOT EXISTS uq_edges_triple ON edges(from_node, type, to_node)",
|
|
63
|
+
"CREATE INDEX IF NOT EXISTS idx_nodes_kind ON nodes(kind)",
|
|
64
|
+
"CREATE INDEX IF NOT EXISTS idx_node_labels_label ON node_labels(label)",
|
|
65
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_from_type ON edges(from_node, type)",
|
|
66
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_to_type ON edges(to_node, type)",
|
|
67
|
+
"CREATE INDEX IF NOT EXISTS idx_edges_type ON edges(type)",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class PostgresGraph:
|
|
72
|
+
"""A label property graph over Postgres. Satisfies the `GraphBackend` surface."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, location: str, **options: Any) -> None:
|
|
75
|
+
try:
|
|
76
|
+
import psycopg
|
|
77
|
+
from psycopg.rows import dict_row
|
|
78
|
+
from psycopg.types.json import Jsonb
|
|
79
|
+
except ImportError as e: # pragma: no cover - exercised only without the extra
|
|
80
|
+
raise NotImplementedError(
|
|
81
|
+
"postgres backend needs psycopg: pip install 'knowledge-graph-rdbms[postgres]'"
|
|
82
|
+
) from e
|
|
83
|
+
self._Jsonb = Jsonb
|
|
84
|
+
self.location = location
|
|
85
|
+
self.path = location # callers may read .path; for pg it's the DSN
|
|
86
|
+
self.conn = psycopg.connect(location, row_factory=dict_row)
|
|
87
|
+
self._batch_depth = 0
|
|
88
|
+
for stmt in _SCHEMA_STATEMENTS:
|
|
89
|
+
self.conn.execute(stmt)
|
|
90
|
+
self.conn.commit()
|
|
91
|
+
|
|
92
|
+
# ---- lifecycle / transactions (mirror Graph's batch semantics) -----
|
|
93
|
+
|
|
94
|
+
def close(self) -> None:
|
|
95
|
+
try:
|
|
96
|
+
self.conn.close()
|
|
97
|
+
except Exception:
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
def _maybe_commit(self) -> None:
|
|
101
|
+
if self._batch_depth == 0:
|
|
102
|
+
self.conn.commit()
|
|
103
|
+
|
|
104
|
+
@contextmanager
|
|
105
|
+
def tx(self) -> Iterator[Any]:
|
|
106
|
+
if self._batch_depth:
|
|
107
|
+
yield self.conn
|
|
108
|
+
return
|
|
109
|
+
try:
|
|
110
|
+
yield self.conn
|
|
111
|
+
self.conn.commit()
|
|
112
|
+
except Exception:
|
|
113
|
+
self.conn.rollback()
|
|
114
|
+
raise
|
|
115
|
+
|
|
116
|
+
@contextmanager
|
|
117
|
+
def batch(self) -> Iterator["PostgresGraph"]:
|
|
118
|
+
self._batch_depth += 1
|
|
119
|
+
try:
|
|
120
|
+
yield self
|
|
121
|
+
except Exception:
|
|
122
|
+
self._batch_depth -= 1
|
|
123
|
+
if self._batch_depth == 0:
|
|
124
|
+
self.conn.rollback()
|
|
125
|
+
raise
|
|
126
|
+
else:
|
|
127
|
+
self._batch_depth -= 1
|
|
128
|
+
if self._batch_depth == 0:
|
|
129
|
+
self.conn.commit()
|
|
130
|
+
|
|
131
|
+
def clear(self) -> None:
|
|
132
|
+
with self.tx():
|
|
133
|
+
self.conn.execute("TRUNCATE nodes, node_labels, node_properties, edges, edge_properties")
|
|
134
|
+
|
|
135
|
+
# ---- writes --------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
def add_node(
|
|
138
|
+
self,
|
|
139
|
+
id: str,
|
|
140
|
+
kind: str,
|
|
141
|
+
name: str | None = None,
|
|
142
|
+
labels: Iterable[str] = (),
|
|
143
|
+
properties: dict[str, Any] | None = None,
|
|
144
|
+
) -> Node:
|
|
145
|
+
name = name or id
|
|
146
|
+
properties = dict(properties or {})
|
|
147
|
+
with self.tx():
|
|
148
|
+
self.conn.execute(
|
|
149
|
+
"INSERT INTO nodes(id, kind, name) VALUES (%s, %s, %s) "
|
|
150
|
+
"ON CONFLICT(id) DO UPDATE SET kind=excluded.kind, name=excluded.name",
|
|
151
|
+
(id, kind, name),
|
|
152
|
+
)
|
|
153
|
+
for label in set(labels):
|
|
154
|
+
self.conn.execute(
|
|
155
|
+
"INSERT INTO node_labels(node_id, label) VALUES (%s, %s) ON CONFLICT DO NOTHING",
|
|
156
|
+
(id, label),
|
|
157
|
+
)
|
|
158
|
+
for k, v in properties.items():
|
|
159
|
+
self.conn.execute(
|
|
160
|
+
"INSERT INTO node_properties(node_id, key, value_json) VALUES (%s, %s, %s) "
|
|
161
|
+
"ON CONFLICT(node_id, key) DO UPDATE SET value_json=excluded.value_json",
|
|
162
|
+
(id, k, self._Jsonb(v)),
|
|
163
|
+
)
|
|
164
|
+
return Node(id=id, kind=kind, name=name, labels=set(labels), properties=properties)
|
|
165
|
+
|
|
166
|
+
def add_nodes(self, specs: Iterable["Node | dict"]) -> int:
|
|
167
|
+
"""Bulk upsert nodes in one transaction via executemany. Mirrors Graph.add_nodes."""
|
|
168
|
+
node_rows: list[tuple] = []
|
|
169
|
+
label_rows: list[tuple] = []
|
|
170
|
+
prop_rows: list[tuple] = []
|
|
171
|
+
count = 0
|
|
172
|
+
for spec in specs:
|
|
173
|
+
nid, kind, name, labels, props = _normalize_node(spec)
|
|
174
|
+
node_rows.append((nid, kind, name))
|
|
175
|
+
for label in set(labels):
|
|
176
|
+
label_rows.append((nid, label))
|
|
177
|
+
for k, v in props.items():
|
|
178
|
+
prop_rows.append((nid, k, self._Jsonb(v)))
|
|
179
|
+
count += 1
|
|
180
|
+
if not node_rows:
|
|
181
|
+
return 0
|
|
182
|
+
with self.tx():
|
|
183
|
+
self.conn.cursor().executemany(
|
|
184
|
+
"INSERT INTO nodes(id, kind, name) VALUES (%s, %s, %s) "
|
|
185
|
+
"ON CONFLICT(id) DO UPDATE SET kind=excluded.kind, name=excluded.name",
|
|
186
|
+
node_rows,
|
|
187
|
+
)
|
|
188
|
+
if label_rows:
|
|
189
|
+
self.conn.cursor().executemany(
|
|
190
|
+
"INSERT INTO node_labels(node_id, label) VALUES (%s, %s) ON CONFLICT DO NOTHING",
|
|
191
|
+
label_rows,
|
|
192
|
+
)
|
|
193
|
+
if prop_rows:
|
|
194
|
+
self.conn.cursor().executemany(
|
|
195
|
+
"INSERT INTO node_properties(node_id, key, value_json) VALUES (%s, %s, %s) "
|
|
196
|
+
"ON CONFLICT(node_id, key) DO UPDATE SET value_json=excluded.value_json",
|
|
197
|
+
prop_rows,
|
|
198
|
+
)
|
|
199
|
+
return count
|
|
200
|
+
|
|
201
|
+
def add_label(self, node_id: str, *labels: str) -> None:
|
|
202
|
+
with self.tx():
|
|
203
|
+
for label in labels:
|
|
204
|
+
self.conn.execute(
|
|
205
|
+
"INSERT INTO node_labels(node_id, label) VALUES (%s, %s) ON CONFLICT DO NOTHING",
|
|
206
|
+
(node_id, label),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def remove_label(self, node_id: str, label: str) -> None:
|
|
210
|
+
with self.tx():
|
|
211
|
+
self.conn.execute(
|
|
212
|
+
"DELETE FROM node_labels WHERE node_id=%s AND label=%s", (node_id, label)
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
def set_property(self, node_id: str, key: str, value: Any) -> None:
|
|
216
|
+
with self.tx():
|
|
217
|
+
self.conn.execute(
|
|
218
|
+
"INSERT INTO node_properties(node_id, key, value_json) VALUES (%s, %s, %s) "
|
|
219
|
+
"ON CONFLICT(node_id, key) DO UPDATE SET value_json=excluded.value_json",
|
|
220
|
+
(node_id, key, self._Jsonb(value)),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def del_property(self, node_id: str, key: str) -> None:
|
|
224
|
+
with self.tx():
|
|
225
|
+
self.conn.execute(
|
|
226
|
+
"DELETE FROM node_properties WHERE node_id=%s AND key=%s", (node_id, key)
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def delete_node(self, node_id: str) -> bool:
|
|
230
|
+
existed = self.node(node_id) is not None
|
|
231
|
+
with self.tx():
|
|
232
|
+
self.conn.execute("DELETE FROM nodes WHERE id=%s", (node_id,))
|
|
233
|
+
return existed
|
|
234
|
+
|
|
235
|
+
def add_edge(
|
|
236
|
+
self,
|
|
237
|
+
from_node: str,
|
|
238
|
+
to_node: str,
|
|
239
|
+
type: str,
|
|
240
|
+
properties: dict[str, Any] | None = None,
|
|
241
|
+
) -> Edge:
|
|
242
|
+
with self.tx():
|
|
243
|
+
row = self.conn.execute(
|
|
244
|
+
"SELECT id FROM edges WHERE from_node=%s AND type=%s AND to_node=%s",
|
|
245
|
+
(from_node, type, to_node),
|
|
246
|
+
).fetchone()
|
|
247
|
+
if row:
|
|
248
|
+
edge_id = row["id"]
|
|
249
|
+
else:
|
|
250
|
+
edge_id = uuid.uuid4().hex
|
|
251
|
+
self.conn.execute(
|
|
252
|
+
"INSERT INTO edges(id, from_node, to_node, type) VALUES (%s, %s, %s, %s)",
|
|
253
|
+
(edge_id, from_node, to_node, type),
|
|
254
|
+
)
|
|
255
|
+
props = dict(properties or {})
|
|
256
|
+
for k, v in props.items():
|
|
257
|
+
self.conn.execute(
|
|
258
|
+
"INSERT INTO edge_properties(edge_id, key, value_json) VALUES (%s, %s, %s) "
|
|
259
|
+
"ON CONFLICT(edge_id, key) DO UPDATE SET value_json=excluded.value_json",
|
|
260
|
+
(edge_id, k, self._Jsonb(v)),
|
|
261
|
+
)
|
|
262
|
+
return Edge(id=edge_id, from_node=from_node, to_node=to_node, type=type, properties=props)
|
|
263
|
+
|
|
264
|
+
def add_edges(self, specs: Iterable["Edge | dict | tuple | list"]) -> int:
|
|
265
|
+
"""Bulk add edges in one transaction. Triples stay unique (ON CONFLICT DO NOTHING);
|
|
266
|
+
properties attach to the surviving row via a triple subquery. Mirrors Graph.add_edges."""
|
|
267
|
+
edge_rows: list[tuple] = []
|
|
268
|
+
prop_rows: list[tuple] = []
|
|
269
|
+
count = 0
|
|
270
|
+
for spec in specs:
|
|
271
|
+
f, t, typ, props = _normalize_edge(spec)
|
|
272
|
+
edge_rows.append((uuid.uuid4().hex, f, t, typ))
|
|
273
|
+
for k, v in props.items():
|
|
274
|
+
prop_rows.append((f, typ, t, k, self._Jsonb(v)))
|
|
275
|
+
count += 1
|
|
276
|
+
if not edge_rows:
|
|
277
|
+
return 0
|
|
278
|
+
with self.tx():
|
|
279
|
+
self.conn.cursor().executemany(
|
|
280
|
+
"INSERT INTO edges(id, from_node, to_node, type) VALUES (%s, %s, %s, %s) "
|
|
281
|
+
"ON CONFLICT (from_node, type, to_node) DO NOTHING",
|
|
282
|
+
edge_rows,
|
|
283
|
+
)
|
|
284
|
+
if prop_rows:
|
|
285
|
+
self.conn.cursor().executemany(
|
|
286
|
+
"INSERT INTO edge_properties(edge_id, key, value_json) VALUES "
|
|
287
|
+
"((SELECT id FROM edges WHERE from_node=%s AND type=%s AND to_node=%s), %s, %s) "
|
|
288
|
+
"ON CONFLICT(edge_id, key) DO UPDATE SET value_json=excluded.value_json",
|
|
289
|
+
prop_rows,
|
|
290
|
+
)
|
|
291
|
+
return count
|
|
292
|
+
|
|
293
|
+
def delete_edge(self, from_node: str, to_node: str, type: str) -> int:
|
|
294
|
+
with self.tx():
|
|
295
|
+
cur = self.conn.execute(
|
|
296
|
+
"DELETE FROM edges WHERE from_node=%s AND type=%s AND to_node=%s",
|
|
297
|
+
(from_node, type, to_node),
|
|
298
|
+
)
|
|
299
|
+
return cur.rowcount
|
|
300
|
+
|
|
301
|
+
def incident_edges(self, node_id: str) -> list[Edge]:
|
|
302
|
+
rows = self.conn.execute(
|
|
303
|
+
"SELECT * FROM edges WHERE from_node=%s OR to_node=%s", (node_id, node_id)
|
|
304
|
+
).fetchall()
|
|
305
|
+
return [self._hydrate_edge(r) for r in rows]
|
|
306
|
+
|
|
307
|
+
# ---- reads ---------------------------------------------------------
|
|
308
|
+
|
|
309
|
+
def node(self, id: str) -> Node | None:
|
|
310
|
+
row = self.conn.execute("SELECT * FROM nodes WHERE id=%s", (id,)).fetchone()
|
|
311
|
+
return self._hydrate_node(row) if row else None
|
|
312
|
+
|
|
313
|
+
def nodes_by_kind(self, kind: str) -> list[Node]:
|
|
314
|
+
rows = self.conn.execute(
|
|
315
|
+
"SELECT * FROM nodes WHERE kind=%s ORDER BY id", (kind,)
|
|
316
|
+
).fetchall()
|
|
317
|
+
return self._hydrate_nodes(rows)
|
|
318
|
+
|
|
319
|
+
def nodes_by_label(self, label: str) -> list[Node]:
|
|
320
|
+
rows = self.conn.execute(
|
|
321
|
+
"SELECT n.* FROM nodes n JOIN node_labels l ON l.node_id = n.id "
|
|
322
|
+
"WHERE l.label = %s ORDER BY n.id",
|
|
323
|
+
(label,),
|
|
324
|
+
).fetchall()
|
|
325
|
+
return self._hydrate_nodes(rows)
|
|
326
|
+
|
|
327
|
+
def out(self, node_id: str, edge_type: str | None = None) -> list[tuple[Edge, Node]]:
|
|
328
|
+
base = (
|
|
329
|
+
"SELECT e.*, n.id AS n_id, n.kind AS n_kind, n.name AS n_name "
|
|
330
|
+
"FROM edges e JOIN nodes n ON n.id = e.to_node WHERE e.from_node = %s"
|
|
331
|
+
)
|
|
332
|
+
if edge_type:
|
|
333
|
+
rows = self.conn.execute(base + " AND e.type = %s ORDER BY e.type, n.id",
|
|
334
|
+
(node_id, edge_type)).fetchall()
|
|
335
|
+
else:
|
|
336
|
+
rows = self.conn.execute(base + " ORDER BY e.type, n.id", (node_id,)).fetchall()
|
|
337
|
+
return self._rows_to_edge_node_pairs(rows)
|
|
338
|
+
|
|
339
|
+
def in_(self, node_id: str, edge_type: str | None = None) -> list[tuple[Edge, Node]]:
|
|
340
|
+
base = (
|
|
341
|
+
"SELECT e.*, n.id AS n_id, n.kind AS n_kind, n.name AS n_name "
|
|
342
|
+
"FROM edges e JOIN nodes n ON n.id = e.from_node WHERE e.to_node = %s"
|
|
343
|
+
)
|
|
344
|
+
if edge_type:
|
|
345
|
+
rows = self.conn.execute(base + " AND e.type = %s ORDER BY e.type, n.id",
|
|
346
|
+
(node_id, edge_type)).fetchall()
|
|
347
|
+
else:
|
|
348
|
+
rows = self.conn.execute(base + " ORDER BY e.type, n.id", (node_id,)).fetchall()
|
|
349
|
+
return self._rows_to_edge_node_pairs(rows)
|
|
350
|
+
|
|
351
|
+
def descendants(self, node_id: str, edge_type: str, max_depth: int = 6) -> list[Node]:
|
|
352
|
+
rows = self.conn.execute(
|
|
353
|
+
"""
|
|
354
|
+
WITH RECURSIVE walk(depth, node_id) AS (
|
|
355
|
+
SELECT 0, %s
|
|
356
|
+
UNION ALL
|
|
357
|
+
SELECT walk.depth + 1, e.to_node
|
|
358
|
+
FROM walk JOIN edges e ON e.from_node = walk.node_id
|
|
359
|
+
WHERE e.type = %s AND walk.depth < %s
|
|
360
|
+
)
|
|
361
|
+
SELECT DISTINCT n.* FROM walk JOIN nodes n ON n.id = walk.node_id
|
|
362
|
+
WHERE walk.depth > 0
|
|
363
|
+
""",
|
|
364
|
+
(node_id, edge_type, max_depth),
|
|
365
|
+
).fetchall()
|
|
366
|
+
return self._hydrate_nodes(rows)
|
|
367
|
+
|
|
368
|
+
# neighborhood + shortest_path are pure BFS over out/in_/node — identical to
|
|
369
|
+
# the SQLite engine, since they touch no SQL directly.
|
|
370
|
+
|
|
371
|
+
def neighborhood(self, node_id: str, depth: int = 1) -> dict[str, Node]:
|
|
372
|
+
depth = max(0, depth)
|
|
373
|
+
if self.conn.execute("SELECT 1 FROM nodes WHERE id=%s", (node_id,)).fetchone() is None:
|
|
374
|
+
return {}
|
|
375
|
+
seen_ids: set[str] = {node_id}
|
|
376
|
+
frontier = {node_id}
|
|
377
|
+
for _ in range(depth):
|
|
378
|
+
nxt: set[str] = set()
|
|
379
|
+
for nid in frontier:
|
|
380
|
+
for _, nb in self.out(nid):
|
|
381
|
+
if nb.id not in seen_ids:
|
|
382
|
+
seen_ids.add(nb.id)
|
|
383
|
+
nxt.add(nb.id)
|
|
384
|
+
for _, nb in self.in_(nid):
|
|
385
|
+
if nb.id not in seen_ids:
|
|
386
|
+
seen_ids.add(nb.id)
|
|
387
|
+
nxt.add(nb.id)
|
|
388
|
+
frontier = nxt
|
|
389
|
+
if not frontier:
|
|
390
|
+
break
|
|
391
|
+
rows = self.conn.execute(
|
|
392
|
+
"SELECT * FROM nodes WHERE id = ANY(%s)", (list(seen_ids),)
|
|
393
|
+
).fetchall()
|
|
394
|
+
return {n.id: n for n in self._hydrate_nodes(rows)}
|
|
395
|
+
|
|
396
|
+
def shortest_path(self, from_id: str, to_id: str, max_depth: int = 8) -> list[Node] | None:
|
|
397
|
+
if from_id == to_id:
|
|
398
|
+
n = self.node(from_id)
|
|
399
|
+
return [n] if n else None
|
|
400
|
+
visited: dict[str, str | None] = {from_id: None}
|
|
401
|
+
frontier = [from_id]
|
|
402
|
+
for _ in range(max_depth):
|
|
403
|
+
nxt: list[str] = []
|
|
404
|
+
for nid in frontier:
|
|
405
|
+
for _, nb in self.out(nid):
|
|
406
|
+
if nb.id in visited:
|
|
407
|
+
continue
|
|
408
|
+
visited[nb.id] = nid
|
|
409
|
+
if nb.id == to_id:
|
|
410
|
+
return self._reconstruct_path(visited, to_id)
|
|
411
|
+
nxt.append(nb.id)
|
|
412
|
+
for _, nb in self.in_(nid):
|
|
413
|
+
if nb.id in visited:
|
|
414
|
+
continue
|
|
415
|
+
visited[nb.id] = nid
|
|
416
|
+
if nb.id == to_id:
|
|
417
|
+
return self._reconstruct_path(visited, to_id)
|
|
418
|
+
nxt.append(nb.id)
|
|
419
|
+
frontier = nxt
|
|
420
|
+
if not frontier:
|
|
421
|
+
break
|
|
422
|
+
return None
|
|
423
|
+
|
|
424
|
+
def _reconstruct_path(self, parents: dict[str, str | None], target: str) -> list[Node]:
|
|
425
|
+
seq: list[str] = []
|
|
426
|
+
cur: str | None = target
|
|
427
|
+
while cur is not None:
|
|
428
|
+
seq.append(cur)
|
|
429
|
+
cur = parents.get(cur)
|
|
430
|
+
seq.reverse()
|
|
431
|
+
return [n for n in (self.node(nid) for nid in seq) if n is not None]
|
|
432
|
+
|
|
433
|
+
# ---- stats ---------------------------------------------------------
|
|
434
|
+
|
|
435
|
+
def count_nodes_by_kind(self) -> dict[str, int]:
|
|
436
|
+
rows = self.conn.execute(
|
|
437
|
+
"SELECT kind, COUNT(*) AS c FROM nodes GROUP BY kind ORDER BY c DESC"
|
|
438
|
+
).fetchall()
|
|
439
|
+
return {r["kind"]: r["c"] for r in rows}
|
|
440
|
+
|
|
441
|
+
def count_edges_by_type(self) -> dict[str, int]:
|
|
442
|
+
rows = self.conn.execute(
|
|
443
|
+
"SELECT type, COUNT(*) AS c FROM edges GROUP BY type ORDER BY c DESC"
|
|
444
|
+
).fetchall()
|
|
445
|
+
return {r["type"]: r["c"] for r in rows}
|
|
446
|
+
|
|
447
|
+
def total_nodes(self) -> int:
|
|
448
|
+
return self.conn.execute("SELECT COUNT(*) AS c FROM nodes").fetchone()["c"]
|
|
449
|
+
|
|
450
|
+
def total_edges(self) -> int:
|
|
451
|
+
return self.conn.execute("SELECT COUNT(*) AS c FROM edges").fetchone()["c"]
|
|
452
|
+
|
|
453
|
+
# ---- hydration (jsonb returns parsed values — no json.loads) --------
|
|
454
|
+
|
|
455
|
+
def _hydrate_node(self, row: dict) -> Node:
|
|
456
|
+
nid = row["id"]
|
|
457
|
+
labels = {
|
|
458
|
+
r["label"]
|
|
459
|
+
for r in self.conn.execute(
|
|
460
|
+
"SELECT label FROM node_labels WHERE node_id=%s", (nid,)
|
|
461
|
+
).fetchall()
|
|
462
|
+
}
|
|
463
|
+
props = {
|
|
464
|
+
r["key"]: r["value_json"]
|
|
465
|
+
for r in self.conn.execute(
|
|
466
|
+
"SELECT key, value_json FROM node_properties WHERE node_id=%s", (nid,)
|
|
467
|
+
).fetchall()
|
|
468
|
+
}
|
|
469
|
+
return Node(id=nid, kind=row["kind"], name=row["name"], labels=labels, properties=props)
|
|
470
|
+
|
|
471
|
+
def _hydrate_nodes(self, rows: Iterable[dict]) -> list[Node]:
|
|
472
|
+
rows = list(rows)
|
|
473
|
+
if not rows:
|
|
474
|
+
return []
|
|
475
|
+
ids = [r["id"] for r in rows]
|
|
476
|
+
labels_by: dict[str, set[str]] = {nid: set() for nid in ids}
|
|
477
|
+
props_by: dict[str, dict[str, Any]] = {nid: {} for nid in ids}
|
|
478
|
+
for r in self.conn.execute(
|
|
479
|
+
"SELECT node_id, label FROM node_labels WHERE node_id = ANY(%s)", (ids,)
|
|
480
|
+
).fetchall():
|
|
481
|
+
labels_by[r["node_id"]].add(r["label"])
|
|
482
|
+
for r in self.conn.execute(
|
|
483
|
+
"SELECT node_id, key, value_json FROM node_properties WHERE node_id = ANY(%s)", (ids,)
|
|
484
|
+
).fetchall():
|
|
485
|
+
props_by[r["node_id"]][r["key"]] = r["value_json"]
|
|
486
|
+
return [
|
|
487
|
+
Node(id=r["id"], kind=r["kind"], name=r["name"],
|
|
488
|
+
labels=labels_by[r["id"]], properties=props_by[r["id"]])
|
|
489
|
+
for r in rows
|
|
490
|
+
]
|
|
491
|
+
|
|
492
|
+
def _rows_to_edge_node_pairs(self, rows: Iterable[dict]) -> list[tuple[Edge, Node]]:
|
|
493
|
+
rows = list(rows)
|
|
494
|
+
if not rows:
|
|
495
|
+
return []
|
|
496
|
+
eids = [r["id"] for r in rows]
|
|
497
|
+
props_by: dict[str, dict[str, Any]] = {eid: {} for eid in eids}
|
|
498
|
+
for pr in self.conn.execute(
|
|
499
|
+
"SELECT edge_id, key, value_json FROM edge_properties WHERE edge_id = ANY(%s)", (eids,)
|
|
500
|
+
).fetchall():
|
|
501
|
+
props_by[pr["edge_id"]][pr["key"]] = pr["value_json"]
|
|
502
|
+
return [
|
|
503
|
+
(
|
|
504
|
+
Edge(id=r["id"], from_node=r["from_node"], to_node=r["to_node"],
|
|
505
|
+
type=r["type"], properties=props_by[r["id"]]),
|
|
506
|
+
Node(id=r["n_id"], kind=r["n_kind"], name=r["n_name"]),
|
|
507
|
+
)
|
|
508
|
+
for r in rows
|
|
509
|
+
]
|
|
510
|
+
|
|
511
|
+
def _hydrate_edge(self, row: dict) -> Edge:
|
|
512
|
+
eid = row["id"]
|
|
513
|
+
props = {
|
|
514
|
+
r["key"]: r["value_json"]
|
|
515
|
+
for r in self.conn.execute(
|
|
516
|
+
"SELECT key, value_json FROM edge_properties WHERE edge_id=%s", (eid,)
|
|
517
|
+
).fetchall()
|
|
518
|
+
}
|
|
519
|
+
return Edge(id=eid, from_node=row["from_node"], to_node=row["to_node"],
|
|
520
|
+
type=row["type"], properties=props)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
@backend("postgres")
|
|
524
|
+
def open_postgres(*, location: str, **options: Any) -> GraphBackend:
|
|
525
|
+
return PostgresGraph(location, **options)
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""SQLite engine — the live default, and the identity of the project.
|
|
2
|
+
|
|
3
|
+
Zero dependencies, in-process, one file. The existing `Graph` already *is* a
|
|
4
|
+
`GraphBackend` (structurally), so this factory is a thin adapter: ensure the
|
|
5
|
+
parent dir exists, open the file. Its event log co-locates in the same file
|
|
6
|
+
(see resolver), which is what makes the default ontology fully embeddable.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from kgrdbms.backends import backend
|
|
15
|
+
from kgrdbms.backends.base import GraphBackend
|
|
16
|
+
from kgrdbms.graph import Graph
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@backend("sqlite")
|
|
20
|
+
def open_sqlite(*, location: str, **options: Any) -> GraphBackend:
|
|
21
|
+
if location not in (":memory:", ""):
|
|
22
|
+
Path(location).parent.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
return Graph(path=location)
|