agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""Kuzu-backed ``GraphStore`` — the default embedded graph adapter and the
|
|
2
|
+
conformance baseline for every other adapter (ADR-0006).
|
|
3
|
+
|
|
4
|
+
Design (see docs/design/design-003): an *open* schema (arbitrary kinds,
|
|
5
|
+
free-form ``attrs``) is mapped onto Kuzu's typed property graph via **one
|
|
6
|
+
generic node table + one generic edge table**, with ``kind`` as a string
|
|
7
|
+
column and ``attrs`` as a JSON string — so an unrecognized kind round-trips
|
|
8
|
+
without any DDL change (ADR-0005).
|
|
9
|
+
|
|
10
|
+
Kuzu is synchronous and a connection is not concurrency-safe, so every DB
|
|
11
|
+
interaction runs on a worker thread (``asyncio.to_thread``) under a single
|
|
12
|
+
``asyncio.Lock``; each public method's DB work is one sync function, which
|
|
13
|
+
keeps multi-statement writes (``upsert``) atomic on one thread.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import kuzu
|
|
23
|
+
|
|
24
|
+
from agentforge_graph.core import (
|
|
25
|
+
Direction,
|
|
26
|
+
Edge,
|
|
27
|
+
EdgeKind,
|
|
28
|
+
FileSubgraph,
|
|
29
|
+
GraphQuery,
|
|
30
|
+
GraphStore,
|
|
31
|
+
Node,
|
|
32
|
+
NodeKind,
|
|
33
|
+
QueryResult,
|
|
34
|
+
Source,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from ._rowmap import (
|
|
38
|
+
acceptable_sources as _acceptable_sources,
|
|
39
|
+
)
|
|
40
|
+
from ._rowmap import (
|
|
41
|
+
dump_attrs as _dump_attrs,
|
|
42
|
+
)
|
|
43
|
+
from ._rowmap import (
|
|
44
|
+
edge_from_row as _edge_from_rel,
|
|
45
|
+
)
|
|
46
|
+
from ._rowmap import (
|
|
47
|
+
edge_params as _edge_params,
|
|
48
|
+
)
|
|
49
|
+
from ._rowmap import (
|
|
50
|
+
load_attrs as _load_attrs,
|
|
51
|
+
)
|
|
52
|
+
from ._rowmap import (
|
|
53
|
+
node_from_row as _node_from_row,
|
|
54
|
+
)
|
|
55
|
+
from ._rowmap import (
|
|
56
|
+
node_params as _node_params,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
SCHEMA_VERSION = 1
|
|
60
|
+
|
|
61
|
+
_DDL = [
|
|
62
|
+
"""CREATE NODE TABLE CkgNode(
|
|
63
|
+
id STRING, kind STRING, name STRING,
|
|
64
|
+
span_start INT64, span_end INT64,
|
|
65
|
+
attrs STRING, sym_path STRING,
|
|
66
|
+
prov_source STRING, prov_extractor STRING,
|
|
67
|
+
prov_commit STRING, prov_confidence DOUBLE,
|
|
68
|
+
origin_path STRING,
|
|
69
|
+
PRIMARY KEY(id))""",
|
|
70
|
+
"""CREATE REL TABLE CkgEdge(
|
|
71
|
+
FROM CkgNode TO CkgNode,
|
|
72
|
+
kind STRING, attrs STRING,
|
|
73
|
+
prov_source STRING, prov_extractor STRING,
|
|
74
|
+
prov_commit STRING, prov_confidence DOUBLE,
|
|
75
|
+
origin_path STRING, resolved_from STRING)""",
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _rows(result: Any) -> list[Any]:
|
|
80
|
+
# kuzu's execute() returns QueryResult | list[QueryResult] (multi-statement)
|
|
81
|
+
# and get_next() a list|dict row; we always issue single statements.
|
|
82
|
+
out: list[Any] = []
|
|
83
|
+
while result.has_next():
|
|
84
|
+
out.append(result.get_next())
|
|
85
|
+
return out
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class KuzuGraphStore(GraphStore):
|
|
89
|
+
"""Embedded graph store backed by a Kuzu database directory."""
|
|
90
|
+
|
|
91
|
+
def __init__(self, db: kuzu.Database, conn: kuzu.Connection, path: Path) -> None:
|
|
92
|
+
self._db = db
|
|
93
|
+
self._conn = conn
|
|
94
|
+
self._path = path
|
|
95
|
+
self._lock = asyncio.Lock()
|
|
96
|
+
self._closed = False
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> KuzuGraphStore:
|
|
100
|
+
"""Open (creating if needed) a Kuzu database at ``path`` and ensure
|
|
101
|
+
the schema exists. ``path`` is the graph DB directory/file. ``config``
|
|
102
|
+
is the ``store.graph.config`` block — unused by the embedded driver
|
|
103
|
+
(server adapters use it for connection details)."""
|
|
104
|
+
p = Path(path)
|
|
105
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
db, conn = await asyncio.to_thread(cls._connect, p)
|
|
107
|
+
return cls(db, conn, p)
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def _connect(p: Path) -> tuple[kuzu.Database, kuzu.Connection]:
|
|
111
|
+
db = kuzu.Database(str(p))
|
|
112
|
+
conn = kuzu.Connection(db)
|
|
113
|
+
for ddl in _DDL:
|
|
114
|
+
try:
|
|
115
|
+
conn.execute(ddl)
|
|
116
|
+
except RuntimeError as exc: # table already exists on reopen
|
|
117
|
+
if "already exists" not in str(exc):
|
|
118
|
+
raise
|
|
119
|
+
return db, conn
|
|
120
|
+
|
|
121
|
+
# --- writes -----------------------------------------------------------
|
|
122
|
+
|
|
123
|
+
async def upsert(self, subgraph: FileSubgraph) -> None:
|
|
124
|
+
async with self._lock:
|
|
125
|
+
await asyncio.to_thread(self._upsert_sync, subgraph)
|
|
126
|
+
|
|
127
|
+
def _upsert_sync(self, sg: FileSubgraph) -> None:
|
|
128
|
+
path = sg.path
|
|
129
|
+
new_ids = [n.id for n in sg.nodes]
|
|
130
|
+
self._conn.execute("BEGIN TRANSACTION")
|
|
131
|
+
try:
|
|
132
|
+
for node in sg.nodes:
|
|
133
|
+
self._merge_node(node, origin_path=path)
|
|
134
|
+
# drop file-owned nodes that vanished from the new subgraph
|
|
135
|
+
self._conn.execute(
|
|
136
|
+
"MATCH (n:CkgNode) WHERE n.origin_path = $p AND NOT n.id IN $keep DETACH DELETE n",
|
|
137
|
+
{"p": path, "keep": new_ids},
|
|
138
|
+
)
|
|
139
|
+
# replace this file's edges
|
|
140
|
+
self._conn.execute(
|
|
141
|
+
"MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", {"p": path}
|
|
142
|
+
)
|
|
143
|
+
for edge in sg.edges:
|
|
144
|
+
self._insert_edge(edge, origin_path=path)
|
|
145
|
+
self._conn.execute("COMMIT")
|
|
146
|
+
except Exception:
|
|
147
|
+
self._conn.execute("ROLLBACK")
|
|
148
|
+
raise
|
|
149
|
+
|
|
150
|
+
async def add(self, items: list[Node | Edge]) -> None:
|
|
151
|
+
async with self._lock:
|
|
152
|
+
await asyncio.to_thread(self._add_sync, items)
|
|
153
|
+
|
|
154
|
+
def _add_sync(self, items: list[Node | Edge]) -> None:
|
|
155
|
+
self._conn.execute("BEGIN TRANSACTION")
|
|
156
|
+
try:
|
|
157
|
+
for item in items:
|
|
158
|
+
if isinstance(item, Node):
|
|
159
|
+
self._merge_node(item, origin_path="")
|
|
160
|
+
else:
|
|
161
|
+
self._insert_edge(item, origin_path="")
|
|
162
|
+
self._conn.execute("COMMIT")
|
|
163
|
+
except Exception:
|
|
164
|
+
self._conn.execute("ROLLBACK")
|
|
165
|
+
raise
|
|
166
|
+
|
|
167
|
+
def _merge_node(self, node: Node, origin_path: str) -> None:
|
|
168
|
+
self._conn.execute(
|
|
169
|
+
"MERGE (n:CkgNode {id: $id}) SET "
|
|
170
|
+
"n.kind = $kind, n.name = $name, "
|
|
171
|
+
"n.span_start = $span_start, n.span_end = $span_end, "
|
|
172
|
+
"n.attrs = $attrs, n.sym_path = $sym_path, "
|
|
173
|
+
"n.prov_source = $prov_source, n.prov_extractor = $prov_extractor, "
|
|
174
|
+
"n.prov_commit = $prov_commit, n.prov_confidence = $prov_confidence, "
|
|
175
|
+
"n.origin_path = $origin_path",
|
|
176
|
+
_node_params(node, origin_path),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def _insert_edge(self, edge: Edge, origin_path: str) -> None:
|
|
180
|
+
# Endpoints must exist; an edge to an absent node is dropped silently
|
|
181
|
+
# by the MATCH (resolved cross-file edges may outrun their target —
|
|
182
|
+
# they reconnect when the target file is indexed).
|
|
183
|
+
self._conn.execute(
|
|
184
|
+
"MATCH (a:CkgNode {id: $src}), (b:CkgNode {id: $dst}) "
|
|
185
|
+
"CREATE (a)-[e:CkgEdge {kind: $kind}]->(b) SET "
|
|
186
|
+
"e.attrs = $attrs, e.prov_source = $prov_source, "
|
|
187
|
+
"e.prov_extractor = $prov_extractor, e.prov_commit = $prov_commit, "
|
|
188
|
+
"e.prov_confidence = $prov_confidence, e.origin_path = $origin_path, "
|
|
189
|
+
"e.resolved_from = $resolved_from",
|
|
190
|
+
_edge_params(edge, origin_path),
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
async def delete_file(self, path: str) -> None:
|
|
194
|
+
async with self._lock:
|
|
195
|
+
await asyncio.to_thread(self._delete_file_sync, path)
|
|
196
|
+
|
|
197
|
+
def _delete_file_sync(self, path: str) -> None:
|
|
198
|
+
self._conn.execute("BEGIN TRANSACTION")
|
|
199
|
+
try:
|
|
200
|
+
self._conn.execute(
|
|
201
|
+
"MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", {"p": path}
|
|
202
|
+
)
|
|
203
|
+
self._conn.execute(
|
|
204
|
+
"MATCH (n:CkgNode) WHERE n.origin_path = $p DETACH DELETE n", {"p": path}
|
|
205
|
+
)
|
|
206
|
+
self._conn.execute("COMMIT")
|
|
207
|
+
except Exception:
|
|
208
|
+
self._conn.execute("ROLLBACK")
|
|
209
|
+
raise
|
|
210
|
+
|
|
211
|
+
async def clear_resolved(self, paths: list[str]) -> None:
|
|
212
|
+
async with self._lock:
|
|
213
|
+
await asyncio.to_thread(self._clear_resolved_sync, paths)
|
|
214
|
+
|
|
215
|
+
def _clear_resolved_sync(self, paths: list[str]) -> None:
|
|
216
|
+
if not paths:
|
|
217
|
+
return
|
|
218
|
+
self._conn.execute("BEGIN TRANSACTION")
|
|
219
|
+
try:
|
|
220
|
+
self._conn.execute(
|
|
221
|
+
"MATCH ()-[e:CkgEdge]->() "
|
|
222
|
+
"WHERE e.origin_path IN $paths AND e.prov_source = $resolved DELETE e",
|
|
223
|
+
{"paths": paths, "resolved": Source.RESOLVED.value},
|
|
224
|
+
)
|
|
225
|
+
# GC external package stubs orphaned by the edge deletion, so the
|
|
226
|
+
# incremental graph matches a full re-index (no dangling sinks).
|
|
227
|
+
self._conn.execute(
|
|
228
|
+
"MATCH (p:CkgNode) WHERE p.kind = $pkg "
|
|
229
|
+
"OPTIONAL MATCH ()-[e:CkgEdge]->(p) "
|
|
230
|
+
"WITH p, count(e) AS c WHERE c = 0 DETACH DELETE p",
|
|
231
|
+
{"pkg": NodeKind.PACKAGE.value},
|
|
232
|
+
)
|
|
233
|
+
self._conn.execute("COMMIT")
|
|
234
|
+
except Exception:
|
|
235
|
+
self._conn.execute("ROLLBACK")
|
|
236
|
+
raise
|
|
237
|
+
|
|
238
|
+
async def clear_outgoing(self, src_ids: list[str], kind: EdgeKind) -> None:
|
|
239
|
+
async with self._lock:
|
|
240
|
+
await asyncio.to_thread(self._clear_outgoing_sync, src_ids, kind)
|
|
241
|
+
|
|
242
|
+
def _clear_outgoing_sync(self, src_ids: list[str], kind: EdgeKind) -> None:
|
|
243
|
+
if not src_ids:
|
|
244
|
+
return
|
|
245
|
+
self._conn.execute(
|
|
246
|
+
"MATCH (a:CkgNode)-[e:CkgEdge]->() WHERE a.id IN $ids AND e.kind = $kind DELETE e",
|
|
247
|
+
{"ids": src_ids, "kind": kind.value},
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# --- reads ------------------------------------------------------------
|
|
251
|
+
|
|
252
|
+
async def query(self, q: GraphQuery) -> QueryResult:
|
|
253
|
+
async with self._lock:
|
|
254
|
+
return await asyncio.to_thread(self._query_sync, q)
|
|
255
|
+
|
|
256
|
+
def _query_sync(self, q: GraphQuery) -> QueryResult:
|
|
257
|
+
clauses: list[str] = []
|
|
258
|
+
params: dict[str, Any] = {}
|
|
259
|
+
if q.kinds is not None:
|
|
260
|
+
clauses.append("n.kind IN $kinds")
|
|
261
|
+
params["kinds"] = [k.value for k in q.kinds]
|
|
262
|
+
if q.name is not None:
|
|
263
|
+
clauses.append("n.name = $name")
|
|
264
|
+
params["name"] = q.name
|
|
265
|
+
if q.path_prefix is not None:
|
|
266
|
+
clauses.append("n.sym_path STARTS WITH $prefix")
|
|
267
|
+
params["prefix"] = q.path_prefix
|
|
268
|
+
if q.min_source is not None:
|
|
269
|
+
clauses.append("n.prov_source IN $sources")
|
|
270
|
+
params["sources"] = _acceptable_sources(q.min_source)
|
|
271
|
+
where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
|
|
272
|
+
params["lim"] = q.limit + 1 # fetch one extra to detect truncation
|
|
273
|
+
result = self._conn.execute(f"MATCH (n:CkgNode){where} RETURN n LIMIT $lim", params)
|
|
274
|
+
nodes = [_node_from_row(row[0]) for row in _rows(result)]
|
|
275
|
+
truncated = len(nodes) > q.limit
|
|
276
|
+
return QueryResult(nodes=nodes[: q.limit], truncated=truncated)
|
|
277
|
+
|
|
278
|
+
async def neighbors(
|
|
279
|
+
self,
|
|
280
|
+
node_id: str,
|
|
281
|
+
kinds: list[EdgeKind] | None = None,
|
|
282
|
+
depth: int = 1,
|
|
283
|
+
) -> list[Node]:
|
|
284
|
+
async with self._lock:
|
|
285
|
+
return await asyncio.to_thread(self._neighbors_sync, node_id, kinds, depth)
|
|
286
|
+
|
|
287
|
+
def _neighbors_sync(self, node_id: str, kinds: list[EdgeKind] | None, depth: int) -> list[Node]:
|
|
288
|
+
# Iterative 1-hop BFS (undirected, kind-filtered), mirroring the
|
|
289
|
+
# InMemory reference; depth is small (<= serve.max_depth).
|
|
290
|
+
kind_values = [k.value for k in kinds] if kinds is not None else None
|
|
291
|
+
visited = {node_id}
|
|
292
|
+
frontier = [node_id]
|
|
293
|
+
collected: list[str] = []
|
|
294
|
+
for _ in range(depth):
|
|
295
|
+
if not frontier:
|
|
296
|
+
break
|
|
297
|
+
params: dict[str, Any] = {"frontier": frontier}
|
|
298
|
+
kind_clause = ""
|
|
299
|
+
if kind_values is not None:
|
|
300
|
+
kind_clause = " AND e.kind IN $kinds"
|
|
301
|
+
params["kinds"] = kind_values
|
|
302
|
+
result = self._conn.execute(
|
|
303
|
+
"MATCH (a:CkgNode)-[e:CkgEdge]-(b:CkgNode) "
|
|
304
|
+
f"WHERE a.id IN $frontier{kind_clause} RETURN DISTINCT b.id",
|
|
305
|
+
params,
|
|
306
|
+
)
|
|
307
|
+
nxt: list[str] = []
|
|
308
|
+
for row in _rows(result):
|
|
309
|
+
nid = row[0]
|
|
310
|
+
if nid not in visited:
|
|
311
|
+
visited.add(nid)
|
|
312
|
+
nxt.append(nid)
|
|
313
|
+
collected.append(nid)
|
|
314
|
+
frontier = nxt
|
|
315
|
+
return [n for n in (self._get_sync(i) for i in collected) if n is not None]
|
|
316
|
+
|
|
317
|
+
async def get(self, node_id: str) -> Node | None:
|
|
318
|
+
async with self._lock:
|
|
319
|
+
return await asyncio.to_thread(self._get_sync, node_id)
|
|
320
|
+
|
|
321
|
+
def _get_sync(self, node_id: str) -> Node | None:
|
|
322
|
+
result = self._conn.execute("MATCH (n:CkgNode {id: $id}) RETURN n", {"id": node_id})
|
|
323
|
+
rows = _rows(result)
|
|
324
|
+
return _node_from_row(rows[0][0]) if rows else None
|
|
325
|
+
|
|
326
|
+
async def set_attrs(self, node_id: str, attrs: dict[str, Any]) -> None:
|
|
327
|
+
async with self._lock:
|
|
328
|
+
await asyncio.to_thread(self._set_attrs_sync, node_id, attrs)
|
|
329
|
+
|
|
330
|
+
def _set_attrs_sync(self, node_id: str, attrs: dict[str, Any]) -> None:
|
|
331
|
+
rows = _rows(
|
|
332
|
+
self._conn.execute("MATCH (n:CkgNode {id: $id}) RETURN n.attrs", {"id": node_id})
|
|
333
|
+
)
|
|
334
|
+
if not rows:
|
|
335
|
+
return # absent node: no-op (contract)
|
|
336
|
+
merged = {**_load_attrs(rows[0][0]), **attrs}
|
|
337
|
+
# SET only attrs — origin_path and every other column are left intact.
|
|
338
|
+
self._conn.execute(
|
|
339
|
+
"MATCH (n:CkgNode {id: $id}) SET n.attrs = $attrs",
|
|
340
|
+
{"id": node_id, "attrs": _dump_attrs(merged)},
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
async def adjacent(
|
|
344
|
+
self,
|
|
345
|
+
node_id: str,
|
|
346
|
+
kinds: list[EdgeKind] | None = None,
|
|
347
|
+
direction: Direction = "both",
|
|
348
|
+
) -> list[Edge]:
|
|
349
|
+
async with self._lock:
|
|
350
|
+
return await asyncio.to_thread(self._adjacent_sync, node_id, kinds, direction)
|
|
351
|
+
|
|
352
|
+
def _adjacent_sync(
|
|
353
|
+
self, node_id: str, kinds: list[EdgeKind] | None, direction: Direction
|
|
354
|
+
) -> list[Edge]:
|
|
355
|
+
params: dict[str, Any] = {"id": node_id}
|
|
356
|
+
where = ""
|
|
357
|
+
if kinds is not None:
|
|
358
|
+
where = " WHERE e.kind IN $kinds"
|
|
359
|
+
params["kinds"] = [k.value for k in kinds]
|
|
360
|
+
edges: list[Edge] = []
|
|
361
|
+
if direction in ("out", "both"):
|
|
362
|
+
res = self._conn.execute(
|
|
363
|
+
f"MATCH (a:CkgNode {{id: $id}})-[e:CkgEdge]->(b:CkgNode){where} RETURN e, b.id",
|
|
364
|
+
params,
|
|
365
|
+
)
|
|
366
|
+
edges += [_edge_from_rel(row[0], node_id, row[1]) for row in _rows(res)]
|
|
367
|
+
if direction in ("in", "both"):
|
|
368
|
+
res = self._conn.execute(
|
|
369
|
+
f"MATCH (a:CkgNode {{id: $id}})<-[e:CkgEdge]-(b:CkgNode){where} RETURN e, b.id",
|
|
370
|
+
params,
|
|
371
|
+
)
|
|
372
|
+
edges += [_edge_from_rel(row[0], row[1], node_id) for row in _rows(res)]
|
|
373
|
+
return edges
|
|
374
|
+
|
|
375
|
+
async def close(self) -> None:
|
|
376
|
+
async with self._lock:
|
|
377
|
+
if self._closed:
|
|
378
|
+
return
|
|
379
|
+
self._closed = True
|
|
380
|
+
await asyncio.to_thread(self._conn.close)
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""LanceDB-backed ``VectorStore`` — the default embedded vector adapter
|
|
2
|
+
(ADR-0006). feat-005 produces the ``Embedded`` items; feat-006 searches
|
|
3
|
+
them and joins the hits back into the graph via ``Store.expand``.
|
|
4
|
+
|
|
5
|
+
LanceDB ships a native async client, so this adapter is async all the way
|
|
6
|
+
down (no thread-wrapping, unlike the sync Kuzu adapter). One ``vectors``
|
|
7
|
+
table is created lazily on first ``upsert`` with the vector dimension fixed
|
|
8
|
+
from the first batch. The ``filter`` contract targets first-class columns
|
|
9
|
+
(``ref``, ``kind``, ``path``) — the portable subset every vector backend can
|
|
10
|
+
honour; ``path`` is derived from the ref's SymbolID, mirroring the graph
|
|
11
|
+
adapter's ``sym_path``.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import lancedb
|
|
21
|
+
import pyarrow as pa
|
|
22
|
+
|
|
23
|
+
from agentforge_graph.core import Embedded, ScoredRef, VectorStore
|
|
24
|
+
from agentforge_graph.core.symbols import SymbolID
|
|
25
|
+
|
|
26
|
+
_TABLE = "vectors"
|
|
27
|
+
_FILTERABLE = ("ref", "kind", "path")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _sym_path(ref: str) -> str:
|
|
31
|
+
try:
|
|
32
|
+
return SymbolID.parse(ref).path
|
|
33
|
+
except ValueError:
|
|
34
|
+
return ""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _sql_str(value: object) -> str:
|
|
38
|
+
return "'" + str(value).replace("'", "''") + "'"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _where(filter: dict[str, Any]) -> str:
|
|
42
|
+
bad = set(filter) - set(_FILTERABLE)
|
|
43
|
+
if bad:
|
|
44
|
+
raise ValueError(f"unfilterable column(s) {sorted(bad)}; allowed: {_FILTERABLE}")
|
|
45
|
+
return " AND ".join(f"{col} = {_sql_str(val)}" for col, val in filter.items())
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _row(item: Embedded) -> dict[str, Any]:
|
|
49
|
+
return {
|
|
50
|
+
"ref": item.ref,
|
|
51
|
+
"vector": [float(x) for x in item.vector],
|
|
52
|
+
"kind": item.kind.value,
|
|
53
|
+
"path": _sym_path(item.ref),
|
|
54
|
+
"attrs_json": json.dumps(item.attrs, sort_keys=True),
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _schema(dim: int) -> pa.Schema:
|
|
59
|
+
return pa.schema(
|
|
60
|
+
[
|
|
61
|
+
pa.field("ref", pa.string()),
|
|
62
|
+
pa.field("vector", pa.list_(pa.float32(), dim)),
|
|
63
|
+
pa.field("kind", pa.string()),
|
|
64
|
+
pa.field("path", pa.string()),
|
|
65
|
+
pa.field("attrs_json", pa.string()),
|
|
66
|
+
]
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class LanceVectorStore(VectorStore):
|
|
71
|
+
"""Embedded vector store backed by a LanceDB database directory."""
|
|
72
|
+
|
|
73
|
+
def __init__(self, db: Any, path: Path) -> None:
|
|
74
|
+
self._db = db
|
|
75
|
+
self._path = path
|
|
76
|
+
self._tbl: Any = None
|
|
77
|
+
self._closed = False
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> LanceVectorStore:
|
|
81
|
+
"""``config`` (the ``store.vectors.config`` block) is unused by the
|
|
82
|
+
embedded driver — server adapters use it for connection details."""
|
|
83
|
+
p = Path(path)
|
|
84
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
db = await lancedb.connect_async(str(p))
|
|
86
|
+
return cls(db, p)
|
|
87
|
+
|
|
88
|
+
async def _table(self) -> Any:
|
|
89
|
+
if self._tbl is None and _TABLE in await self._table_names():
|
|
90
|
+
self._tbl = await self._db.open_table(_TABLE)
|
|
91
|
+
return self._tbl
|
|
92
|
+
|
|
93
|
+
async def _table_names(self) -> list[str]:
|
|
94
|
+
# LanceDB's async list_tables() returns a paginated result object
|
|
95
|
+
# (.tables + .page_token), not a plain list of names.
|
|
96
|
+
result = await self._db.list_tables()
|
|
97
|
+
names = getattr(result, "tables", result)
|
|
98
|
+
return list(names)
|
|
99
|
+
|
|
100
|
+
async def upsert(self, items: list[Embedded]) -> None:
|
|
101
|
+
if not items:
|
|
102
|
+
return
|
|
103
|
+
tbl = await self._table()
|
|
104
|
+
if tbl is None:
|
|
105
|
+
tbl = await self._db.create_table(_TABLE, schema=_schema(len(items[0].vector)))
|
|
106
|
+
self._tbl = tbl
|
|
107
|
+
refs = ", ".join(_sql_str(i.ref) for i in items)
|
|
108
|
+
await tbl.delete(f"ref IN ({refs})") # delete-then-add = upsert by ref
|
|
109
|
+
await tbl.add([_row(i) for i in items])
|
|
110
|
+
|
|
111
|
+
async def search(
|
|
112
|
+
self,
|
|
113
|
+
vector: list[float],
|
|
114
|
+
k: int,
|
|
115
|
+
filter: dict[str, Any] | None = None,
|
|
116
|
+
) -> list[ScoredRef]:
|
|
117
|
+
tbl = await self._table()
|
|
118
|
+
if tbl is None:
|
|
119
|
+
return []
|
|
120
|
+
query = tbl.vector_search(vector).distance_type("cosine").limit(k)
|
|
121
|
+
if filter:
|
|
122
|
+
query = query.where(_where(filter))
|
|
123
|
+
rows = await query.to_list()
|
|
124
|
+
# Cosine distance in [0, 2] (smaller = closer); expose a cosine
|
|
125
|
+
# similarity in [0, 1] (higher = closer) so scores are interpretable
|
|
126
|
+
# and survive the retrieval decay (BUG-002).
|
|
127
|
+
return [
|
|
128
|
+
ScoredRef(
|
|
129
|
+
ref=r["ref"],
|
|
130
|
+
score=max(0.0, 1.0 - float(r["_distance"])),
|
|
131
|
+
attrs=json.loads(r["attrs_json"]) if r.get("attrs_json") else {},
|
|
132
|
+
)
|
|
133
|
+
for r in rows
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
async def delete_where(self, filter: dict[str, Any]) -> None:
|
|
137
|
+
tbl = await self._table()
|
|
138
|
+
if tbl is None:
|
|
139
|
+
return
|
|
140
|
+
await tbl.delete(_where(filter))
|
|
141
|
+
|
|
142
|
+
async def close(self) -> None:
|
|
143
|
+
if self._closed:
|
|
144
|
+
return
|
|
145
|
+
self._closed = True
|
|
146
|
+
self._db.close() # LanceDB's async connection close() is synchronous
|