agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,380 @@
1
+ """Kuzu-backed ``GraphStore`` — the default embedded graph adapter and the
2
+ conformance baseline for every other adapter (ADR-0006).
3
+
4
+ Design (see docs/design/design-003): an *open* schema (arbitrary kinds,
5
+ free-form ``attrs``) is mapped onto Kuzu's typed property graph via **one
6
+ generic node table + one generic edge table**, with ``kind`` as a string
7
+ column and ``attrs`` as a JSON string — so an unrecognized kind round-trips
8
+ without any DDL change (ADR-0005).
9
+
10
+ Kuzu is synchronous and a connection is not concurrency-safe, so every DB
11
+ interaction runs on a worker thread (``asyncio.to_thread``) under a single
12
+ ``asyncio.Lock``; each public method's DB work is one sync function, which
13
+ keeps multi-statement writes (``upsert``) atomic on one thread.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ import kuzu
23
+
24
+ from agentforge_graph.core import (
25
+ Direction,
26
+ Edge,
27
+ EdgeKind,
28
+ FileSubgraph,
29
+ GraphQuery,
30
+ GraphStore,
31
+ Node,
32
+ NodeKind,
33
+ QueryResult,
34
+ Source,
35
+ )
36
+
37
+ from ._rowmap import (
38
+ acceptable_sources as _acceptable_sources,
39
+ )
40
+ from ._rowmap import (
41
+ dump_attrs as _dump_attrs,
42
+ )
43
+ from ._rowmap import (
44
+ edge_from_row as _edge_from_rel,
45
+ )
46
+ from ._rowmap import (
47
+ edge_params as _edge_params,
48
+ )
49
+ from ._rowmap import (
50
+ load_attrs as _load_attrs,
51
+ )
52
+ from ._rowmap import (
53
+ node_from_row as _node_from_row,
54
+ )
55
+ from ._rowmap import (
56
+ node_params as _node_params,
57
+ )
58
+
59
+ SCHEMA_VERSION = 1
60
+
61
+ _DDL = [
62
+ """CREATE NODE TABLE CkgNode(
63
+ id STRING, kind STRING, name STRING,
64
+ span_start INT64, span_end INT64,
65
+ attrs STRING, sym_path STRING,
66
+ prov_source STRING, prov_extractor STRING,
67
+ prov_commit STRING, prov_confidence DOUBLE,
68
+ origin_path STRING,
69
+ PRIMARY KEY(id))""",
70
+ """CREATE REL TABLE CkgEdge(
71
+ FROM CkgNode TO CkgNode,
72
+ kind STRING, attrs STRING,
73
+ prov_source STRING, prov_extractor STRING,
74
+ prov_commit STRING, prov_confidence DOUBLE,
75
+ origin_path STRING, resolved_from STRING)""",
76
+ ]
77
+
78
+
79
+ def _rows(result: Any) -> list[Any]:
80
+ # kuzu's execute() returns QueryResult | list[QueryResult] (multi-statement)
81
+ # and get_next() a list|dict row; we always issue single statements.
82
+ out: list[Any] = []
83
+ while result.has_next():
84
+ out.append(result.get_next())
85
+ return out
86
+
87
+
88
+ class KuzuGraphStore(GraphStore):
89
+ """Embedded graph store backed by a Kuzu database directory."""
90
+
91
+ def __init__(self, db: kuzu.Database, conn: kuzu.Connection, path: Path) -> None:
92
+ self._db = db
93
+ self._conn = conn
94
+ self._path = path
95
+ self._lock = asyncio.Lock()
96
+ self._closed = False
97
+
98
+ @classmethod
99
+ async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> KuzuGraphStore:
100
+ """Open (creating if needed) a Kuzu database at ``path`` and ensure
101
+ the schema exists. ``path`` is the graph DB directory/file. ``config``
102
+ is the ``store.graph.config`` block — unused by the embedded driver
103
+ (server adapters use it for connection details)."""
104
+ p = Path(path)
105
+ p.parent.mkdir(parents=True, exist_ok=True)
106
+ db, conn = await asyncio.to_thread(cls._connect, p)
107
+ return cls(db, conn, p)
108
+
109
+ @staticmethod
110
+ def _connect(p: Path) -> tuple[kuzu.Database, kuzu.Connection]:
111
+ db = kuzu.Database(str(p))
112
+ conn = kuzu.Connection(db)
113
+ for ddl in _DDL:
114
+ try:
115
+ conn.execute(ddl)
116
+ except RuntimeError as exc: # table already exists on reopen
117
+ if "already exists" not in str(exc):
118
+ raise
119
+ return db, conn
120
+
121
+ # --- writes -----------------------------------------------------------
122
+
123
+ async def upsert(self, subgraph: FileSubgraph) -> None:
124
+ async with self._lock:
125
+ await asyncio.to_thread(self._upsert_sync, subgraph)
126
+
127
+ def _upsert_sync(self, sg: FileSubgraph) -> None:
128
+ path = sg.path
129
+ new_ids = [n.id for n in sg.nodes]
130
+ self._conn.execute("BEGIN TRANSACTION")
131
+ try:
132
+ for node in sg.nodes:
133
+ self._merge_node(node, origin_path=path)
134
+ # drop file-owned nodes that vanished from the new subgraph
135
+ self._conn.execute(
136
+ "MATCH (n:CkgNode) WHERE n.origin_path = $p AND NOT n.id IN $keep DETACH DELETE n",
137
+ {"p": path, "keep": new_ids},
138
+ )
139
+ # replace this file's edges
140
+ self._conn.execute(
141
+ "MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", {"p": path}
142
+ )
143
+ for edge in sg.edges:
144
+ self._insert_edge(edge, origin_path=path)
145
+ self._conn.execute("COMMIT")
146
+ except Exception:
147
+ self._conn.execute("ROLLBACK")
148
+ raise
149
+
150
+ async def add(self, items: list[Node | Edge]) -> None:
151
+ async with self._lock:
152
+ await asyncio.to_thread(self._add_sync, items)
153
+
154
+ def _add_sync(self, items: list[Node | Edge]) -> None:
155
+ self._conn.execute("BEGIN TRANSACTION")
156
+ try:
157
+ for item in items:
158
+ if isinstance(item, Node):
159
+ self._merge_node(item, origin_path="")
160
+ else:
161
+ self._insert_edge(item, origin_path="")
162
+ self._conn.execute("COMMIT")
163
+ except Exception:
164
+ self._conn.execute("ROLLBACK")
165
+ raise
166
+
167
+ def _merge_node(self, node: Node, origin_path: str) -> None:
168
+ self._conn.execute(
169
+ "MERGE (n:CkgNode {id: $id}) SET "
170
+ "n.kind = $kind, n.name = $name, "
171
+ "n.span_start = $span_start, n.span_end = $span_end, "
172
+ "n.attrs = $attrs, n.sym_path = $sym_path, "
173
+ "n.prov_source = $prov_source, n.prov_extractor = $prov_extractor, "
174
+ "n.prov_commit = $prov_commit, n.prov_confidence = $prov_confidence, "
175
+ "n.origin_path = $origin_path",
176
+ _node_params(node, origin_path),
177
+ )
178
+
179
+ def _insert_edge(self, edge: Edge, origin_path: str) -> None:
180
+ # Endpoints must exist; an edge to an absent node is dropped silently
181
+ # by the MATCH (resolved cross-file edges may outrun their target —
182
+ # they reconnect when the target file is indexed).
183
+ self._conn.execute(
184
+ "MATCH (a:CkgNode {id: $src}), (b:CkgNode {id: $dst}) "
185
+ "CREATE (a)-[e:CkgEdge {kind: $kind}]->(b) SET "
186
+ "e.attrs = $attrs, e.prov_source = $prov_source, "
187
+ "e.prov_extractor = $prov_extractor, e.prov_commit = $prov_commit, "
188
+ "e.prov_confidence = $prov_confidence, e.origin_path = $origin_path, "
189
+ "e.resolved_from = $resolved_from",
190
+ _edge_params(edge, origin_path),
191
+ )
192
+
193
+ async def delete_file(self, path: str) -> None:
194
+ async with self._lock:
195
+ await asyncio.to_thread(self._delete_file_sync, path)
196
+
197
+ def _delete_file_sync(self, path: str) -> None:
198
+ self._conn.execute("BEGIN TRANSACTION")
199
+ try:
200
+ self._conn.execute(
201
+ "MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", {"p": path}
202
+ )
203
+ self._conn.execute(
204
+ "MATCH (n:CkgNode) WHERE n.origin_path = $p DETACH DELETE n", {"p": path}
205
+ )
206
+ self._conn.execute("COMMIT")
207
+ except Exception:
208
+ self._conn.execute("ROLLBACK")
209
+ raise
210
+
211
+ async def clear_resolved(self, paths: list[str]) -> None:
212
+ async with self._lock:
213
+ await asyncio.to_thread(self._clear_resolved_sync, paths)
214
+
215
+ def _clear_resolved_sync(self, paths: list[str]) -> None:
216
+ if not paths:
217
+ return
218
+ self._conn.execute("BEGIN TRANSACTION")
219
+ try:
220
+ self._conn.execute(
221
+ "MATCH ()-[e:CkgEdge]->() "
222
+ "WHERE e.origin_path IN $paths AND e.prov_source = $resolved DELETE e",
223
+ {"paths": paths, "resolved": Source.RESOLVED.value},
224
+ )
225
+ # GC external package stubs orphaned by the edge deletion, so the
226
+ # incremental graph matches a full re-index (no dangling sinks).
227
+ self._conn.execute(
228
+ "MATCH (p:CkgNode) WHERE p.kind = $pkg "
229
+ "OPTIONAL MATCH ()-[e:CkgEdge]->(p) "
230
+ "WITH p, count(e) AS c WHERE c = 0 DETACH DELETE p",
231
+ {"pkg": NodeKind.PACKAGE.value},
232
+ )
233
+ self._conn.execute("COMMIT")
234
+ except Exception:
235
+ self._conn.execute("ROLLBACK")
236
+ raise
237
+
238
+ async def clear_outgoing(self, src_ids: list[str], kind: EdgeKind) -> None:
239
+ async with self._lock:
240
+ await asyncio.to_thread(self._clear_outgoing_sync, src_ids, kind)
241
+
242
+ def _clear_outgoing_sync(self, src_ids: list[str], kind: EdgeKind) -> None:
243
+ if not src_ids:
244
+ return
245
+ self._conn.execute(
246
+ "MATCH (a:CkgNode)-[e:CkgEdge]->() WHERE a.id IN $ids AND e.kind = $kind DELETE e",
247
+ {"ids": src_ids, "kind": kind.value},
248
+ )
249
+
250
+ # --- reads ------------------------------------------------------------
251
+
252
+ async def query(self, q: GraphQuery) -> QueryResult:
253
+ async with self._lock:
254
+ return await asyncio.to_thread(self._query_sync, q)
255
+
256
+ def _query_sync(self, q: GraphQuery) -> QueryResult:
257
+ clauses: list[str] = []
258
+ params: dict[str, Any] = {}
259
+ if q.kinds is not None:
260
+ clauses.append("n.kind IN $kinds")
261
+ params["kinds"] = [k.value for k in q.kinds]
262
+ if q.name is not None:
263
+ clauses.append("n.name = $name")
264
+ params["name"] = q.name
265
+ if q.path_prefix is not None:
266
+ clauses.append("n.sym_path STARTS WITH $prefix")
267
+ params["prefix"] = q.path_prefix
268
+ if q.min_source is not None:
269
+ clauses.append("n.prov_source IN $sources")
270
+ params["sources"] = _acceptable_sources(q.min_source)
271
+ where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
272
+ params["lim"] = q.limit + 1 # fetch one extra to detect truncation
273
+ result = self._conn.execute(f"MATCH (n:CkgNode){where} RETURN n LIMIT $lim", params)
274
+ nodes = [_node_from_row(row[0]) for row in _rows(result)]
275
+ truncated = len(nodes) > q.limit
276
+ return QueryResult(nodes=nodes[: q.limit], truncated=truncated)
277
+
278
+ async def neighbors(
279
+ self,
280
+ node_id: str,
281
+ kinds: list[EdgeKind] | None = None,
282
+ depth: int = 1,
283
+ ) -> list[Node]:
284
+ async with self._lock:
285
+ return await asyncio.to_thread(self._neighbors_sync, node_id, kinds, depth)
286
+
287
+ def _neighbors_sync(self, node_id: str, kinds: list[EdgeKind] | None, depth: int) -> list[Node]:
288
+ # Iterative 1-hop BFS (undirected, kind-filtered), mirroring the
289
+ # InMemory reference; depth is small (<= serve.max_depth).
290
+ kind_values = [k.value for k in kinds] if kinds is not None else None
291
+ visited = {node_id}
292
+ frontier = [node_id]
293
+ collected: list[str] = []
294
+ for _ in range(depth):
295
+ if not frontier:
296
+ break
297
+ params: dict[str, Any] = {"frontier": frontier}
298
+ kind_clause = ""
299
+ if kind_values is not None:
300
+ kind_clause = " AND e.kind IN $kinds"
301
+ params["kinds"] = kind_values
302
+ result = self._conn.execute(
303
+ "MATCH (a:CkgNode)-[e:CkgEdge]-(b:CkgNode) "
304
+ f"WHERE a.id IN $frontier{kind_clause} RETURN DISTINCT b.id",
305
+ params,
306
+ )
307
+ nxt: list[str] = []
308
+ for row in _rows(result):
309
+ nid = row[0]
310
+ if nid not in visited:
311
+ visited.add(nid)
312
+ nxt.append(nid)
313
+ collected.append(nid)
314
+ frontier = nxt
315
+ return [n for n in (self._get_sync(i) for i in collected) if n is not None]
316
+
317
+ async def get(self, node_id: str) -> Node | None:
318
+ async with self._lock:
319
+ return await asyncio.to_thread(self._get_sync, node_id)
320
+
321
+ def _get_sync(self, node_id: str) -> Node | None:
322
+ result = self._conn.execute("MATCH (n:CkgNode {id: $id}) RETURN n", {"id": node_id})
323
+ rows = _rows(result)
324
+ return _node_from_row(rows[0][0]) if rows else None
325
+
326
+ async def set_attrs(self, node_id: str, attrs: dict[str, Any]) -> None:
327
+ async with self._lock:
328
+ await asyncio.to_thread(self._set_attrs_sync, node_id, attrs)
329
+
330
+ def _set_attrs_sync(self, node_id: str, attrs: dict[str, Any]) -> None:
331
+ rows = _rows(
332
+ self._conn.execute("MATCH (n:CkgNode {id: $id}) RETURN n.attrs", {"id": node_id})
333
+ )
334
+ if not rows:
335
+ return # absent node: no-op (contract)
336
+ merged = {**_load_attrs(rows[0][0]), **attrs}
337
+ # SET only attrs — origin_path and every other column are left intact.
338
+ self._conn.execute(
339
+ "MATCH (n:CkgNode {id: $id}) SET n.attrs = $attrs",
340
+ {"id": node_id, "attrs": _dump_attrs(merged)},
341
+ )
342
+
343
+ async def adjacent(
344
+ self,
345
+ node_id: str,
346
+ kinds: list[EdgeKind] | None = None,
347
+ direction: Direction = "both",
348
+ ) -> list[Edge]:
349
+ async with self._lock:
350
+ return await asyncio.to_thread(self._adjacent_sync, node_id, kinds, direction)
351
+
352
+ def _adjacent_sync(
353
+ self, node_id: str, kinds: list[EdgeKind] | None, direction: Direction
354
+ ) -> list[Edge]:
355
+ params: dict[str, Any] = {"id": node_id}
356
+ where = ""
357
+ if kinds is not None:
358
+ where = " WHERE e.kind IN $kinds"
359
+ params["kinds"] = [k.value for k in kinds]
360
+ edges: list[Edge] = []
361
+ if direction in ("out", "both"):
362
+ res = self._conn.execute(
363
+ f"MATCH (a:CkgNode {{id: $id}})-[e:CkgEdge]->(b:CkgNode){where} RETURN e, b.id",
364
+ params,
365
+ )
366
+ edges += [_edge_from_rel(row[0], node_id, row[1]) for row in _rows(res)]
367
+ if direction in ("in", "both"):
368
+ res = self._conn.execute(
369
+ f"MATCH (a:CkgNode {{id: $id}})<-[e:CkgEdge]-(b:CkgNode){where} RETURN e, b.id",
370
+ params,
371
+ )
372
+ edges += [_edge_from_rel(row[0], row[1], node_id) for row in _rows(res)]
373
+ return edges
374
+
375
+ async def close(self) -> None:
376
+ async with self._lock:
377
+ if self._closed:
378
+ return
379
+ self._closed = True
380
+ await asyncio.to_thread(self._conn.close)
@@ -0,0 +1,146 @@
1
+ """LanceDB-backed ``VectorStore`` — the default embedded vector adapter
2
+ (ADR-0006). feat-005 produces the ``Embedded`` items; feat-006 searches
3
+ them and joins the hits back into the graph via ``Store.expand``.
4
+
5
+ LanceDB ships a native async client, so this adapter is async all the way
6
+ down (no thread-wrapping, unlike the sync Kuzu adapter). One ``vectors``
7
+ table is created lazily on first ``upsert`` with the vector dimension fixed
8
+ from the first batch. The ``filter`` contract targets first-class columns
9
+ (``ref``, ``kind``, ``path``) — the portable subset every vector backend can
10
+ honour; ``path`` is derived from the ref's SymbolID, mirroring the graph
11
+ adapter's ``sym_path``.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ import lancedb
21
+ import pyarrow as pa
22
+
23
+ from agentforge_graph.core import Embedded, ScoredRef, VectorStore
24
+ from agentforge_graph.core.symbols import SymbolID
25
+
26
+ _TABLE = "vectors"
27
+ _FILTERABLE = ("ref", "kind", "path")
28
+
29
+
30
+ def _sym_path(ref: str) -> str:
31
+ try:
32
+ return SymbolID.parse(ref).path
33
+ except ValueError:
34
+ return ""
35
+
36
+
37
+ def _sql_str(value: object) -> str:
38
+ return "'" + str(value).replace("'", "''") + "'"
39
+
40
+
41
+ def _where(filter: dict[str, Any]) -> str:
42
+ bad = set(filter) - set(_FILTERABLE)
43
+ if bad:
44
+ raise ValueError(f"unfilterable column(s) {sorted(bad)}; allowed: {_FILTERABLE}")
45
+ return " AND ".join(f"{col} = {_sql_str(val)}" for col, val in filter.items())
46
+
47
+
48
+ def _row(item: Embedded) -> dict[str, Any]:
49
+ return {
50
+ "ref": item.ref,
51
+ "vector": [float(x) for x in item.vector],
52
+ "kind": item.kind.value,
53
+ "path": _sym_path(item.ref),
54
+ "attrs_json": json.dumps(item.attrs, sort_keys=True),
55
+ }
56
+
57
+
58
+ def _schema(dim: int) -> pa.Schema:
59
+ return pa.schema(
60
+ [
61
+ pa.field("ref", pa.string()),
62
+ pa.field("vector", pa.list_(pa.float32(), dim)),
63
+ pa.field("kind", pa.string()),
64
+ pa.field("path", pa.string()),
65
+ pa.field("attrs_json", pa.string()),
66
+ ]
67
+ )
68
+
69
+
70
+ class LanceVectorStore(VectorStore):
71
+ """Embedded vector store backed by a LanceDB database directory."""
72
+
73
+ def __init__(self, db: Any, path: Path) -> None:
74
+ self._db = db
75
+ self._path = path
76
+ self._tbl: Any = None
77
+ self._closed = False
78
+
79
+ @classmethod
80
+ async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> LanceVectorStore:
81
+ """``config`` (the ``store.vectors.config`` block) is unused by the
82
+ embedded driver — server adapters use it for connection details."""
83
+ p = Path(path)
84
+ p.parent.mkdir(parents=True, exist_ok=True)
85
+ db = await lancedb.connect_async(str(p))
86
+ return cls(db, p)
87
+
88
+ async def _table(self) -> Any:
89
+ if self._tbl is None and _TABLE in await self._table_names():
90
+ self._tbl = await self._db.open_table(_TABLE)
91
+ return self._tbl
92
+
93
+ async def _table_names(self) -> list[str]:
94
+ # LanceDB's async list_tables() returns a paginated result object
95
+ # (.tables + .page_token), not a plain list of names.
96
+ result = await self._db.list_tables()
97
+ names = getattr(result, "tables", result)
98
+ return list(names)
99
+
100
+ async def upsert(self, items: list[Embedded]) -> None:
101
+ if not items:
102
+ return
103
+ tbl = await self._table()
104
+ if tbl is None:
105
+ tbl = await self._db.create_table(_TABLE, schema=_schema(len(items[0].vector)))
106
+ self._tbl = tbl
107
+ refs = ", ".join(_sql_str(i.ref) for i in items)
108
+ await tbl.delete(f"ref IN ({refs})") # delete-then-add = upsert by ref
109
+ await tbl.add([_row(i) for i in items])
110
+
111
+ async def search(
112
+ self,
113
+ vector: list[float],
114
+ k: int,
115
+ filter: dict[str, Any] | None = None,
116
+ ) -> list[ScoredRef]:
117
+ tbl = await self._table()
118
+ if tbl is None:
119
+ return []
120
+ query = tbl.vector_search(vector).distance_type("cosine").limit(k)
121
+ if filter:
122
+ query = query.where(_where(filter))
123
+ rows = await query.to_list()
124
+ # Cosine distance in [0, 2] (smaller = closer); expose a cosine
125
+ # similarity in [0, 1] (higher = closer) so scores are interpretable
126
+ # and survive the retrieval decay (BUG-002).
127
+ return [
128
+ ScoredRef(
129
+ ref=r["ref"],
130
+ score=max(0.0, 1.0 - float(r["_distance"])),
131
+ attrs=json.loads(r["attrs_json"]) if r.get("attrs_json") else {},
132
+ )
133
+ for r in rows
134
+ ]
135
+
136
+ async def delete_where(self, filter: dict[str, Any]) -> None:
137
+ tbl = await self._table()
138
+ if tbl is None:
139
+ return
140
+ await tbl.delete(_where(filter))
141
+
142
+ async def close(self) -> None:
143
+ if self._closed:
144
+ return
145
+ self._closed = True
146
+ self._db.close() # LanceDB's async connection close() is synchronous