agentforge-graph 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. agentforge_graph/__init__.py +6 -0
  2. agentforge_graph/chunking/__init__.py +12 -0
  3. agentforge_graph/chunking/cast.py +159 -0
  4. agentforge_graph/chunking/chunk.py +19 -0
  5. agentforge_graph/chunking/tokens.py +15 -0
  6. agentforge_graph/cli.py +607 -0
  7. agentforge_graph/config.py +259 -0
  8. agentforge_graph/core/__init__.py +54 -0
  9. agentforge_graph/core/conformance.py +270 -0
  10. agentforge_graph/core/contracts.py +163 -0
  11. agentforge_graph/core/kinds.py +68 -0
  12. agentforge_graph/core/models.py +134 -0
  13. agentforge_graph/core/provenance.py +62 -0
  14. agentforge_graph/core/symbols.py +116 -0
  15. agentforge_graph/embed/__init__.py +28 -0
  16. agentforge_graph/embed/base.py +22 -0
  17. agentforge_graph/embed/bedrock.py +85 -0
  18. agentforge_graph/embed/fake.py +34 -0
  19. agentforge_graph/embed/openai.py +67 -0
  20. agentforge_graph/embed/pipeline.py +184 -0
  21. agentforge_graph/embed/registry.py +66 -0
  22. agentforge_graph/embed/report.py +15 -0
  23. agentforge_graph/enrich/__init__.py +70 -0
  24. agentforge_graph/enrich/anthropic.py +38 -0
  25. agentforge_graph/enrich/anthropic_client.py +109 -0
  26. agentforge_graph/enrich/bedrock.py +24 -0
  27. agentforge_graph/enrich/bedrock_client.py +115 -0
  28. agentforge_graph/enrich/bedrock_summarizer.py +23 -0
  29. agentforge_graph/enrich/claude.py +172 -0
  30. agentforge_graph/enrich/enricher.py +108 -0
  31. agentforge_graph/enrich/governs.py +173 -0
  32. agentforge_graph/enrich/governs_enricher.py +152 -0
  33. agentforge_graph/enrich/heuristics.py +224 -0
  34. agentforge_graph/enrich/judge.py +63 -0
  35. agentforge_graph/enrich/registry.py +133 -0
  36. agentforge_graph/enrich/report.py +60 -0
  37. agentforge_graph/enrich/summarizer.py +62 -0
  38. agentforge_graph/enrich/summary_enricher.py +211 -0
  39. agentforge_graph/enrich/taxonomy.py +38 -0
  40. agentforge_graph/frameworks/__init__.py +29 -0
  41. agentforge_graph/frameworks/base.py +75 -0
  42. agentforge_graph/frameworks/detect.py +124 -0
  43. agentforge_graph/frameworks/extractor.py +63 -0
  44. agentforge_graph/frameworks/orm.py +93 -0
  45. agentforge_graph/frameworks/packs/_js_ast.py +56 -0
  46. agentforge_graph/frameworks/packs/_python_ast.py +157 -0
  47. agentforge_graph/frameworks/packs/django/__init__.py +240 -0
  48. agentforge_graph/frameworks/packs/django/models.scm +7 -0
  49. agentforge_graph/frameworks/packs/express/__init__.py +133 -0
  50. agentforge_graph/frameworks/packs/express/routes.scm +8 -0
  51. agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
  52. agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
  53. agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
  54. agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
  55. agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
  56. agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
  57. agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
  58. agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
  59. agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
  60. agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
  61. agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
  62. agentforge_graph/frameworks/registry.py +44 -0
  63. agentforge_graph/ingest/__init__.py +30 -0
  64. agentforge_graph/ingest/codegraph.py +847 -0
  65. agentforge_graph/ingest/extractor.py +353 -0
  66. agentforge_graph/ingest/incremental/__init__.py +25 -0
  67. agentforge_graph/ingest/incremental/detect.py +118 -0
  68. agentforge_graph/ingest/incremental/dirty.py +61 -0
  69. agentforge_graph/ingest/incremental/indexer.py +218 -0
  70. agentforge_graph/ingest/incremental/meta.py +72 -0
  71. agentforge_graph/ingest/incremental/ports.py +39 -0
  72. agentforge_graph/ingest/pack.py +160 -0
  73. agentforge_graph/ingest/packs/__init__.py +34 -0
  74. agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
  75. agentforge_graph/ingest/packs/cpp/references.scm +15 -0
  76. agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
  77. agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
  78. agentforge_graph/ingest/packs/csharp/references.scm +12 -0
  79. agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
  80. agentforge_graph/ingest/packs/go/__init__.py +38 -0
  81. agentforge_graph/ingest/packs/go/references.scm +12 -0
  82. agentforge_graph/ingest/packs/go/structure.scm +64 -0
  83. agentforge_graph/ingest/packs/java/__init__.py +35 -0
  84. agentforge_graph/ingest/packs/java/references.scm +12 -0
  85. agentforge_graph/ingest/packs/java/structure.scm +38 -0
  86. agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
  87. agentforge_graph/ingest/packs/javascript/references.scm +11 -0
  88. agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
  89. agentforge_graph/ingest/packs/php/__init__.py +35 -0
  90. agentforge_graph/ingest/packs/php/references.scm +15 -0
  91. agentforge_graph/ingest/packs/php/structure.scm +44 -0
  92. agentforge_graph/ingest/packs/python/__init__.py +25 -0
  93. agentforge_graph/ingest/packs/python/references.scm +14 -0
  94. agentforge_graph/ingest/packs/python/structure.scm +57 -0
  95. agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
  96. agentforge_graph/ingest/packs/ruby/references.scm +12 -0
  97. agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
  98. agentforge_graph/ingest/packs/rust/__init__.py +39 -0
  99. agentforge_graph/ingest/packs/rust/references.scm +12 -0
  100. agentforge_graph/ingest/packs/rust/structure.scm +46 -0
  101. agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
  102. agentforge_graph/ingest/packs/typescript/references.scm +11 -0
  103. agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
  104. agentforge_graph/ingest/pipeline.py +134 -0
  105. agentforge_graph/ingest/report.py +84 -0
  106. agentforge_graph/ingest/resolver.py +467 -0
  107. agentforge_graph/ingest/source.py +79 -0
  108. agentforge_graph/knowledge/__init__.py +28 -0
  109. agentforge_graph/knowledge/adr.py +136 -0
  110. agentforge_graph/knowledge/commits.py +152 -0
  111. agentforge_graph/knowledge/ingest.py +312 -0
  112. agentforge_graph/knowledge/mentions.py +71 -0
  113. agentforge_graph/knowledge/report.py +32 -0
  114. agentforge_graph/main.py +21 -0
  115. agentforge_graph/providers.py +36 -0
  116. agentforge_graph/repomap/__init__.py +14 -0
  117. agentforge_graph/repomap/rank.py +161 -0
  118. agentforge_graph/repomap/render.py +55 -0
  119. agentforge_graph/repomap/repomap.py +66 -0
  120. agentforge_graph/retrieve/__init__.py +21 -0
  121. agentforge_graph/retrieve/pack.py +76 -0
  122. agentforge_graph/retrieve/rerank.py +251 -0
  123. agentforge_graph/retrieve/retriever.py +286 -0
  124. agentforge_graph/retrieve/scoring.py +36 -0
  125. agentforge_graph/serve/__init__.py +19 -0
  126. agentforge_graph/serve/engine.py +204 -0
  127. agentforge_graph/serve/http_runner.py +133 -0
  128. agentforge_graph/serve/server.py +110 -0
  129. agentforge_graph/serve/tools.py +307 -0
  130. agentforge_graph/store/__init__.py +32 -0
  131. agentforge_graph/store/_rowmap.py +102 -0
  132. agentforge_graph/store/errors.py +22 -0
  133. agentforge_graph/store/facade.py +89 -0
  134. agentforge_graph/store/kuzu_store.py +380 -0
  135. agentforge_graph/store/lance_store.py +146 -0
  136. agentforge_graph/store/neo4j_store.py +294 -0
  137. agentforge_graph/store/pgvector_store.py +170 -0
  138. agentforge_graph/store/registry.py +45 -0
  139. agentforge_graph/temporal/__init__.py +36 -0
  140. agentforge_graph/temporal/backfill.py +338 -0
  141. agentforge_graph/temporal/events.py +82 -0
  142. agentforge_graph/temporal/index.py +190 -0
  143. agentforge_graph/temporal/mining.py +190 -0
  144. agentforge_graph/temporal/recorder.py +114 -0
  145. agentforge_graph/temporal/store.py +282 -0
  146. agentforge_graph-0.3.2.dist-info/METADATA +291 -0
  147. agentforge_graph-0.3.2.dist-info/RECORD +151 -0
  148. agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
  149. agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
  150. agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
  151. agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
@@ -0,0 +1,294 @@
1
+ """Neo4j-backed ``GraphStore`` — an opt-in *server* graph adapter (ENH-004).
2
+
3
+ For teams that want a shared, server-backed graph (multiple devs/CI hit one
4
+ store) or to reuse existing Neo4j infra. Neo4j speaks Cypher, like the embedded
5
+ Kuzu default, so this is a close port: the same open schema (one ``:CkgNode``
6
+ label + one ``:CkgEdge`` relationship type, ``kind`` a property, ``attrs`` a JSON
7
+ string) mapped via the shared ``_rowmap`` helpers, and it passes the same
8
+ ``GraphStoreConformance`` suite Kuzu does.
9
+
10
+ Install: ``pip install agentforge-graph[neo4j]``; select in ckg.yaml:
11
+
12
+ store:
13
+ graph: { driver: neo4j, config: { uri: bolt://host:7687, user: neo4j } }
14
+
15
+ The ``neo4j`` driver is imported lazily in :meth:`open`, so the module imports
16
+ fine without the extra installed (the registry can reference it unconditionally).
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ from agentforge_graph.core import (
26
+ Direction,
27
+ Edge,
28
+ EdgeKind,
29
+ FileSubgraph,
30
+ GraphQuery,
31
+ GraphStore,
32
+ Node,
33
+ NodeKind,
34
+ QueryResult,
35
+ )
36
+
37
+ from ._rowmap import (
38
+ acceptable_sources,
39
+ dump_attrs,
40
+ edge_from_row,
41
+ edge_params,
42
+ load_attrs,
43
+ node_from_row,
44
+ node_params,
45
+ )
46
+
47
+ if TYPE_CHECKING:
48
+ from neo4j import AsyncDriver, AsyncManagedTransaction
49
+
50
+ # id uniqueness makes MERGE-by-id idempotent and fast (the conformance baseline).
51
+ _CONSTRAINT = "CREATE CONSTRAINT ckg_node_id IF NOT EXISTS FOR (n:CkgNode) REQUIRE n.id IS UNIQUE"
52
+ _MERGE_NODE = (
53
+ "MERGE (n:CkgNode {id: $id}) SET "
54
+ "n.kind = $kind, n.name = $name, "
55
+ "n.span_start = $span_start, n.span_end = $span_end, "
56
+ "n.attrs = $attrs, n.sym_path = $sym_path, "
57
+ "n.prov_source = $prov_source, n.prov_extractor = $prov_extractor, "
58
+ "n.prov_commit = $prov_commit, n.prov_confidence = $prov_confidence, "
59
+ "n.origin_path = $origin_path"
60
+ )
61
+ _INSERT_EDGE = (
62
+ "MATCH (a:CkgNode {id: $src}), (b:CkgNode {id: $dst}) "
63
+ "CREATE (a)-[e:CkgEdge {kind: $kind}]->(b) SET "
64
+ "e.attrs = $attrs, e.prov_source = $prov_source, "
65
+ "e.prov_extractor = $prov_extractor, e.prov_commit = $prov_commit, "
66
+ "e.prov_confidence = $prov_confidence, e.origin_path = $origin_path, "
67
+ "e.resolved_from = $resolved_from"
68
+ )
69
+
70
+
71
+ class Neo4jGraphStore(GraphStore):
72
+ """Server graph store backed by Neo4j (Bolt)."""
73
+
74
+ def __init__(self, driver: AsyncDriver, database: str) -> None:
75
+ self._driver = driver
76
+ self._database = database
77
+ self._closed = False
78
+
79
+ @classmethod
80
+ async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> Neo4jGraphStore:
81
+ """Connect to Neo4j from the ``store.graph.config`` block. ``path`` (the
82
+ embedded ``.ckg/`` location) is ignored. Recognised config keys: ``uri``,
83
+ ``user``, ``password`` (falls back to ``$CKG_NEO4J_PASSWORD``), ``database``.
84
+ Raises at open (not mid-index) if the server is unreachable."""
85
+ from neo4j import AsyncGraphDatabase
86
+
87
+ cfg = config or {}
88
+ uri = str(cfg.get("uri") or os.environ.get("CKG_NEO4J_URI") or "bolt://localhost:7687")
89
+ user = str(cfg.get("user", "neo4j"))
90
+ password = str(cfg.get("password") or os.environ.get("CKG_NEO4J_PASSWORD") or "")
91
+ database = str(cfg.get("database", "neo4j"))
92
+ driver: AsyncDriver = AsyncGraphDatabase.driver(uri, auth=(user, password))
93
+ await driver.verify_connectivity()
94
+ async with driver.session(database=database) as session:
95
+ await session.run(_CONSTRAINT)
96
+ return cls(driver, database)
97
+
98
+ # --- writes -----------------------------------------------------------
99
+
100
+ async def upsert(self, subgraph: FileSubgraph) -> None:
101
+ async with self._driver.session(database=self._database) as session:
102
+ await session.execute_write(self._upsert_tx, subgraph)
103
+
104
+ @staticmethod
105
+ async def _upsert_tx(tx: AsyncManagedTransaction, sg: FileSubgraph) -> None:
106
+ for node in sg.nodes:
107
+ await tx.run(_MERGE_NODE, node_params(node, sg.path))
108
+ await tx.run(
109
+ "MATCH (n:CkgNode) WHERE n.origin_path = $p AND NOT n.id IN $keep DETACH DELETE n",
110
+ p=sg.path,
111
+ keep=[n.id for n in sg.nodes],
112
+ )
113
+ await tx.run("MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", p=sg.path)
114
+ for edge in sg.edges:
115
+ await tx.run(_INSERT_EDGE, edge_params(edge, sg.path))
116
+
117
+ async def add(self, items: list[Node | Edge]) -> None:
118
+ async with self._driver.session(database=self._database) as session:
119
+ await session.execute_write(self._add_tx, items)
120
+
121
+ @staticmethod
122
+ async def _add_tx(tx: AsyncManagedTransaction, items: list[Node | Edge]) -> None:
123
+ for item in items:
124
+ if isinstance(item, Node):
125
+ await tx.run(_MERGE_NODE, node_params(item, ""))
126
+ else:
127
+ await tx.run(_INSERT_EDGE, edge_params(item, ""))
128
+
129
+ async def delete_file(self, path: str) -> None:
130
+ async with self._driver.session(database=self._database) as session:
131
+ await session.execute_write(self._delete_file_tx, path)
132
+
133
+ @staticmethod
134
+ async def _delete_file_tx(tx: AsyncManagedTransaction, path: str) -> None:
135
+ await tx.run("MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", p=path)
136
+ await tx.run("MATCH (n:CkgNode) WHERE n.origin_path = $p DETACH DELETE n", p=path)
137
+
138
+ async def clear_resolved(self, paths: list[str]) -> None:
139
+ if not paths:
140
+ return
141
+ async with self._driver.session(database=self._database) as session:
142
+ await session.execute_write(self._clear_resolved_tx, paths)
143
+
144
+ @staticmethod
145
+ async def _clear_resolved_tx(tx: AsyncManagedTransaction, paths: list[str]) -> None:
146
+ from agentforge_graph.core import Source
147
+
148
+ await tx.run(
149
+ "MATCH ()-[e:CkgEdge]->() "
150
+ "WHERE e.origin_path IN $paths AND e.prov_source = $resolved DELETE e",
151
+ paths=paths,
152
+ resolved=Source.RESOLVED.value,
153
+ )
154
+ # GC external package stubs orphaned by the edge deletion, so the
155
+ # incremental graph matches a full re-index (no dangling sinks).
156
+ await tx.run(
157
+ "MATCH (p:CkgNode) WHERE p.kind = $pkg AND NOT ()-[:CkgEdge]->(p) DETACH DELETE p",
158
+ pkg=NodeKind.PACKAGE.value,
159
+ )
160
+
161
+ async def clear_outgoing(self, src_ids: list[str], kind: EdgeKind) -> None:
162
+ if not src_ids:
163
+ return
164
+ async with self._driver.session(database=self._database) as session:
165
+ await session.run(
166
+ "MATCH (a:CkgNode)-[e:CkgEdge]->() WHERE a.id IN $ids AND e.kind = $kind DELETE e",
167
+ ids=src_ids,
168
+ kind=kind.value,
169
+ )
170
+
171
+ # --- reads ------------------------------------------------------------
172
+
173
+ async def query(self, q: GraphQuery) -> QueryResult:
174
+ clauses: list[str] = []
175
+ params: dict[str, Any] = {}
176
+ if q.kinds is not None:
177
+ clauses.append("n.kind IN $kinds")
178
+ params["kinds"] = [k.value for k in q.kinds]
179
+ if q.name is not None:
180
+ clauses.append("n.name = $name")
181
+ params["name"] = q.name
182
+ if q.path_prefix is not None:
183
+ clauses.append("n.sym_path STARTS WITH $prefix")
184
+ params["prefix"] = q.path_prefix
185
+ if q.min_source is not None:
186
+ clauses.append("n.prov_source IN $sources")
187
+ params["sources"] = acceptable_sources(q.min_source)
188
+ where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
189
+ params["lim"] = q.limit + 1 # one extra to detect truncation
190
+ rows = await self._read(f"MATCH (n:CkgNode){where} RETURN n LIMIT $lim", params)
191
+ nodes = [node_from_row(dict(r["n"])) for r in rows]
192
+ return QueryResult(nodes=nodes[: q.limit], truncated=len(nodes) > q.limit)
193
+
194
+ async def neighbors(
195
+ self, node_id: str, kinds: list[EdgeKind] | None = None, depth: int = 1
196
+ ) -> list[Node]:
197
+ kind_values = [k.value for k in kinds] if kinds is not None else None
198
+ visited = {node_id}
199
+ frontier = [node_id]
200
+ collected: list[str] = []
201
+ for _ in range(depth):
202
+ if not frontier:
203
+ break
204
+ params: dict[str, Any] = {"frontier": frontier}
205
+ kind_clause = ""
206
+ if kind_values is not None:
207
+ kind_clause = " AND e.kind IN $kinds"
208
+ params["kinds"] = kind_values
209
+ rows = await self._read(
210
+ "MATCH (a:CkgNode)-[e:CkgEdge]-(b:CkgNode) "
211
+ f"WHERE a.id IN $frontier{kind_clause} RETURN DISTINCT b.id AS id",
212
+ params,
213
+ )
214
+ nxt: list[str] = []
215
+ for r in rows:
216
+ nid = r["id"]
217
+ if nid not in visited:
218
+ visited.add(nid)
219
+ nxt.append(nid)
220
+ collected.append(nid)
221
+ frontier = nxt
222
+ out: list[Node] = []
223
+ for i in collected:
224
+ n = await self.get(i)
225
+ if n is not None:
226
+ out.append(n)
227
+ return out
228
+
229
+ async def get(self, node_id: str) -> Node | None:
230
+ rows = await self._read("MATCH (n:CkgNode {id: $id}) RETURN n", {"id": node_id})
231
+ return node_from_row(dict(rows[0]["n"])) if rows else None
232
+
233
+ async def set_attrs(self, node_id: str, attrs: dict[str, Any]) -> None:
234
+ async with self._driver.session(database=self._database) as session:
235
+ await session.execute_write(self._set_attrs_tx, node_id, attrs)
236
+
237
+ @staticmethod
238
+ async def _set_attrs_tx(
239
+ tx: AsyncManagedTransaction, node_id: str, attrs: dict[str, Any]
240
+ ) -> None:
241
+ rows = [
242
+ r
243
+ async for r in await tx.run(
244
+ "MATCH (n:CkgNode {id: $id}) RETURN n.attrs AS a", id=node_id
245
+ )
246
+ ]
247
+ if not rows:
248
+ return # absent node: no-op (contract)
249
+ merged = {**load_attrs(rows[0]["a"]), **attrs}
250
+ # SET only attrs — origin_path and every other property are left intact.
251
+ await tx.run(
252
+ "MATCH (n:CkgNode {id: $id}) SET n.attrs = $attrs",
253
+ id=node_id,
254
+ attrs=dump_attrs(merged),
255
+ )
256
+
257
+ async def adjacent(
258
+ self,
259
+ node_id: str,
260
+ kinds: list[EdgeKind] | None = None,
261
+ direction: Direction = "both",
262
+ ) -> list[Edge]:
263
+ params: dict[str, Any] = {"id": node_id}
264
+ where = ""
265
+ if kinds is not None:
266
+ where = " WHERE e.kind IN $kinds"
267
+ params["kinds"] = [k.value for k in kinds]
268
+ edges: list[Edge] = []
269
+ if direction in ("out", "both"):
270
+ rows = await self._read(
271
+ f"MATCH (a:CkgNode {{id: $id}})-[e:CkgEdge]->(b:CkgNode){where} "
272
+ "RETURN e, b.id AS oid",
273
+ params,
274
+ )
275
+ edges += [edge_from_row(dict(r["e"]), node_id, r["oid"]) for r in rows]
276
+ if direction in ("in", "both"):
277
+ rows = await self._read(
278
+ f"MATCH (a:CkgNode {{id: $id}})<-[e:CkgEdge]-(b:CkgNode){where} "
279
+ "RETURN e, b.id AS oid",
280
+ params,
281
+ )
282
+ edges += [edge_from_row(dict(r["e"]), r["oid"], node_id) for r in rows]
283
+ return edges
284
+
285
+ async def _read(self, cypher: str, params: dict[str, Any]) -> list[Any]:
286
+ async with self._driver.session(database=self._database) as session:
287
+ result = await session.run(cypher, params)
288
+ return [r async for r in result]
289
+
290
+ async def close(self) -> None:
291
+ if self._closed:
292
+ return
293
+ self._closed = True
294
+ await self._driver.close()
@@ -0,0 +1,170 @@
1
+ """Postgres + pgvector ``VectorStore`` — an opt-in *server* vector adapter
2
+ (ENH-004), so teams reuse an existing Postgres instead of the embedded LanceDB.
3
+
4
+ Mirrors the LanceDB adapter's shape: one ``vectors`` table created lazily on the
5
+ first ``upsert`` (dimension fixed from the first batch), the same first-class
6
+ filter columns (``ref``, ``kind``, ``path``), and a cosine similarity in [0, 1]
7
+ (higher = closer, BUG-002). Passes the same ``VectorStoreConformance`` suite.
8
+
9
+ Install: ``pip install agentforge-graph[pgvector]``; select in ckg.yaml:
10
+
11
+ store:
12
+ vectors: { driver: pgvector, config: { dsn: postgresql://user@host/db } }
13
+
14
+ ``asyncpg``/``pgvector`` are imported lazily in :meth:`open`, so the module
15
+ imports fine without the extra (the registry can reference it unconditionally).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import os
22
+ from pathlib import Path
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ from agentforge_graph.core import Embedded, ScoredRef, VectorStore
26
+ from agentforge_graph.core.symbols import SymbolID
27
+
28
+ if TYPE_CHECKING:
29
+ from asyncpg import Pool
30
+
31
+ _TABLE = "ckg_vectors"
32
+ _FILTERABLE = ("ref", "kind", "path")
33
+
34
+
35
+ def _sym_path(ref: str) -> str:
36
+ try:
37
+ return SymbolID.parse(ref).path
38
+ except ValueError:
39
+ return ""
40
+
41
+
42
+ def _check_filter(filter: dict[str, Any]) -> None:
43
+ bad = set(filter) - set(_FILTERABLE)
44
+ if bad:
45
+ raise ValueError(f"unfilterable column(s) {sorted(bad)}; allowed: {_FILTERABLE}")
46
+
47
+
48
+ class PgVectorStore(VectorStore):
49
+ """Server vector store backed by Postgres + the pgvector extension."""
50
+
51
+ def __init__(self, pool: Pool, dim: int | None) -> None:
52
+ self._pool = pool
53
+ self._dim = dim # None until the table exists (created on first upsert)
54
+ self._closed = False
55
+
56
+ @classmethod
57
+ async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> PgVectorStore:
58
+ """Connect to Postgres from the ``store.vectors.config`` block. ``path``
59
+ (the embedded ``.ckg/`` location) is ignored. Recognised config keys:
60
+ ``dsn`` (falls back to ``$CKG_PGVECTOR_DSN``). Ensures the ``vector``
61
+ extension exists and registers the type on every pooled connection."""
62
+ import asyncpg
63
+ from pgvector.asyncpg import register_vector
64
+
65
+ cfg = config or {}
66
+ dsn = str(cfg.get("dsn") or os.environ.get("CKG_PGVECTOR_DSN") or "")
67
+
68
+ async def _init(conn: Any) -> None:
69
+ await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
70
+ await register_vector(conn)
71
+
72
+ pool = await asyncpg.create_pool(dsn, init=_init, min_size=1, max_size=4)
73
+ # discover the dimension if the table already exists (reopen).
74
+ dim: int | None = None
75
+ async with pool.acquire() as conn:
76
+ row = await conn.fetchrow(
77
+ "SELECT a.atttypmod AS dim FROM pg_attribute a "
78
+ "JOIN pg_class c ON a.attrelid = c.oid "
79
+ "WHERE c.relname = $1 AND a.attname = 'embedding'",
80
+ _TABLE,
81
+ )
82
+ if row is not None and row["dim"] is not None and row["dim"] > 0:
83
+ dim = int(row["dim"])
84
+ return cls(pool, dim)
85
+
86
+ async def _ensure_table(self, dim: int) -> None:
87
+ if self._dim is not None:
88
+ return
89
+ async with self._pool.acquire() as conn:
90
+ await conn.execute(
91
+ f"CREATE TABLE IF NOT EXISTS {_TABLE} ("
92
+ "ref TEXT PRIMARY KEY, "
93
+ f"embedding vector({dim}), "
94
+ "kind TEXT, path TEXT, attrs_json TEXT)"
95
+ )
96
+ self._dim = dim
97
+
98
+ async def upsert(self, items: list[Embedded]) -> None:
99
+ if not items:
100
+ return
101
+ await self._ensure_table(len(items[0].vector))
102
+ rows = [
103
+ (
104
+ i.ref,
105
+ [float(x) for x in i.vector],
106
+ i.kind.value,
107
+ _sym_path(i.ref),
108
+ json.dumps(i.attrs, sort_keys=True),
109
+ )
110
+ for i in items
111
+ ]
112
+ async with self._pool.acquire() as conn:
113
+ await conn.executemany(
114
+ f"INSERT INTO {_TABLE} (ref, embedding, kind, path, attrs_json) "
115
+ "VALUES ($1, $2, $3, $4, $5) "
116
+ "ON CONFLICT (ref) DO UPDATE SET "
117
+ "embedding = EXCLUDED.embedding, kind = EXCLUDED.kind, "
118
+ "path = EXCLUDED.path, attrs_json = EXCLUDED.attrs_json",
119
+ rows,
120
+ )
121
+
122
+ async def search(
123
+ self, vector: list[float], k: int, filter: dict[str, Any] | None = None
124
+ ) -> list[ScoredRef]:
125
+ if self._dim is None:
126
+ return []
127
+ params: list[Any] = [[float(x) for x in vector]]
128
+ where = ""
129
+ if filter:
130
+ _check_filter(filter)
131
+ conds = []
132
+ for col, val in filter.items():
133
+ params.append(val)
134
+ conds.append(f"{col} = ${len(params)}")
135
+ where = " WHERE " + " AND ".join(conds)
136
+ params.append(k)
137
+ # `<=>` is cosine distance in [0, 2]; expose a similarity in [0, 1].
138
+ sql = (
139
+ f"SELECT ref, attrs_json, 1 - (embedding <=> $1) AS sim "
140
+ f"FROM {_TABLE}{where} ORDER BY embedding <=> $1 LIMIT ${len(params)}"
141
+ )
142
+ async with self._pool.acquire() as conn:
143
+ rows = await conn.fetch(sql, *params)
144
+ return [
145
+ ScoredRef(
146
+ ref=r["ref"],
147
+ score=max(0.0, min(1.0, float(r["sim"]))),
148
+ attrs=json.loads(r["attrs_json"]) if r["attrs_json"] else {},
149
+ )
150
+ for r in rows
151
+ ]
152
+
153
+ async def delete_where(self, filter: dict[str, Any]) -> None:
154
+ if self._dim is None:
155
+ return
156
+ _check_filter(filter)
157
+ params: list[Any] = []
158
+ conds = []
159
+ for col, val in filter.items():
160
+ params.append(val)
161
+ conds.append(f"{col} = ${len(params)}")
162
+ where = (" WHERE " + " AND ".join(conds)) if conds else ""
163
+ async with self._pool.acquire() as conn:
164
+ await conn.execute(f"DELETE FROM {_TABLE}{where}", *params)
165
+
166
+ async def close(self) -> None:
167
+ if self._closed:
168
+ return
169
+ self._closed = True
170
+ await self._pool.close()
@@ -0,0 +1,45 @@
1
+ """Driver registry: config driver-name → adapter class.
2
+
3
+ Embedded drivers (Kuzu, LanceDB) ship by default; first-party **server** drivers
4
+ (Neo4j graph, pgvector — ENH-004) are registered too but their DB SDK is imported
5
+ lazily inside the adapter's ``open``, so they cost nothing until selected and
6
+ need only their extra installed (``pip install agentforge-graph[neo4j|pgvector]``).
7
+ Third-party adapters still register out-of-tree via the entry-point groups.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from importlib.metadata import entry_points
13
+ from typing import Any
14
+
15
+ from .errors import DriverNotFound
16
+ from .kuzu_store import KuzuGraphStore
17
+ from .lance_store import LanceVectorStore
18
+ from .neo4j_store import Neo4jGraphStore
19
+ from .pgvector_store import PgVectorStore
20
+
21
+ GRAPH_GROUP = "agentforge_graph.graph_drivers"
22
+ VECTOR_GROUP = "agentforge_graph.vector_drivers"
23
+
24
+ _GRAPH_BUILTINS: dict[str, Any] = {"kuzu": KuzuGraphStore, "neo4j": Neo4jGraphStore}
25
+ _VECTOR_BUILTINS: dict[str, Any] = {"lancedb": LanceVectorStore, "pgvector": PgVectorStore}
26
+
27
+
28
+ def _resolve(name: str, builtins: dict[str, Any], group: str) -> Any:
29
+ if name in builtins:
30
+ return builtins[name]
31
+ for ep in entry_points(group=group):
32
+ if ep.name == name:
33
+ return ep.load()
34
+ known = sorted(builtins)
35
+ raise DriverNotFound(f"unknown driver {name!r}; built-in drivers: {known}")
36
+
37
+
38
+ def graph_driver(name: str) -> Any:
39
+ """The graph-store class for a config ``driver`` name."""
40
+ return _resolve(name, _GRAPH_BUILTINS, GRAPH_GROUP)
41
+
42
+
43
+ def vector_driver(name: str) -> Any:
44
+ """The vector-store class for a config ``driver`` name."""
45
+ return _resolve(name, _VECTOR_BUILTINS, VECTOR_GROUP)
@@ -0,0 +1,36 @@
1
+ """feat-009 temporal / git-evolution layer.
2
+
3
+ An append-only evolution log (``.ckg/temporal.db``) populated by the feat-004
4
+ refresh, recording when each symbol was introduced/removed at which commit —
5
+ the basis for ``history`` / ``changed_since`` / ``as_of`` and for churn/age
6
+ ranking signals. Higher layer: imports ``core``/``store``/``ingest``; the
7
+ deterministic engine core never imports this. Default off (opt-in). See
8
+ ``docs/design/design-009-temporal-evolution-layer.md``.
9
+ """
10
+
11
+ from .backfill import BackfillReport, parse_history, run_backfill
12
+ from .events import Author, Change, Entity, Event, EventKind, SymbolHistory
13
+ from .index import TemporalError, TemporalIndex
14
+ from .mining import ChurnMiner, SymbolAggregate
15
+ from .recorder import SqliteTemporalRecorder, build_recorder, seed_symbols
16
+ from .store import TemporalStore
17
+
18
+ __all__ = [
19
+ "Author",
20
+ "BackfillReport",
21
+ "Change",
22
+ "ChurnMiner",
23
+ "Entity",
24
+ "Event",
25
+ "EventKind",
26
+ "SqliteTemporalRecorder",
27
+ "SymbolAggregate",
28
+ "SymbolHistory",
29
+ "TemporalError",
30
+ "TemporalIndex",
31
+ "TemporalStore",
32
+ "build_recorder",
33
+ "parse_history",
34
+ "run_backfill",
35
+ "seed_symbols",
36
+ ]