agentforge-graph 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_graph/__init__.py +6 -0
- agentforge_graph/chunking/__init__.py +12 -0
- agentforge_graph/chunking/cast.py +159 -0
- agentforge_graph/chunking/chunk.py +19 -0
- agentforge_graph/chunking/tokens.py +15 -0
- agentforge_graph/cli.py +607 -0
- agentforge_graph/config.py +259 -0
- agentforge_graph/core/__init__.py +54 -0
- agentforge_graph/core/conformance.py +270 -0
- agentforge_graph/core/contracts.py +163 -0
- agentforge_graph/core/kinds.py +68 -0
- agentforge_graph/core/models.py +134 -0
- agentforge_graph/core/provenance.py +62 -0
- agentforge_graph/core/symbols.py +116 -0
- agentforge_graph/embed/__init__.py +28 -0
- agentforge_graph/embed/base.py +22 -0
- agentforge_graph/embed/bedrock.py +85 -0
- agentforge_graph/embed/fake.py +34 -0
- agentforge_graph/embed/openai.py +67 -0
- agentforge_graph/embed/pipeline.py +184 -0
- agentforge_graph/embed/registry.py +66 -0
- agentforge_graph/embed/report.py +15 -0
- agentforge_graph/enrich/__init__.py +70 -0
- agentforge_graph/enrich/anthropic.py +38 -0
- agentforge_graph/enrich/anthropic_client.py +109 -0
- agentforge_graph/enrich/bedrock.py +24 -0
- agentforge_graph/enrich/bedrock_client.py +115 -0
- agentforge_graph/enrich/bedrock_summarizer.py +23 -0
- agentforge_graph/enrich/claude.py +172 -0
- agentforge_graph/enrich/enricher.py +108 -0
- agentforge_graph/enrich/governs.py +173 -0
- agentforge_graph/enrich/governs_enricher.py +152 -0
- agentforge_graph/enrich/heuristics.py +224 -0
- agentforge_graph/enrich/judge.py +63 -0
- agentforge_graph/enrich/registry.py +133 -0
- agentforge_graph/enrich/report.py +60 -0
- agentforge_graph/enrich/summarizer.py +62 -0
- agentforge_graph/enrich/summary_enricher.py +211 -0
- agentforge_graph/enrich/taxonomy.py +38 -0
- agentforge_graph/frameworks/__init__.py +29 -0
- agentforge_graph/frameworks/base.py +75 -0
- agentforge_graph/frameworks/detect.py +124 -0
- agentforge_graph/frameworks/extractor.py +63 -0
- agentforge_graph/frameworks/orm.py +93 -0
- agentforge_graph/frameworks/packs/_js_ast.py +56 -0
- agentforge_graph/frameworks/packs/_python_ast.py +157 -0
- agentforge_graph/frameworks/packs/django/__init__.py +240 -0
- agentforge_graph/frameworks/packs/django/models.scm +7 -0
- agentforge_graph/frameworks/packs/express/__init__.py +133 -0
- agentforge_graph/frameworks/packs/express/routes.scm +8 -0
- agentforge_graph/frameworks/packs/fastapi/__init__.py +210 -0
- agentforge_graph/frameworks/packs/fastapi/depends.scm +6 -0
- agentforge_graph/frameworks/packs/fastapi/routes.scm +10 -0
- agentforge_graph/frameworks/packs/flask/__init__.py +143 -0
- agentforge_graph/frameworks/packs/flask/routes.scm +11 -0
- agentforge_graph/frameworks/packs/nestjs/__init__.py +205 -0
- agentforge_graph/frameworks/packs/nestjs/routes.scm +6 -0
- agentforge_graph/frameworks/packs/spring/__init__.py +267 -0
- agentforge_graph/frameworks/packs/spring/routes.scm +6 -0
- agentforge_graph/frameworks/packs/sqlalchemy/__init__.py +250 -0
- agentforge_graph/frameworks/packs/sqlalchemy/models.scm +7 -0
- agentforge_graph/frameworks/registry.py +44 -0
- agentforge_graph/ingest/__init__.py +30 -0
- agentforge_graph/ingest/codegraph.py +847 -0
- agentforge_graph/ingest/extractor.py +353 -0
- agentforge_graph/ingest/incremental/__init__.py +25 -0
- agentforge_graph/ingest/incremental/detect.py +118 -0
- agentforge_graph/ingest/incremental/dirty.py +61 -0
- agentforge_graph/ingest/incremental/indexer.py +218 -0
- agentforge_graph/ingest/incremental/meta.py +72 -0
- agentforge_graph/ingest/incremental/ports.py +39 -0
- agentforge_graph/ingest/pack.py +160 -0
- agentforge_graph/ingest/packs/__init__.py +34 -0
- agentforge_graph/ingest/packs/cpp/__init__.py +35 -0
- agentforge_graph/ingest/packs/cpp/references.scm +15 -0
- agentforge_graph/ingest/packs/cpp/structure.scm +49 -0
- agentforge_graph/ingest/packs/csharp/__init__.py +35 -0
- agentforge_graph/ingest/packs/csharp/references.scm +12 -0
- agentforge_graph/ingest/packs/csharp/structure.scm +45 -0
- agentforge_graph/ingest/packs/go/__init__.py +38 -0
- agentforge_graph/ingest/packs/go/references.scm +12 -0
- agentforge_graph/ingest/packs/go/structure.scm +64 -0
- agentforge_graph/ingest/packs/java/__init__.py +35 -0
- agentforge_graph/ingest/packs/java/references.scm +12 -0
- agentforge_graph/ingest/packs/java/structure.scm +38 -0
- agentforge_graph/ingest/packs/javascript/__init__.py +34 -0
- agentforge_graph/ingest/packs/javascript/references.scm +11 -0
- agentforge_graph/ingest/packs/javascript/structure.scm +166 -0
- agentforge_graph/ingest/packs/php/__init__.py +35 -0
- agentforge_graph/ingest/packs/php/references.scm +15 -0
- agentforge_graph/ingest/packs/php/structure.scm +44 -0
- agentforge_graph/ingest/packs/python/__init__.py +25 -0
- agentforge_graph/ingest/packs/python/references.scm +14 -0
- agentforge_graph/ingest/packs/python/structure.scm +57 -0
- agentforge_graph/ingest/packs/ruby/__init__.py +37 -0
- agentforge_graph/ingest/packs/ruby/references.scm +12 -0
- agentforge_graph/ingest/packs/ruby/structure.scm +37 -0
- agentforge_graph/ingest/packs/rust/__init__.py +39 -0
- agentforge_graph/ingest/packs/rust/references.scm +12 -0
- agentforge_graph/ingest/packs/rust/structure.scm +46 -0
- agentforge_graph/ingest/packs/typescript/__init__.py +31 -0
- agentforge_graph/ingest/packs/typescript/references.scm +11 -0
- agentforge_graph/ingest/packs/typescript/structure.scm +99 -0
- agentforge_graph/ingest/pipeline.py +134 -0
- agentforge_graph/ingest/report.py +84 -0
- agentforge_graph/ingest/resolver.py +467 -0
- agentforge_graph/ingest/source.py +79 -0
- agentforge_graph/knowledge/__init__.py +28 -0
- agentforge_graph/knowledge/adr.py +136 -0
- agentforge_graph/knowledge/commits.py +152 -0
- agentforge_graph/knowledge/ingest.py +312 -0
- agentforge_graph/knowledge/mentions.py +71 -0
- agentforge_graph/knowledge/report.py +32 -0
- agentforge_graph/main.py +21 -0
- agentforge_graph/providers.py +36 -0
- agentforge_graph/repomap/__init__.py +14 -0
- agentforge_graph/repomap/rank.py +161 -0
- agentforge_graph/repomap/render.py +55 -0
- agentforge_graph/repomap/repomap.py +66 -0
- agentforge_graph/retrieve/__init__.py +21 -0
- agentforge_graph/retrieve/pack.py +76 -0
- agentforge_graph/retrieve/rerank.py +251 -0
- agentforge_graph/retrieve/retriever.py +286 -0
- agentforge_graph/retrieve/scoring.py +36 -0
- agentforge_graph/serve/__init__.py +19 -0
- agentforge_graph/serve/engine.py +204 -0
- agentforge_graph/serve/http_runner.py +133 -0
- agentforge_graph/serve/server.py +110 -0
- agentforge_graph/serve/tools.py +307 -0
- agentforge_graph/store/__init__.py +32 -0
- agentforge_graph/store/_rowmap.py +102 -0
- agentforge_graph/store/errors.py +22 -0
- agentforge_graph/store/facade.py +89 -0
- agentforge_graph/store/kuzu_store.py +380 -0
- agentforge_graph/store/lance_store.py +146 -0
- agentforge_graph/store/neo4j_store.py +294 -0
- agentforge_graph/store/pgvector_store.py +170 -0
- agentforge_graph/store/registry.py +45 -0
- agentforge_graph/temporal/__init__.py +36 -0
- agentforge_graph/temporal/backfill.py +338 -0
- agentforge_graph/temporal/events.py +82 -0
- agentforge_graph/temporal/index.py +190 -0
- agentforge_graph/temporal/mining.py +190 -0
- agentforge_graph/temporal/recorder.py +114 -0
- agentforge_graph/temporal/store.py +282 -0
- agentforge_graph-0.3.2.dist-info/METADATA +291 -0
- agentforge_graph-0.3.2.dist-info/RECORD +151 -0
- agentforge_graph-0.3.2.dist-info/WHEEL +4 -0
- agentforge_graph-0.3.2.dist-info/entry_points.txt +3 -0
- agentforge_graph-0.3.2.dist-info/licenses/LICENSE +202 -0
- agentforge_graph-0.3.2.dist-info/licenses/NOTICE +14 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Neo4j-backed ``GraphStore`` — an opt-in *server* graph adapter (ENH-004).
|
|
2
|
+
|
|
3
|
+
For teams that want a shared, server-backed graph (multiple devs/CI hit one
|
|
4
|
+
store) or to reuse existing Neo4j infra. Neo4j speaks Cypher, like the embedded
|
|
5
|
+
Kuzu default, so this is a close port: the same open schema (one ``:CkgNode``
|
|
6
|
+
label + one ``:CkgEdge`` relationship type, ``kind`` a property, ``attrs`` a JSON
|
|
7
|
+
string) mapped via the shared ``_rowmap`` helpers, and it passes the same
|
|
8
|
+
``GraphStoreConformance`` suite Kuzu does.
|
|
9
|
+
|
|
10
|
+
Install: ``pip install agentforge-graph[neo4j]``; select in ckg.yaml:
|
|
11
|
+
|
|
12
|
+
store:
|
|
13
|
+
graph: { driver: neo4j, config: { uri: bolt://host:7687, user: neo4j } }
|
|
14
|
+
|
|
15
|
+
The ``neo4j`` driver is imported lazily in :meth:`open`, so the module imports
|
|
16
|
+
fine without the extra installed (the registry can reference it unconditionally).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import TYPE_CHECKING, Any
|
|
24
|
+
|
|
25
|
+
from agentforge_graph.core import (
|
|
26
|
+
Direction,
|
|
27
|
+
Edge,
|
|
28
|
+
EdgeKind,
|
|
29
|
+
FileSubgraph,
|
|
30
|
+
GraphQuery,
|
|
31
|
+
GraphStore,
|
|
32
|
+
Node,
|
|
33
|
+
NodeKind,
|
|
34
|
+
QueryResult,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from ._rowmap import (
|
|
38
|
+
acceptable_sources,
|
|
39
|
+
dump_attrs,
|
|
40
|
+
edge_from_row,
|
|
41
|
+
edge_params,
|
|
42
|
+
load_attrs,
|
|
43
|
+
node_from_row,
|
|
44
|
+
node_params,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if TYPE_CHECKING:
|
|
48
|
+
from neo4j import AsyncDriver, AsyncManagedTransaction
|
|
49
|
+
|
|
50
|
+
# id uniqueness makes MERGE-by-id idempotent and fast (the conformance baseline).
|
|
51
|
+
_CONSTRAINT = "CREATE CONSTRAINT ckg_node_id IF NOT EXISTS FOR (n:CkgNode) REQUIRE n.id IS UNIQUE"
|
|
52
|
+
_MERGE_NODE = (
|
|
53
|
+
"MERGE (n:CkgNode {id: $id}) SET "
|
|
54
|
+
"n.kind = $kind, n.name = $name, "
|
|
55
|
+
"n.span_start = $span_start, n.span_end = $span_end, "
|
|
56
|
+
"n.attrs = $attrs, n.sym_path = $sym_path, "
|
|
57
|
+
"n.prov_source = $prov_source, n.prov_extractor = $prov_extractor, "
|
|
58
|
+
"n.prov_commit = $prov_commit, n.prov_confidence = $prov_confidence, "
|
|
59
|
+
"n.origin_path = $origin_path"
|
|
60
|
+
)
|
|
61
|
+
_INSERT_EDGE = (
|
|
62
|
+
"MATCH (a:CkgNode {id: $src}), (b:CkgNode {id: $dst}) "
|
|
63
|
+
"CREATE (a)-[e:CkgEdge {kind: $kind}]->(b) SET "
|
|
64
|
+
"e.attrs = $attrs, e.prov_source = $prov_source, "
|
|
65
|
+
"e.prov_extractor = $prov_extractor, e.prov_commit = $prov_commit, "
|
|
66
|
+
"e.prov_confidence = $prov_confidence, e.origin_path = $origin_path, "
|
|
67
|
+
"e.resolved_from = $resolved_from"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class Neo4jGraphStore(GraphStore):
|
|
72
|
+
"""Server graph store backed by Neo4j (Bolt)."""
|
|
73
|
+
|
|
74
|
+
def __init__(self, driver: AsyncDriver, database: str) -> None:
|
|
75
|
+
self._driver = driver
|
|
76
|
+
self._database = database
|
|
77
|
+
self._closed = False
|
|
78
|
+
|
|
79
|
+
@classmethod
|
|
80
|
+
async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> Neo4jGraphStore:
|
|
81
|
+
"""Connect to Neo4j from the ``store.graph.config`` block. ``path`` (the
|
|
82
|
+
embedded ``.ckg/`` location) is ignored. Recognised config keys: ``uri``,
|
|
83
|
+
``user``, ``password`` (falls back to ``$CKG_NEO4J_PASSWORD``), ``database``.
|
|
84
|
+
Raises at open (not mid-index) if the server is unreachable."""
|
|
85
|
+
from neo4j import AsyncGraphDatabase
|
|
86
|
+
|
|
87
|
+
cfg = config or {}
|
|
88
|
+
uri = str(cfg.get("uri") or os.environ.get("CKG_NEO4J_URI") or "bolt://localhost:7687")
|
|
89
|
+
user = str(cfg.get("user", "neo4j"))
|
|
90
|
+
password = str(cfg.get("password") or os.environ.get("CKG_NEO4J_PASSWORD") or "")
|
|
91
|
+
database = str(cfg.get("database", "neo4j"))
|
|
92
|
+
driver: AsyncDriver = AsyncGraphDatabase.driver(uri, auth=(user, password))
|
|
93
|
+
await driver.verify_connectivity()
|
|
94
|
+
async with driver.session(database=database) as session:
|
|
95
|
+
await session.run(_CONSTRAINT)
|
|
96
|
+
return cls(driver, database)
|
|
97
|
+
|
|
98
|
+
# --- writes -----------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
async def upsert(self, subgraph: FileSubgraph) -> None:
|
|
101
|
+
async with self._driver.session(database=self._database) as session:
|
|
102
|
+
await session.execute_write(self._upsert_tx, subgraph)
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
async def _upsert_tx(tx: AsyncManagedTransaction, sg: FileSubgraph) -> None:
|
|
106
|
+
for node in sg.nodes:
|
|
107
|
+
await tx.run(_MERGE_NODE, node_params(node, sg.path))
|
|
108
|
+
await tx.run(
|
|
109
|
+
"MATCH (n:CkgNode) WHERE n.origin_path = $p AND NOT n.id IN $keep DETACH DELETE n",
|
|
110
|
+
p=sg.path,
|
|
111
|
+
keep=[n.id for n in sg.nodes],
|
|
112
|
+
)
|
|
113
|
+
await tx.run("MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", p=sg.path)
|
|
114
|
+
for edge in sg.edges:
|
|
115
|
+
await tx.run(_INSERT_EDGE, edge_params(edge, sg.path))
|
|
116
|
+
|
|
117
|
+
async def add(self, items: list[Node | Edge]) -> None:
|
|
118
|
+
async with self._driver.session(database=self._database) as session:
|
|
119
|
+
await session.execute_write(self._add_tx, items)
|
|
120
|
+
|
|
121
|
+
@staticmethod
|
|
122
|
+
async def _add_tx(tx: AsyncManagedTransaction, items: list[Node | Edge]) -> None:
|
|
123
|
+
for item in items:
|
|
124
|
+
if isinstance(item, Node):
|
|
125
|
+
await tx.run(_MERGE_NODE, node_params(item, ""))
|
|
126
|
+
else:
|
|
127
|
+
await tx.run(_INSERT_EDGE, edge_params(item, ""))
|
|
128
|
+
|
|
129
|
+
async def delete_file(self, path: str) -> None:
|
|
130
|
+
async with self._driver.session(database=self._database) as session:
|
|
131
|
+
await session.execute_write(self._delete_file_tx, path)
|
|
132
|
+
|
|
133
|
+
@staticmethod
|
|
134
|
+
async def _delete_file_tx(tx: AsyncManagedTransaction, path: str) -> None:
|
|
135
|
+
await tx.run("MATCH ()-[e:CkgEdge]->() WHERE e.origin_path = $p DELETE e", p=path)
|
|
136
|
+
await tx.run("MATCH (n:CkgNode) WHERE n.origin_path = $p DETACH DELETE n", p=path)
|
|
137
|
+
|
|
138
|
+
async def clear_resolved(self, paths: list[str]) -> None:
|
|
139
|
+
if not paths:
|
|
140
|
+
return
|
|
141
|
+
async with self._driver.session(database=self._database) as session:
|
|
142
|
+
await session.execute_write(self._clear_resolved_tx, paths)
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
async def _clear_resolved_tx(tx: AsyncManagedTransaction, paths: list[str]) -> None:
|
|
146
|
+
from agentforge_graph.core import Source
|
|
147
|
+
|
|
148
|
+
await tx.run(
|
|
149
|
+
"MATCH ()-[e:CkgEdge]->() "
|
|
150
|
+
"WHERE e.origin_path IN $paths AND e.prov_source = $resolved DELETE e",
|
|
151
|
+
paths=paths,
|
|
152
|
+
resolved=Source.RESOLVED.value,
|
|
153
|
+
)
|
|
154
|
+
# GC external package stubs orphaned by the edge deletion, so the
|
|
155
|
+
# incremental graph matches a full re-index (no dangling sinks).
|
|
156
|
+
await tx.run(
|
|
157
|
+
"MATCH (p:CkgNode) WHERE p.kind = $pkg AND NOT ()-[:CkgEdge]->(p) DETACH DELETE p",
|
|
158
|
+
pkg=NodeKind.PACKAGE.value,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
async def clear_outgoing(self, src_ids: list[str], kind: EdgeKind) -> None:
|
|
162
|
+
if not src_ids:
|
|
163
|
+
return
|
|
164
|
+
async with self._driver.session(database=self._database) as session:
|
|
165
|
+
await session.run(
|
|
166
|
+
"MATCH (a:CkgNode)-[e:CkgEdge]->() WHERE a.id IN $ids AND e.kind = $kind DELETE e",
|
|
167
|
+
ids=src_ids,
|
|
168
|
+
kind=kind.value,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# --- reads ------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
async def query(self, q: GraphQuery) -> QueryResult:
|
|
174
|
+
clauses: list[str] = []
|
|
175
|
+
params: dict[str, Any] = {}
|
|
176
|
+
if q.kinds is not None:
|
|
177
|
+
clauses.append("n.kind IN $kinds")
|
|
178
|
+
params["kinds"] = [k.value for k in q.kinds]
|
|
179
|
+
if q.name is not None:
|
|
180
|
+
clauses.append("n.name = $name")
|
|
181
|
+
params["name"] = q.name
|
|
182
|
+
if q.path_prefix is not None:
|
|
183
|
+
clauses.append("n.sym_path STARTS WITH $prefix")
|
|
184
|
+
params["prefix"] = q.path_prefix
|
|
185
|
+
if q.min_source is not None:
|
|
186
|
+
clauses.append("n.prov_source IN $sources")
|
|
187
|
+
params["sources"] = acceptable_sources(q.min_source)
|
|
188
|
+
where = (" WHERE " + " AND ".join(clauses)) if clauses else ""
|
|
189
|
+
params["lim"] = q.limit + 1 # one extra to detect truncation
|
|
190
|
+
rows = await self._read(f"MATCH (n:CkgNode){where} RETURN n LIMIT $lim", params)
|
|
191
|
+
nodes = [node_from_row(dict(r["n"])) for r in rows]
|
|
192
|
+
return QueryResult(nodes=nodes[: q.limit], truncated=len(nodes) > q.limit)
|
|
193
|
+
|
|
194
|
+
async def neighbors(
|
|
195
|
+
self, node_id: str, kinds: list[EdgeKind] | None = None, depth: int = 1
|
|
196
|
+
) -> list[Node]:
|
|
197
|
+
kind_values = [k.value for k in kinds] if kinds is not None else None
|
|
198
|
+
visited = {node_id}
|
|
199
|
+
frontier = [node_id]
|
|
200
|
+
collected: list[str] = []
|
|
201
|
+
for _ in range(depth):
|
|
202
|
+
if not frontier:
|
|
203
|
+
break
|
|
204
|
+
params: dict[str, Any] = {"frontier": frontier}
|
|
205
|
+
kind_clause = ""
|
|
206
|
+
if kind_values is not None:
|
|
207
|
+
kind_clause = " AND e.kind IN $kinds"
|
|
208
|
+
params["kinds"] = kind_values
|
|
209
|
+
rows = await self._read(
|
|
210
|
+
"MATCH (a:CkgNode)-[e:CkgEdge]-(b:CkgNode) "
|
|
211
|
+
f"WHERE a.id IN $frontier{kind_clause} RETURN DISTINCT b.id AS id",
|
|
212
|
+
params,
|
|
213
|
+
)
|
|
214
|
+
nxt: list[str] = []
|
|
215
|
+
for r in rows:
|
|
216
|
+
nid = r["id"]
|
|
217
|
+
if nid not in visited:
|
|
218
|
+
visited.add(nid)
|
|
219
|
+
nxt.append(nid)
|
|
220
|
+
collected.append(nid)
|
|
221
|
+
frontier = nxt
|
|
222
|
+
out: list[Node] = []
|
|
223
|
+
for i in collected:
|
|
224
|
+
n = await self.get(i)
|
|
225
|
+
if n is not None:
|
|
226
|
+
out.append(n)
|
|
227
|
+
return out
|
|
228
|
+
|
|
229
|
+
async def get(self, node_id: str) -> Node | None:
|
|
230
|
+
rows = await self._read("MATCH (n:CkgNode {id: $id}) RETURN n", {"id": node_id})
|
|
231
|
+
return node_from_row(dict(rows[0]["n"])) if rows else None
|
|
232
|
+
|
|
233
|
+
async def set_attrs(self, node_id: str, attrs: dict[str, Any]) -> None:
|
|
234
|
+
async with self._driver.session(database=self._database) as session:
|
|
235
|
+
await session.execute_write(self._set_attrs_tx, node_id, attrs)
|
|
236
|
+
|
|
237
|
+
@staticmethod
|
|
238
|
+
async def _set_attrs_tx(
|
|
239
|
+
tx: AsyncManagedTransaction, node_id: str, attrs: dict[str, Any]
|
|
240
|
+
) -> None:
|
|
241
|
+
rows = [
|
|
242
|
+
r
|
|
243
|
+
async for r in await tx.run(
|
|
244
|
+
"MATCH (n:CkgNode {id: $id}) RETURN n.attrs AS a", id=node_id
|
|
245
|
+
)
|
|
246
|
+
]
|
|
247
|
+
if not rows:
|
|
248
|
+
return # absent node: no-op (contract)
|
|
249
|
+
merged = {**load_attrs(rows[0]["a"]), **attrs}
|
|
250
|
+
# SET only attrs — origin_path and every other property are left intact.
|
|
251
|
+
await tx.run(
|
|
252
|
+
"MATCH (n:CkgNode {id: $id}) SET n.attrs = $attrs",
|
|
253
|
+
id=node_id,
|
|
254
|
+
attrs=dump_attrs(merged),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
async def adjacent(
|
|
258
|
+
self,
|
|
259
|
+
node_id: str,
|
|
260
|
+
kinds: list[EdgeKind] | None = None,
|
|
261
|
+
direction: Direction = "both",
|
|
262
|
+
) -> list[Edge]:
|
|
263
|
+
params: dict[str, Any] = {"id": node_id}
|
|
264
|
+
where = ""
|
|
265
|
+
if kinds is not None:
|
|
266
|
+
where = " WHERE e.kind IN $kinds"
|
|
267
|
+
params["kinds"] = [k.value for k in kinds]
|
|
268
|
+
edges: list[Edge] = []
|
|
269
|
+
if direction in ("out", "both"):
|
|
270
|
+
rows = await self._read(
|
|
271
|
+
f"MATCH (a:CkgNode {{id: $id}})-[e:CkgEdge]->(b:CkgNode){where} "
|
|
272
|
+
"RETURN e, b.id AS oid",
|
|
273
|
+
params,
|
|
274
|
+
)
|
|
275
|
+
edges += [edge_from_row(dict(r["e"]), node_id, r["oid"]) for r in rows]
|
|
276
|
+
if direction in ("in", "both"):
|
|
277
|
+
rows = await self._read(
|
|
278
|
+
f"MATCH (a:CkgNode {{id: $id}})<-[e:CkgEdge]-(b:CkgNode){where} "
|
|
279
|
+
"RETURN e, b.id AS oid",
|
|
280
|
+
params,
|
|
281
|
+
)
|
|
282
|
+
edges += [edge_from_row(dict(r["e"]), r["oid"], node_id) for r in rows]
|
|
283
|
+
return edges
|
|
284
|
+
|
|
285
|
+
async def _read(self, cypher: str, params: dict[str, Any]) -> list[Any]:
|
|
286
|
+
async with self._driver.session(database=self._database) as session:
|
|
287
|
+
result = await session.run(cypher, params)
|
|
288
|
+
return [r async for r in result]
|
|
289
|
+
|
|
290
|
+
async def close(self) -> None:
|
|
291
|
+
if self._closed:
|
|
292
|
+
return
|
|
293
|
+
self._closed = True
|
|
294
|
+
await self._driver.close()
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Postgres + pgvector ``VectorStore`` — an opt-in *server* vector adapter
|
|
2
|
+
(ENH-004), so teams reuse an existing Postgres instead of the embedded LanceDB.
|
|
3
|
+
|
|
4
|
+
Mirrors the LanceDB adapter's shape: one ``vectors`` table created lazily on the
|
|
5
|
+
first ``upsert`` (dimension fixed from the first batch), the same first-class
|
|
6
|
+
filter columns (``ref``, ``kind``, ``path``), and a cosine similarity in [0, 1]
|
|
7
|
+
(higher = closer, BUG-002). Passes the same ``VectorStoreConformance`` suite.
|
|
8
|
+
|
|
9
|
+
Install: ``pip install agentforge-graph[pgvector]``; select in ckg.yaml:
|
|
10
|
+
|
|
11
|
+
store:
|
|
12
|
+
vectors: { driver: pgvector, config: { dsn: postgresql://user@host/db } }
|
|
13
|
+
|
|
14
|
+
``asyncpg``/``pgvector`` are imported lazily in :meth:`open`, so the module
|
|
15
|
+
imports fine without the extra (the registry can reference it unconditionally).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import TYPE_CHECKING, Any
|
|
24
|
+
|
|
25
|
+
from agentforge_graph.core import Embedded, ScoredRef, VectorStore
|
|
26
|
+
from agentforge_graph.core.symbols import SymbolID
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from asyncpg import Pool
|
|
30
|
+
|
|
31
|
+
_TABLE = "ckg_vectors"
|
|
32
|
+
_FILTERABLE = ("ref", "kind", "path")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _sym_path(ref: str) -> str:
|
|
36
|
+
try:
|
|
37
|
+
return SymbolID.parse(ref).path
|
|
38
|
+
except ValueError:
|
|
39
|
+
return ""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _check_filter(filter: dict[str, Any]) -> None:
|
|
43
|
+
bad = set(filter) - set(_FILTERABLE)
|
|
44
|
+
if bad:
|
|
45
|
+
raise ValueError(f"unfilterable column(s) {sorted(bad)}; allowed: {_FILTERABLE}")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class PgVectorStore(VectorStore):
|
|
49
|
+
"""Server vector store backed by Postgres + the pgvector extension."""
|
|
50
|
+
|
|
51
|
+
def __init__(self, pool: Pool, dim: int | None) -> None:
|
|
52
|
+
self._pool = pool
|
|
53
|
+
self._dim = dim # None until the table exists (created on first upsert)
|
|
54
|
+
self._closed = False
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
async def open(cls, path: str | Path, config: dict[str, Any] | None = None) -> PgVectorStore:
|
|
58
|
+
"""Connect to Postgres from the ``store.vectors.config`` block. ``path``
|
|
59
|
+
(the embedded ``.ckg/`` location) is ignored. Recognised config keys:
|
|
60
|
+
``dsn`` (falls back to ``$CKG_PGVECTOR_DSN``). Ensures the ``vector``
|
|
61
|
+
extension exists and registers the type on every pooled connection."""
|
|
62
|
+
import asyncpg
|
|
63
|
+
from pgvector.asyncpg import register_vector
|
|
64
|
+
|
|
65
|
+
cfg = config or {}
|
|
66
|
+
dsn = str(cfg.get("dsn") or os.environ.get("CKG_PGVECTOR_DSN") or "")
|
|
67
|
+
|
|
68
|
+
async def _init(conn: Any) -> None:
|
|
69
|
+
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
70
|
+
await register_vector(conn)
|
|
71
|
+
|
|
72
|
+
pool = await asyncpg.create_pool(dsn, init=_init, min_size=1, max_size=4)
|
|
73
|
+
# discover the dimension if the table already exists (reopen).
|
|
74
|
+
dim: int | None = None
|
|
75
|
+
async with pool.acquire() as conn:
|
|
76
|
+
row = await conn.fetchrow(
|
|
77
|
+
"SELECT a.atttypmod AS dim FROM pg_attribute a "
|
|
78
|
+
"JOIN pg_class c ON a.attrelid = c.oid "
|
|
79
|
+
"WHERE c.relname = $1 AND a.attname = 'embedding'",
|
|
80
|
+
_TABLE,
|
|
81
|
+
)
|
|
82
|
+
if row is not None and row["dim"] is not None and row["dim"] > 0:
|
|
83
|
+
dim = int(row["dim"])
|
|
84
|
+
return cls(pool, dim)
|
|
85
|
+
|
|
86
|
+
async def _ensure_table(self, dim: int) -> None:
|
|
87
|
+
if self._dim is not None:
|
|
88
|
+
return
|
|
89
|
+
async with self._pool.acquire() as conn:
|
|
90
|
+
await conn.execute(
|
|
91
|
+
f"CREATE TABLE IF NOT EXISTS {_TABLE} ("
|
|
92
|
+
"ref TEXT PRIMARY KEY, "
|
|
93
|
+
f"embedding vector({dim}), "
|
|
94
|
+
"kind TEXT, path TEXT, attrs_json TEXT)"
|
|
95
|
+
)
|
|
96
|
+
self._dim = dim
|
|
97
|
+
|
|
98
|
+
async def upsert(self, items: list[Embedded]) -> None:
|
|
99
|
+
if not items:
|
|
100
|
+
return
|
|
101
|
+
await self._ensure_table(len(items[0].vector))
|
|
102
|
+
rows = [
|
|
103
|
+
(
|
|
104
|
+
i.ref,
|
|
105
|
+
[float(x) for x in i.vector],
|
|
106
|
+
i.kind.value,
|
|
107
|
+
_sym_path(i.ref),
|
|
108
|
+
json.dumps(i.attrs, sort_keys=True),
|
|
109
|
+
)
|
|
110
|
+
for i in items
|
|
111
|
+
]
|
|
112
|
+
async with self._pool.acquire() as conn:
|
|
113
|
+
await conn.executemany(
|
|
114
|
+
f"INSERT INTO {_TABLE} (ref, embedding, kind, path, attrs_json) "
|
|
115
|
+
"VALUES ($1, $2, $3, $4, $5) "
|
|
116
|
+
"ON CONFLICT (ref) DO UPDATE SET "
|
|
117
|
+
"embedding = EXCLUDED.embedding, kind = EXCLUDED.kind, "
|
|
118
|
+
"path = EXCLUDED.path, attrs_json = EXCLUDED.attrs_json",
|
|
119
|
+
rows,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
async def search(
|
|
123
|
+
self, vector: list[float], k: int, filter: dict[str, Any] | None = None
|
|
124
|
+
) -> list[ScoredRef]:
|
|
125
|
+
if self._dim is None:
|
|
126
|
+
return []
|
|
127
|
+
params: list[Any] = [[float(x) for x in vector]]
|
|
128
|
+
where = ""
|
|
129
|
+
if filter:
|
|
130
|
+
_check_filter(filter)
|
|
131
|
+
conds = []
|
|
132
|
+
for col, val in filter.items():
|
|
133
|
+
params.append(val)
|
|
134
|
+
conds.append(f"{col} = ${len(params)}")
|
|
135
|
+
where = " WHERE " + " AND ".join(conds)
|
|
136
|
+
params.append(k)
|
|
137
|
+
# `<=>` is cosine distance in [0, 2]; expose a similarity in [0, 1].
|
|
138
|
+
sql = (
|
|
139
|
+
f"SELECT ref, attrs_json, 1 - (embedding <=> $1) AS sim "
|
|
140
|
+
f"FROM {_TABLE}{where} ORDER BY embedding <=> $1 LIMIT ${len(params)}"
|
|
141
|
+
)
|
|
142
|
+
async with self._pool.acquire() as conn:
|
|
143
|
+
rows = await conn.fetch(sql, *params)
|
|
144
|
+
return [
|
|
145
|
+
ScoredRef(
|
|
146
|
+
ref=r["ref"],
|
|
147
|
+
score=max(0.0, min(1.0, float(r["sim"]))),
|
|
148
|
+
attrs=json.loads(r["attrs_json"]) if r["attrs_json"] else {},
|
|
149
|
+
)
|
|
150
|
+
for r in rows
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
async def delete_where(self, filter: dict[str, Any]) -> None:
|
|
154
|
+
if self._dim is None:
|
|
155
|
+
return
|
|
156
|
+
_check_filter(filter)
|
|
157
|
+
params: list[Any] = []
|
|
158
|
+
conds = []
|
|
159
|
+
for col, val in filter.items():
|
|
160
|
+
params.append(val)
|
|
161
|
+
conds.append(f"{col} = ${len(params)}")
|
|
162
|
+
where = (" WHERE " + " AND ".join(conds)) if conds else ""
|
|
163
|
+
async with self._pool.acquire() as conn:
|
|
164
|
+
await conn.execute(f"DELETE FROM {_TABLE}{where}", *params)
|
|
165
|
+
|
|
166
|
+
async def close(self) -> None:
|
|
167
|
+
if self._closed:
|
|
168
|
+
return
|
|
169
|
+
self._closed = True
|
|
170
|
+
await self._pool.close()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Driver registry: config driver-name → adapter class.
|
|
2
|
+
|
|
3
|
+
Embedded drivers (Kuzu, LanceDB) ship by default; first-party **server** drivers
|
|
4
|
+
(Neo4j graph, pgvector — ENH-004) are registered too but their DB SDK is imported
|
|
5
|
+
lazily inside the adapter's ``open``, so they cost nothing until selected and
|
|
6
|
+
need only their extra installed (``pip install agentforge-graph[neo4j|pgvector]``).
|
|
7
|
+
Third-party adapters still register out-of-tree via the entry-point groups.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from importlib.metadata import entry_points
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from .errors import DriverNotFound
|
|
16
|
+
from .kuzu_store import KuzuGraphStore
|
|
17
|
+
from .lance_store import LanceVectorStore
|
|
18
|
+
from .neo4j_store import Neo4jGraphStore
|
|
19
|
+
from .pgvector_store import PgVectorStore
|
|
20
|
+
|
|
21
|
+
GRAPH_GROUP = "agentforge_graph.graph_drivers"
|
|
22
|
+
VECTOR_GROUP = "agentforge_graph.vector_drivers"
|
|
23
|
+
|
|
24
|
+
_GRAPH_BUILTINS: dict[str, Any] = {"kuzu": KuzuGraphStore, "neo4j": Neo4jGraphStore}
|
|
25
|
+
_VECTOR_BUILTINS: dict[str, Any] = {"lancedb": LanceVectorStore, "pgvector": PgVectorStore}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _resolve(name: str, builtins: dict[str, Any], group: str) -> Any:
|
|
29
|
+
if name in builtins:
|
|
30
|
+
return builtins[name]
|
|
31
|
+
for ep in entry_points(group=group):
|
|
32
|
+
if ep.name == name:
|
|
33
|
+
return ep.load()
|
|
34
|
+
known = sorted(builtins)
|
|
35
|
+
raise DriverNotFound(f"unknown driver {name!r}; built-in drivers: {known}")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def graph_driver(name: str) -> Any:
|
|
39
|
+
"""The graph-store class for a config ``driver`` name."""
|
|
40
|
+
return _resolve(name, _GRAPH_BUILTINS, GRAPH_GROUP)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def vector_driver(name: str) -> Any:
|
|
44
|
+
"""The vector-store class for a config ``driver`` name."""
|
|
45
|
+
return _resolve(name, _VECTOR_BUILTINS, VECTOR_GROUP)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""feat-009 temporal / git-evolution layer.
|
|
2
|
+
|
|
3
|
+
An append-only evolution log (``.ckg/temporal.db``) populated by the feat-004
|
|
4
|
+
refresh, recording when each symbol was introduced/removed at which commit —
|
|
5
|
+
the basis for ``history`` / ``changed_since`` / ``as_of`` and for churn/age
|
|
6
|
+
ranking signals. Higher layer: imports ``core``/``store``/``ingest``; the
|
|
7
|
+
deterministic engine core never imports this. Default off (opt-in). See
|
|
8
|
+
``docs/design/design-009-temporal-evolution-layer.md``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .backfill import BackfillReport, parse_history, run_backfill
|
|
12
|
+
from .events import Author, Change, Entity, Event, EventKind, SymbolHistory
|
|
13
|
+
from .index import TemporalError, TemporalIndex
|
|
14
|
+
from .mining import ChurnMiner, SymbolAggregate
|
|
15
|
+
from .recorder import SqliteTemporalRecorder, build_recorder, seed_symbols
|
|
16
|
+
from .store import TemporalStore
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Author",
|
|
20
|
+
"BackfillReport",
|
|
21
|
+
"Change",
|
|
22
|
+
"ChurnMiner",
|
|
23
|
+
"Entity",
|
|
24
|
+
"Event",
|
|
25
|
+
"EventKind",
|
|
26
|
+
"SqliteTemporalRecorder",
|
|
27
|
+
"SymbolAggregate",
|
|
28
|
+
"SymbolHistory",
|
|
29
|
+
"TemporalError",
|
|
30
|
+
"TemporalIndex",
|
|
31
|
+
"TemporalStore",
|
|
32
|
+
"build_recorder",
|
|
33
|
+
"parse_history",
|
|
34
|
+
"run_backfill",
|
|
35
|
+
"seed_symbols",
|
|
36
|
+
]
|