know-do-graph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. agents/__init__.py +0 -0
  2. agents/extraction_agent/__init__.py +0 -0
  3. agents/extraction_agent/agent.py +170 -0
  4. agents/graph_agent/__init__.py +5 -0
  5. agents/graph_agent/agent.py +373 -0
  6. agents/graph_agent/tools.py +2106 -0
  7. agents/maintenance_agent/__init__.py +0 -0
  8. agents/maintenance_agent/agent.py +283 -0
  9. agents/orchestrator/__init__.py +0 -0
  10. agents/orchestrator/agent.py +217 -0
  11. agents/review_agent/__init__.py +0 -0
  12. agents/review_agent/agent.py +188 -0
  13. agents/review_agent/tools.py +472 -0
  14. api/__init__.py +0 -0
  15. api/main.py +136 -0
  16. api/routes/__init__.py +0 -0
  17. api/routes/agent.py +81 -0
  18. api/routes/entries.py +411 -0
  19. api/routes/graph.py +132 -0
  20. api/routes/mem.py +179 -0
  21. api/routes/remote.py +815 -0
  22. api/routes/remote_sync.py +230 -0
  23. api/routes/retrieve.py +88 -0
  24. core/__init__.py +0 -0
  25. core/app_state.py +9 -0
  26. core/events.py +84 -0
  27. core/extraction/__init__.py +0 -0
  28. core/extraction/wikilink_parser.py +48 -0
  29. core/graph/__init__.py +0 -0
  30. core/graph/graph.py +204 -0
  31. core/memory/__init__.py +0 -0
  32. core/memory/memgraph.py +458 -0
  33. core/resources/starter.db +0 -0
  34. core/retrieval/__init__.py +0 -0
  35. core/retrieval/embedder.py +122 -0
  36. core/retrieval/fusion.py +52 -0
  37. core/retrieval/progressive.py +399 -0
  38. core/retrieval/retrieval.py +346 -0
  39. core/retrieval/vector_store.py +91 -0
  40. core/schemas/__init__.py +0 -0
  41. core/schemas/edge.py +46 -0
  42. core/schemas/entry.py +388 -0
  43. core/storage/__init__.py +0 -0
  44. core/storage/database.py +104 -0
  45. core/storage/models.py +66 -0
  46. core/storage/repository.py +243 -0
  47. core/sync/__init__.py +20 -0
  48. core/sync/autolink.py +301 -0
  49. core/sync/db_merge.py +297 -0
  50. core/sync/db_watcher.py +84 -0
  51. core/sync/remote_sync.py +345 -0
  52. examples/__init__.py +0 -0
  53. examples/example_entries.py +206 -0
  54. examples/pymatgen_interface_examples.py +811 -0
  55. frontend/dist/assets/index-BLfo7ZZu.css +1 -0
  56. frontend/dist/assets/index-G-mYbZ9R.js +83 -0
  57. frontend/dist/assets/index-G-mYbZ9R.js.map +1 -0
  58. frontend/dist/index.html +92 -0
  59. know_do_graph-0.1.0.dist-info/METADATA +765 -0
  60. know_do_graph-0.1.0.dist-info/RECORD +63 -0
  61. know_do_graph-0.1.0.dist-info/WHEEL +4 -0
  62. know_do_graph-0.1.0.dist-info/entry_points.txt +2 -0
  63. main.py +944 -0
@@ -0,0 +1,230 @@
1
+ """Remote-source sync routes.
2
+
3
+ These endpoints expose the :mod:`core.sync.remote_sync` machinery so that
4
+ operators (and the UI) can:
5
+
6
+ * list all entries that mirror an upstream file,
7
+ * trigger a one-shot resync (one entry or all due),
8
+ * attach / detach a remote source on an existing entry.
9
+
10
+ Mounted at ``/remote-sync`` from :mod:`api.main`.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from datetime import datetime
16
+ from typing import Optional
17
+
18
+ from fastapi import APIRouter, Depends, HTTPException
19
+ from pydantic import BaseModel
20
+ from sqlalchemy.orm import Session
21
+
22
+ from core import events as _events
23
+ from core.app_state import graph as _graph
24
+ from core.schemas.entry import RemoteSource
25
+ from core.storage.database import get_db
26
+ from core.storage.repository import EntryRepository
27
+ from core.sync.remote_sync import (
28
+ SyncResult,
29
+ parse_github_url,
30
+ sync_all_due,
31
+ sync_entry,
32
+ )
33
+
34
+ router = APIRouter()
35
+
36
+
37
+ # ── Helpers ───────────────────────────────────────────────────────────────────
38
+
39
+
40
+ def _result_to_dict(r: SyncResult) -> dict:
41
+ return {
42
+ "entry_id": r.entry_id,
43
+ "title": r.title,
44
+ "status": r.status,
45
+ "detail": r.detail,
46
+ "bytes_fetched": r.bytes_fetched,
47
+ "new_hash": r.new_hash,
48
+ "fetched_at": r.fetched_at.isoformat() if r.fetched_at else None,
49
+ }
50
+
51
+
52
+ def _source_to_dict(src: RemoteSource) -> dict:
53
+ d = src.model_dump(mode="json")
54
+ if isinstance(d.get("fetched_at"), datetime):
55
+ d["fetched_at"] = d["fetched_at"].isoformat()
56
+ return d
57
+
58
+
59
+ def _resolve_entry(db: Session, id_or_slug: str):
60
+ """Look up an entry by id, slug, or alias via EntryRepository.get_all().
61
+
62
+ Cheap because get_all() is O(n) over a small node set; if this gets hot,
63
+ add an explicit get_by_slug method on the repo.
64
+ """
65
+ repo = EntryRepository(db)
66
+ for e in repo.get_all():
67
+ if e.id == id_or_slug or e.slug == id_or_slug or id_or_slug in e.aliases:
68
+ return repo, e
69
+ raise HTTPException(status_code=404, detail=f"entry not found: {id_or_slug}")
70
+
71
+
72
+ # ── Request models ────────────────────────────────────────────────────────────
73
+
74
+
75
+ class AttachSourceRequest(BaseModel):
76
+ url: str
77
+ ref: Optional[str] = None
78
+ path: Optional[str] = None
79
+ owner: Optional[str] = None
80
+ repo: Optional[str] = None
81
+ auto_sync: bool = True
82
+ sync_interval_seconds: int = 3600
83
+ sync_now: bool = True
84
+
85
+
86
+ # ── Routes ────────────────────────────────────────────────────────────────────
87
+
88
+
89
+ @router.get("/")
90
+ def list_linked_entries(db: Session = Depends(get_db)) -> list[dict]:
91
+ """List every entry that mirrors an upstream source."""
92
+ out: list[dict] = []
93
+ for e in EntryRepository(db).get_all():
94
+ src = e.metadata.remote_source
95
+ if src is None:
96
+ continue
97
+ out.append({
98
+ "entry_id": e.id,
99
+ "slug": e.slug,
100
+ "title": e.title,
101
+ "remote_source": _source_to_dict(src),
102
+ })
103
+ return out
104
+
105
+
106
+ @router.post("/all")
107
+ async def sync_all_endpoint(force: bool = False) -> dict:
108
+ """Sync every entry whose remote source is due (or all when ``force=true``)."""
109
+ results = await sync_all_due(force=force)
110
+ summary = {
111
+ "checked": len(results),
112
+ "updated": sum(1 for r in results if r.status == "updated"),
113
+ "unchanged": sum(1 for r in results if r.status == "unchanged"),
114
+ "errors": sum(1 for r in results if r.status == "error"),
115
+ "results": [_result_to_dict(r) for r in results],
116
+ }
117
+ return summary
118
+
119
+
120
+ @router.post("/{id_or_slug}")
121
+ async def sync_one_endpoint(
122
+ id_or_slug: str,
123
+ force: bool = True,
124
+ db: Session = Depends(get_db),
125
+ ) -> dict:
126
+ """Sync a single entry now."""
127
+ repo, entry = _resolve_entry(db, id_or_slug)
128
+ if entry.metadata.remote_source is None:
129
+ raise HTTPException(status_code=400, detail="entry has no remote_source")
130
+ result = await sync_entry(entry, force=force)
131
+ updated = repo.update(entry)
132
+ autolink_summary: dict | None = None
133
+ if result.status == "updated" and updated is not None:
134
+ try:
135
+ _graph.add_entry(updated)
136
+ except Exception:
137
+ pass
138
+ # Refresh derived edges from the new content.
139
+ try:
140
+ from core.storage.repository import EdgeRepository
141
+ from core.sync.autolink import auto_link_entry
142
+ al = auto_link_entry(updated, repo.get_all(), EdgeRepository(db))
143
+ autolink_summary = {
144
+ "frontmatter_edges": al.frontmatter_edges,
145
+ "mention_edges": al.mention_edges,
146
+ }
147
+ except Exception: # pragma: no cover
148
+ pass
149
+ _events.emit(
150
+ "node_updated",
151
+ {"id": entry.id, "slug": entry.slug, "title": entry.title, "source": "remote_sync"},
152
+ )
153
+ return {
154
+ "result": _result_to_dict(result),
155
+ "remote_source": _source_to_dict(entry.metadata.remote_source),
156
+ "autolink": autolink_summary,
157
+ }
158
+
159
+
160
+ @router.put("/{id_or_slug}/source")
161
+ async def attach_source(
162
+ id_or_slug: str,
163
+ body: AttachSourceRequest,
164
+ db: Session = Depends(get_db),
165
+ ) -> dict:
166
+ """Attach or replace the remote source on an entry."""
167
+ repo, entry = _resolve_entry(db, id_or_slug)
168
+
169
+ owner = body.owner
170
+ repo_name = body.repo
171
+ path = body.path
172
+ ref = body.ref
173
+ kind = "github"
174
+
175
+ parsed = parse_github_url(body.url)
176
+ if parsed:
177
+ owner = owner or parsed.get("owner")
178
+ repo_name = repo_name or parsed.get("repo")
179
+ path = path or parsed.get("path")
180
+ ref = ref or parsed.get("ref") or "main"
181
+ else:
182
+ kind = "http"
183
+
184
+ if kind == "github" and not (owner and repo_name and path):
185
+ raise HTTPException(
186
+ status_code=400,
187
+ detail="GitHub source needs owner/repo/path (parsed from URL or supplied explicitly)",
188
+ )
189
+
190
+ src = RemoteSource(
191
+ kind=kind,
192
+ url=body.url,
193
+ owner=owner,
194
+ repo=repo_name,
195
+ ref=ref or "main",
196
+ path=path,
197
+ auto_sync=body.auto_sync,
198
+ sync_interval_seconds=body.sync_interval_seconds,
199
+ )
200
+ entry.metadata.remote_source = src
201
+ repo.update(entry)
202
+
203
+ result_dict = None
204
+ if body.sync_now:
205
+ result = await sync_entry(entry, force=True)
206
+ updated = repo.update(entry)
207
+ if result.status == "updated" and updated is not None:
208
+ try:
209
+ _graph.add_entry(updated)
210
+ except Exception:
211
+ pass
212
+ _events.emit(
213
+ "node_updated",
214
+ {"id": entry.id, "slug": entry.slug, "title": entry.title, "source": "remote_sync"},
215
+ )
216
+ result_dict = _result_to_dict(result)
217
+
218
+ return {
219
+ "remote_source": _source_to_dict(entry.metadata.remote_source),
220
+ "result": result_dict,
221
+ }
222
+
223
+
224
+ @router.delete("/{id_or_slug}/source")
225
+ def detach_source(id_or_slug: str, db: Session = Depends(get_db)) -> dict:
226
+ """Remove the remote source link from an entry (content is preserved)."""
227
+ repo, entry = _resolve_entry(db, id_or_slug)
228
+ entry.metadata.remote_source = None
229
+ repo.update(entry)
230
+ return {"detached": True, "entry_id": entry.id}
api/routes/retrieve.py ADDED
@@ -0,0 +1,88 @@
1
+ """Progressive (staged) retrieval API.
2
+
3
+ Endpoints surface the hierarchical-memory layers so external agents can pull
4
+ only the level of detail needed for the current execution stage.
5
+
6
+ See :mod:`core.retrieval.progressive` for the underlying logic.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Optional
12
+
13
+ from fastapi import APIRouter, Depends, HTTPException, Query
14
+ from sqlalchemy.orm import Session
15
+
16
+ from core.app_state import graph as _graph
17
+ from core.retrieval.progressive import ProgressiveRetriever
18
+ from core.schemas.entry import Entry, implied_level
19
+ from core.storage.database import get_db
20
+
21
+ router = APIRouter()
22
+
23
+
24
+ def _retriever(db: Session = Depends(get_db)) -> ProgressiveRetriever:
25
+ return ProgressiveRetriever(db, _graph)
26
+
27
+
28
+ def _annotate(entry: Entry) -> dict:
29
+ data = entry.model_dump(mode="json")
30
+ level = implied_level(entry.entry_type, entry.metadata.skill_level)
31
+ data["_level"] = level.value if level else None
32
+ return data
33
+
34
+
35
+ @router.get("/plan", response_model=list[dict])
36
+ def plan(
37
+ goal: str = Query(..., description="Free-text description of what you want to do"),
38
+ k: int = Query(5, ge=1, le=50),
39
+ mode: str = Query("hybrid", pattern="^(hybrid|semantic|keyword)$"),
40
+ include_l2: bool = Query(True, description="Include L2 procedures alongside L1 capabilities"),
41
+ retriever: ProgressiveRetriever = Depends(_retriever),
42
+ ):
43
+ """Return planner-level candidates (L1 capabilities, optionally L2 procedures).
44
+
45
+ Heuristics (L3) and constraints (L4) are intentionally excluded — fetch
46
+ them on demand via ``/retrieve/heuristics`` and ``/retrieve/constraints``.
47
+ """
48
+ return [_annotate(e) for e in retriever.plan(goal=goal, k=k, mode=mode, include_l2=include_l2)]
49
+
50
+
51
+ @router.get("/heuristics", response_model=list[dict])
52
+ def heuristics(
53
+ skill: str = Query(..., description="Entry id, slug, or alias of the L1/L2 skill"),
54
+ k: int = Query(5, ge=1, le=50),
55
+ fallback: bool = Query(True, description="Include semantic-search L3 fallback if no edges exist"),
56
+ retriever: ProgressiveRetriever = Depends(_retriever),
57
+ ):
58
+ """Return L3 heuristics attached to a skill."""
59
+ return [_annotate(e) for e in retriever.heuristics_for(skill, k=k, include_semantic_fallback=fallback)]
60
+
61
+
62
+ @router.get("/constraints", response_model=list[dict])
63
+ def constraints(
64
+ skill: str = Query(..., description="Entry id, slug, or alias of the L1/L2 skill"),
65
+ k: int = Query(5, ge=1, le=50),
66
+ fallback: bool = Query(True, description="Include semantic-search L4 fallback if no edges exist"),
67
+ retriever: ProgressiveRetriever = Depends(_retriever),
68
+ ):
69
+ """Return L4 constraints / failure modes attached to a skill."""
70
+ return [_annotate(e) for e in retriever.constraints_for(skill, k=k, include_semantic_fallback=fallback)]
71
+
72
+
73
+ @router.get("/expand/{skill}", response_model=dict)
74
+ def expand(
75
+ skill: str,
76
+ stages: Optional[str] = Query(
77
+ None,
78
+ description="Comma-separated subset of heuristics,constraints,decomposition (default: heuristics,constraints)",
79
+ ),
80
+ k: int = Query(5, ge=1, le=50),
81
+ retriever: ProgressiveRetriever = Depends(_retriever),
82
+ ):
83
+ """Bundle additional context for an already-selected skill (verifier loop)."""
84
+ stage_list = [s.strip() for s in stages.split(",")] if stages else None
85
+ result = retriever.expand(skill=skill, stages=stage_list, k=k)
86
+ if "error" in result:
87
+ raise HTTPException(status_code=404, detail=result["error"])
88
+ return result
core/__init__.py ADDED
File without changes
core/app_state.py ADDED
@@ -0,0 +1,9 @@
1
+ """Application-level shared state.
2
+
3
+ Import `graph` from here wherever a single in-process graph instance is needed.
4
+ The API startup handler calls `graph.rebuild_from_db(...)` after init_db().
5
+ """
6
+
7
+ from core.graph.graph import KnowDoGraph
8
+
9
+ graph = KnowDoGraph()
core/events.py ADDED
@@ -0,0 +1,84 @@
1
+ """Lightweight SSE event bus for broadcasting graph mutations to connected clients.
2
+
3
+ Usage (from sync FastAPI route handlers):
4
+ from core import events
5
+ events.emit("node_added", {"id": ..., "title": ...})
6
+
7
+ Usage (SSE endpoint):
8
+ q = events.subscribe()
9
+ try:
10
+ msg = await asyncio.wait_for(q.get(), timeout=25)
11
+ if msg is events.SHUTDOWN_SENTINEL:
12
+ return # server shutting down
13
+ finally:
14
+ events.unsubscribe(q)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import json
21
+ from typing import Any
22
+
23
+ # Sentinel put into every subscriber queue on server shutdown
24
+ SHUTDOWN_SENTINEL: object = object()
25
+
26
+ # Active subscriber queues (one per SSE connection)
27
+ _subscribers: set[asyncio.Queue] = set()
28
+
29
+ # The asyncio loop FastAPI runs on. Captured during app startup so that
30
+ # `emit()` works when invoked from worker threads (sync route handlers and
31
+ # background threads have no `get_running_loop()`).
32
+ _loop: asyncio.AbstractEventLoop | None = None
33
+
34
+
35
+ def set_loop(loop: asyncio.AbstractEventLoop) -> None:
36
+ """Remember the main event loop so `emit()` can dispatch from any thread."""
37
+ global _loop
38
+ _loop = loop
39
+
40
+
41
+ def signal_shutdown() -> None:
42
+ """Push the shutdown sentinel into every subscriber queue so generators exit cleanly."""
43
+ for q in list(_subscribers):
44
+ try:
45
+ q.put_nowait(SHUTDOWN_SENTINEL)
46
+ except asyncio.QueueFull:
47
+ pass
48
+
49
+
50
+ def subscribe() -> asyncio.Queue:
51
+ """Register a new SSE subscriber and return its queue."""
52
+ q: asyncio.Queue = asyncio.Queue(maxsize=200)
53
+ _subscribers.add(q)
54
+ return q
55
+
56
+
57
+ def unsubscribe(q: asyncio.Queue) -> None:
58
+ """Remove a subscriber queue (called when the SSE connection closes)."""
59
+ _subscribers.discard(q)
60
+
61
+
62
+ def emit(event_type: str, data: Any = None) -> None:
63
+ """Broadcast a graph-change event to all connected SSE clients.
64
+
65
+ Safe to call from sync FastAPI route handlers (which run in a thread pool)
66
+ and from background threads — delivery is scheduled on the main asyncio
67
+ event loop captured at startup via :func:`set_loop`.
68
+ """
69
+ msg = json.dumps({"type": event_type, "data": data or {}})
70
+ loop = _loop
71
+ if loop is None:
72
+ try:
73
+ loop = asyncio.get_running_loop()
74
+ except RuntimeError:
75
+ return # No event loop available — nothing to broadcast
76
+ for q in list(_subscribers):
77
+ loop.call_soon_threadsafe(_try_put, q, msg)
78
+
79
+
80
+ def _try_put(q: asyncio.Queue, msg: str) -> None:
81
+ try:
82
+ q.put_nowait(msg)
83
+ except asyncio.QueueFull:
84
+ pass # Slow consumer — drop the event rather than blocking
File without changes
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ _WIKILINK_RE = re.compile(r"\[\[([^\]|]+?)(?:\|[^\]]+)?\]\]")
6
+ _MDLINK_RE = re.compile(r"\[(?:[^\]]+)\]\((https?://[^\)]+)\)")
7
+
8
+
9
+ def parse_wikilinks(content: str) -> list[str]:
10
+ """Return all [[wikilink]] targets found in *content*."""
11
+ return _WIKILINK_RE.findall(content)
12
+
13
+
14
+ def extract_external_refs(content: str) -> list[str]:
15
+ """Return all markdown [text](url) hrefs found in *content*."""
16
+ return _MDLINK_RE.findall(content)
17
+
18
+
19
+ _CHAR_SUBS: dict[str, str] = {
20
+ "Å": "angstrom",
21
+ "å": "angstrom",
22
+ "µ": "micro",
23
+ "μ": "micro",
24
+ "°": "deg",
25
+ "±": "plus-minus",
26
+ "×": "x",
27
+ "·": "-",
28
+ }
29
+
30
+
31
+ def slug_from_title(title: str) -> str:
32
+ import unicodedata
33
+ for sym, replacement in _CHAR_SUBS.items():
34
+ title = title.replace(sym, f" {replacement} ")
35
+ parts: list[str] = []
36
+ for ch in unicodedata.normalize("NFKD", title):
37
+ if ch.isascii():
38
+ parts.append(ch)
39
+ elif unicodedata.combining(ch):
40
+ pass
41
+ else:
42
+ name = unicodedata.name(ch, "").lower()
43
+ parts.append(name.split()[-1] if name else "")
44
+ slug = "".join(parts).lower().strip()
45
+ slug = re.sub(r"[^\w\s-]", "", slug)
46
+ slug = re.sub(r"[\s_]+", "-", slug)
47
+ slug = re.sub(r"-+", "-", slug)
48
+ return slug.strip("-")
core/graph/__init__.py ADDED
File without changes
core/graph/graph.py ADDED
@@ -0,0 +1,204 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Optional
5
+
6
+ import networkx as nx
7
+
8
+ from core.schemas.edge import Edge, EdgeRelation
9
+ from core.schemas.entry import Entry
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class KnowDoGraph:
15
+ """In-memory directed graph backed by networkx.
16
+
17
+ Entries are nodes; edges represent semantic relations between them.
18
+ This is rebuilt from the database on startup and kept in sync during
19
+ the process lifetime.
20
+ """
21
+
22
+ def __init__(self) -> None:
23
+ self._g: nx.DiGraph = nx.DiGraph()
24
+
25
+ # ------------------------------------------------------------------
26
+ # Nodes
27
+ # ------------------------------------------------------------------
28
+
29
+ def add_entry(self, entry: Entry) -> None:
30
+ md = entry.metadata
31
+ timestamp = md.timestamp.isoformat() if getattr(md, "timestamp", None) else None
32
+ verification = (
33
+ md.verification_status.value
34
+ if hasattr(md.verification_status, "value")
35
+ else md.verification_status
36
+ )
37
+ # Effective hierarchical-memory level (explicit override > entry_type default).
38
+ from core.schemas.entry import implied_level
39
+
40
+ level_obj = implied_level(entry.entry_type, md.skill_level)
41
+ level_value = level_obj.value if level_obj else None
42
+ self._g.add_node(
43
+ entry.id,
44
+ title=entry.title,
45
+ slug=entry.slug,
46
+ entry_type=entry.entry_type.value,
47
+ tags=entry.tags,
48
+ timestamp=timestamp,
49
+ usage_count=md.usage_count,
50
+ trust_score=md.trust_score,
51
+ verification_status=verification,
52
+ skill_level=level_value,
53
+ )
54
+
55
+ def remove_entry(self, entry_id: str) -> None:
56
+ if self._g.has_node(entry_id):
57
+ self._g.remove_node(entry_id)
58
+
59
+ # ------------------------------------------------------------------
60
+ # Edges
61
+ # ------------------------------------------------------------------
62
+
63
+ def add_edge(self, edge: Edge) -> bool:
64
+ """Add an edge to the in-memory graph.
65
+
66
+ Returns ``False`` (and logs a warning) if either endpoint is unknown,
67
+ instead of silently letting networkx auto-create a typeless ghost node.
68
+ """
69
+ if not self._g.has_node(edge.source_id) or not self._g.has_node(edge.target_id):
70
+ logger.warning(
71
+ "skipping edge %s → %s (%s): endpoint missing from graph",
72
+ edge.source_id,
73
+ edge.target_id,
74
+ edge.relation.value if hasattr(edge.relation, "value") else edge.relation,
75
+ )
76
+ return False
77
+ self._g.add_edge(
78
+ edge.source_id,
79
+ edge.target_id,
80
+ id=edge.id,
81
+ relation=edge.relation.value if hasattr(edge.relation, "value") else edge.relation,
82
+ weight=edge.weight,
83
+ )
84
+ return True
85
+
86
+ def remove_edge(self, source_id: str, target_id: str) -> None:
87
+ if self._g.has_edge(source_id, target_id):
88
+ self._g.remove_edge(source_id, target_id)
89
+
90
+ # ------------------------------------------------------------------
91
+ # Queries
92
+ # ------------------------------------------------------------------
93
+
94
+ def get_neighbors(
95
+ self,
96
+ entry_id: str,
97
+ relation: Optional[EdgeRelation] = None,
98
+ direction: str = "both",
99
+ ) -> list[dict]:
100
+ """Return neighboring node IDs with edge metadata.
101
+
102
+ direction: "out" (successors), "in" (predecessors), "both"
103
+ """
104
+ if entry_id not in self._g:
105
+ return []
106
+
107
+ neighbors: list[dict] = []
108
+
109
+ def _matches(data: dict) -> bool:
110
+ if relation is None:
111
+ return True
112
+ rel_val = relation.value if hasattr(relation, "value") else relation
113
+ return data.get("relation") == rel_val
114
+
115
+ if direction in ("out", "both"):
116
+ for nbr in self._g.successors(entry_id):
117
+ data = dict(self._g.edges[entry_id, nbr])
118
+ if _matches(data):
119
+ neighbors.append({"id": nbr, "direction": "out", **data})
120
+
121
+ if direction in ("in", "both"):
122
+ for nbr in self._g.predecessors(entry_id):
123
+ data = dict(self._g.edges[nbr, entry_id])
124
+ if _matches(data):
125
+ neighbors.append({"id": nbr, "direction": "in", **data})
126
+
127
+ return neighbors
128
+
129
+ def get_related_ids(
130
+ self,
131
+ entry_id: str,
132
+ depth: int = 1,
133
+ relation: Optional[EdgeRelation] = None,
134
+ ) -> list[str]:
135
+ """BFS from *entry_id* up to *depth* hops, optionally filtered by relation type.
136
+
137
+ Returns IDs of all reachable nodes (excluding the start node).
138
+ """
139
+ visited: set[str] = {entry_id}
140
+ frontier: set[str] = {entry_id}
141
+ for _ in range(depth):
142
+ next_frontier: set[str] = set()
143
+ for node in frontier:
144
+ for nbr_info in self.get_neighbors(node, relation=relation):
145
+ nbr_id = nbr_info["id"]
146
+ if nbr_id not in visited:
147
+ next_frontier.add(nbr_id)
148
+ frontier = next_frontier
149
+ visited.update(frontier)
150
+ visited.discard(entry_id)
151
+ return list(visited)
152
+
153
+ def has_node(self, entry_id: str) -> bool:
154
+ return self._g.has_node(entry_id)
155
+
156
+ def get_subgraph(self, entry_id: str, depth: int = 2) -> nx.DiGraph:
157
+ """Return an ego-subgraph centred on entry_id up to *depth* hops."""
158
+ if entry_id not in self._g:
159
+ return nx.DiGraph()
160
+ nodes = {entry_id}
161
+ frontier = {entry_id}
162
+ for _ in range(depth):
163
+ next_frontier: set[str] = set()
164
+ for node in frontier:
165
+ next_frontier.update(self._g.successors(node))
166
+ next_frontier.update(self._g.predecessors(node))
167
+ frontier = next_frontier - nodes
168
+ nodes.update(frontier)
169
+ return self._g.subgraph(nodes).copy()
170
+
171
+ def find_paths(
172
+ self, source_id: str, target_id: str, cutoff: int = 6
173
+ ) -> list[list[str]]:
174
+ try:
175
+ return list(
176
+ nx.all_simple_paths(self._g, source_id, target_id, cutoff=cutoff)
177
+ )
178
+ except (nx.NodeNotFound, nx.NetworkXNoPath):
179
+ return []
180
+
181
+ def stats(self) -> dict:
182
+ return {
183
+ "nodes": self._g.number_of_nodes(),
184
+ "edges": self._g.number_of_edges(),
185
+ "is_dag": nx.is_directed_acyclic_graph(self._g),
186
+ }
187
+
188
+ def rebuild_from_db(self, entries: list[Entry], edges: list[Edge]) -> None:
189
+ """Clear and rebuild the graph from persisted entries and edges.
190
+
191
+ Edges whose endpoints are not present in *entries* are skipped (with a
192
+ warning). They survive in the database — the maintenance agent's
193
+ ``remove_dangling_edges`` is responsible for pruning them — but they
194
+ are never allowed to materialise ghost nodes in the in-memory graph.
195
+ """
196
+ self._g.clear()
197
+ for entry in entries:
198
+ self.add_entry(entry)
199
+ skipped = 0
200
+ for edge in edges:
201
+ if not self.add_edge(edge):
202
+ skipped += 1
203
+ if skipped:
204
+ logger.warning("rebuild_from_db: skipped %d dangling edge(s)", skipped)
File without changes