know-do-graph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/extraction_agent/__init__.py +0 -0
- agents/extraction_agent/agent.py +170 -0
- agents/graph_agent/__init__.py +5 -0
- agents/graph_agent/agent.py +373 -0
- agents/graph_agent/tools.py +2106 -0
- agents/maintenance_agent/__init__.py +0 -0
- agents/maintenance_agent/agent.py +283 -0
- agents/orchestrator/__init__.py +0 -0
- agents/orchestrator/agent.py +217 -0
- agents/review_agent/__init__.py +0 -0
- agents/review_agent/agent.py +188 -0
- agents/review_agent/tools.py +472 -0
- api/__init__.py +0 -0
- api/main.py +136 -0
- api/routes/__init__.py +0 -0
- api/routes/agent.py +81 -0
- api/routes/entries.py +411 -0
- api/routes/graph.py +132 -0
- api/routes/mem.py +179 -0
- api/routes/remote.py +815 -0
- api/routes/remote_sync.py +230 -0
- api/routes/retrieve.py +88 -0
- core/__init__.py +0 -0
- core/app_state.py +9 -0
- core/events.py +84 -0
- core/extraction/__init__.py +0 -0
- core/extraction/wikilink_parser.py +48 -0
- core/graph/__init__.py +0 -0
- core/graph/graph.py +204 -0
- core/memory/__init__.py +0 -0
- core/memory/memgraph.py +458 -0
- core/resources/starter.db +0 -0
- core/retrieval/__init__.py +0 -0
- core/retrieval/embedder.py +122 -0
- core/retrieval/fusion.py +52 -0
- core/retrieval/progressive.py +399 -0
- core/retrieval/retrieval.py +346 -0
- core/retrieval/vector_store.py +91 -0
- core/schemas/__init__.py +0 -0
- core/schemas/edge.py +46 -0
- core/schemas/entry.py +388 -0
- core/storage/__init__.py +0 -0
- core/storage/database.py +104 -0
- core/storage/models.py +66 -0
- core/storage/repository.py +243 -0
- core/sync/__init__.py +20 -0
- core/sync/autolink.py +301 -0
- core/sync/db_merge.py +297 -0
- core/sync/db_watcher.py +84 -0
- core/sync/remote_sync.py +345 -0
- examples/__init__.py +0 -0
- examples/example_entries.py +206 -0
- examples/pymatgen_interface_examples.py +811 -0
- frontend/dist/assets/index-BLfo7ZZu.css +1 -0
- frontend/dist/assets/index-G-mYbZ9R.js +83 -0
- frontend/dist/assets/index-G-mYbZ9R.js.map +1 -0
- frontend/dist/index.html +92 -0
- know_do_graph-0.1.0.dist-info/METADATA +765 -0
- know_do_graph-0.1.0.dist-info/RECORD +63 -0
- know_do_graph-0.1.0.dist-info/WHEEL +4 -0
- know_do_graph-0.1.0.dist-info/entry_points.txt +2 -0
- main.py +944 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Remote-source sync routes.
|
|
2
|
+
|
|
3
|
+
These endpoints expose the :mod:`core.sync.remote_sync` machinery so that
|
|
4
|
+
operators (and the UI) can:
|
|
5
|
+
|
|
6
|
+
* list all entries that mirror an upstream file,
|
|
7
|
+
* trigger a one-shot resync (one entry or all due),
|
|
8
|
+
* attach / detach a remote source on an existing entry.
|
|
9
|
+
|
|
10
|
+
Mounted at ``/remote-sync`` from :mod:`api.main`.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
from fastapi import APIRouter, Depends, HTTPException
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
from sqlalchemy.orm import Session
|
|
21
|
+
|
|
22
|
+
from core import events as _events
|
|
23
|
+
from core.app_state import graph as _graph
|
|
24
|
+
from core.schemas.entry import RemoteSource
|
|
25
|
+
from core.storage.database import get_db
|
|
26
|
+
from core.storage.repository import EntryRepository
|
|
27
|
+
from core.sync.remote_sync import (
|
|
28
|
+
SyncResult,
|
|
29
|
+
parse_github_url,
|
|
30
|
+
sync_all_due,
|
|
31
|
+
sync_entry,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
router = APIRouter()
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _result_to_dict(r: SyncResult) -> dict:
|
|
41
|
+
return {
|
|
42
|
+
"entry_id": r.entry_id,
|
|
43
|
+
"title": r.title,
|
|
44
|
+
"status": r.status,
|
|
45
|
+
"detail": r.detail,
|
|
46
|
+
"bytes_fetched": r.bytes_fetched,
|
|
47
|
+
"new_hash": r.new_hash,
|
|
48
|
+
"fetched_at": r.fetched_at.isoformat() if r.fetched_at else None,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _source_to_dict(src: RemoteSource) -> dict:
|
|
53
|
+
d = src.model_dump(mode="json")
|
|
54
|
+
if isinstance(d.get("fetched_at"), datetime):
|
|
55
|
+
d["fetched_at"] = d["fetched_at"].isoformat()
|
|
56
|
+
return d
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _resolve_entry(db: Session, id_or_slug: str):
|
|
60
|
+
"""Look up an entry by id, slug, or alias via EntryRepository.get_all().
|
|
61
|
+
|
|
62
|
+
Cheap because get_all() is O(n) over a small node set; if this gets hot,
|
|
63
|
+
add an explicit get_by_slug method on the repo.
|
|
64
|
+
"""
|
|
65
|
+
repo = EntryRepository(db)
|
|
66
|
+
for e in repo.get_all():
|
|
67
|
+
if e.id == id_or_slug or e.slug == id_or_slug or id_or_slug in e.aliases:
|
|
68
|
+
return repo, e
|
|
69
|
+
raise HTTPException(status_code=404, detail=f"entry not found: {id_or_slug}")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# ── Request models ────────────────────────────────────────────────────────────
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class AttachSourceRequest(BaseModel):
|
|
76
|
+
url: str
|
|
77
|
+
ref: Optional[str] = None
|
|
78
|
+
path: Optional[str] = None
|
|
79
|
+
owner: Optional[str] = None
|
|
80
|
+
repo: Optional[str] = None
|
|
81
|
+
auto_sync: bool = True
|
|
82
|
+
sync_interval_seconds: int = 3600
|
|
83
|
+
sync_now: bool = True
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
# ── Routes ────────────────────────────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@router.get("/")
|
|
90
|
+
def list_linked_entries(db: Session = Depends(get_db)) -> list[dict]:
|
|
91
|
+
"""List every entry that mirrors an upstream source."""
|
|
92
|
+
out: list[dict] = []
|
|
93
|
+
for e in EntryRepository(db).get_all():
|
|
94
|
+
src = e.metadata.remote_source
|
|
95
|
+
if src is None:
|
|
96
|
+
continue
|
|
97
|
+
out.append({
|
|
98
|
+
"entry_id": e.id,
|
|
99
|
+
"slug": e.slug,
|
|
100
|
+
"title": e.title,
|
|
101
|
+
"remote_source": _source_to_dict(src),
|
|
102
|
+
})
|
|
103
|
+
return out
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@router.post("/all")
|
|
107
|
+
async def sync_all_endpoint(force: bool = False) -> dict:
|
|
108
|
+
"""Sync every entry whose remote source is due (or all when ``force=true``)."""
|
|
109
|
+
results = await sync_all_due(force=force)
|
|
110
|
+
summary = {
|
|
111
|
+
"checked": len(results),
|
|
112
|
+
"updated": sum(1 for r in results if r.status == "updated"),
|
|
113
|
+
"unchanged": sum(1 for r in results if r.status == "unchanged"),
|
|
114
|
+
"errors": sum(1 for r in results if r.status == "error"),
|
|
115
|
+
"results": [_result_to_dict(r) for r in results],
|
|
116
|
+
}
|
|
117
|
+
return summary
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@router.post("/{id_or_slug}")
|
|
121
|
+
async def sync_one_endpoint(
|
|
122
|
+
id_or_slug: str,
|
|
123
|
+
force: bool = True,
|
|
124
|
+
db: Session = Depends(get_db),
|
|
125
|
+
) -> dict:
|
|
126
|
+
"""Sync a single entry now."""
|
|
127
|
+
repo, entry = _resolve_entry(db, id_or_slug)
|
|
128
|
+
if entry.metadata.remote_source is None:
|
|
129
|
+
raise HTTPException(status_code=400, detail="entry has no remote_source")
|
|
130
|
+
result = await sync_entry(entry, force=force)
|
|
131
|
+
updated = repo.update(entry)
|
|
132
|
+
autolink_summary: dict | None = None
|
|
133
|
+
if result.status == "updated" and updated is not None:
|
|
134
|
+
try:
|
|
135
|
+
_graph.add_entry(updated)
|
|
136
|
+
except Exception:
|
|
137
|
+
pass
|
|
138
|
+
# Refresh derived edges from the new content.
|
|
139
|
+
try:
|
|
140
|
+
from core.storage.repository import EdgeRepository
|
|
141
|
+
from core.sync.autolink import auto_link_entry
|
|
142
|
+
al = auto_link_entry(updated, repo.get_all(), EdgeRepository(db))
|
|
143
|
+
autolink_summary = {
|
|
144
|
+
"frontmatter_edges": al.frontmatter_edges,
|
|
145
|
+
"mention_edges": al.mention_edges,
|
|
146
|
+
}
|
|
147
|
+
except Exception: # pragma: no cover
|
|
148
|
+
pass
|
|
149
|
+
_events.emit(
|
|
150
|
+
"node_updated",
|
|
151
|
+
{"id": entry.id, "slug": entry.slug, "title": entry.title, "source": "remote_sync"},
|
|
152
|
+
)
|
|
153
|
+
return {
|
|
154
|
+
"result": _result_to_dict(result),
|
|
155
|
+
"remote_source": _source_to_dict(entry.metadata.remote_source),
|
|
156
|
+
"autolink": autolink_summary,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
@router.put("/{id_or_slug}/source")
|
|
161
|
+
async def attach_source(
|
|
162
|
+
id_or_slug: str,
|
|
163
|
+
body: AttachSourceRequest,
|
|
164
|
+
db: Session = Depends(get_db),
|
|
165
|
+
) -> dict:
|
|
166
|
+
"""Attach or replace the remote source on an entry."""
|
|
167
|
+
repo, entry = _resolve_entry(db, id_or_slug)
|
|
168
|
+
|
|
169
|
+
owner = body.owner
|
|
170
|
+
repo_name = body.repo
|
|
171
|
+
path = body.path
|
|
172
|
+
ref = body.ref
|
|
173
|
+
kind = "github"
|
|
174
|
+
|
|
175
|
+
parsed = parse_github_url(body.url)
|
|
176
|
+
if parsed:
|
|
177
|
+
owner = owner or parsed.get("owner")
|
|
178
|
+
repo_name = repo_name or parsed.get("repo")
|
|
179
|
+
path = path or parsed.get("path")
|
|
180
|
+
ref = ref or parsed.get("ref") or "main"
|
|
181
|
+
else:
|
|
182
|
+
kind = "http"
|
|
183
|
+
|
|
184
|
+
if kind == "github" and not (owner and repo_name and path):
|
|
185
|
+
raise HTTPException(
|
|
186
|
+
status_code=400,
|
|
187
|
+
detail="GitHub source needs owner/repo/path (parsed from URL or supplied explicitly)",
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
src = RemoteSource(
|
|
191
|
+
kind=kind,
|
|
192
|
+
url=body.url,
|
|
193
|
+
owner=owner,
|
|
194
|
+
repo=repo_name,
|
|
195
|
+
ref=ref or "main",
|
|
196
|
+
path=path,
|
|
197
|
+
auto_sync=body.auto_sync,
|
|
198
|
+
sync_interval_seconds=body.sync_interval_seconds,
|
|
199
|
+
)
|
|
200
|
+
entry.metadata.remote_source = src
|
|
201
|
+
repo.update(entry)
|
|
202
|
+
|
|
203
|
+
result_dict = None
|
|
204
|
+
if body.sync_now:
|
|
205
|
+
result = await sync_entry(entry, force=True)
|
|
206
|
+
updated = repo.update(entry)
|
|
207
|
+
if result.status == "updated" and updated is not None:
|
|
208
|
+
try:
|
|
209
|
+
_graph.add_entry(updated)
|
|
210
|
+
except Exception:
|
|
211
|
+
pass
|
|
212
|
+
_events.emit(
|
|
213
|
+
"node_updated",
|
|
214
|
+
{"id": entry.id, "slug": entry.slug, "title": entry.title, "source": "remote_sync"},
|
|
215
|
+
)
|
|
216
|
+
result_dict = _result_to_dict(result)
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
"remote_source": _source_to_dict(entry.metadata.remote_source),
|
|
220
|
+
"result": result_dict,
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@router.delete("/{id_or_slug}/source")
|
|
225
|
+
def detach_source(id_or_slug: str, db: Session = Depends(get_db)) -> dict:
|
|
226
|
+
"""Remove the remote source link from an entry (content is preserved)."""
|
|
227
|
+
repo, entry = _resolve_entry(db, id_or_slug)
|
|
228
|
+
entry.metadata.remote_source = None
|
|
229
|
+
repo.update(entry)
|
|
230
|
+
return {"detached": True, "entry_id": entry.id}
|
api/routes/retrieve.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Progressive (staged) retrieval API.
|
|
2
|
+
|
|
3
|
+
Endpoints surface the hierarchical-memory layers so external agents can pull
|
|
4
|
+
only the level of detail needed for the current execution stage.
|
|
5
|
+
|
|
6
|
+
See :mod:`core.retrieval.progressive` for the underlying logic.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
14
|
+
from sqlalchemy.orm import Session
|
|
15
|
+
|
|
16
|
+
from core.app_state import graph as _graph
|
|
17
|
+
from core.retrieval.progressive import ProgressiveRetriever
|
|
18
|
+
from core.schemas.entry import Entry, implied_level
|
|
19
|
+
from core.storage.database import get_db
|
|
20
|
+
|
|
21
|
+
router = APIRouter()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _retriever(db: Session = Depends(get_db)) -> ProgressiveRetriever:
|
|
25
|
+
return ProgressiveRetriever(db, _graph)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _annotate(entry: Entry) -> dict:
|
|
29
|
+
data = entry.model_dump(mode="json")
|
|
30
|
+
level = implied_level(entry.entry_type, entry.metadata.skill_level)
|
|
31
|
+
data["_level"] = level.value if level else None
|
|
32
|
+
return data
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@router.get("/plan", response_model=list[dict])
|
|
36
|
+
def plan(
|
|
37
|
+
goal: str = Query(..., description="Free-text description of what you want to do"),
|
|
38
|
+
k: int = Query(5, ge=1, le=50),
|
|
39
|
+
mode: str = Query("hybrid", pattern="^(hybrid|semantic|keyword)$"),
|
|
40
|
+
include_l2: bool = Query(True, description="Include L2 procedures alongside L1 capabilities"),
|
|
41
|
+
retriever: ProgressiveRetriever = Depends(_retriever),
|
|
42
|
+
):
|
|
43
|
+
"""Return planner-level candidates (L1 capabilities, optionally L2 procedures).
|
|
44
|
+
|
|
45
|
+
Heuristics (L3) and constraints (L4) are intentionally excluded — fetch
|
|
46
|
+
them on demand via ``/retrieve/heuristics`` and ``/retrieve/constraints``.
|
|
47
|
+
"""
|
|
48
|
+
return [_annotate(e) for e in retriever.plan(goal=goal, k=k, mode=mode, include_l2=include_l2)]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@router.get("/heuristics", response_model=list[dict])
|
|
52
|
+
def heuristics(
|
|
53
|
+
skill: str = Query(..., description="Entry id, slug, or alias of the L1/L2 skill"),
|
|
54
|
+
k: int = Query(5, ge=1, le=50),
|
|
55
|
+
fallback: bool = Query(True, description="Include semantic-search L3 fallback if no edges exist"),
|
|
56
|
+
retriever: ProgressiveRetriever = Depends(_retriever),
|
|
57
|
+
):
|
|
58
|
+
"""Return L3 heuristics attached to a skill."""
|
|
59
|
+
return [_annotate(e) for e in retriever.heuristics_for(skill, k=k, include_semantic_fallback=fallback)]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@router.get("/constraints", response_model=list[dict])
|
|
63
|
+
def constraints(
|
|
64
|
+
skill: str = Query(..., description="Entry id, slug, or alias of the L1/L2 skill"),
|
|
65
|
+
k: int = Query(5, ge=1, le=50),
|
|
66
|
+
fallback: bool = Query(True, description="Include semantic-search L4 fallback if no edges exist"),
|
|
67
|
+
retriever: ProgressiveRetriever = Depends(_retriever),
|
|
68
|
+
):
|
|
69
|
+
"""Return L4 constraints / failure modes attached to a skill."""
|
|
70
|
+
return [_annotate(e) for e in retriever.constraints_for(skill, k=k, include_semantic_fallback=fallback)]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@router.get("/expand/{skill}", response_model=dict)
|
|
74
|
+
def expand(
|
|
75
|
+
skill: str,
|
|
76
|
+
stages: Optional[str] = Query(
|
|
77
|
+
None,
|
|
78
|
+
description="Comma-separated subset of heuristics,constraints,decomposition (default: heuristics,constraints)",
|
|
79
|
+
),
|
|
80
|
+
k: int = Query(5, ge=1, le=50),
|
|
81
|
+
retriever: ProgressiveRetriever = Depends(_retriever),
|
|
82
|
+
):
|
|
83
|
+
"""Bundle additional context for an already-selected skill (verifier loop)."""
|
|
84
|
+
stage_list = [s.strip() for s in stages.split(",")] if stages else None
|
|
85
|
+
result = retriever.expand(skill=skill, stages=stage_list, k=k)
|
|
86
|
+
if "error" in result:
|
|
87
|
+
raise HTTPException(status_code=404, detail=result["error"])
|
|
88
|
+
return result
|
core/__init__.py
ADDED
|
File without changes
|
core/app_state.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Application-level shared state.
|
|
2
|
+
|
|
3
|
+
Import `graph` from here wherever a single in-process graph instance is needed.
|
|
4
|
+
The API startup handler calls `graph.rebuild_from_db(...)` after init_db().
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from core.graph.graph import KnowDoGraph
|
|
8
|
+
|
|
9
|
+
graph = KnowDoGraph()
|
core/events.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Lightweight SSE event bus for broadcasting graph mutations to connected clients.
|
|
2
|
+
|
|
3
|
+
Usage (from sync FastAPI route handlers):
|
|
4
|
+
from core import events
|
|
5
|
+
events.emit("node_added", {"id": ..., "title": ...})
|
|
6
|
+
|
|
7
|
+
Usage (SSE endpoint):
|
|
8
|
+
q = events.subscribe()
|
|
9
|
+
try:
|
|
10
|
+
msg = await asyncio.wait_for(q.get(), timeout=25)
|
|
11
|
+
if msg is events.SHUTDOWN_SENTINEL:
|
|
12
|
+
return # server shutting down
|
|
13
|
+
finally:
|
|
14
|
+
events.unsubscribe(q)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import json
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
# Sentinel put into every subscriber queue on server shutdown
|
|
24
|
+
SHUTDOWN_SENTINEL: object = object()
|
|
25
|
+
|
|
26
|
+
# Active subscriber queues (one per SSE connection)
|
|
27
|
+
_subscribers: set[asyncio.Queue] = set()
|
|
28
|
+
|
|
29
|
+
# The asyncio loop FastAPI runs on. Captured during app startup so that
|
|
30
|
+
# `emit()` works when invoked from worker threads (sync route handlers and
|
|
31
|
+
# background threads have no `get_running_loop()`).
|
|
32
|
+
_loop: asyncio.AbstractEventLoop | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def set_loop(loop: asyncio.AbstractEventLoop) -> None:
|
|
36
|
+
"""Remember the main event loop so `emit()` can dispatch from any thread."""
|
|
37
|
+
global _loop
|
|
38
|
+
_loop = loop
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def signal_shutdown() -> None:
|
|
42
|
+
"""Push the shutdown sentinel into every subscriber queue so generators exit cleanly."""
|
|
43
|
+
for q in list(_subscribers):
|
|
44
|
+
try:
|
|
45
|
+
q.put_nowait(SHUTDOWN_SENTINEL)
|
|
46
|
+
except asyncio.QueueFull:
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def subscribe() -> asyncio.Queue:
|
|
51
|
+
"""Register a new SSE subscriber and return its queue."""
|
|
52
|
+
q: asyncio.Queue = asyncio.Queue(maxsize=200)
|
|
53
|
+
_subscribers.add(q)
|
|
54
|
+
return q
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def unsubscribe(q: asyncio.Queue) -> None:
|
|
58
|
+
"""Remove a subscriber queue (called when the SSE connection closes)."""
|
|
59
|
+
_subscribers.discard(q)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def emit(event_type: str, data: Any = None) -> None:
|
|
63
|
+
"""Broadcast a graph-change event to all connected SSE clients.
|
|
64
|
+
|
|
65
|
+
Safe to call from sync FastAPI route handlers (which run in a thread pool)
|
|
66
|
+
and from background threads — delivery is scheduled on the main asyncio
|
|
67
|
+
event loop captured at startup via :func:`set_loop`.
|
|
68
|
+
"""
|
|
69
|
+
msg = json.dumps({"type": event_type, "data": data or {}})
|
|
70
|
+
loop = _loop
|
|
71
|
+
if loop is None:
|
|
72
|
+
try:
|
|
73
|
+
loop = asyncio.get_running_loop()
|
|
74
|
+
except RuntimeError:
|
|
75
|
+
return # No event loop available — nothing to broadcast
|
|
76
|
+
for q in list(_subscribers):
|
|
77
|
+
loop.call_soon_threadsafe(_try_put, q, msg)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _try_put(q: asyncio.Queue, msg: str) -> None:
|
|
81
|
+
try:
|
|
82
|
+
q.put_nowait(msg)
|
|
83
|
+
except asyncio.QueueFull:
|
|
84
|
+
pass # Slow consumer — drop the event rather than blocking
|
|
File without changes
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
_WIKILINK_RE = re.compile(r"\[\[([^\]|]+?)(?:\|[^\]]+)?\]\]")
|
|
6
|
+
_MDLINK_RE = re.compile(r"\[(?:[^\]]+)\]\((https?://[^\)]+)\)")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def parse_wikilinks(content: str) -> list[str]:
|
|
10
|
+
"""Return all [[wikilink]] targets found in *content*."""
|
|
11
|
+
return _WIKILINK_RE.findall(content)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def extract_external_refs(content: str) -> list[str]:
|
|
15
|
+
"""Return all markdown [text](url) hrefs found in *content*."""
|
|
16
|
+
return _MDLINK_RE.findall(content)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
_CHAR_SUBS: dict[str, str] = {
|
|
20
|
+
"Å": "angstrom",
|
|
21
|
+
"å": "angstrom",
|
|
22
|
+
"µ": "micro",
|
|
23
|
+
"μ": "micro",
|
|
24
|
+
"°": "deg",
|
|
25
|
+
"±": "plus-minus",
|
|
26
|
+
"×": "x",
|
|
27
|
+
"·": "-",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def slug_from_title(title: str) -> str:
|
|
32
|
+
import unicodedata
|
|
33
|
+
for sym, replacement in _CHAR_SUBS.items():
|
|
34
|
+
title = title.replace(sym, f" {replacement} ")
|
|
35
|
+
parts: list[str] = []
|
|
36
|
+
for ch in unicodedata.normalize("NFKD", title):
|
|
37
|
+
if ch.isascii():
|
|
38
|
+
parts.append(ch)
|
|
39
|
+
elif unicodedata.combining(ch):
|
|
40
|
+
pass
|
|
41
|
+
else:
|
|
42
|
+
name = unicodedata.name(ch, "").lower()
|
|
43
|
+
parts.append(name.split()[-1] if name else "")
|
|
44
|
+
slug = "".join(parts).lower().strip()
|
|
45
|
+
slug = re.sub(r"[^\w\s-]", "", slug)
|
|
46
|
+
slug = re.sub(r"[\s_]+", "-", slug)
|
|
47
|
+
slug = re.sub(r"-+", "-", slug)
|
|
48
|
+
return slug.strip("-")
|
core/graph/__init__.py
ADDED
|
File without changes
|
core/graph/graph.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
|
|
8
|
+
from core.schemas.edge import Edge, EdgeRelation
|
|
9
|
+
from core.schemas.entry import Entry
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class KnowDoGraph:
|
|
15
|
+
"""In-memory directed graph backed by networkx.
|
|
16
|
+
|
|
17
|
+
Entries are nodes; edges represent semantic relations between them.
|
|
18
|
+
This is rebuilt from the database on startup and kept in sync during
|
|
19
|
+
the process lifetime.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._g: nx.DiGraph = nx.DiGraph()
|
|
24
|
+
|
|
25
|
+
# ------------------------------------------------------------------
|
|
26
|
+
# Nodes
|
|
27
|
+
# ------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
def add_entry(self, entry: Entry) -> None:
|
|
30
|
+
md = entry.metadata
|
|
31
|
+
timestamp = md.timestamp.isoformat() if getattr(md, "timestamp", None) else None
|
|
32
|
+
verification = (
|
|
33
|
+
md.verification_status.value
|
|
34
|
+
if hasattr(md.verification_status, "value")
|
|
35
|
+
else md.verification_status
|
|
36
|
+
)
|
|
37
|
+
# Effective hierarchical-memory level (explicit override > entry_type default).
|
|
38
|
+
from core.schemas.entry import implied_level
|
|
39
|
+
|
|
40
|
+
level_obj = implied_level(entry.entry_type, md.skill_level)
|
|
41
|
+
level_value = level_obj.value if level_obj else None
|
|
42
|
+
self._g.add_node(
|
|
43
|
+
entry.id,
|
|
44
|
+
title=entry.title,
|
|
45
|
+
slug=entry.slug,
|
|
46
|
+
entry_type=entry.entry_type.value,
|
|
47
|
+
tags=entry.tags,
|
|
48
|
+
timestamp=timestamp,
|
|
49
|
+
usage_count=md.usage_count,
|
|
50
|
+
trust_score=md.trust_score,
|
|
51
|
+
verification_status=verification,
|
|
52
|
+
skill_level=level_value,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def remove_entry(self, entry_id: str) -> None:
|
|
56
|
+
if self._g.has_node(entry_id):
|
|
57
|
+
self._g.remove_node(entry_id)
|
|
58
|
+
|
|
59
|
+
# ------------------------------------------------------------------
|
|
60
|
+
# Edges
|
|
61
|
+
# ------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
def add_edge(self, edge: Edge) -> bool:
|
|
64
|
+
"""Add an edge to the in-memory graph.
|
|
65
|
+
|
|
66
|
+
Returns ``False`` (and logs a warning) if either endpoint is unknown,
|
|
67
|
+
instead of silently letting networkx auto-create a typeless ghost node.
|
|
68
|
+
"""
|
|
69
|
+
if not self._g.has_node(edge.source_id) or not self._g.has_node(edge.target_id):
|
|
70
|
+
logger.warning(
|
|
71
|
+
"skipping edge %s → %s (%s): endpoint missing from graph",
|
|
72
|
+
edge.source_id,
|
|
73
|
+
edge.target_id,
|
|
74
|
+
edge.relation.value if hasattr(edge.relation, "value") else edge.relation,
|
|
75
|
+
)
|
|
76
|
+
return False
|
|
77
|
+
self._g.add_edge(
|
|
78
|
+
edge.source_id,
|
|
79
|
+
edge.target_id,
|
|
80
|
+
id=edge.id,
|
|
81
|
+
relation=edge.relation.value if hasattr(edge.relation, "value") else edge.relation,
|
|
82
|
+
weight=edge.weight,
|
|
83
|
+
)
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
def remove_edge(self, source_id: str, target_id: str) -> None:
|
|
87
|
+
if self._g.has_edge(source_id, target_id):
|
|
88
|
+
self._g.remove_edge(source_id, target_id)
|
|
89
|
+
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
# Queries
|
|
92
|
+
# ------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
def get_neighbors(
|
|
95
|
+
self,
|
|
96
|
+
entry_id: str,
|
|
97
|
+
relation: Optional[EdgeRelation] = None,
|
|
98
|
+
direction: str = "both",
|
|
99
|
+
) -> list[dict]:
|
|
100
|
+
"""Return neighboring node IDs with edge metadata.
|
|
101
|
+
|
|
102
|
+
direction: "out" (successors), "in" (predecessors), "both"
|
|
103
|
+
"""
|
|
104
|
+
if entry_id not in self._g:
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
neighbors: list[dict] = []
|
|
108
|
+
|
|
109
|
+
def _matches(data: dict) -> bool:
|
|
110
|
+
if relation is None:
|
|
111
|
+
return True
|
|
112
|
+
rel_val = relation.value if hasattr(relation, "value") else relation
|
|
113
|
+
return data.get("relation") == rel_val
|
|
114
|
+
|
|
115
|
+
if direction in ("out", "both"):
|
|
116
|
+
for nbr in self._g.successors(entry_id):
|
|
117
|
+
data = dict(self._g.edges[entry_id, nbr])
|
|
118
|
+
if _matches(data):
|
|
119
|
+
neighbors.append({"id": nbr, "direction": "out", **data})
|
|
120
|
+
|
|
121
|
+
if direction in ("in", "both"):
|
|
122
|
+
for nbr in self._g.predecessors(entry_id):
|
|
123
|
+
data = dict(self._g.edges[nbr, entry_id])
|
|
124
|
+
if _matches(data):
|
|
125
|
+
neighbors.append({"id": nbr, "direction": "in", **data})
|
|
126
|
+
|
|
127
|
+
return neighbors
|
|
128
|
+
|
|
129
|
+
def get_related_ids(
|
|
130
|
+
self,
|
|
131
|
+
entry_id: str,
|
|
132
|
+
depth: int = 1,
|
|
133
|
+
relation: Optional[EdgeRelation] = None,
|
|
134
|
+
) -> list[str]:
|
|
135
|
+
"""BFS from *entry_id* up to *depth* hops, optionally filtered by relation type.
|
|
136
|
+
|
|
137
|
+
Returns IDs of all reachable nodes (excluding the start node).
|
|
138
|
+
"""
|
|
139
|
+
visited: set[str] = {entry_id}
|
|
140
|
+
frontier: set[str] = {entry_id}
|
|
141
|
+
for _ in range(depth):
|
|
142
|
+
next_frontier: set[str] = set()
|
|
143
|
+
for node in frontier:
|
|
144
|
+
for nbr_info in self.get_neighbors(node, relation=relation):
|
|
145
|
+
nbr_id = nbr_info["id"]
|
|
146
|
+
if nbr_id not in visited:
|
|
147
|
+
next_frontier.add(nbr_id)
|
|
148
|
+
frontier = next_frontier
|
|
149
|
+
visited.update(frontier)
|
|
150
|
+
visited.discard(entry_id)
|
|
151
|
+
return list(visited)
|
|
152
|
+
|
|
153
|
+
def has_node(self, entry_id: str) -> bool:
|
|
154
|
+
return self._g.has_node(entry_id)
|
|
155
|
+
|
|
156
|
+
def get_subgraph(self, entry_id: str, depth: int = 2) -> nx.DiGraph:
|
|
157
|
+
"""Return an ego-subgraph centred on entry_id up to *depth* hops."""
|
|
158
|
+
if entry_id not in self._g:
|
|
159
|
+
return nx.DiGraph()
|
|
160
|
+
nodes = {entry_id}
|
|
161
|
+
frontier = {entry_id}
|
|
162
|
+
for _ in range(depth):
|
|
163
|
+
next_frontier: set[str] = set()
|
|
164
|
+
for node in frontier:
|
|
165
|
+
next_frontier.update(self._g.successors(node))
|
|
166
|
+
next_frontier.update(self._g.predecessors(node))
|
|
167
|
+
frontier = next_frontier - nodes
|
|
168
|
+
nodes.update(frontier)
|
|
169
|
+
return self._g.subgraph(nodes).copy()
|
|
170
|
+
|
|
171
|
+
def find_paths(
|
|
172
|
+
self, source_id: str, target_id: str, cutoff: int = 6
|
|
173
|
+
) -> list[list[str]]:
|
|
174
|
+
try:
|
|
175
|
+
return list(
|
|
176
|
+
nx.all_simple_paths(self._g, source_id, target_id, cutoff=cutoff)
|
|
177
|
+
)
|
|
178
|
+
except (nx.NodeNotFound, nx.NetworkXNoPath):
|
|
179
|
+
return []
|
|
180
|
+
|
|
181
|
+
def stats(self) -> dict:
|
|
182
|
+
return {
|
|
183
|
+
"nodes": self._g.number_of_nodes(),
|
|
184
|
+
"edges": self._g.number_of_edges(),
|
|
185
|
+
"is_dag": nx.is_directed_acyclic_graph(self._g),
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
def rebuild_from_db(self, entries: list[Entry], edges: list[Edge]) -> None:
|
|
189
|
+
"""Clear and rebuild the graph from persisted entries and edges.
|
|
190
|
+
|
|
191
|
+
Edges whose endpoints are not present in *entries* are skipped (with a
|
|
192
|
+
warning). They survive in the database — the maintenance agent's
|
|
193
|
+
``remove_dangling_edges`` is responsible for pruning them — but they
|
|
194
|
+
are never allowed to materialise ghost nodes in the in-memory graph.
|
|
195
|
+
"""
|
|
196
|
+
self._g.clear()
|
|
197
|
+
for entry in entries:
|
|
198
|
+
self.add_entry(entry)
|
|
199
|
+
skipped = 0
|
|
200
|
+
for edge in edges:
|
|
201
|
+
if not self.add_edge(edge):
|
|
202
|
+
skipped += 1
|
|
203
|
+
if skipped:
|
|
204
|
+
logger.warning("rebuild_from_db: skipped %d dangling edge(s)", skipped)
|
core/memory/__init__.py
ADDED
|
File without changes
|