know-do-graph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/extraction_agent/__init__.py +0 -0
- agents/extraction_agent/agent.py +170 -0
- agents/graph_agent/__init__.py +5 -0
- agents/graph_agent/agent.py +373 -0
- agents/graph_agent/tools.py +2106 -0
- agents/maintenance_agent/__init__.py +0 -0
- agents/maintenance_agent/agent.py +283 -0
- agents/orchestrator/__init__.py +0 -0
- agents/orchestrator/agent.py +217 -0
- agents/review_agent/__init__.py +0 -0
- agents/review_agent/agent.py +188 -0
- agents/review_agent/tools.py +472 -0
- api/__init__.py +0 -0
- api/main.py +136 -0
- api/routes/__init__.py +0 -0
- api/routes/agent.py +81 -0
- api/routes/entries.py +411 -0
- api/routes/graph.py +132 -0
- api/routes/mem.py +179 -0
- api/routes/remote.py +815 -0
- api/routes/remote_sync.py +230 -0
- api/routes/retrieve.py +88 -0
- core/__init__.py +0 -0
- core/app_state.py +9 -0
- core/events.py +84 -0
- core/extraction/__init__.py +0 -0
- core/extraction/wikilink_parser.py +48 -0
- core/graph/__init__.py +0 -0
- core/graph/graph.py +204 -0
- core/memory/__init__.py +0 -0
- core/memory/memgraph.py +458 -0
- core/resources/starter.db +0 -0
- core/retrieval/__init__.py +0 -0
- core/retrieval/embedder.py +122 -0
- core/retrieval/fusion.py +52 -0
- core/retrieval/progressive.py +399 -0
- core/retrieval/retrieval.py +346 -0
- core/retrieval/vector_store.py +91 -0
- core/schemas/__init__.py +0 -0
- core/schemas/edge.py +46 -0
- core/schemas/entry.py +388 -0
- core/storage/__init__.py +0 -0
- core/storage/database.py +104 -0
- core/storage/models.py +66 -0
- core/storage/repository.py +243 -0
- core/sync/__init__.py +20 -0
- core/sync/autolink.py +301 -0
- core/sync/db_merge.py +297 -0
- core/sync/db_watcher.py +84 -0
- core/sync/remote_sync.py +345 -0
- examples/__init__.py +0 -0
- examples/example_entries.py +206 -0
- examples/pymatgen_interface_examples.py +811 -0
- frontend/dist/assets/index-BLfo7ZZu.css +1 -0
- frontend/dist/assets/index-G-mYbZ9R.js +83 -0
- frontend/dist/assets/index-G-mYbZ9R.js.map +1 -0
- frontend/dist/index.html +92 -0
- know_do_graph-0.1.0.dist-info/METADATA +765 -0
- know_do_graph-0.1.0.dist-info/RECORD +63 -0
- know_do_graph-0.1.0.dist-info/WHEEL +4 -0
- know_do_graph-0.1.0.dist-info/entry_points.txt +2 -0
- main.py +944 -0
|
File without changes
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""Maintenance agent.
|
|
2
|
+
|
|
3
|
+
Performs routine graph health and consistency tasks:
|
|
4
|
+
- Remove dangling edges pointing to deleted entries
|
|
5
|
+
- Rebuild the in-memory graph from the database
|
|
6
|
+
- Export entries to YAML node files
|
|
7
|
+
- Promote Mem-Graph traces into full Know-Do Graph entries
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
from core.graph.graph import KnowDoGraph
|
|
16
|
+
from core.schemas.entry import Entry, EntryMetadata, EntryType, RefinementStatus
|
|
17
|
+
from core.storage.database import SessionLocal
|
|
18
|
+
from core.storage.repository import EdgeRepository, EntryRepository
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MaintenanceAgent:
|
|
22
|
+
def __init__(self, graph: KnowDoGraph) -> None:
|
|
23
|
+
self._graph = graph
|
|
24
|
+
|
|
25
|
+
def remove_dangling_edges(self) -> int:
|
|
26
|
+
"""Delete edges whose source or target entry no longer exists."""
|
|
27
|
+
removed = 0
|
|
28
|
+
with SessionLocal() as db:
|
|
29
|
+
entry_repo = EntryRepository(db)
|
|
30
|
+
edge_repo = EdgeRepository(db)
|
|
31
|
+
entry_ids = {e.id for e in entry_repo.get_all()}
|
|
32
|
+
for edge in edge_repo.get_all():
|
|
33
|
+
if edge.source_id not in entry_ids or edge.target_id not in entry_ids:
|
|
34
|
+
edge_repo.delete(edge.id)
|
|
35
|
+
self._graph.remove_edge(edge.source_id, edge.target_id)
|
|
36
|
+
removed += 1
|
|
37
|
+
return removed
|
|
38
|
+
|
|
39
|
+
def rebuild_graph(self) -> None:
|
|
40
|
+
"""Rebuild the in-memory graph from the current database state."""
|
|
41
|
+
with SessionLocal() as db:
|
|
42
|
+
entries = EntryRepository(db).get_all()
|
|
43
|
+
edges = EdgeRepository(db).get_all()
|
|
44
|
+
self._graph.rebuild_from_db(entries, edges)
|
|
45
|
+
|
|
46
|
+
def export_to_yaml(self, output_dir: Path) -> int:
|
|
47
|
+
"""Write each entry as a YAML file under *output_dir*.
|
|
48
|
+
|
|
49
|
+
Returns the number of files written.
|
|
50
|
+
"""
|
|
51
|
+
import yaml # pyyaml
|
|
52
|
+
|
|
53
|
+
output_dir = Path(output_dir)
|
|
54
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
with SessionLocal() as db:
|
|
56
|
+
entries = EntryRepository(db).get_all()
|
|
57
|
+
for entry in entries:
|
|
58
|
+
data = entry.model_dump(mode="json")
|
|
59
|
+
file_path = output_dir / f"{entry.slug}.yaml"
|
|
60
|
+
file_path.write_text(
|
|
61
|
+
yaml.dump(data, allow_unicode=True, sort_keys=False),
|
|
62
|
+
encoding="utf-8",
|
|
63
|
+
)
|
|
64
|
+
return len(entries)
|
|
65
|
+
|
|
66
|
+
def promote_mem_entry(
|
|
67
|
+
self,
|
|
68
|
+
mem_id: str,
|
|
69
|
+
session_id: str = "default",
|
|
70
|
+
entry_type: EntryType = EntryType.memory,
|
|
71
|
+
tags: Optional[list[str]] = None,
|
|
72
|
+
) -> Optional[Entry]:
|
|
73
|
+
"""Promote a Mem-Graph trace into a full Know-Do Graph entry."""
|
|
74
|
+
from core.memory.memgraph import MemGraph
|
|
75
|
+
from core.storage.repository import EntryRepository
|
|
76
|
+
|
|
77
|
+
mg = MemGraph(session_id)
|
|
78
|
+
mem_entry = mg.get(mem_id)
|
|
79
|
+
if not mem_entry:
|
|
80
|
+
return None
|
|
81
|
+
|
|
82
|
+
entry = Entry(
|
|
83
|
+
title=f"Memory: {mem_entry.content[:60]}",
|
|
84
|
+
entry_type=entry_type,
|
|
85
|
+
content=mem_entry.content,
|
|
86
|
+
tags=(tags or []) + mem_entry.tags,
|
|
87
|
+
metadata=EntryMetadata(
|
|
88
|
+
source_provenance=f"mem-graph:{session_id}:{mem_id}",
|
|
89
|
+
extraction_method="mem_promotion",
|
|
90
|
+
refinement_status=RefinementStatus.raw,
|
|
91
|
+
),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
with SessionLocal() as db:
|
|
95
|
+
saved = EntryRepository(db).create(entry)
|
|
96
|
+
self._graph.add_entry(saved)
|
|
97
|
+
mg.mark_promoted(mem_id, saved.id)
|
|
98
|
+
return saved
|
|
99
|
+
|
|
100
|
+
# ------------------------------------------------------------------
|
|
101
|
+
# Query helpers — surface entries that need attention
|
|
102
|
+
# ------------------------------------------------------------------
|
|
103
|
+
|
|
104
|
+
def list_unverified(self, limit: int = 100) -> list[Entry]:
|
|
105
|
+
"""Return entries whose verification_status is 'unverified'."""
|
|
106
|
+
from core.schemas.entry import VerificationStatus
|
|
107
|
+
|
|
108
|
+
with SessionLocal() as db:
|
|
109
|
+
entries = EntryRepository(db).get_all()
|
|
110
|
+
return [
|
|
111
|
+
e for e in entries
|
|
112
|
+
if e.metadata.verification_status == VerificationStatus.unverified
|
|
113
|
+
][:limit]
|
|
114
|
+
|
|
115
|
+
def list_bugged(self, limit: int = 100) -> list[Entry]:
|
|
116
|
+
"""Return entries flagged as bugged via feedback."""
|
|
117
|
+
from core.schemas.entry import VerificationStatus
|
|
118
|
+
|
|
119
|
+
with SessionLocal() as db:
|
|
120
|
+
entries = EntryRepository(db).get_all()
|
|
121
|
+
return [
|
|
122
|
+
e for e in entries
|
|
123
|
+
if e.metadata.verification_status == VerificationStatus.bugged
|
|
124
|
+
][:limit]
|
|
125
|
+
|
|
126
|
+
def list_needs_generalization(self, limit: int = 100) -> list[Entry]:
|
|
127
|
+
"""Return entries flagged by the abstraction check."""
|
|
128
|
+
with SessionLocal() as db:
|
|
129
|
+
entries = EntryRepository(db).get_all()
|
|
130
|
+
return [e for e in entries if e.metadata.needs_generalization][:limit]
|
|
131
|
+
|
|
132
|
+
# ------------------------------------------------------------------
|
|
133
|
+
# Hierarchical-memory maintenance
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
def extract_heuristics_from_node(
|
|
137
|
+
self,
|
|
138
|
+
entry_id: str,
|
|
139
|
+
dry_run: bool = True,
|
|
140
|
+
) -> dict:
|
|
141
|
+
"""Best-effort split of a flat skill blob into L3 / L4 child nodes.
|
|
142
|
+
|
|
143
|
+
Scans the body of *entry_id* for headed sections whose titles match
|
|
144
|
+
common heuristic / constraint patterns (case-insensitive substrings):
|
|
145
|
+
|
|
146
|
+
============================ ===============
|
|
147
|
+
Heading contains Maps to
|
|
148
|
+
============================ ===============
|
|
149
|
+
"heuristic", "rule of thumb" L3 heuristic
|
|
150
|
+
"tips", "best practice" L3 heuristic
|
|
151
|
+
"limitation", "failure" L4 constraint
|
|
152
|
+
"caveat", "warning", "pitfall" L4 constraint
|
|
153
|
+
"do not use", "not suitable" L4 constraint
|
|
154
|
+
============================ ===============
|
|
155
|
+
|
|
156
|
+
Non-destructive: the source node is not modified; child nodes are only
|
|
157
|
+
created when ``dry_run=False``. Returns a summary dict describing what
|
|
158
|
+
was (or would be) extracted.
|
|
159
|
+
"""
|
|
160
|
+
import re
|
|
161
|
+
|
|
162
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
163
|
+
from core.schemas.edge import Edge, EdgeRelation
|
|
164
|
+
from core.schemas.entry import (
|
|
165
|
+
Entry,
|
|
166
|
+
EntryMetadata,
|
|
167
|
+
EntryType,
|
|
168
|
+
SkillLevel,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
l3_keywords = ("heuristic", "rule of thumb", "tips", "best practice", "tip:", "guideline")
|
|
172
|
+
l4_keywords = (
|
|
173
|
+
"limitation", "failure", "caveat", "warning", "pitfall",
|
|
174
|
+
"do not use", "not suitable", "unsuitable", "instability", "known issue",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
with SessionLocal() as db:
|
|
178
|
+
engine = RetrievalEngine(db, self._graph)
|
|
179
|
+
entry = engine.resolve_identifier(entry_id)
|
|
180
|
+
if entry is None:
|
|
181
|
+
return {"error": f"Entry '{entry_id}' not found."}
|
|
182
|
+
|
|
183
|
+
sections = _split_markdown_sections(entry.content)
|
|
184
|
+
heuristics: list[dict] = []
|
|
185
|
+
constraints: list[dict] = []
|
|
186
|
+
for heading, body in sections:
|
|
187
|
+
lower = heading.lower()
|
|
188
|
+
if any(k in lower for k in l4_keywords):
|
|
189
|
+
constraints.append({"title": heading.strip(), "content": body.strip()})
|
|
190
|
+
elif any(k in lower for k in l3_keywords):
|
|
191
|
+
heuristics.append({"title": heading.strip(), "content": body.strip()})
|
|
192
|
+
|
|
193
|
+
created_h: list[dict] = []
|
|
194
|
+
created_c: list[dict] = []
|
|
195
|
+
if not dry_run:
|
|
196
|
+
edge_repo = EdgeRepository(db)
|
|
197
|
+
repo = EntryRepository(db)
|
|
198
|
+
for item in heuristics:
|
|
199
|
+
child = Entry(
|
|
200
|
+
title=f"{entry.title} — {item['title']}"[:200],
|
|
201
|
+
content=item["content"],
|
|
202
|
+
entry_type=EntryType.heuristic,
|
|
203
|
+
tags=list(dict.fromkeys(entry.tags + ["heuristic"])),
|
|
204
|
+
metadata=EntryMetadata(
|
|
205
|
+
skill_level=SkillLevel.L3,
|
|
206
|
+
source_provenance=f"extracted_from:{entry.slug}",
|
|
207
|
+
applicability={"parent": entry.slug},
|
|
208
|
+
),
|
|
209
|
+
)
|
|
210
|
+
saved = repo.create(child)
|
|
211
|
+
edge = Edge(
|
|
212
|
+
source_id=saved.id,
|
|
213
|
+
target_id=entry.id,
|
|
214
|
+
relation=EdgeRelation.heuristic_for,
|
|
215
|
+
)
|
|
216
|
+
edge_repo.create(edge)
|
|
217
|
+
self._graph.add_entry(saved)
|
|
218
|
+
self._graph.add_edge(edge)
|
|
219
|
+
created_h.append({"id": saved.id, "slug": saved.slug, "title": saved.title})
|
|
220
|
+
|
|
221
|
+
for item in constraints:
|
|
222
|
+
child = Entry(
|
|
223
|
+
title=f"{entry.title} — {item['title']}"[:200],
|
|
224
|
+
content=item["content"],
|
|
225
|
+
entry_type=EntryType.constraint,
|
|
226
|
+
tags=list(dict.fromkeys(entry.tags + ["constraint", "failure-mode"])),
|
|
227
|
+
metadata=EntryMetadata(
|
|
228
|
+
skill_level=SkillLevel.L4,
|
|
229
|
+
source_provenance=f"extracted_from:{entry.slug}",
|
|
230
|
+
applicability={"parent": entry.slug},
|
|
231
|
+
),
|
|
232
|
+
)
|
|
233
|
+
saved = repo.create(child)
|
|
234
|
+
edge = Edge(
|
|
235
|
+
source_id=saved.id,
|
|
236
|
+
target_id=entry.id,
|
|
237
|
+
relation=EdgeRelation.constraint_on,
|
|
238
|
+
)
|
|
239
|
+
edge_repo.create(edge)
|
|
240
|
+
self._graph.add_entry(saved)
|
|
241
|
+
self._graph.add_edge(edge)
|
|
242
|
+
created_c.append({"id": saved.id, "slug": saved.slug, "title": saved.title})
|
|
243
|
+
|
|
244
|
+
# Denormalise constraint slugs on parent.
|
|
245
|
+
if created_c:
|
|
246
|
+
entry.metadata.failure_modes = list(
|
|
247
|
+
dict.fromkeys(
|
|
248
|
+
entry.metadata.failure_modes + [c["slug"] for c in created_c]
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
repo.update(entry)
|
|
252
|
+
|
|
253
|
+
return {
|
|
254
|
+
"entry": entry.slug,
|
|
255
|
+
"dry_run": dry_run,
|
|
256
|
+
"candidate_heuristics": heuristics,
|
|
257
|
+
"candidate_constraints": constraints,
|
|
258
|
+
"created_heuristics": created_h,
|
|
259
|
+
"created_constraints": created_c,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _split_markdown_sections(text: str) -> list[tuple[str, str]]:
|
|
264
|
+
"""Naive markdown splitter: returns list of (heading, body) pairs.
|
|
265
|
+
|
|
266
|
+
Recognises ``#``-style headings (any level). Text before the first heading
|
|
267
|
+
is associated with heading ``""``.
|
|
268
|
+
"""
|
|
269
|
+
import re
|
|
270
|
+
|
|
271
|
+
sections: list[tuple[str, str]] = []
|
|
272
|
+
current_heading = ""
|
|
273
|
+
current_body: list[str] = []
|
|
274
|
+
for line in (text or "").splitlines():
|
|
275
|
+
m = re.match(r"^\s*#{1,6}\s+(.+?)\s*$", line)
|
|
276
|
+
if m:
|
|
277
|
+
sections.append((current_heading, "\n".join(current_body)))
|
|
278
|
+
current_heading = m.group(1)
|
|
279
|
+
current_body = []
|
|
280
|
+
else:
|
|
281
|
+
current_body.append(line)
|
|
282
|
+
sections.append((current_heading, "\n".join(current_body)))
|
|
283
|
+
return sections
|
|
File without changes
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""Orchestrator — routes a user request to the right agent or pipeline.
|
|
2
|
+
|
|
3
|
+
The orchestrator is a thin LLM-driven router that:
|
|
4
|
+
1. Reads the user's request (with optional graph context).
|
|
5
|
+
2. Decides which agent(s) to invoke and with what parameters.
|
|
6
|
+
3. Calls them in sequence or parallel and returns the combined result.
|
|
7
|
+
|
|
8
|
+
Supported targets:
|
|
9
|
+
- ``graph`` — GraphAgent (add/update/link knowledge)
|
|
10
|
+
- ``review`` — ReviewAgent (audit & clean existing nodes)
|
|
11
|
+
|
|
12
|
+
The orchestrator does NOT do graph work itself; it delegates entirely.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
from typing import Callable
|
|
20
|
+
|
|
21
|
+
from openai import OpenAI
|
|
22
|
+
|
|
23
|
+
from core.graph.graph import KnowDoGraph
|
|
24
|
+
|
|
25
|
+
_DEFAULT_MODEL = "qwen-plus"
|
|
26
|
+
|
|
27
|
+
_SYSTEM_PROMPT = """You are the orchestrator for the Know-Do Graph system.
|
|
28
|
+
Your only job is to decide which agent(s) should handle the user's request and
|
|
29
|
+
call them with the right parameters.
|
|
30
|
+
|
|
31
|
+
Available agents / pipelines
|
|
32
|
+
-----------------------------
|
|
33
|
+
1. ``run_graph_agent(message)``
|
|
34
|
+
Use for: adding new knowledge, searching/updating/linking nodes, enriching
|
|
35
|
+
content from the web, answering questions about graph content.
|
|
36
|
+
|
|
37
|
+
2. ``run_review_agent(instructions, batch_size)``
|
|
38
|
+
Use for: auditing existing nodes, cleaning titles/tags/aliases, merging
|
|
39
|
+
duplicates, checking graph quality, improving coverage.
|
|
40
|
+
batch_size controls how many nodes are reviewed per session (default 5).
|
|
41
|
+
|
|
42
|
+
Decision rules
|
|
43
|
+
--------------
|
|
44
|
+
- If the request is about *adding, updating, or querying* knowledge → graph agent.
|
|
45
|
+
- If the request is about *cleaning, reviewing, fixing, auditing* the graph → review agent.
|
|
46
|
+
- If both apply (e.g. "add new nodes and then review the related area") → call both in order.
|
|
47
|
+
- If the intent is ambiguous, prefer the graph agent for content tasks and the
|
|
48
|
+
review agent for quality/maintenance tasks.
|
|
49
|
+
|
|
50
|
+
Always call at least one agent. Never answer directly without delegating.
|
|
51
|
+
After all agent calls are complete, give a concise summary of what was done.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class OrchestratorAgent:
|
|
56
|
+
"""Routes requests to GraphAgent and/or ReviewAgent.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
graph:
|
|
61
|
+
Shared KnowDoGraph instance.
|
|
62
|
+
model:
|
|
63
|
+
LLM model for the orchestrator's routing decision.
|
|
64
|
+
on_step:
|
|
65
|
+
Optional callback ``(event, data)`` forwarded to sub-agents for CLI display.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
graph: KnowDoGraph,
|
|
71
|
+
model: str | None = None,
|
|
72
|
+
on_step: Callable[[str, dict], None] | None = None,
|
|
73
|
+
read_only: bool = False,
|
|
74
|
+
) -> None:
|
|
75
|
+
self._graph = graph
|
|
76
|
+
self._model = model or os.environ.get("ORCHESTRATOR_MODEL", os.environ.get("GRAPH_AGENT_MODEL", _DEFAULT_MODEL))
|
|
77
|
+
self._on_step = on_step
|
|
78
|
+
self._read_only = read_only
|
|
79
|
+
self._client = OpenAI(
|
|
80
|
+
api_key=os.environ["OPENAI_API_KEY"],
|
|
81
|
+
base_url=os.environ.get("OPENAI_API_BASE"),
|
|
82
|
+
)
|
|
83
|
+
self._history: list[dict] = [{"role": "system", "content": _SYSTEM_PROMPT}]
|
|
84
|
+
|
|
85
|
+
# ------------------------------------------------------------------
|
|
86
|
+
# Public
|
|
87
|
+
# ------------------------------------------------------------------
|
|
88
|
+
|
|
89
|
+
def chat(self, user_message: str) -> str:
|
|
90
|
+
"""Route a message and return the combined agent response."""
|
|
91
|
+
self._history.append({"role": "user", "content": user_message})
|
|
92
|
+
result = self._run_loop()
|
|
93
|
+
self._history.append({"role": "assistant", "content": result})
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
def reset(self) -> None:
|
|
97
|
+
self._history = [self._history[0]]
|
|
98
|
+
|
|
99
|
+
# ------------------------------------------------------------------
|
|
100
|
+
# Internal loop
|
|
101
|
+
# ------------------------------------------------------------------
|
|
102
|
+
|
|
103
|
+
def _run_loop(self) -> str:
|
|
104
|
+
tools = [
|
|
105
|
+
{
|
|
106
|
+
"type": "function",
|
|
107
|
+
"function": {
|
|
108
|
+
"name": "run_graph_agent",
|
|
109
|
+
"description": (
|
|
110
|
+
"Delegate a task to the GraphAgent. Use for adding, updating, "
|
|
111
|
+
"searching, linking, or enriching knowledge nodes."
|
|
112
|
+
),
|
|
113
|
+
"parameters": {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"properties": {
|
|
116
|
+
"message": {
|
|
117
|
+
"type": "string",
|
|
118
|
+
"description": "Full task description for the graph agent",
|
|
119
|
+
},
|
|
120
|
+
},
|
|
121
|
+
"required": ["message"],
|
|
122
|
+
},
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"type": "function",
|
|
127
|
+
"function": {
|
|
128
|
+
"name": "run_review_agent",
|
|
129
|
+
"description": (
|
|
130
|
+
"Delegate a quality-review session to the ReviewAgent. "
|
|
131
|
+
"Use for auditing, cleaning, deduplicating, or fixing existing nodes."
|
|
132
|
+
),
|
|
133
|
+
"parameters": {
|
|
134
|
+
"type": "object",
|
|
135
|
+
"properties": {
|
|
136
|
+
"instructions": {
|
|
137
|
+
"type": "string",
|
|
138
|
+
"description": "Specific review focus (leave empty for general review)",
|
|
139
|
+
"default": "",
|
|
140
|
+
},
|
|
141
|
+
"batch_size": {
|
|
142
|
+
"type": "integer",
|
|
143
|
+
"description": "Number of nodes to review in this session",
|
|
144
|
+
"default": 5,
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
]
|
|
151
|
+
|
|
152
|
+
MAX_ITERATIONS = 8
|
|
153
|
+
for i in range(MAX_ITERATIONS):
|
|
154
|
+
if self._on_step:
|
|
155
|
+
self._on_step("orchestrator_thinking", {"iteration": i + 1})
|
|
156
|
+
|
|
157
|
+
response = self._client.chat.completions.create(
|
|
158
|
+
model=self._model,
|
|
159
|
+
messages=self._history,
|
|
160
|
+
tools=tools,
|
|
161
|
+
tool_choice="auto",
|
|
162
|
+
)
|
|
163
|
+
message = response.choices[0].message
|
|
164
|
+
|
|
165
|
+
if not message.tool_calls:
|
|
166
|
+
return message.content or ""
|
|
167
|
+
|
|
168
|
+
self._history.append(message.model_dump(exclude_unset=True))
|
|
169
|
+
|
|
170
|
+
for tc in message.tool_calls:
|
|
171
|
+
name = tc.function.name
|
|
172
|
+
try:
|
|
173
|
+
kwargs = json.loads(tc.function.arguments or "{}")
|
|
174
|
+
except json.JSONDecodeError:
|
|
175
|
+
kwargs = {}
|
|
176
|
+
|
|
177
|
+
if self._on_step:
|
|
178
|
+
self._on_step("route", {"agent": name, "args": kwargs})
|
|
179
|
+
|
|
180
|
+
result = self._dispatch(name, kwargs)
|
|
181
|
+
|
|
182
|
+
self._history.append(
|
|
183
|
+
{
|
|
184
|
+
"role": "tool",
|
|
185
|
+
"tool_call_id": tc.id,
|
|
186
|
+
"content": json.dumps({"result": result}, default=str),
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
return "Orchestrator reached maximum iterations."
|
|
191
|
+
|
|
192
|
+
def _dispatch(self, name: str, kwargs: dict) -> str:
|
|
193
|
+
if name == "run_graph_agent":
|
|
194
|
+
return self._run_graph_agent(kwargs.get("message", ""))
|
|
195
|
+
if name == "run_review_agent":
|
|
196
|
+
return self._run_review_agent(
|
|
197
|
+
kwargs.get("instructions", ""),
|
|
198
|
+
int(kwargs.get("batch_size", 5)),
|
|
199
|
+
)
|
|
200
|
+
return f"Unknown agent: {name}"
|
|
201
|
+
|
|
202
|
+
def _run_graph_agent(self, message: str) -> str:
|
|
203
|
+
from agents.graph_agent.agent import GraphAgent
|
|
204
|
+
|
|
205
|
+
agent = GraphAgent(graph=self._graph, model=self._model, on_step=self._on_step, read_only=self._read_only)
|
|
206
|
+
return agent.chat(message)
|
|
207
|
+
|
|
208
|
+
def _run_review_agent(self, instructions: str, batch_size: int) -> str:
|
|
209
|
+
from agents.review_agent.agent import ReviewAgent
|
|
210
|
+
|
|
211
|
+
agent = ReviewAgent(
|
|
212
|
+
graph=self._graph,
|
|
213
|
+
model=self._model,
|
|
214
|
+
batch_size=batch_size,
|
|
215
|
+
on_step=self._on_step,
|
|
216
|
+
)
|
|
217
|
+
return agent.run_review(instructions=instructions)
|
|
File without changes
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
"""ReviewAgent — LLM-driven agent that incrementally audits and cleans the graph.
|
|
2
|
+
|
|
3
|
+
The agent never receives the full graph at once. Instead it:
|
|
4
|
+
1. Checks overall graph health via ``get_graph_summary``.
|
|
5
|
+
2. Samples a small batch of under-reviewed nodes via ``sample_nodes_for_review``.
|
|
6
|
+
3. Inspects each node and its local neighbourhood with ``inspect_node``.
|
|
7
|
+
4. Applies targeted fixes: normalise titles, move acronyms to aliases,
|
|
8
|
+
fix tags, merge near-duplicates, add missing edges.
|
|
9
|
+
5. Marks each reviewed node with ``mark_reviewed`` so the next session
|
|
10
|
+
continues where this one left off.
|
|
11
|
+
|
|
12
|
+
Configuration:
|
|
13
|
+
OPENAI_API_KEY — required
|
|
14
|
+
OPENAI_API_BASE — optional base URL override
|
|
15
|
+
REVIEW_AGENT_MODEL — model name (defaults to GRAPH_AGENT_MODEL or "qwen-plus")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
from typing import Any, Callable
|
|
23
|
+
|
|
24
|
+
from openai import OpenAI
|
|
25
|
+
|
|
26
|
+
from core.graph.graph import KnowDoGraph
|
|
27
|
+
from agents.review_agent.tools import REVIEW_TOOL_DISPATCH, REVIEW_TOOL_SCHEMAS
|
|
28
|
+
|
|
29
|
+
_DEFAULT_MODEL = "qwen-plus"
|
|
30
|
+
|
|
31
|
+
_SYSTEM_PROMPT = """You are a knowledge-graph quality reviewer for the Know-Do Graph system.
|
|
32
|
+
|
|
33
|
+
Your role is to incrementally audit existing nodes (entries) for quality issues and
|
|
34
|
+
fix them. You work in focused batches — you do NOT need to review the entire graph
|
|
35
|
+
in one session.
|
|
36
|
+
|
|
37
|
+
## Quality criteria
|
|
38
|
+
1. **Title hygiene**
|
|
39
|
+
- Titles must be concise (3–7 words).
|
|
40
|
+
- Remove parenthetical acronyms/aliases from titles — put them in `aliases` instead.
|
|
41
|
+
Example: "Density Functional Theory (DFT)" → title "Density Functional Theory",
|
|
42
|
+
aliases ["DFT", "ab initio DFT"].
|
|
43
|
+
- Remove redundant tool-name prefixes from capability titles.
|
|
44
|
+
Example: "RDKit Molecular Fingerprint Generation" → "Molecular Fingerprint Generation"
|
|
45
|
+
(keep "rdkit" as a tag and ensure a `dependency` edge to the RDKit tool node).
|
|
46
|
+
|
|
47
|
+
2. **Tag normalisation**
|
|
48
|
+
- All tags must be lowercase and hyphenated (e.g. "machine-learning", "rdkit").
|
|
49
|
+
- Remove capitalised duplicates (e.g. "RDKit" when "rdkit" already present).
|
|
50
|
+
- Tags should be domain-specific and meaningful — remove generic filler tags.
|
|
51
|
+
|
|
52
|
+
3. **Duplicate / alias detection**
|
|
53
|
+
- If two nodes represent the same concept, merge them with `merge_entries`.
|
|
54
|
+
- If one node is a more specific variant of another, add a `derived_from` or
|
|
55
|
+
`refinement_of` edge rather than merging.
|
|
56
|
+
|
|
57
|
+
4. **Edge completeness**
|
|
58
|
+
- If a node uses a tool or library, ensure a `dependency` edge exists to that
|
|
59
|
+
tool's node.
|
|
60
|
+
- If a node is a specific application of a broader capability, add a
|
|
61
|
+
`derived_from` or `prerequisite` edge.
|
|
62
|
+
|
|
63
|
+
## Your workflow each session
|
|
64
|
+
1. Call `get_graph_summary` to see overall health and review coverage.
|
|
65
|
+
2. Call `sample_nodes_for_review` to get a batch (default 5).
|
|
66
|
+
3. For each sampled node: call `inspect_node`, assess quality, apply fixes if needed.
|
|
67
|
+
4. After inspecting (and optionally fixing) each node, call `mark_reviewed`.
|
|
68
|
+
5. Summarise what you found and fixed.
|
|
69
|
+
|
|
70
|
+
Keep changes conservative — prefer targeted fixes over large rewrites.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ReviewAgent:
|
|
75
|
+
"""LLM-powered agent that reviews and cleans the Know-Do Graph incrementally.
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
graph:
|
|
80
|
+
The shared ``KnowDoGraph`` instance.
|
|
81
|
+
model:
|
|
82
|
+
Model identifier forwarded to the OpenAI client.
|
|
83
|
+
batch_size:
|
|
84
|
+
Number of nodes to review per ``run_review`` call.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
def __init__(
|
|
88
|
+
self,
|
|
89
|
+
graph: KnowDoGraph,
|
|
90
|
+
model: str | None = None,
|
|
91
|
+
batch_size: int = 5,
|
|
92
|
+
on_step: Callable[[str, dict], None] | None = None,
|
|
93
|
+
) -> None:
|
|
94
|
+
self._graph = graph
|
|
95
|
+
self._batch_size = batch_size
|
|
96
|
+
self._on_step = on_step
|
|
97
|
+
self._model = model or os.environ.get(
|
|
98
|
+
"REVIEW_AGENT_MODEL",
|
|
99
|
+
os.environ.get("GRAPH_AGENT_MODEL", _DEFAULT_MODEL),
|
|
100
|
+
)
|
|
101
|
+
self._client = OpenAI(
|
|
102
|
+
api_key=os.environ["OPENAI_API_KEY"],
|
|
103
|
+
base_url=os.environ.get("OPENAI_API_BASE"),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# ------------------------------------------------------------------
|
|
107
|
+
# Public interface
|
|
108
|
+
# ------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
def run_review(self, instructions: str = "") -> str:
|
|
111
|
+
"""Run one review session and return a summary of findings and fixes."""
|
|
112
|
+
user_msg = (
|
|
113
|
+
f"Please review a batch of {self._batch_size} nodes. "
|
|
114
|
+
+ (instructions if instructions else "Apply all quality criteria from your instructions.")
|
|
115
|
+
)
|
|
116
|
+
history: list[dict] = [
|
|
117
|
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
118
|
+
{"role": "user", "content": user_msg},
|
|
119
|
+
]
|
|
120
|
+
return self._run_loop(history)
|
|
121
|
+
|
|
122
|
+
def chat(self, user_message: str) -> str:
|
|
123
|
+
"""Single-turn interactive review conversation (stateless)."""
|
|
124
|
+
history: list[dict] = [
|
|
125
|
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
126
|
+
{"role": "user", "content": user_message},
|
|
127
|
+
]
|
|
128
|
+
return self._run_loop(history)
|
|
129
|
+
|
|
130
|
+
# ------------------------------------------------------------------
|
|
131
|
+
# Internal agentic loop
|
|
132
|
+
# ------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
def _run_loop(self, history: list[dict]) -> str:
|
|
135
|
+
MAX_ITERATIONS = 30
|
|
136
|
+
for i in range(MAX_ITERATIONS):
|
|
137
|
+
if self._on_step:
|
|
138
|
+
self._on_step("thinking", {"iteration": i + 1})
|
|
139
|
+
|
|
140
|
+
response = self._client.chat.completions.create(
|
|
141
|
+
model=self._model,
|
|
142
|
+
messages=history,
|
|
143
|
+
tools=REVIEW_TOOL_SCHEMAS,
|
|
144
|
+
tool_choice="auto",
|
|
145
|
+
)
|
|
146
|
+
message = response.choices[0].message
|
|
147
|
+
|
|
148
|
+
if not message.tool_calls:
|
|
149
|
+
return message.content or ""
|
|
150
|
+
|
|
151
|
+
history.append(message.model_dump(exclude_unset=True))
|
|
152
|
+
|
|
153
|
+
for tc in message.tool_calls:
|
|
154
|
+
try:
|
|
155
|
+
display_args = {k: v for k, v in json.loads(tc.function.arguments or "{}").items() if k != "graph"}
|
|
156
|
+
except Exception:
|
|
157
|
+
display_args = {}
|
|
158
|
+
if self._on_step:
|
|
159
|
+
self._on_step("tool_call", {"name": tc.function.name, "args": display_args})
|
|
160
|
+
|
|
161
|
+
result = self._dispatch(tc.function.name, tc.function.arguments)
|
|
162
|
+
|
|
163
|
+
if self._on_step:
|
|
164
|
+
self._on_step("tool_result", {"name": tc.function.name, "result": result})
|
|
165
|
+
|
|
166
|
+
history.append(
|
|
167
|
+
{
|
|
168
|
+
"role": "tool",
|
|
169
|
+
"tool_call_id": tc.id,
|
|
170
|
+
"content": json.dumps(result, default=str),
|
|
171
|
+
}
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
return "Review agent reached maximum iterations without a final answer."
|
|
175
|
+
|
|
176
|
+
def _dispatch(self, name: str, arguments_json: str) -> Any:
|
|
177
|
+
func = REVIEW_TOOL_DISPATCH.get(name)
|
|
178
|
+
if func is None:
|
|
179
|
+
return {"error": f"Unknown tool: {name}"}
|
|
180
|
+
try:
|
|
181
|
+
kwargs = json.loads(arguments_json) if arguments_json else {}
|
|
182
|
+
except json.JSONDecodeError as exc:
|
|
183
|
+
return {"error": f"Bad arguments JSON: {exc}"}
|
|
184
|
+
kwargs["graph"] = self._graph
|
|
185
|
+
try:
|
|
186
|
+
return func(**kwargs)
|
|
187
|
+
except Exception as exc: # noqa: BLE001
|
|
188
|
+
return {"error": str(exc)}
|