know-do-graph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +0 -0
- agents/extraction_agent/__init__.py +0 -0
- agents/extraction_agent/agent.py +170 -0
- agents/graph_agent/__init__.py +5 -0
- agents/graph_agent/agent.py +373 -0
- agents/graph_agent/tools.py +2106 -0
- agents/maintenance_agent/__init__.py +0 -0
- agents/maintenance_agent/agent.py +283 -0
- agents/orchestrator/__init__.py +0 -0
- agents/orchestrator/agent.py +217 -0
- agents/review_agent/__init__.py +0 -0
- agents/review_agent/agent.py +188 -0
- agents/review_agent/tools.py +472 -0
- api/__init__.py +0 -0
- api/main.py +136 -0
- api/routes/__init__.py +0 -0
- api/routes/agent.py +81 -0
- api/routes/entries.py +411 -0
- api/routes/graph.py +132 -0
- api/routes/mem.py +179 -0
- api/routes/remote.py +815 -0
- api/routes/remote_sync.py +230 -0
- api/routes/retrieve.py +88 -0
- core/__init__.py +0 -0
- core/app_state.py +9 -0
- core/events.py +84 -0
- core/extraction/__init__.py +0 -0
- core/extraction/wikilink_parser.py +48 -0
- core/graph/__init__.py +0 -0
- core/graph/graph.py +204 -0
- core/memory/__init__.py +0 -0
- core/memory/memgraph.py +458 -0
- core/resources/starter.db +0 -0
- core/retrieval/__init__.py +0 -0
- core/retrieval/embedder.py +122 -0
- core/retrieval/fusion.py +52 -0
- core/retrieval/progressive.py +399 -0
- core/retrieval/retrieval.py +346 -0
- core/retrieval/vector_store.py +91 -0
- core/schemas/__init__.py +0 -0
- core/schemas/edge.py +46 -0
- core/schemas/entry.py +388 -0
- core/storage/__init__.py +0 -0
- core/storage/database.py +104 -0
- core/storage/models.py +66 -0
- core/storage/repository.py +243 -0
- core/sync/__init__.py +20 -0
- core/sync/autolink.py +301 -0
- core/sync/db_merge.py +297 -0
- core/sync/db_watcher.py +84 -0
- core/sync/remote_sync.py +345 -0
- examples/__init__.py +0 -0
- examples/example_entries.py +206 -0
- examples/pymatgen_interface_examples.py +811 -0
- frontend/dist/assets/index-BLfo7ZZu.css +1 -0
- frontend/dist/assets/index-G-mYbZ9R.js +83 -0
- frontend/dist/assets/index-G-mYbZ9R.js.map +1 -0
- frontend/dist/index.html +92 -0
- know_do_graph-0.1.0.dist-info/METADATA +765 -0
- know_do_graph-0.1.0.dist-info/RECORD +63 -0
- know_do_graph-0.1.0.dist-info/WHEEL +4 -0
- know_do_graph-0.1.0.dist-info/entry_points.txt +2 -0
- main.py +944 -0
|
@@ -0,0 +1,2106 @@
|
|
|
1
|
+
"""Tool definitions for the GraphAgent.
|
|
2
|
+
|
|
3
|
+
Each function corresponds to an OpenAI function-calling tool. All functions
|
|
4
|
+
receive the live ``KnowDoGraph`` instance via the module-level ``_graph``
|
|
5
|
+
variable which is set once by ``GraphAgent.__init__``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# Helpers
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _slug(title: str) -> str:
|
|
20
|
+
slug = title.lower().strip()
|
|
21
|
+
slug = re.sub(r"[^\w\s-]", "", slug)
|
|
22
|
+
slug = re.sub(r"[\s_]+", "-", slug)
|
|
23
|
+
return re.sub(r"-+", "-", slug).strip("-")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Tokens that suggest a title is a *concrete instance* of a more general skill
|
|
27
|
+
# (chemical formulas, two-formula interfaces, "build X molecule", etc.).
|
|
28
|
+
# Used by create_entry's soft abstraction check.
|
|
29
|
+
_CONCRETE_INSTANCE_PATTERNS = [
|
|
30
|
+
re.compile(r"\b[A-Z][a-zA-Z]*\d+[A-Za-z]*\d*\b"), # CH4, H2O, TiO2, SrTiO3
|
|
31
|
+
re.compile(r"\b[A-Z][A-Za-z0-9]+/[A-Z][A-Za-z0-9]+\b"), # TiO2/SrTiO3
|
|
32
|
+
re.compile(r"^build\s+[A-Z][a-zA-Z]*\d+"), # "Build H2O" (formula must contain a digit)
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _looks_overly_specific(title: str) -> bool:
|
|
37
|
+
"""Heuristic: True if *title* mentions a concrete formula/material pair."""
|
|
38
|
+
return any(p.search(title) for p in _CONCRETE_INSTANCE_PATTERNS)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _check_generalization(title: str, db: Any) -> dict:
|
|
42
|
+
"""Soft abstraction check.
|
|
43
|
+
|
|
44
|
+
Returns ``{needs_generalization: bool, similar: [...], suggestion: str}``.
|
|
45
|
+
Looks for existing nodes whose titles overlap the proposed *title* (case-
|
|
46
|
+
insensitive token overlap) — those are likely the generic ancestor or a
|
|
47
|
+
near-duplicate.
|
|
48
|
+
"""
|
|
49
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
50
|
+
from core import app_state
|
|
51
|
+
|
|
52
|
+
engine = RetrievalEngine(db, app_state.graph)
|
|
53
|
+
candidates = engine.search_entries(query=title, limit=5)
|
|
54
|
+
flag = _looks_overly_specific(title) or len(candidates) > 0
|
|
55
|
+
return {
|
|
56
|
+
"needs_generalization": flag,
|
|
57
|
+
"similar": [{"id": e.id, "title": e.title, "type": e.entry_type.value} for e in candidates],
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ---------------------------------------------------------------------------
|
|
62
|
+
# Node / Entry tools
|
|
63
|
+
# ---------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def create_entry(
|
|
67
|
+
title: str,
|
|
68
|
+
content: str = "",
|
|
69
|
+
entry_type: str = "generic",
|
|
70
|
+
tags: list[str] | None = None,
|
|
71
|
+
aliases: list[str] | None = None,
|
|
72
|
+
source_provenance: str | None = None,
|
|
73
|
+
graph: Any = None,
|
|
74
|
+
) -> dict:
|
|
75
|
+
"""Create a new knowledge entry (node) in the graph.
|
|
76
|
+
|
|
77
|
+
Performs a soft abstraction check: if *title* looks overly specific (mentions
|
|
78
|
+
a concrete formula/material pair) or strongly overlaps an existing node, the
|
|
79
|
+
new entry is created with ``metadata.needs_generalization = True`` and the
|
|
80
|
+
response includes the similar candidates so the agent can decide to merge
|
|
81
|
+
or rename instead.
|
|
82
|
+
"""
|
|
83
|
+
from core.schemas.entry import Entry, EntryMetadata, EntryType
|
|
84
|
+
from core.storage.database import SessionLocal
|
|
85
|
+
from core.storage.repository import EntryRepository
|
|
86
|
+
|
|
87
|
+
with SessionLocal() as db:
|
|
88
|
+
check = _check_generalization(title, db)
|
|
89
|
+
meta = EntryMetadata(
|
|
90
|
+
source_provenance=source_provenance,
|
|
91
|
+
needs_generalization=check["needs_generalization"],
|
|
92
|
+
)
|
|
93
|
+
entry = Entry(
|
|
94
|
+
title=title,
|
|
95
|
+
content=content,
|
|
96
|
+
entry_type=EntryType(entry_type),
|
|
97
|
+
tags=tags or [],
|
|
98
|
+
aliases=aliases or [],
|
|
99
|
+
metadata=meta,
|
|
100
|
+
)
|
|
101
|
+
saved = EntryRepository(db).create(entry)
|
|
102
|
+
if graph is not None:
|
|
103
|
+
graph.add_entry(saved)
|
|
104
|
+
return {
|
|
105
|
+
"id": saved.id,
|
|
106
|
+
"slug": saved.slug,
|
|
107
|
+
"title": saved.title,
|
|
108
|
+
"needs_generalization": check["needs_generalization"],
|
|
109
|
+
"similar_existing": check["similar"],
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def update_entry(
|
|
114
|
+
entry_id: str,
|
|
115
|
+
title: str | None = None,
|
|
116
|
+
content: str | None = None,
|
|
117
|
+
entry_type: str | None = None,
|
|
118
|
+
tags: list[str] | None = None,
|
|
119
|
+
aliases: list[str] | None = None,
|
|
120
|
+
graph: Any = None,
|
|
121
|
+
) -> dict:
|
|
122
|
+
"""Update fields on an existing entry."""
|
|
123
|
+
from core import app_state
|
|
124
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
125
|
+
from core.schemas.entry import EntryType
|
|
126
|
+
from core.storage.database import SessionLocal
|
|
127
|
+
from core.storage.repository import EntryRepository
|
|
128
|
+
|
|
129
|
+
g = graph or app_state.graph
|
|
130
|
+
with SessionLocal() as db:
|
|
131
|
+
engine = RetrievalEngine(db, g)
|
|
132
|
+
entry = engine.resolve_identifier(entry_id)
|
|
133
|
+
if entry is None:
|
|
134
|
+
return {"error": f"Entry '{entry_id}' not found."}
|
|
135
|
+
if title is not None:
|
|
136
|
+
entry.title = title
|
|
137
|
+
if content is not None:
|
|
138
|
+
entry.content = content
|
|
139
|
+
entry.refresh_refs()
|
|
140
|
+
if entry_type is not None:
|
|
141
|
+
entry.entry_type = EntryType(entry_type)
|
|
142
|
+
if tags is not None:
|
|
143
|
+
entry.tags = tags
|
|
144
|
+
if aliases is not None:
|
|
145
|
+
entry.aliases = aliases
|
|
146
|
+
saved = EntryRepository(db).update(entry)
|
|
147
|
+
if graph is not None and saved:
|
|
148
|
+
graph.add_entry(saved) # upsert node attributes
|
|
149
|
+
return {"id": saved.id, "slug": saved.slug, "title": saved.title} if saved else {"error": "Update failed."}
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def delete_entry(entry_id: str, graph: Any = None) -> dict:
|
|
153
|
+
"""Delete an entry (node) and its associated edges."""
|
|
154
|
+
from core.storage.database import SessionLocal
|
|
155
|
+
from core.storage.repository import EntryRepository
|
|
156
|
+
|
|
157
|
+
with SessionLocal() as db:
|
|
158
|
+
deleted = EntryRepository(db).delete(entry_id)
|
|
159
|
+
if deleted and graph is not None:
|
|
160
|
+
graph.remove_entry(entry_id)
|
|
161
|
+
return {"deleted": deleted, "entry_id": entry_id}
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def search_entries(query: str, limit: int = 10, mode: str = "hybrid", graph: Any = None) -> list[dict]:
|
|
165
|
+
"""Hybrid semantic + keyword search over entries."""
|
|
166
|
+
from core import app_state
|
|
167
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
168
|
+
from core.storage.database import SessionLocal
|
|
169
|
+
|
|
170
|
+
g = graph or app_state.graph
|
|
171
|
+
with SessionLocal() as db:
|
|
172
|
+
engine = RetrievalEngine(db, g)
|
|
173
|
+
results = engine.search_entries(query=query, limit=limit, mode=mode)
|
|
174
|
+
return [
|
|
175
|
+
{"id": e.id, "slug": e.slug, "title": e.title, "type": e.entry_type.value, "tags": e.tags}
|
|
176
|
+
for e in results
|
|
177
|
+
]
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_entry(identifier: str, graph: Any = None) -> dict:
|
|
181
|
+
"""Retrieve a single entry by ID, slug, or alias."""
|
|
182
|
+
from core import app_state
|
|
183
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
184
|
+
from core.storage.database import SessionLocal
|
|
185
|
+
|
|
186
|
+
g = graph or app_state.graph
|
|
187
|
+
with SessionLocal() as db:
|
|
188
|
+
engine = RetrievalEngine(db, g)
|
|
189
|
+
entry = engine.resolve_identifier(identifier)
|
|
190
|
+
if entry is None:
|
|
191
|
+
return {"error": f"Entry '{identifier}' not found."}
|
|
192
|
+
return {
|
|
193
|
+
"id": entry.id,
|
|
194
|
+
"slug": entry.slug,
|
|
195
|
+
"title": entry.title,
|
|
196
|
+
"type": entry.entry_type.value,
|
|
197
|
+
"tags": entry.tags,
|
|
198
|
+
"aliases": entry.aliases,
|
|
199
|
+
"content": entry.content,
|
|
200
|
+
"refs": entry.internal_refs,
|
|
201
|
+
"source": entry.metadata.source_provenance,
|
|
202
|
+
"status": entry.metadata.refinement_status.value,
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def list_entries(limit: int = 20, graph: Any = None) -> list[dict]:
|
|
207
|
+
"""List entries in the graph."""
|
|
208
|
+
from core import app_state
|
|
209
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
210
|
+
from core.storage.database import SessionLocal
|
|
211
|
+
|
|
212
|
+
g = graph or app_state.graph
|
|
213
|
+
with SessionLocal() as db:
|
|
214
|
+
engine = RetrievalEngine(db, g)
|
|
215
|
+
entries = engine.list_entries(limit=limit)
|
|
216
|
+
return [
|
|
217
|
+
{"id": e.id, "slug": e.slug, "title": e.title, "type": e.entry_type.value}
|
|
218
|
+
for e in entries
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ---------------------------------------------------------------------------
|
|
223
|
+
# Edge tools
|
|
224
|
+
# ---------------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def create_edge(
|
|
228
|
+
source_id: str,
|
|
229
|
+
target_id: str,
|
|
230
|
+
relation: str = "related_to",
|
|
231
|
+
weight: float = 1.0,
|
|
232
|
+
graph: Any = None,
|
|
233
|
+
) -> dict:
|
|
234
|
+
"""Create a directed edge between two entries.
|
|
235
|
+
|
|
236
|
+
Both ``source_id`` and ``target_id`` may be a real entry ID, a slug, or an
|
|
237
|
+
alias — they are resolved against the database. If either side does not
|
|
238
|
+
resolve to an existing entry, the edge is rejected (no DB write, no
|
|
239
|
+
in-memory mutation) and an error is returned so the agent can either fix
|
|
240
|
+
its arguments or create the missing node first. This prevents the ghost
|
|
241
|
+
"grey" placeholder nodes that networkx would otherwise auto-create.
|
|
242
|
+
"""
|
|
243
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
244
|
+
from core.schemas.edge import Edge, EdgeRelation
|
|
245
|
+
from core.storage.database import SessionLocal
|
|
246
|
+
from core.storage.repository import EdgeRepository
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
rel = EdgeRelation(relation)
|
|
250
|
+
except ValueError:
|
|
251
|
+
rel = EdgeRelation.wikilink
|
|
252
|
+
|
|
253
|
+
with SessionLocal() as db:
|
|
254
|
+
engine = RetrievalEngine(db, graph)
|
|
255
|
+
src = engine.resolve_identifier(source_id)
|
|
256
|
+
tgt = engine.resolve_identifier(target_id)
|
|
257
|
+
missing = []
|
|
258
|
+
if src is None:
|
|
259
|
+
missing.append(source_id)
|
|
260
|
+
if tgt is None:
|
|
261
|
+
missing.append(target_id)
|
|
262
|
+
if missing:
|
|
263
|
+
return {
|
|
264
|
+
"error": "edge_endpoint_not_found",
|
|
265
|
+
"missing": missing,
|
|
266
|
+
"hint": "Resolve source_id and target_id to existing entries (use search_entries / get_entry) or call create_entry first.",
|
|
267
|
+
}
|
|
268
|
+
if src.id == tgt.id:
|
|
269
|
+
return {"error": "self_loop_rejected", "entry_id": src.id}
|
|
270
|
+
|
|
271
|
+
edge = Edge(source_id=src.id, target_id=tgt.id, relation=rel, weight=weight)
|
|
272
|
+
saved = EdgeRepository(db).create(edge)
|
|
273
|
+
if graph is not None:
|
|
274
|
+
graph.add_edge(saved)
|
|
275
|
+
return {
|
|
276
|
+
"id": saved.id,
|
|
277
|
+
"source_id": saved.source_id,
|
|
278
|
+
"target_id": saved.target_id,
|
|
279
|
+
"relation": saved.relation.value,
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def delete_edge(edge_id: str, graph: Any = None) -> dict:
|
|
284
|
+
"""Delete an edge by its ID."""
|
|
285
|
+
from core.storage.database import SessionLocal
|
|
286
|
+
from core.storage.models import EdgeModel
|
|
287
|
+
from core.storage.repository import EdgeRepository
|
|
288
|
+
from core.schemas.edge import Edge
|
|
289
|
+
|
|
290
|
+
with SessionLocal() as db:
|
|
291
|
+
model = db.get(EdgeModel, edge_id)
|
|
292
|
+
if model is None:
|
|
293
|
+
return {"error": f"Edge '{edge_id}' not found."}
|
|
294
|
+
src_id, tgt_id = model.source_id, model.target_id
|
|
295
|
+
deleted = EdgeRepository(db).delete(edge_id)
|
|
296
|
+
if deleted and graph is not None:
|
|
297
|
+
graph.remove_edge(src_id, tgt_id)
|
|
298
|
+
return {"deleted": deleted, "edge_id": edge_id}
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def get_neighbors(entry_id: str, direction: str = "both", graph: Any = None) -> list[dict]:
|
|
302
|
+
"""Get neighboring entries connected by edges."""
|
|
303
|
+
from core import app_state
|
|
304
|
+
|
|
305
|
+
g = graph or app_state.graph
|
|
306
|
+
neighbors = g.get_neighbors(entry_id, direction=direction)
|
|
307
|
+
return neighbors
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
# Graph-level tools
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def graph_stats(graph: Any = None) -> dict:
|
|
316
|
+
"""Return high-level statistics about the graph."""
|
|
317
|
+
from core import app_state
|
|
318
|
+
|
|
319
|
+
g = graph or app_state.graph
|
|
320
|
+
return g.stats()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def resolve_wikilinks(graph: Any = None) -> dict:
|
|
324
|
+
"""Scan all entries for [[wikilinks]] and create edges for matches."""
|
|
325
|
+
from core import app_state
|
|
326
|
+
from agents.extraction_agent.agent import ExtractionAgent
|
|
327
|
+
|
|
328
|
+
g = graph or app_state.graph
|
|
329
|
+
agent = ExtractionAgent(g)
|
|
330
|
+
count = agent.resolve_wikilinks()
|
|
331
|
+
return {"edges_created": count}
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def remove_dangling_edges(graph: Any = None) -> dict:
|
|
335
|
+
"""Remove edges pointing to deleted entries."""
|
|
336
|
+
from core import app_state
|
|
337
|
+
from agents.maintenance_agent.agent import MaintenanceAgent
|
|
338
|
+
|
|
339
|
+
g = graph or app_state.graph
|
|
340
|
+
agent = MaintenanceAgent(g)
|
|
341
|
+
count = agent.remove_dangling_edges()
|
|
342
|
+
return {"edges_removed": count}
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
# ---------------------------------------------------------------------------
|
|
346
|
+
# Web / URL tools
|
|
347
|
+
# ---------------------------------------------------------------------------
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def fetch_url(url: str, timeout: int = 15) -> dict:
|
|
351
|
+
"""Fetch the text content of a URL and return it so the agent can read it.
|
|
352
|
+
|
|
353
|
+
Uses ``httpx`` if available, falls back to ``urllib``.
|
|
354
|
+
Returns a dict with keys ``url``, ``status_code``, and ``text``.
|
|
355
|
+
"""
|
|
356
|
+
try:
|
|
357
|
+
try:
|
|
358
|
+
import httpx
|
|
359
|
+
with httpx.Client(follow_redirects=True, timeout=timeout) as client:
|
|
360
|
+
resp = client.get(url, headers={"User-Agent": "KnowDoGraph/1.0"})
|
|
361
|
+
return {"url": url, "status_code": resp.status_code, "text": resp.text[:20000]}
|
|
362
|
+
except ImportError:
|
|
363
|
+
import urllib.request
|
|
364
|
+
req = urllib.request.Request(url, headers={"User-Agent": "KnowDoGraph/1.0"})
|
|
365
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310
|
|
366
|
+
return {"url": url, "status_code": resp.status, "text": resp.read(20000).decode("utf-8", errors="replace")}
|
|
367
|
+
except Exception as exc:
|
|
368
|
+
return {"url": url, "error": str(exc)}
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def web_search(query: str, max_results: int = 5) -> list[dict]:
|
|
372
|
+
"""Search the web using DuckDuckGo and return result snippets."""
|
|
373
|
+
try:
|
|
374
|
+
from duckduckgo_search import DDGS
|
|
375
|
+
|
|
376
|
+
with DDGS() as ddgs:
|
|
377
|
+
results = list(ddgs.text(query, max_results=max_results))
|
|
378
|
+
return [
|
|
379
|
+
{"title": r.get("title", ""), "url": r.get("href", ""), "snippet": r.get("body", "")}
|
|
380
|
+
for r in results
|
|
381
|
+
]
|
|
382
|
+
except Exception as exc:
|
|
383
|
+
return [{"error": str(exc)}]
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
# ---------------------------------------------------------------------------
|
|
387
|
+
# Node-discovery / graph-intelligence tools
|
|
388
|
+
# ---------------------------------------------------------------------------
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def find_similar_nodes(title: str, limit: int = 8, mode: str = "hybrid", graph: Any = None) -> list[dict]:
|
|
392
|
+
"""Search for nodes whose title or aliases closely resemble *title*.
|
|
393
|
+
|
|
394
|
+
Use this before creating a new node to avoid duplicates and decide whether
|
|
395
|
+
to reuse an existing entry, add an alias, or create a truly new node.
|
|
396
|
+
Returns id, slug, title, type, tags, and aliases for each candidate.
|
|
397
|
+
"""
|
|
398
|
+
from core import app_state
|
|
399
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
400
|
+
from core.storage.database import SessionLocal
|
|
401
|
+
|
|
402
|
+
g = graph or app_state.graph
|
|
403
|
+
with SessionLocal() as db:
|
|
404
|
+
engine = RetrievalEngine(db, g)
|
|
405
|
+
results = engine.search_entries(query=title, limit=limit, mode=mode)
|
|
406
|
+
return [
|
|
407
|
+
{
|
|
408
|
+
"id": e.id,
|
|
409
|
+
"slug": e.slug,
|
|
410
|
+
"title": e.title,
|
|
411
|
+
"type": e.entry_type.value,
|
|
412
|
+
"tags": e.tags,
|
|
413
|
+
"aliases": e.aliases,
|
|
414
|
+
}
|
|
415
|
+
for e in results
|
|
416
|
+
]
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def get_graph_overview(sample_size: int = 15, graph: Any = None) -> dict:
|
|
420
|
+
"""Return a high-level overview of the graph without dumping every node.
|
|
421
|
+
|
|
422
|
+
Includes:
|
|
423
|
+
- Node/edge counts and DAG status
|
|
424
|
+
- Distribution of entry types
|
|
425
|
+
- A random sample of node titles (to check naming conventions)
|
|
426
|
+
- Top-5 most connected nodes
|
|
427
|
+
|
|
428
|
+
Use this to orient yourself before deciding how to add or restructure nodes.
|
|
429
|
+
"""
|
|
430
|
+
import random
|
|
431
|
+
from collections import Counter
|
|
432
|
+
|
|
433
|
+
from core import app_state
|
|
434
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
435
|
+
from core.storage.database import SessionLocal
|
|
436
|
+
|
|
437
|
+
g = graph or app_state.graph
|
|
438
|
+
stats = g.stats()
|
|
439
|
+
|
|
440
|
+
with SessionLocal() as db:
|
|
441
|
+
engine = RetrievalEngine(db, g)
|
|
442
|
+
all_entries = engine.list_entries(limit=2000)
|
|
443
|
+
|
|
444
|
+
type_dist = dict(Counter(e.entry_type.value for e in all_entries))
|
|
445
|
+
sample = random.sample(all_entries, min(sample_size, len(all_entries)))
|
|
446
|
+
sample_titles = [{"id": e.id, "title": e.title, "type": e.entry_type.value, "tags": e.tags} for e in sample]
|
|
447
|
+
|
|
448
|
+
# Top connected nodes (by total degree in the in-memory graph)
|
|
449
|
+
top_nodes: list[dict] = []
|
|
450
|
+
try:
|
|
451
|
+
degree_map = dict(g._g.degree()) # type: ignore[attr-defined]
|
|
452
|
+
top_ids = sorted(degree_map, key=lambda k: degree_map[k], reverse=True)[:5]
|
|
453
|
+
id_to_entry = {e.id: e for e in all_entries}
|
|
454
|
+
top_nodes = [
|
|
455
|
+
{"id": nid, "title": id_to_entry[nid].title if nid in id_to_entry else "?", "degree": degree_map[nid]}
|
|
456
|
+
for nid in top_ids
|
|
457
|
+
]
|
|
458
|
+
except Exception:
|
|
459
|
+
pass
|
|
460
|
+
|
|
461
|
+
return {
|
|
462
|
+
"stats": stats,
|
|
463
|
+
"type_distribution": type_dist,
|
|
464
|
+
"sample_nodes": sample_titles,
|
|
465
|
+
"top_connected": top_nodes,
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def list_nodes_by_type(entry_type: str, limit: int = 50, graph: Any = None) -> list[dict]:
|
|
470
|
+
"""List all nodes of a given entry type (returns id, slug, title, tags, aliases)."""
|
|
471
|
+
from core import app_state
|
|
472
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
473
|
+
from core.schemas.entry import EntryType
|
|
474
|
+
from core.storage.database import SessionLocal
|
|
475
|
+
|
|
476
|
+
g = graph or app_state.graph
|
|
477
|
+
with SessionLocal() as db:
|
|
478
|
+
engine = RetrievalEngine(db, g)
|
|
479
|
+
try:
|
|
480
|
+
et = EntryType(entry_type)
|
|
481
|
+
except ValueError:
|
|
482
|
+
return [{"error": f"Unknown entry_type '{entry_type}'"}]
|
|
483
|
+
results = engine.search_entries(entry_type=et, limit=limit)
|
|
484
|
+
return [
|
|
485
|
+
{"id": e.id, "slug": e.slug, "title": e.title, "tags": e.tags, "aliases": e.aliases}
|
|
486
|
+
for e in results
|
|
487
|
+
]
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
def merge_entries(
|
|
491
|
+
primary_id: str,
|
|
492
|
+
duplicate_id: str,
|
|
493
|
+
merge_aliases: bool = True,
|
|
494
|
+
merge_tags: bool = True,
|
|
495
|
+
graph: Any = None,
|
|
496
|
+
) -> dict:
|
|
497
|
+
"""Merge *duplicate_id* into *primary_id*.
|
|
498
|
+
|
|
499
|
+
The duplicate's aliases and tags are optionally merged into the primary.
|
|
500
|
+
All edges pointing to/from the duplicate are re-targeted to the primary.
|
|
501
|
+
The duplicate entry is then deleted.
|
|
502
|
+
|
|
503
|
+
Use this to consolidate redundant nodes identified during review.
|
|
504
|
+
"""
|
|
505
|
+
from core import app_state
|
|
506
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
507
|
+
from core.storage.database import SessionLocal
|
|
508
|
+
from core.storage.models import EdgeModel
|
|
509
|
+
from core.storage.repository import EntryRepository
|
|
510
|
+
|
|
511
|
+
g = graph or app_state.graph
|
|
512
|
+
with SessionLocal() as db:
|
|
513
|
+
engine = RetrievalEngine(db, g)
|
|
514
|
+
primary = engine.resolve_identifier(primary_id)
|
|
515
|
+
duplicate = engine.resolve_identifier(duplicate_id)
|
|
516
|
+
if primary is None:
|
|
517
|
+
return {"error": f"Primary entry '{primary_id}' not found."}
|
|
518
|
+
if duplicate is None:
|
|
519
|
+
return {"error": f"Duplicate entry '{duplicate_id}' not found."}
|
|
520
|
+
if primary.id == duplicate.id:
|
|
521
|
+
return {"error": "primary_id and duplicate_id refer to the same entry."}
|
|
522
|
+
|
|
523
|
+
# Re-target edges
|
|
524
|
+
edges_retargeted = 0
|
|
525
|
+
for edge_model in db.query(EdgeModel).filter(EdgeModel.target_id == duplicate.id).all():
|
|
526
|
+
if edge_model.source_id != primary.id:
|
|
527
|
+
edge_model.target_id = primary.id
|
|
528
|
+
edges_retargeted += 1
|
|
529
|
+
for edge_model in db.query(EdgeModel).filter(EdgeModel.source_id == duplicate.id).all():
|
|
530
|
+
if edge_model.target_id != primary.id:
|
|
531
|
+
edge_model.source_id = primary.id
|
|
532
|
+
edges_retargeted += 1
|
|
533
|
+
|
|
534
|
+
# Merge metadata into primary
|
|
535
|
+
if merge_aliases:
|
|
536
|
+
new_aliases = list(dict.fromkeys(primary.aliases + duplicate.aliases + [duplicate.title]))
|
|
537
|
+
primary.aliases = new_aliases
|
|
538
|
+
if merge_tags:
|
|
539
|
+
primary.tags = list(dict.fromkeys(primary.tags + duplicate.tags))
|
|
540
|
+
|
|
541
|
+
repo = EntryRepository(db)
|
|
542
|
+
repo.update(primary)
|
|
543
|
+
|
|
544
|
+
# Delete the duplicate entry model directly
|
|
545
|
+
from core.storage.models import EntryModel
|
|
546
|
+
dup_model = db.get(EntryModel, duplicate.id)
|
|
547
|
+
if dup_model:
|
|
548
|
+
db.delete(dup_model)
|
|
549
|
+
db.commit()
|
|
550
|
+
|
|
551
|
+
# Refresh in-memory graph
|
|
552
|
+
if g is not None:
|
|
553
|
+
g.remove_entry(duplicate.id)
|
|
554
|
+
with SessionLocal() as db2:
|
|
555
|
+
from core.retrieval.retrieval import RetrievalEngine as RE
|
|
556
|
+
refreshed = RE(db2, g).get_entry_by_id(primary.id)
|
|
557
|
+
if refreshed:
|
|
558
|
+
g.add_entry(refreshed)
|
|
559
|
+
|
|
560
|
+
return {
|
|
561
|
+
"merged": True,
|
|
562
|
+
"primary_id": primary.id,
|
|
563
|
+
"removed_duplicate_id": duplicate.id,
|
|
564
|
+
"edges_retargeted": edges_retargeted,
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
# ---------------------------------------------------------------------------
|
|
569
|
+
# Script entry tools
|
|
570
|
+
# ---------------------------------------------------------------------------
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def create_script_entry(
|
|
574
|
+
title: str,
|
|
575
|
+
code: str,
|
|
576
|
+
language: str = "python",
|
|
577
|
+
requirements: list[str] | None = None,
|
|
578
|
+
description: str = "",
|
|
579
|
+
tags: list[str] | None = None,
|
|
580
|
+
aliases: list[str] | None = None,
|
|
581
|
+
filename: str | None = None,
|
|
582
|
+
source_provenance: str | None = None,
|
|
583
|
+
graph: Any = None,
|
|
584
|
+
) -> dict:
|
|
585
|
+
"""DEPRECATED — creates a standalone script node.
|
|
586
|
+
|
|
587
|
+
Scripts should be attached directly to their parent node via
|
|
588
|
+
``add_script_to_entry``. This function is kept for backward compatibility
|
|
589
|
+
but will be removed in a future version.
|
|
590
|
+
"""
|
|
591
|
+
return {
|
|
592
|
+
"error": (
|
|
593
|
+
"create_script_entry is deprecated. Use add_script_to_entry to attach "
|
|
594
|
+
"scripts directly to the procedure or capability node they belong to. "
|
|
595
|
+
"This keeps script code out of the content field that agents read by default."
|
|
596
|
+
)
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
def add_script_to_entry(
|
|
601
|
+
entry_id: str,
|
|
602
|
+
code: str,
|
|
603
|
+
filename: str | None = None,
|
|
604
|
+
language: str = "python",
|
|
605
|
+
requirements: list[str] | None = None,
|
|
606
|
+
description: str = "",
|
|
607
|
+
graph: Any = None,
|
|
608
|
+
) -> dict:
|
|
609
|
+
"""Attach an executable script directly to an existing entry.
|
|
610
|
+
|
|
611
|
+
The script is stored in the entry's ``scripts`` list (``scripts_json`` column),
|
|
612
|
+
separate from the human-readable ``content`` field. Agents reading the entry
|
|
613
|
+
will see the description/workflow in ``content`` but will not accidentally
|
|
614
|
+
consume large script bodies. A node can hold any number of scripts.
|
|
615
|
+
|
|
616
|
+
Returns the updated entry id, title, and a summary of all attached scripts.
|
|
617
|
+
"""
|
|
618
|
+
from core import app_state
|
|
619
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
620
|
+
from core.schemas.entry import NodeAsset, ASSET_FOLDER_SCRIPTS
|
|
621
|
+
from core.storage.database import SessionLocal
|
|
622
|
+
from core.storage.repository import EntryRepository
|
|
623
|
+
|
|
624
|
+
g = graph or app_state.graph
|
|
625
|
+
with SessionLocal() as db:
|
|
626
|
+
engine = RetrievalEngine(db, g)
|
|
627
|
+
entry = engine.resolve_identifier(entry_id)
|
|
628
|
+
if entry is None:
|
|
629
|
+
return {"error": f"Entry '{entry_id}' not found."}
|
|
630
|
+
|
|
631
|
+
resolved_filename = filename or (_slug(entry.title) + _ext_for_language(language))
|
|
632
|
+
# Replace existing asset at scripts/<filename> if present
|
|
633
|
+
entry.assets = [
|
|
634
|
+
a for a in entry.assets
|
|
635
|
+
if not (a.folder == ASSET_FOLDER_SCRIPTS and a.filename == resolved_filename)
|
|
636
|
+
]
|
|
637
|
+
entry.assets.append(NodeAsset(
|
|
638
|
+
folder=ASSET_FOLDER_SCRIPTS,
|
|
639
|
+
filename=resolved_filename,
|
|
640
|
+
kind="file",
|
|
641
|
+
content=code,
|
|
642
|
+
language=language,
|
|
643
|
+
requirements=requirements or [],
|
|
644
|
+
description=description,
|
|
645
|
+
))
|
|
646
|
+
updated = EntryRepository(db).update(entry)
|
|
647
|
+
|
|
648
|
+
if g is not None and updated:
|
|
649
|
+
g.add_entry(updated)
|
|
650
|
+
|
|
651
|
+
return {
|
|
652
|
+
"id": updated.id,
|
|
653
|
+
"title": updated.title,
|
|
654
|
+
"scripts": [{"filename": s.filename, "language": s.language, "requirements": s.requirements}
|
|
655
|
+
for s in updated.scripts],
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _ext_for_language(language: str) -> str:
|
|
660
|
+
"""Map language name to a file extension."""
|
|
661
|
+
mapping = {
|
|
662
|
+
"python": ".py",
|
|
663
|
+
"py": ".py",
|
|
664
|
+
"bash": ".sh",
|
|
665
|
+
"shell": ".sh",
|
|
666
|
+
"sh": ".sh",
|
|
667
|
+
"julia": ".jl",
|
|
668
|
+
"javascript": ".js",
|
|
669
|
+
"js": ".js",
|
|
670
|
+
"typescript": ".ts",
|
|
671
|
+
"ts": ".ts",
|
|
672
|
+
"r": ".r",
|
|
673
|
+
"matlab": ".m",
|
|
674
|
+
"ruby": ".rb",
|
|
675
|
+
"rust": ".rs",
|
|
676
|
+
"go": ".go",
|
|
677
|
+
"c": ".c",
|
|
678
|
+
"cpp": ".cpp",
|
|
679
|
+
"c++": ".cpp",
|
|
680
|
+
}
|
|
681
|
+
return mapping.get(language.lower(), ".txt")
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def add_asset_to_entry(
|
|
685
|
+
entry_id: str,
|
|
686
|
+
folder: str,
|
|
687
|
+
filename: str,
|
|
688
|
+
content: str = "",
|
|
689
|
+
kind: str = "file",
|
|
690
|
+
language: str | None = None,
|
|
691
|
+
mime_type: str | None = None,
|
|
692
|
+
description: str = "",
|
|
693
|
+
requirements: list[str] | None = None,
|
|
694
|
+
graph: Any = None,
|
|
695
|
+
) -> dict:
|
|
696
|
+
"""Attach a generic *asset* to an entry inside a named folder.
|
|
697
|
+
|
|
698
|
+
Each entry behaves like a small folder containing typed assets. Conventional
|
|
699
|
+
folders are ``scripts``, ``references``, ``docs``, ``examples``, ``data``,
|
|
700
|
+
``notes``; any other folder name is allowed.
|
|
701
|
+
|
|
702
|
+
Parameters
|
|
703
|
+
----------
|
|
704
|
+
folder:
|
|
705
|
+
Sub-folder name (e.g. ``"scripts"``, ``"references"``, ``"docs"``).
|
|
706
|
+
filename:
|
|
707
|
+
File name within the folder (may contain a sub-path like
|
|
708
|
+
``"examples/relax.py"``).
|
|
709
|
+
content:
|
|
710
|
+
File body for ``kind="file"`` / ``"text"``, or the URL for
|
|
711
|
+
``kind="link"``.
|
|
712
|
+
kind:
|
|
713
|
+
One of ``"file"`` (binary/code download), ``"text"`` (inline markdown
|
|
714
|
+
/ notes), or ``"link"`` (external reference; ``content`` is a URL).
|
|
715
|
+
language:
|
|
716
|
+
Programming language hint for syntax / mime detection (``"python"`` …).
|
|
717
|
+
description:
|
|
718
|
+
Short human-readable description shown in the UI.
|
|
719
|
+
requirements:
|
|
720
|
+
Package dependencies (for runnable scripts).
|
|
721
|
+
|
|
722
|
+
The asset becomes addressable as
|
|
723
|
+
``GET /entries/{entry_id}/assets/{folder}/{filename}``.
|
|
724
|
+
"""
|
|
725
|
+
from core import app_state
|
|
726
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
727
|
+
from core.schemas.entry import NodeAsset
|
|
728
|
+
from core.storage.database import SessionLocal
|
|
729
|
+
from core.storage.repository import EntryRepository
|
|
730
|
+
|
|
731
|
+
g = graph or app_state.graph
|
|
732
|
+
with SessionLocal() as db:
|
|
733
|
+
engine = RetrievalEngine(db, g)
|
|
734
|
+
entry = engine.resolve_identifier(entry_id)
|
|
735
|
+
if entry is None:
|
|
736
|
+
return {"error": f"Entry '{entry_id}' not found."}
|
|
737
|
+
try:
|
|
738
|
+
asset = NodeAsset(
|
|
739
|
+
folder=folder,
|
|
740
|
+
filename=filename,
|
|
741
|
+
kind=kind,
|
|
742
|
+
content=content,
|
|
743
|
+
language=language,
|
|
744
|
+
mime_type=mime_type,
|
|
745
|
+
description=description,
|
|
746
|
+
requirements=requirements or [],
|
|
747
|
+
)
|
|
748
|
+
except ValueError as exc:
|
|
749
|
+
return {"error": str(exc)}
|
|
750
|
+
# Replace existing asset at same folder/filename
|
|
751
|
+
entry.assets = [
|
|
752
|
+
a for a in entry.assets
|
|
753
|
+
if not (a.folder == asset.folder and a.filename == asset.filename)
|
|
754
|
+
]
|
|
755
|
+
entry.assets.append(asset)
|
|
756
|
+
updated = EntryRepository(db).update(entry)
|
|
757
|
+
|
|
758
|
+
if g is not None and updated:
|
|
759
|
+
g.add_entry(updated)
|
|
760
|
+
|
|
761
|
+
return {
|
|
762
|
+
"id": updated.id,
|
|
763
|
+
"title": updated.title,
|
|
764
|
+
"asset": {
|
|
765
|
+
"folder": asset.folder,
|
|
766
|
+
"filename": asset.filename,
|
|
767
|
+
"kind": asset.kind,
|
|
768
|
+
"download_url": f"/entries/{updated.id}/assets/{asset.folder}/{asset.filename}",
|
|
769
|
+
},
|
|
770
|
+
"total_assets": len(updated.assets),
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
def list_assets(identifier: str, folder: str | None = None, graph: Any = None) -> dict:
|
|
775
|
+
"""List the assets attached to an entry, grouped by folder.
|
|
776
|
+
|
|
777
|
+
Pass ``folder`` to filter to a single sub-folder (e.g. ``"references"``).
|
|
778
|
+
Returns metadata only — fetch bodies via the asset download URL.
|
|
779
|
+
"""
|
|
780
|
+
from core import app_state
|
|
781
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
782
|
+
from core.storage.database import SessionLocal
|
|
783
|
+
|
|
784
|
+
g = graph or app_state.graph
|
|
785
|
+
with SessionLocal() as db:
|
|
786
|
+
engine = RetrievalEngine(db, g)
|
|
787
|
+
entry = engine.resolve_identifier(identifier)
|
|
788
|
+
if entry is None:
|
|
789
|
+
return {"error": f"Entry '{identifier}' not found."}
|
|
790
|
+
|
|
791
|
+
grouped: dict[str, list[dict]] = {}
|
|
792
|
+
for a in entry.assets:
|
|
793
|
+
if folder and a.folder != folder.lower():
|
|
794
|
+
continue
|
|
795
|
+
grouped.setdefault(a.folder, []).append({
|
|
796
|
+
"filename": a.filename,
|
|
797
|
+
"kind": a.kind,
|
|
798
|
+
"language": a.language,
|
|
799
|
+
"description": a.description,
|
|
800
|
+
"size": len(a.content or ""),
|
|
801
|
+
"download_url": f"/entries/{entry.id}/assets/{a.folder}/{a.filename}",
|
|
802
|
+
})
|
|
803
|
+
return {
|
|
804
|
+
"id": entry.id,
|
|
805
|
+
"slug": entry.slug,
|
|
806
|
+
"title": entry.title,
|
|
807
|
+
"folders": grouped,
|
|
808
|
+
"total": sum(len(v) for v in grouped.values()),
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def get_script(identifier: str, filename: str | None = None, graph: Any = None) -> dict:
|
|
813
|
+
"""List the scripts attached to an entry (no code returned — use the download URL to fetch source).
|
|
814
|
+
|
|
815
|
+
Pass ``filename`` to retrieve metadata for a specific script; omit to get all.
|
|
816
|
+
"""
|
|
817
|
+
from core import app_state
|
|
818
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
819
|
+
from core.storage.database import SessionLocal
|
|
820
|
+
|
|
821
|
+
g = graph or app_state.graph
|
|
822
|
+
with SessionLocal() as db:
|
|
823
|
+
engine = RetrievalEngine(db, g)
|
|
824
|
+
entry = engine.resolve_identifier(identifier)
|
|
825
|
+
if entry is None:
|
|
826
|
+
return {"error": f"Entry '{identifier}' not found."}
|
|
827
|
+
if not entry.scripts:
|
|
828
|
+
return {"error": f"Entry '{identifier}' has no attached scripts."}
|
|
829
|
+
|
|
830
|
+
scripts = entry.scripts
|
|
831
|
+
if filename:
|
|
832
|
+
scripts = [s for s in scripts if s.filename == filename]
|
|
833
|
+
if not scripts:
|
|
834
|
+
return {"error": f"No script named '{filename}' on entry '{identifier}'."}
|
|
835
|
+
|
|
836
|
+
return {
|
|
837
|
+
"id": entry.id,
|
|
838
|
+
"slug": entry.slug,
|
|
839
|
+
"title": entry.title,
|
|
840
|
+
"scripts": [
|
|
841
|
+
{
|
|
842
|
+
"filename": s.filename,
|
|
843
|
+
"language": s.language,
|
|
844
|
+
"requirements": s.requirements,
|
|
845
|
+
"description": s.description,
|
|
846
|
+
"download_url": f"/entries/{entry.id}/scripts/{s.filename}",
|
|
847
|
+
}
|
|
848
|
+
for s in scripts
|
|
849
|
+
],
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
def list_scripts(limit: int = 50, graph: Any = None) -> list[dict]:
|
|
854
|
+
"""List all entries that have scripts directly attached, with filename and download URLs."""
|
|
855
|
+
from core import app_state
|
|
856
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
857
|
+
from core.storage.database import SessionLocal
|
|
858
|
+
|
|
859
|
+
g = graph or app_state.graph
|
|
860
|
+
with SessionLocal() as db:
|
|
861
|
+
engine = RetrievalEngine(db, g)
|
|
862
|
+
candidates = engine.list_entries(limit=max(limit * 5, 500))
|
|
863
|
+
results = [e for e in candidates if e.scripts]
|
|
864
|
+
return [
|
|
865
|
+
{
|
|
866
|
+
"id": e.id,
|
|
867
|
+
"slug": e.slug,
|
|
868
|
+
"title": e.title,
|
|
869
|
+
"tags": e.tags,
|
|
870
|
+
"scripts": [
|
|
871
|
+
{
|
|
872
|
+
"filename": s.filename,
|
|
873
|
+
"language": s.language,
|
|
874
|
+
"requirements": s.requirements,
|
|
875
|
+
"download_url": f"/entries/{e.id}/scripts/{s.filename}",
|
|
876
|
+
}
|
|
877
|
+
for s in e.scripts
|
|
878
|
+
],
|
|
879
|
+
}
|
|
880
|
+
for e in results[:limit]
|
|
881
|
+
]
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
# ---------------------------------------------------------------------------
|
|
885
|
+
# Material interface tools
|
|
886
|
+
# ---------------------------------------------------------------------------
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def build_material_interface_workflow(
|
|
890
|
+
material_a: str,
|
|
891
|
+
material_b: str,
|
|
892
|
+
method: str = "slab_stacking",
|
|
893
|
+
description: str = "",
|
|
894
|
+
tags: list[str] | None = None,
|
|
895
|
+
graph: Any = None,
|
|
896
|
+
) -> dict:
|
|
897
|
+
"""DEPRECATED — produces overly specific per-material-pair nodes.
|
|
898
|
+
|
|
899
|
+
Use the generic ``Material interface construction`` capability node plus
|
|
900
|
+
``Slab-stacking procedure`` (or similar) and add the specific pair as either
|
|
901
|
+
(a) a `data` example linked via ``provenance``, or (b) a parameterised note
|
|
902
|
+
in the procedure's content. Returns an error directing the agent to do so.
|
|
903
|
+
"""
|
|
904
|
+
return {
|
|
905
|
+
"error": (
|
|
906
|
+
"build_material_interface_workflow is deprecated because it creates "
|
|
907
|
+
"one node per material pair. Instead: ensure a generic "
|
|
908
|
+
"'Material interface construction' capability and a "
|
|
909
|
+
"'Slab-stacking procedure' node exist (create them if missing), "
|
|
910
|
+
f"then add a single data entry for the {material_a}/{material_b} "
|
|
911
|
+
"instance with provenance edges to those generic nodes."
|
|
912
|
+
),
|
|
913
|
+
"suggested_generic_titles": [
|
|
914
|
+
"Material interface construction",
|
|
915
|
+
f"{method.replace('_', ' ').title()} procedure",
|
|
916
|
+
],
|
|
917
|
+
}
|
|
918
|
+
|
|
919
|
+
|
|
920
|
+
# ---------------------------------------------------------------------------
|
|
921
|
+
# Verification feedback tools
|
|
922
|
+
# ---------------------------------------------------------------------------
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
def submit_feedback(
|
|
926
|
+
entry_id: str,
|
|
927
|
+
verdict: str,
|
|
928
|
+
note: str = "",
|
|
929
|
+
evidence: str = "",
|
|
930
|
+
agent_id: str = "unknown",
|
|
931
|
+
graph: Any = None,
|
|
932
|
+
) -> dict:
|
|
933
|
+
"""Record correctness feedback on a node and update its verification_status.
|
|
934
|
+
|
|
935
|
+
Parameters
|
|
936
|
+
----------
|
|
937
|
+
verdict:
|
|
938
|
+
One of ``works`` (→ self_tested), ``peer_works`` (→ peer_reviewed),
|
|
939
|
+
``bugged`` (→ bugged), ``deprecated`` (→ deprecated), ``unclear``
|
|
940
|
+
(records the note without changing status).
|
|
941
|
+
note, evidence:
|
|
942
|
+
Free-text describing the test/observation. Stored in the entry's
|
|
943
|
+
``metadata.feedback_log`` for the maintenance agent to consult.
|
|
944
|
+
agent_id:
|
|
945
|
+
Identifier of the agent or human that submitted the feedback.
|
|
946
|
+
"""
|
|
947
|
+
from datetime import datetime, timezone
|
|
948
|
+
from core import app_state
|
|
949
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
950
|
+
from core.schemas.entry import VerificationStatus
|
|
951
|
+
from core.storage.database import SessionLocal
|
|
952
|
+
from core.storage.repository import EntryRepository
|
|
953
|
+
|
|
954
|
+
verdict_to_status = {
|
|
955
|
+
"works": VerificationStatus.self_tested,
|
|
956
|
+
"peer_works": VerificationStatus.peer_reviewed,
|
|
957
|
+
"bugged": VerificationStatus.bugged,
|
|
958
|
+
"deprecated": VerificationStatus.deprecated,
|
|
959
|
+
"unclear": None,
|
|
960
|
+
}
|
|
961
|
+
if verdict not in verdict_to_status:
|
|
962
|
+
return {"error": f"verdict must be one of {sorted(verdict_to_status)}"}
|
|
963
|
+
|
|
964
|
+
g = graph or app_state.graph
|
|
965
|
+
with SessionLocal() as db:
|
|
966
|
+
engine = RetrievalEngine(db, g)
|
|
967
|
+
entry = engine.resolve_identifier(entry_id)
|
|
968
|
+
if entry is None:
|
|
969
|
+
return {"error": f"Entry '{entry_id}' not found."}
|
|
970
|
+
|
|
971
|
+
entry.metadata.feedback_log.append({
|
|
972
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
973
|
+
"agent_id": agent_id,
|
|
974
|
+
"verdict": verdict,
|
|
975
|
+
"note": note,
|
|
976
|
+
"evidence": evidence,
|
|
977
|
+
})
|
|
978
|
+
new_status = verdict_to_status[verdict]
|
|
979
|
+
if new_status is not None:
|
|
980
|
+
entry.metadata.verification_status = new_status
|
|
981
|
+
EntryRepository(db).update(entry)
|
|
982
|
+
|
|
983
|
+
return {
|
|
984
|
+
"entry_id": entry.id,
|
|
985
|
+
"verification_status": entry.metadata.verification_status.value,
|
|
986
|
+
"feedback_count": len(entry.metadata.feedback_log),
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
def list_by_verification(
|
|
991
|
+
status: str = "unverified",
|
|
992
|
+
limit: int = 50,
|
|
993
|
+
graph: Any = None,
|
|
994
|
+
) -> list[dict]:
|
|
995
|
+
"""List nodes filtered by verification_status (unverified | bugged | ...)."""
|
|
996
|
+
from core import app_state
|
|
997
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
998
|
+
from core.storage.database import SessionLocal
|
|
999
|
+
|
|
1000
|
+
g = graph or app_state.graph
|
|
1001
|
+
with SessionLocal() as db:
|
|
1002
|
+
engine = RetrievalEngine(db, g)
|
|
1003
|
+
entries = engine.list_entries(limit=max(limit * 5, 500))
|
|
1004
|
+
matching = [
|
|
1005
|
+
e for e in entries
|
|
1006
|
+
if e.metadata.verification_status and e.metadata.verification_status.value == status
|
|
1007
|
+
]
|
|
1008
|
+
return [
|
|
1009
|
+
{
|
|
1010
|
+
"id": e.id,
|
|
1011
|
+
"slug": e.slug,
|
|
1012
|
+
"title": e.title,
|
|
1013
|
+
"type": e.entry_type.value,
|
|
1014
|
+
"verification_status": e.metadata.verification_status.value,
|
|
1015
|
+
"feedback_count": len(e.metadata.feedback_log),
|
|
1016
|
+
}
|
|
1017
|
+
for e in matching[:limit]
|
|
1018
|
+
]
|
|
1019
|
+
|
|
1020
|
+
|
|
1021
|
+
def list_needs_generalization(limit: int = 50, graph: Any = None) -> list[dict]:
|
|
1022
|
+
"""List nodes flagged as overly specific by the abstraction check."""
|
|
1023
|
+
from core import app_state
|
|
1024
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
1025
|
+
from core.storage.database import SessionLocal
|
|
1026
|
+
|
|
1027
|
+
g = graph or app_state.graph
|
|
1028
|
+
with SessionLocal() as db:
|
|
1029
|
+
engine = RetrievalEngine(db, g)
|
|
1030
|
+
entries = engine.list_entries(limit=max(limit * 5, 500))
|
|
1031
|
+
matching = [e for e in entries if e.metadata.needs_generalization]
|
|
1032
|
+
return [
|
|
1033
|
+
{"id": e.id, "slug": e.slug, "title": e.title, "type": e.entry_type.value}
|
|
1034
|
+
for e in matching[:limit]
|
|
1035
|
+
]
|
|
1036
|
+
|
|
1037
|
+
|
|
1038
|
+
def create_material_entry(
|
|
1039
|
+
formula: str,
|
|
1040
|
+
crystal_system: str = "",
|
|
1041
|
+
space_group: str = "",
|
|
1042
|
+
description: str = "",
|
|
1043
|
+
tags: list[str] | None = None,
|
|
1044
|
+
source_provenance: str | None = None,
|
|
1045
|
+
graph: Any = None,
|
|
1046
|
+
) -> dict:
|
|
1047
|
+
"""Create a structured *material* entry for a crystal or compound.
|
|
1048
|
+
|
|
1049
|
+
Stores formula, crystal system, space group, and description in a standardised
|
|
1050
|
+
content template so the entry is immediately useful for downstream interface
|
|
1051
|
+
workflows and agent reasoning.
|
|
1052
|
+
"""
|
|
1053
|
+
from core.schemas.entry import Entry, EntryMetadata, EntryType
|
|
1054
|
+
from core.storage.database import SessionLocal
|
|
1055
|
+
from core.storage.repository import EntryRepository
|
|
1056
|
+
|
|
1057
|
+
content_lines = [f"## {formula}\n"]
|
|
1058
|
+
if crystal_system:
|
|
1059
|
+
content_lines.append(f"- **Crystal system**: {crystal_system}")
|
|
1060
|
+
if space_group:
|
|
1061
|
+
content_lines.append(f"- **Space group**: {space_group}")
|
|
1062
|
+
if description:
|
|
1063
|
+
content_lines.append(f"\n{description}")
|
|
1064
|
+
content_lines.append(
|
|
1065
|
+
"\n### Usage\n"
|
|
1066
|
+
f"This material can be used as a component in [[{formula}/X Interface]] workflows."
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
entry = Entry(
|
|
1070
|
+
title=formula,
|
|
1071
|
+
content="\n".join(content_lines),
|
|
1072
|
+
entry_type=EntryType.data,
|
|
1073
|
+
tags=list(dict.fromkeys(["material", "crystal"] + (tags or []))),
|
|
1074
|
+
metadata=EntryMetadata(source_provenance=source_provenance),
|
|
1075
|
+
)
|
|
1076
|
+
with SessionLocal() as db:
|
|
1077
|
+
saved = EntryRepository(db).create(entry)
|
|
1078
|
+
if graph is not None:
|
|
1079
|
+
graph.add_entry(saved)
|
|
1080
|
+
return {"id": saved.id, "slug": saved.slug, "title": saved.title, "type": "data"}
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
def attach_script_to_entry(
|
|
1084
|
+
entry_id: str,
|
|
1085
|
+
script_id: str,
|
|
1086
|
+
relation: str = "implements",
|
|
1087
|
+
graph: Any = None,
|
|
1088
|
+
) -> dict:
|
|
1089
|
+
"""DEPRECATED — linked standalone script nodes via edges.
|
|
1090
|
+
|
|
1091
|
+
Use ``add_script_to_entry`` instead to attach scripts directly to a node.
|
|
1092
|
+
"""
|
|
1093
|
+
return {
|
|
1094
|
+
"error": (
|
|
1095
|
+
"attach_script_to_entry is deprecated. Use add_script_to_entry to attach "
|
|
1096
|
+
"scripts directly to the target entry instead of creating a separate script node."
|
|
1097
|
+
)
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
|
|
1101
|
+
# ---------------------------------------------------------------------------
|
|
1102
|
+
# Hierarchical-memory tools (L3 heuristics / L4 constraints)
|
|
1103
|
+
# ---------------------------------------------------------------------------
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
def _create_sidecar(
|
|
1107
|
+
*,
|
|
1108
|
+
skill_id: str,
|
|
1109
|
+
title: str,
|
|
1110
|
+
content: str,
|
|
1111
|
+
entry_type_value: str,
|
|
1112
|
+
skill_level_value: str,
|
|
1113
|
+
edge_relation_value: str,
|
|
1114
|
+
tags: list[str] | None,
|
|
1115
|
+
applicability: dict | None,
|
|
1116
|
+
update_failure_modes: bool,
|
|
1117
|
+
graph: Any,
|
|
1118
|
+
) -> dict:
|
|
1119
|
+
"""Shared helper for create_heuristic / create_constraint."""
|
|
1120
|
+
from core import app_state
|
|
1121
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
1122
|
+
from core.schemas.edge import Edge, EdgeRelation
|
|
1123
|
+
from core.schemas.entry import Entry, EntryMetadata, EntryType, SkillLevel
|
|
1124
|
+
from core.storage.database import SessionLocal
|
|
1125
|
+
from core.storage.repository import EdgeRepository, EntryRepository
|
|
1126
|
+
|
|
1127
|
+
g = graph or app_state.graph
|
|
1128
|
+
with SessionLocal() as db:
|
|
1129
|
+
engine = RetrievalEngine(db, g)
|
|
1130
|
+
skill_entry = engine.resolve_identifier(skill_id)
|
|
1131
|
+
if skill_entry is None:
|
|
1132
|
+
return {"error": f"Skill '{skill_id}' not found."}
|
|
1133
|
+
|
|
1134
|
+
meta = EntryMetadata(
|
|
1135
|
+
skill_level=SkillLevel(skill_level_value),
|
|
1136
|
+
applicability=applicability or {},
|
|
1137
|
+
source_provenance=f"sidecar_of:{skill_entry.slug}",
|
|
1138
|
+
)
|
|
1139
|
+
entry = Entry(
|
|
1140
|
+
title=title,
|
|
1141
|
+
content=content,
|
|
1142
|
+
entry_type=EntryType(entry_type_value),
|
|
1143
|
+
tags=tags or [],
|
|
1144
|
+
metadata=meta,
|
|
1145
|
+
)
|
|
1146
|
+
saved = EntryRepository(db).create(entry)
|
|
1147
|
+
edge = Edge(
|
|
1148
|
+
source_id=saved.id,
|
|
1149
|
+
target_id=skill_entry.id,
|
|
1150
|
+
relation=EdgeRelation(edge_relation_value),
|
|
1151
|
+
)
|
|
1152
|
+
EdgeRepository(db).create(edge)
|
|
1153
|
+
|
|
1154
|
+
if update_failure_modes and entry_type_value == "constraint":
|
|
1155
|
+
if saved.slug not in skill_entry.metadata.failure_modes:
|
|
1156
|
+
skill_entry.metadata.failure_modes.append(saved.slug)
|
|
1157
|
+
EntryRepository(db).update(skill_entry)
|
|
1158
|
+
|
|
1159
|
+
if g is not None:
|
|
1160
|
+
g.add_entry(saved)
|
|
1161
|
+
g.add_edge(edge)
|
|
1162
|
+
return {
|
|
1163
|
+
"id": saved.id,
|
|
1164
|
+
"slug": saved.slug,
|
|
1165
|
+
"title": saved.title,
|
|
1166
|
+
"level": skill_level_value,
|
|
1167
|
+
"attached_to": skill_entry.slug,
|
|
1168
|
+
"relation": edge_relation_value,
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
def create_heuristic(
|
|
1173
|
+
skill: str,
|
|
1174
|
+
title: str,
|
|
1175
|
+
content: str,
|
|
1176
|
+
tags: list[str] | None = None,
|
|
1177
|
+
domain: str | None = None,
|
|
1178
|
+
confidence: float | None = None,
|
|
1179
|
+
papers: list[str] | None = None,
|
|
1180
|
+
graph: Any = None,
|
|
1181
|
+
) -> dict:
|
|
1182
|
+
"""Create an L3 heuristic node attached to an L1/L2 *skill*.
|
|
1183
|
+
|
|
1184
|
+
Heuristics are conditional, empirical guidance ("cooling rate strongly
|
|
1185
|
+
affects sp2/sp3 ratio") — NOT universal truths. Use this instead of
|
|
1186
|
+
embedding heuristics inside a capability's content blob.
|
|
1187
|
+
|
|
1188
|
+
Wires a ``heuristic_for`` edge from the new node to *skill*.
|
|
1189
|
+
"""
|
|
1190
|
+
applicability: dict = {}
|
|
1191
|
+
if domain:
|
|
1192
|
+
applicability["domain"] = domain
|
|
1193
|
+
if confidence is not None:
|
|
1194
|
+
applicability["confidence"] = float(confidence)
|
|
1195
|
+
if papers:
|
|
1196
|
+
applicability["papers"] = list(papers)
|
|
1197
|
+
return _create_sidecar(
|
|
1198
|
+
skill_id=skill,
|
|
1199
|
+
title=title,
|
|
1200
|
+
content=content,
|
|
1201
|
+
entry_type_value="heuristic",
|
|
1202
|
+
skill_level_value="L3",
|
|
1203
|
+
edge_relation_value="heuristic_for",
|
|
1204
|
+
tags=tags,
|
|
1205
|
+
applicability=applicability,
|
|
1206
|
+
update_failure_modes=False,
|
|
1207
|
+
graph=graph,
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
def create_constraint(
|
|
1212
|
+
skill: str,
|
|
1213
|
+
title: str,
|
|
1214
|
+
content: str,
|
|
1215
|
+
tags: list[str] | None = None,
|
|
1216
|
+
domain: str | None = None,
|
|
1217
|
+
severity: str | None = None,
|
|
1218
|
+
papers: list[str] | None = None,
|
|
1219
|
+
graph: Any = None,
|
|
1220
|
+
) -> dict:
|
|
1221
|
+
"""Create an L4 constraint / failure-mode node attached to an L1/L2 *skill*.
|
|
1222
|
+
|
|
1223
|
+
Constraints describe known limitations, instability regions, and failure
|
|
1224
|
+
patterns ("unsuitable for bond-breaking processes"). Wires a
|
|
1225
|
+
``constraint_on`` edge from the new node to *skill* and appends the new
|
|
1226
|
+
node's slug to ``skill.metadata.failure_modes`` for quick planner access.
|
|
1227
|
+
"""
|
|
1228
|
+
applicability: dict = {}
|
|
1229
|
+
if domain:
|
|
1230
|
+
applicability["domain"] = domain
|
|
1231
|
+
if severity:
|
|
1232
|
+
applicability["severity"] = severity
|
|
1233
|
+
if papers:
|
|
1234
|
+
applicability["papers"] = list(papers)
|
|
1235
|
+
return _create_sidecar(
|
|
1236
|
+
skill_id=skill,
|
|
1237
|
+
title=title,
|
|
1238
|
+
content=content,
|
|
1239
|
+
entry_type_value="constraint",
|
|
1240
|
+
skill_level_value="L4",
|
|
1241
|
+
edge_relation_value="constraint_on",
|
|
1242
|
+
tags=tags,
|
|
1243
|
+
applicability=applicability,
|
|
1244
|
+
update_failure_modes=True,
|
|
1245
|
+
graph=graph,
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
|
|
1249
|
+
def decompose_capability(
|
|
1250
|
+
capability: str,
|
|
1251
|
+
procedure: str,
|
|
1252
|
+
graph: Any = None,
|
|
1253
|
+
) -> dict:
|
|
1254
|
+
"""Wire a ``decomposes_to`` edge from an L1 *capability* to an L2 *procedure*.
|
|
1255
|
+
|
|
1256
|
+
Both arguments are entry id/slug/alias of existing nodes. Use this to
|
|
1257
|
+
record that *procedure* is one of the executable decompositions of
|
|
1258
|
+
*capability*. Multiple decompositions per capability are allowed.
|
|
1259
|
+
"""
|
|
1260
|
+
from core import app_state
|
|
1261
|
+
from core.retrieval.retrieval import RetrievalEngine
|
|
1262
|
+
from core.schemas.edge import Edge, EdgeRelation
|
|
1263
|
+
from core.storage.database import SessionLocal
|
|
1264
|
+
from core.storage.repository import EdgeRepository
|
|
1265
|
+
|
|
1266
|
+
g = graph or app_state.graph
|
|
1267
|
+
with SessionLocal() as db:
|
|
1268
|
+
engine = RetrievalEngine(db, g)
|
|
1269
|
+
cap = engine.resolve_identifier(capability)
|
|
1270
|
+
proc = engine.resolve_identifier(procedure)
|
|
1271
|
+
if cap is None:
|
|
1272
|
+
return {"error": f"Capability '{capability}' not found."}
|
|
1273
|
+
if proc is None:
|
|
1274
|
+
return {"error": f"Procedure '{procedure}' not found."}
|
|
1275
|
+
edge = Edge(source_id=cap.id, target_id=proc.id, relation=EdgeRelation.decomposes_to)
|
|
1276
|
+
saved = EdgeRepository(db).create(edge)
|
|
1277
|
+
if g is not None:
|
|
1278
|
+
g.add_edge(saved)
|
|
1279
|
+
return {
|
|
1280
|
+
"edge_id": saved.id,
|
|
1281
|
+
"capability": cap.slug,
|
|
1282
|
+
"procedure": proc.slug,
|
|
1283
|
+
"relation": "decomposes_to",
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
|
|
1287
|
+
def retrieve_plan(goal: str, k: int = 5, include_l2: bool = True, graph: Any = None) -> list[dict]:
|
|
1288
|
+
"""Stage-1 retrieval: return planner-level skills (L1, optionally L2) for *goal*.
|
|
1289
|
+
|
|
1290
|
+
Excludes heuristics and constraints — fetch them with
|
|
1291
|
+
``retrieve_heuristics`` / ``retrieve_constraints`` once a candidate is
|
|
1292
|
+
selected.
|
|
1293
|
+
"""
|
|
1294
|
+
from core import app_state
|
|
1295
|
+
from core.retrieval.progressive import ProgressiveRetriever
|
|
1296
|
+
from core.storage.database import SessionLocal
|
|
1297
|
+
|
|
1298
|
+
g = graph or app_state.graph
|
|
1299
|
+
with SessionLocal() as db:
|
|
1300
|
+
ret = ProgressiveRetriever(db, g)
|
|
1301
|
+
results = ret.plan(goal=goal, k=k, include_l2=include_l2)
|
|
1302
|
+
return [
|
|
1303
|
+
{
|
|
1304
|
+
"id": e.id,
|
|
1305
|
+
"slug": e.slug,
|
|
1306
|
+
"title": e.title,
|
|
1307
|
+
"entry_type": e.entry_type.value,
|
|
1308
|
+
"tags": e.tags,
|
|
1309
|
+
}
|
|
1310
|
+
for e in results
|
|
1311
|
+
]
|
|
1312
|
+
|
|
1313
|
+
|
|
1314
|
+
def retrieve_heuristics(skill: str, k: int = 5, graph: Any = None) -> list[dict]:
|
|
1315
|
+
"""Stage-2 retrieval: L3 heuristics attached to *skill*."""
|
|
1316
|
+
from core import app_state
|
|
1317
|
+
from core.retrieval.progressive import ProgressiveRetriever
|
|
1318
|
+
from core.storage.database import SessionLocal
|
|
1319
|
+
|
|
1320
|
+
g = graph or app_state.graph
|
|
1321
|
+
with SessionLocal() as db:
|
|
1322
|
+
ret = ProgressiveRetriever(db, g)
|
|
1323
|
+
results = ret.heuristics_for(skill, k=k)
|
|
1324
|
+
return [
|
|
1325
|
+
{
|
|
1326
|
+
"id": e.id,
|
|
1327
|
+
"slug": e.slug,
|
|
1328
|
+
"title": e.title,
|
|
1329
|
+
"content": e.content,
|
|
1330
|
+
"applicability": e.metadata.applicability,
|
|
1331
|
+
}
|
|
1332
|
+
for e in results
|
|
1333
|
+
]
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
def retrieve_constraints(skill: str, k: int = 5, graph: Any = None) -> list[dict]:
|
|
1337
|
+
"""Stage-3 retrieval: L4 constraints / failure modes for *skill*."""
|
|
1338
|
+
from core import app_state
|
|
1339
|
+
from core.retrieval.progressive import ProgressiveRetriever
|
|
1340
|
+
from core.storage.database import SessionLocal
|
|
1341
|
+
|
|
1342
|
+
g = graph or app_state.graph
|
|
1343
|
+
with SessionLocal() as db:
|
|
1344
|
+
ret = ProgressiveRetriever(db, g)
|
|
1345
|
+
results = ret.constraints_for(skill, k=k)
|
|
1346
|
+
return [
|
|
1347
|
+
{
|
|
1348
|
+
"id": e.id,
|
|
1349
|
+
"slug": e.slug,
|
|
1350
|
+
"title": e.title,
|
|
1351
|
+
"content": e.content,
|
|
1352
|
+
"applicability": e.metadata.applicability,
|
|
1353
|
+
}
|
|
1354
|
+
for e in results
|
|
1355
|
+
]
|
|
1356
|
+
|
|
1357
|
+
|
|
1358
|
+
# ---------------------------------------------------------------------------
|
|
1359
|
+
# OpenAI tool schema definitions
|
|
1360
|
+
# ---------------------------------------------------------------------------
|
|
1361
|
+
|
|
1362
|
+
TOOL_SCHEMAS: list[dict] = [
|
|
1363
|
+
{
|
|
1364
|
+
"type": "function",
|
|
1365
|
+
"function": {
|
|
1366
|
+
"name": "create_entry",
|
|
1367
|
+
"description": "Create a new knowledge entry (node) in the graph.",
|
|
1368
|
+
"parameters": {
|
|
1369
|
+
"type": "object",
|
|
1370
|
+
"properties": {
|
|
1371
|
+
"title": {"type": "string", "description": "Entry title"},
|
|
1372
|
+
"content": {"type": "string", "description": "Entry body (wiki text, markdown)"},
|
|
1373
|
+
"entry_type": {
|
|
1374
|
+
"type": "string",
|
|
1375
|
+
"enum": ["capability", "procedure", "workflow", "tool", "repository",
|
|
1376
|
+
"environment", "dependency", "data", "analytical", "memory", "heuristic", "constraint", "generic"],
|
|
1377
|
+
"description": "Semantic type of this entry",
|
|
1378
|
+
},
|
|
1379
|
+
"tags": {"type": "array", "items": {"type": "string"}, "description": "List of tags"},
|
|
1380
|
+
"aliases": {"type": "array", "items": {"type": "string"}, "description": "Alternative names / synonyms for this entry"},
|
|
1381
|
+
"source_provenance": {"type": "string", "description": "URL or path this entry was sourced from"},
|
|
1382
|
+
},
|
|
1383
|
+
"required": ["title"],
|
|
1384
|
+
},
|
|
1385
|
+
},
|
|
1386
|
+
},
|
|
1387
|
+
{
|
|
1388
|
+
"type": "function",
|
|
1389
|
+
"function": {
|
|
1390
|
+
"name": "update_entry",
|
|
1391
|
+
"description": "Update fields on an existing entry by its ID or slug.",
|
|
1392
|
+
"parameters": {
|
|
1393
|
+
"type": "object",
|
|
1394
|
+
"properties": {
|
|
1395
|
+
"entry_id": {"type": "string", "description": "Entry ID or slug"},
|
|
1396
|
+
"title": {"type": "string"},
|
|
1397
|
+
"content": {"type": "string"},
|
|
1398
|
+
"entry_type": {
|
|
1399
|
+
"type": "string",
|
|
1400
|
+
"enum": ["capability", "procedure", "workflow", "tool", "repository",
|
|
1401
|
+
"environment", "dependency", "data", "analytical", "memory", "heuristic", "constraint", "generic"],
|
|
1402
|
+
},
|
|
1403
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
1404
|
+
"aliases": {"type": "array", "items": {"type": "string"}, "description": "Alternative names / synonyms"},
|
|
1405
|
+
},
|
|
1406
|
+
"required": ["entry_id"],
|
|
1407
|
+
},
|
|
1408
|
+
},
|
|
1409
|
+
},
|
|
1410
|
+
{
|
|
1411
|
+
"type": "function",
|
|
1412
|
+
"function": {
|
|
1413
|
+
"name": "delete_entry",
|
|
1414
|
+
"description": "Delete an entry (node) and all its edges by ID.",
|
|
1415
|
+
"parameters": {
|
|
1416
|
+
"type": "object",
|
|
1417
|
+
"properties": {
|
|
1418
|
+
"entry_id": {"type": "string", "description": "Entry ID"},
|
|
1419
|
+
},
|
|
1420
|
+
"required": ["entry_id"],
|
|
1421
|
+
},
|
|
1422
|
+
},
|
|
1423
|
+
},
|
|
1424
|
+
{
|
|
1425
|
+
"type": "function",
|
|
1426
|
+
"function": {
|
|
1427
|
+
"name": "get_entry",
|
|
1428
|
+
"description": "Retrieve full details of a single entry by ID or slug.",
|
|
1429
|
+
"parameters": {
|
|
1430
|
+
"type": "object",
|
|
1431
|
+
"properties": {
|
|
1432
|
+
"identifier": {"type": "string", "description": "Entry ID or slug"},
|
|
1433
|
+
},
|
|
1434
|
+
"required": ["identifier"],
|
|
1435
|
+
},
|
|
1436
|
+
},
|
|
1437
|
+
},
|
|
1438
|
+
{
|
|
1439
|
+
"type": "function",
|
|
1440
|
+
"function": {
|
|
1441
|
+
"name": "search_entries",
|
|
1442
|
+
"description": (
|
|
1443
|
+
"Search for entries using hybrid semantic + keyword retrieval. "
|
|
1444
|
+
"The default 'hybrid' mode fuses embedding-based vector similarity (ANN) "
|
|
1445
|
+
"with keyword scoring via Reciprocal Rank Fusion, then re-ranks by "
|
|
1446
|
+
"verification trust and usage count. "
|
|
1447
|
+
"Use 'semantic' when you want conceptually/thematically similar results "
|
|
1448
|
+
"even if the exact words differ (e.g. paraphrases, synonyms, related domains). "
|
|
1449
|
+
"Use 'keyword' for exact title, acronym, or tag lookups. "
|
|
1450
|
+
"Strategy tip: if the first search misses, retry with a different mode or "
|
|
1451
|
+
"a rephrased / more general query."
|
|
1452
|
+
),
|
|
1453
|
+
"parameters": {
|
|
1454
|
+
"type": "object",
|
|
1455
|
+
"properties": {
|
|
1456
|
+
"query": {"type": "string"},
|
|
1457
|
+
"limit": {"type": "integer", "default": 10},
|
|
1458
|
+
"mode": {
|
|
1459
|
+
"type": "string",
|
|
1460
|
+
"enum": ["hybrid", "semantic", "keyword"],
|
|
1461
|
+
"default": "hybrid",
|
|
1462
|
+
"description": (
|
|
1463
|
+
"hybrid: keyword + embedding ANN fused (default). "
|
|
1464
|
+
"semantic: embedding-only, best for conceptual similarity. "
|
|
1465
|
+
"keyword: exact text match, best for known titles or acronyms."
|
|
1466
|
+
),
|
|
1467
|
+
},
|
|
1468
|
+
},
|
|
1469
|
+
"required": ["query"],
|
|
1470
|
+
},
|
|
1471
|
+
},
|
|
1472
|
+
},
|
|
1473
|
+
{
|
|
1474
|
+
"type": "function",
|
|
1475
|
+
"function": {
|
|
1476
|
+
"name": "list_entries",
|
|
1477
|
+
"description": "List entries in the graph (returns id, slug, title, type).",
|
|
1478
|
+
"parameters": {
|
|
1479
|
+
"type": "object",
|
|
1480
|
+
"properties": {
|
|
1481
|
+
"limit": {"type": "integer", "default": 20},
|
|
1482
|
+
},
|
|
1483
|
+
},
|
|
1484
|
+
},
|
|
1485
|
+
},
|
|
1486
|
+
{
|
|
1487
|
+
"type": "function",
|
|
1488
|
+
"function": {
|
|
1489
|
+
"name": "create_edge",
|
|
1490
|
+
"description": "Create a directed edge (relationship) between two entries.",
|
|
1491
|
+
"parameters": {
|
|
1492
|
+
"type": "object",
|
|
1493
|
+
"properties": {
|
|
1494
|
+
"source_id": {"type": "string", "description": "Source entry ID"},
|
|
1495
|
+
"target_id": {"type": "string", "description": "Target entry ID"},
|
|
1496
|
+
"relation": {
|
|
1497
|
+
"type": "string",
|
|
1498
|
+
"enum": ["dependency", "compatible_with", "alternative_to", "related_workflow",
|
|
1499
|
+
"generated_from", "memory_of", "refinement_of", "derived_from",
|
|
1500
|
+
"warning_about", "cited_by", "wikilink", "prerequisite", "replacement",
|
|
1501
|
+
"execution_pathway", "transformation", "provenance", "compatibility",
|
|
1502
|
+
"implements", "uses", "documents"],
|
|
1503
|
+
"description": "Semantic relation type",
|
|
1504
|
+
},
|
|
1505
|
+
"weight": {"type": "number", "default": 1.0},
|
|
1506
|
+
},
|
|
1507
|
+
"required": ["source_id", "target_id"],
|
|
1508
|
+
},
|
|
1509
|
+
},
|
|
1510
|
+
},
|
|
1511
|
+
{
|
|
1512
|
+
"type": "function",
|
|
1513
|
+
"function": {
|
|
1514
|
+
"name": "delete_edge",
|
|
1515
|
+
"description": "Delete an edge by its ID.",
|
|
1516
|
+
"parameters": {
|
|
1517
|
+
"type": "object",
|
|
1518
|
+
"properties": {
|
|
1519
|
+
"edge_id": {"type": "string"},
|
|
1520
|
+
},
|
|
1521
|
+
"required": ["edge_id"],
|
|
1522
|
+
},
|
|
1523
|
+
},
|
|
1524
|
+
},
|
|
1525
|
+
{
|
|
1526
|
+
"type": "function",
|
|
1527
|
+
"function": {
|
|
1528
|
+
"name": "get_neighbors",
|
|
1529
|
+
"description": "Get entries directly connected to a given entry.",
|
|
1530
|
+
"parameters": {
|
|
1531
|
+
"type": "object",
|
|
1532
|
+
"properties": {
|
|
1533
|
+
"entry_id": {"type": "string"},
|
|
1534
|
+
"direction": {
|
|
1535
|
+
"type": "string",
|
|
1536
|
+
"enum": ["out", "in", "both"],
|
|
1537
|
+
"default": "both",
|
|
1538
|
+
},
|
|
1539
|
+
},
|
|
1540
|
+
"required": ["entry_id"],
|
|
1541
|
+
},
|
|
1542
|
+
},
|
|
1543
|
+
},
|
|
1544
|
+
{
|
|
1545
|
+
"type": "function",
|
|
1546
|
+
"function": {
|
|
1547
|
+
"name": "graph_stats",
|
|
1548
|
+
"description": "Return node count, edge count, and DAG status of the graph.",
|
|
1549
|
+
"parameters": {"type": "object", "properties": {}},
|
|
1550
|
+
},
|
|
1551
|
+
},
|
|
1552
|
+
{
|
|
1553
|
+
"type": "function",
|
|
1554
|
+
"function": {
|
|
1555
|
+
"name": "resolve_wikilinks",
|
|
1556
|
+
"description": "Scan all entry content for [[wikilinks]] and create edges for resolved matches.",
|
|
1557
|
+
"parameters": {"type": "object", "properties": {}},
|
|
1558
|
+
},
|
|
1559
|
+
},
|
|
1560
|
+
{
|
|
1561
|
+
"type": "function",
|
|
1562
|
+
"function": {
|
|
1563
|
+
"name": "remove_dangling_edges",
|
|
1564
|
+
"description": "Remove edges whose source or target entry no longer exists.",
|
|
1565
|
+
"parameters": {"type": "object", "properties": {}},
|
|
1566
|
+
},
|
|
1567
|
+
},
|
|
1568
|
+
{
|
|
1569
|
+
"type": "function",
|
|
1570
|
+
"function": {
|
|
1571
|
+
"name": "fetch_url",
|
|
1572
|
+
"description": "Fetch and return the text content of any URL (web page, API endpoint, documentation site, etc.). Use this when the user provides a specific URL or when you need to read a page in full rather than just search snippets.",
|
|
1573
|
+
"parameters": {
|
|
1574
|
+
"type": "object",
|
|
1575
|
+
"properties": {
|
|
1576
|
+
"url": {"type": "string", "description": "The URL to fetch"},
|
|
1577
|
+
"timeout": {"type": "integer", "default": 15, "description": "Request timeout in seconds"},
|
|
1578
|
+
},
|
|
1579
|
+
"required": ["url"],
|
|
1580
|
+
},
|
|
1581
|
+
},
|
|
1582
|
+
},
|
|
1583
|
+
{
|
|
1584
|
+
"type": "function",
|
|
1585
|
+
"function": {
|
|
1586
|
+
"name": "web_search",
|
|
1587
|
+
"description": "Search the web via DuckDuckGo and return titles, URLs and snippets.",
|
|
1588
|
+
"parameters": {
|
|
1589
|
+
"type": "object",
|
|
1590
|
+
"properties": {
|
|
1591
|
+
"query": {"type": "string", "description": "Search query"},
|
|
1592
|
+
"max_results": {"type": "integer", "default": 5},
|
|
1593
|
+
},
|
|
1594
|
+
"required": ["query"],
|
|
1595
|
+
},
|
|
1596
|
+
},
|
|
1597
|
+
},
|
|
1598
|
+
{
|
|
1599
|
+
"type": "function",
|
|
1600
|
+
"function": {
|
|
1601
|
+
"name": "find_similar_nodes",
|
|
1602
|
+
"description": (
|
|
1603
|
+
"Find existing nodes that are semantically or lexically similar to a proposed title. "
|
|
1604
|
+
"Uses hybrid embedding + keyword search by default. "
|
|
1605
|
+
"ALWAYS call this before creating a new node to avoid duplicates — "
|
|
1606
|
+
"try both the specific title AND a generalised version. "
|
|
1607
|
+
"If the default mode returns poor results, retry with mode='semantic' to catch "
|
|
1608
|
+
"conceptually equivalent nodes that use different wording, or mode='keyword' "
|
|
1609
|
+
"to find exact-title/acronym matches. "
|
|
1610
|
+
"Returns id, slug, title, type, tags, and aliases for each candidate."
|
|
1611
|
+
),
|
|
1612
|
+
"parameters": {
|
|
1613
|
+
"type": "object",
|
|
1614
|
+
"properties": {
|
|
1615
|
+
"title": {"type": "string", "description": "The proposed node title or concept to check"},
|
|
1616
|
+
"limit": {"type": "integer", "default": 8},
|
|
1617
|
+
"mode": {
|
|
1618
|
+
"type": "string",
|
|
1619
|
+
"enum": ["hybrid", "semantic", "keyword"],
|
|
1620
|
+
"default": "hybrid",
|
|
1621
|
+
"description": (
|
|
1622
|
+
"hybrid: keyword + embedding ANN fused (default). "
|
|
1623
|
+
"semantic: embedding-only, best for conceptual/paraphrase matching. "
|
|
1624
|
+
"keyword: exact text match, best for known titles or acronyms."
|
|
1625
|
+
),
|
|
1626
|
+
},
|
|
1627
|
+
},
|
|
1628
|
+
"required": ["title"],
|
|
1629
|
+
},
|
|
1630
|
+
},
|
|
1631
|
+
},
|
|
1632
|
+
{
|
|
1633
|
+
"type": "function",
|
|
1634
|
+
"function": {
|
|
1635
|
+
"name": "get_graph_overview",
|
|
1636
|
+
"description": (
|
|
1637
|
+
"Get a high-level overview of the graph: stats, type distribution, "
|
|
1638
|
+
"a random sample of node titles, and the most connected nodes. "
|
|
1639
|
+
"Use this to orient yourself before adding or restructuring content."
|
|
1640
|
+
),
|
|
1641
|
+
"parameters": {
|
|
1642
|
+
"type": "object",
|
|
1643
|
+
"properties": {
|
|
1644
|
+
"sample_size": {"type": "integer", "default": 15, "description": "Number of random nodes to sample"},
|
|
1645
|
+
},
|
|
1646
|
+
},
|
|
1647
|
+
},
|
|
1648
|
+
},
|
|
1649
|
+
{
|
|
1650
|
+
"type": "function",
|
|
1651
|
+
"function": {
|
|
1652
|
+
"name": "list_nodes_by_type",
|
|
1653
|
+
"description": "List all nodes of a specific entry type (capability, tool, procedure, etc.).",
|
|
1654
|
+
"parameters": {
|
|
1655
|
+
"type": "object",
|
|
1656
|
+
"properties": {
|
|
1657
|
+
"entry_type": {
|
|
1658
|
+
"type": "string",
|
|
1659
|
+
"enum": ["capability", "procedure", "workflow", "tool", "repository",
|
|
1660
|
+
"environment", "dependency", "data", "analytical", "memory", "heuristic", "constraint", "generic"],
|
|
1661
|
+
},
|
|
1662
|
+
"limit": {"type": "integer", "default": 50},
|
|
1663
|
+
},
|
|
1664
|
+
"required": ["entry_type"],
|
|
1665
|
+
},
|
|
1666
|
+
},
|
|
1667
|
+
},
|
|
1668
|
+
{
|
|
1669
|
+
"type": "function",
|
|
1670
|
+
"function": {
|
|
1671
|
+
"name": "merge_entries",
|
|
1672
|
+
"description": (
|
|
1673
|
+
"Merge a duplicate node into a primary node. "
|
|
1674
|
+
"Re-targets all edges, optionally merges aliases and tags, then deletes the duplicate. "
|
|
1675
|
+
"Use when two nodes represent the same concept."
|
|
1676
|
+
),
|
|
1677
|
+
"parameters": {
|
|
1678
|
+
"type": "object",
|
|
1679
|
+
"properties": {
|
|
1680
|
+
"primary_id": {"type": "string", "description": "ID or slug of the entry to keep"},
|
|
1681
|
+
"duplicate_id": {"type": "string", "description": "ID or slug of the entry to remove"},
|
|
1682
|
+
"merge_aliases": {"type": "boolean", "default": True, "description": "Add duplicate's title and aliases to primary's aliases"},
|
|
1683
|
+
"merge_tags": {"type": "boolean", "default": True, "description": "Merge duplicate's tags into primary"},
|
|
1684
|
+
},
|
|
1685
|
+
"required": ["primary_id", "duplicate_id"],
|
|
1686
|
+
},
|
|
1687
|
+
},
|
|
1688
|
+
},
|
|
1689
|
+
# ------------------------------------------------------------------ #
|
|
1690
|
+
# Hierarchical-memory tools (L1–L4) #
|
|
1691
|
+
# ------------------------------------------------------------------ #
|
|
1692
|
+
{
|
|
1693
|
+
"type": "function",
|
|
1694
|
+
"function": {
|
|
1695
|
+
"name": "create_heuristic",
|
|
1696
|
+
"description": (
|
|
1697
|
+
"Create an L3 heuristic node attached to an L1 capability or L2 procedure. "
|
|
1698
|
+
"Use this for conditional empirical guidance (e.g. 'cooling rate strongly affects "
|
|
1699
|
+
"sp2/sp3 ratio'), NOT for universal truths. Wires a 'heuristic_for' edge."
|
|
1700
|
+
),
|
|
1701
|
+
"parameters": {
|
|
1702
|
+
"type": "object",
|
|
1703
|
+
"properties": {
|
|
1704
|
+
"skill": {"type": "string", "description": "Target skill id/slug/alias (L1 or L2)"},
|
|
1705
|
+
"title": {"type": "string", "description": "Short heuristic title"},
|
|
1706
|
+
"content": {"type": "string", "description": "Detailed heuristic description"},
|
|
1707
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
1708
|
+
"domain": {"type": "string", "description": "Applicable domain (e.g. 'amorphous-carbon', 'spintronics')"},
|
|
1709
|
+
"confidence": {"type": "number", "description": "0.0–1.0 confidence in this heuristic"},
|
|
1710
|
+
"papers": {"type": "array", "items": {"type": "string"}, "description": "Supporting paper URLs/DOIs"},
|
|
1711
|
+
},
|
|
1712
|
+
"required": ["skill", "title", "content"],
|
|
1713
|
+
},
|
|
1714
|
+
},
|
|
1715
|
+
},
|
|
1716
|
+
{
|
|
1717
|
+
"type": "function",
|
|
1718
|
+
"function": {
|
|
1719
|
+
"name": "create_constraint",
|
|
1720
|
+
"description": (
|
|
1721
|
+
"Create an L4 constraint / failure-mode node attached to an L1/L2 skill. "
|
|
1722
|
+
"Use for known limitations, instability regions, or failure patterns (e.g. "
|
|
1723
|
+
"'unsuitable for bond-breaking processes'). Wires a 'constraint_on' edge and "
|
|
1724
|
+
"denormalises the constraint slug into the skill's metadata.failure_modes list."
|
|
1725
|
+
),
|
|
1726
|
+
"parameters": {
|
|
1727
|
+
"type": "object",
|
|
1728
|
+
"properties": {
|
|
1729
|
+
"skill": {"type": "string", "description": "Target skill id/slug/alias"},
|
|
1730
|
+
"title": {"type": "string"},
|
|
1731
|
+
"content": {"type": "string"},
|
|
1732
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
1733
|
+
"domain": {"type": "string"},
|
|
1734
|
+
"severity": {"type": "string", "description": "low | medium | high"},
|
|
1735
|
+
"papers": {"type": "array", "items": {"type": "string"}},
|
|
1736
|
+
},
|
|
1737
|
+
"required": ["skill", "title", "content"],
|
|
1738
|
+
},
|
|
1739
|
+
},
|
|
1740
|
+
},
|
|
1741
|
+
{
|
|
1742
|
+
"type": "function",
|
|
1743
|
+
"function": {
|
|
1744
|
+
"name": "decompose_capability",
|
|
1745
|
+
"description": (
|
|
1746
|
+
"Wire a 'decomposes_to' edge from an L1 capability to an L2 procedure that "
|
|
1747
|
+
"implements it. Use to record executable decompositions of high-level skills."
|
|
1748
|
+
),
|
|
1749
|
+
"parameters": {
|
|
1750
|
+
"type": "object",
|
|
1751
|
+
"properties": {
|
|
1752
|
+
"capability": {"type": "string", "description": "L1 capability id/slug"},
|
|
1753
|
+
"procedure": {"type": "string", "description": "L2 procedure id/slug"},
|
|
1754
|
+
},
|
|
1755
|
+
"required": ["capability", "procedure"],
|
|
1756
|
+
},
|
|
1757
|
+
},
|
|
1758
|
+
},
|
|
1759
|
+
{
|
|
1760
|
+
"type": "function",
|
|
1761
|
+
"function": {
|
|
1762
|
+
"name": "retrieve_plan",
|
|
1763
|
+
"description": (
|
|
1764
|
+
"Stage-1 progressive retrieval: return planner-level skills (L1 capabilities, "
|
|
1765
|
+
"optionally L2 procedures) for a goal. Excludes heuristics/constraints — fetch "
|
|
1766
|
+
"those on demand via retrieve_heuristics / retrieve_constraints once a candidate "
|
|
1767
|
+
"is selected."
|
|
1768
|
+
),
|
|
1769
|
+
"parameters": {
|
|
1770
|
+
"type": "object",
|
|
1771
|
+
"properties": {
|
|
1772
|
+
"goal": {"type": "string", "description": "Free-text description of the task"},
|
|
1773
|
+
"k": {"type": "integer", "default": 5},
|
|
1774
|
+
"include_l2": {"type": "boolean", "default": True},
|
|
1775
|
+
},
|
|
1776
|
+
"required": ["goal"],
|
|
1777
|
+
},
|
|
1778
|
+
},
|
|
1779
|
+
},
|
|
1780
|
+
{
|
|
1781
|
+
"type": "function",
|
|
1782
|
+
"function": {
|
|
1783
|
+
"name": "retrieve_heuristics",
|
|
1784
|
+
"description": (
|
|
1785
|
+
"Stage-2 progressive retrieval: L3 heuristics attached to a selected skill. "
|
|
1786
|
+
"Falls back to semantic search over L3 nodes if no edges exist."
|
|
1787
|
+
),
|
|
1788
|
+
"parameters": {
|
|
1789
|
+
"type": "object",
|
|
1790
|
+
"properties": {
|
|
1791
|
+
"skill": {"type": "string"},
|
|
1792
|
+
"k": {"type": "integer", "default": 5},
|
|
1793
|
+
},
|
|
1794
|
+
"required": ["skill"],
|
|
1795
|
+
},
|
|
1796
|
+
},
|
|
1797
|
+
},
|
|
1798
|
+
{
|
|
1799
|
+
"type": "function",
|
|
1800
|
+
"function": {
|
|
1801
|
+
"name": "retrieve_constraints",
|
|
1802
|
+
"description": (
|
|
1803
|
+
"Stage-3 progressive retrieval: L4 constraints / failure modes attached to a "
|
|
1804
|
+
"selected skill. Use this when the verifier reports an error or when execution "
|
|
1805
|
+
"uncertainty is high."
|
|
1806
|
+
),
|
|
1807
|
+
"parameters": {
|
|
1808
|
+
"type": "object",
|
|
1809
|
+
"properties": {
|
|
1810
|
+
"skill": {"type": "string"},
|
|
1811
|
+
"k": {"type": "integer", "default": 5},
|
|
1812
|
+
},
|
|
1813
|
+
"required": ["skill"],
|
|
1814
|
+
},
|
|
1815
|
+
},
|
|
1816
|
+
},
|
|
1817
|
+
# ------------------------------------------------------------------ #
|
|
1818
|
+
# Script management tools #
|
|
1819
|
+
# ------------------------------------------------------------------ #
|
|
1820
|
+
{
|
|
1821
|
+
"type": "function",
|
|
1822
|
+
"function": {
|
|
1823
|
+
"name": "add_script_to_entry",
|
|
1824
|
+
"description": (
|
|
1825
|
+
"Attach an executable script directly to an existing entry. "
|
|
1826
|
+
"The script is stored separately from the human-readable content field, "
|
|
1827
|
+
"so agents reading the entry will not accidentally consume large script bodies. "
|
|
1828
|
+
"A single entry can hold multiple scripts (e.g. two helper modules for one skill). "
|
|
1829
|
+
"Use for Python, bash, Julia, or any other runnable script."
|
|
1830
|
+
),
|
|
1831
|
+
"parameters": {
|
|
1832
|
+
"type": "object",
|
|
1833
|
+
"properties": {
|
|
1834
|
+
"entry_id": {"type": "string", "description": "ID or slug of the target entry"},
|
|
1835
|
+
"code": {"type": "string", "description": "Full source code of the script"},
|
|
1836
|
+
"filename": {"type": "string", "description": "Suggested filename (e.g. relax.py). Defaults to slug + extension."},
|
|
1837
|
+
"language": {
|
|
1838
|
+
"type": "string",
|
|
1839
|
+
"description": "Programming language (e.g. python, bash, julia, r)",
|
|
1840
|
+
"default": "python",
|
|
1841
|
+
},
|
|
1842
|
+
"requirements": {
|
|
1843
|
+
"type": "array",
|
|
1844
|
+
"items": {"type": "string"},
|
|
1845
|
+
"description": "Package dependencies (e.g. ['ase', 'numpy'])",
|
|
1846
|
+
},
|
|
1847
|
+
"description": {"type": "string", "description": "Short description of what the script does"},
|
|
1848
|
+
},
|
|
1849
|
+
"required": ["entry_id", "code"],
|
|
1850
|
+
},
|
|
1851
|
+
},
|
|
1852
|
+
},
|
|
1853
|
+
{
|
|
1854
|
+
"type": "function",
|
|
1855
|
+
"function": {
|
|
1856
|
+
"name": "get_script",
|
|
1857
|
+
"description": (
|
|
1858
|
+
"List the scripts attached to an entry (metadata only — no code). "
|
|
1859
|
+
"Returns filenames, language, requirements, and download URLs. "
|
|
1860
|
+
"Use the download URL to fetch the actual source code."
|
|
1861
|
+
),
|
|
1862
|
+
"parameters": {
|
|
1863
|
+
"type": "object",
|
|
1864
|
+
"properties": {
|
|
1865
|
+
"identifier": {"type": "string", "description": "Entry ID or slug"},
|
|
1866
|
+
"filename": {"type": "string", "description": "Filter to a specific script filename (optional)"},
|
|
1867
|
+
},
|
|
1868
|
+
"required": ["identifier"],
|
|
1869
|
+
},
|
|
1870
|
+
},
|
|
1871
|
+
},
|
|
1872
|
+
{
|
|
1873
|
+
"type": "function",
|
|
1874
|
+
"function": {
|
|
1875
|
+
"name": "list_scripts",
|
|
1876
|
+
"description": "List all entries that have scripts directly attached, with filenames and download URLs.",
|
|
1877
|
+
"parameters": {
|
|
1878
|
+
"type": "object",
|
|
1879
|
+
"properties": {
|
|
1880
|
+
"limit": {"type": "integer", "default": 50},
|
|
1881
|
+
},
|
|
1882
|
+
},
|
|
1883
|
+
},
|
|
1884
|
+
},
|
|
1885
|
+
# ------------------------------------------------------------------ #
|
|
1886
|
+
# Generic asset (folder-style) tools #
|
|
1887
|
+
# ------------------------------------------------------------------ #
|
|
1888
|
+
{
|
|
1889
|
+
"type": "function",
|
|
1890
|
+
"function": {
|
|
1891
|
+
"name": "add_asset_to_entry",
|
|
1892
|
+
"description": (
|
|
1893
|
+
"Attach a generic asset (script, reference, doc, example, data file, "
|
|
1894
|
+
"external link, free-form note) to an entry in a named folder. "
|
|
1895
|
+
"Each node is treated as a small folder of typed assets addressable "
|
|
1896
|
+
"as `[entry]/[folder]/[filename]`. "
|
|
1897
|
+
"Conventional folders: scripts, references, docs, examples, data, notes "
|
|
1898
|
+
"(custom folder names are also accepted). "
|
|
1899
|
+
"Use kind='link' for URLs (content = the URL), 'text' for inline "
|
|
1900
|
+
"markdown/notes, 'file' for downloadable bodies."
|
|
1901
|
+
),
|
|
1902
|
+
"parameters": {
|
|
1903
|
+
"type": "object",
|
|
1904
|
+
"properties": {
|
|
1905
|
+
"entry_id": {"type": "string", "description": "ID or slug of the target entry"},
|
|
1906
|
+
"folder": {
|
|
1907
|
+
"type": "string",
|
|
1908
|
+
"description": "Sub-folder name (e.g. scripts, references, docs, examples, data, notes)",
|
|
1909
|
+
},
|
|
1910
|
+
"filename": {
|
|
1911
|
+
"type": "string",
|
|
1912
|
+
"description": "Filename inside the folder (may include a sub-path like 'inputs/lj.in')",
|
|
1913
|
+
},
|
|
1914
|
+
"content": {
|
|
1915
|
+
"type": "string",
|
|
1916
|
+
"description": "Body for file/text assets; the URL itself for link assets",
|
|
1917
|
+
},
|
|
1918
|
+
"kind": {
|
|
1919
|
+
"type": "string",
|
|
1920
|
+
"enum": ["file", "link", "text"],
|
|
1921
|
+
"default": "file",
|
|
1922
|
+
},
|
|
1923
|
+
"language": {"type": "string", "description": "Optional language hint (python, bash, markdown, …)"},
|
|
1924
|
+
"mime_type": {"type": "string", "description": "Optional MIME override"},
|
|
1925
|
+
"description": {"type": "string"},
|
|
1926
|
+
"requirements": {
|
|
1927
|
+
"type": "array",
|
|
1928
|
+
"items": {"type": "string"},
|
|
1929
|
+
"description": "Package dependencies (for runnable scripts)",
|
|
1930
|
+
},
|
|
1931
|
+
},
|
|
1932
|
+
"required": ["entry_id", "folder", "filename"],
|
|
1933
|
+
},
|
|
1934
|
+
},
|
|
1935
|
+
},
|
|
1936
|
+
{
|
|
1937
|
+
"type": "function",
|
|
1938
|
+
"function": {
|
|
1939
|
+
"name": "list_assets",
|
|
1940
|
+
"description": (
|
|
1941
|
+
"List the assets attached to an entry, grouped by folder. "
|
|
1942
|
+
"Optionally filter to a single folder. Returns metadata + download URLs; "
|
|
1943
|
+
"fetch bodies via the download URL."
|
|
1944
|
+
),
|
|
1945
|
+
"parameters": {
|
|
1946
|
+
"type": "object",
|
|
1947
|
+
"properties": {
|
|
1948
|
+
"identifier": {"type": "string", "description": "Entry ID or slug"},
|
|
1949
|
+
"folder": {"type": "string", "description": "Optional folder name filter"},
|
|
1950
|
+
},
|
|
1951
|
+
"required": ["identifier"],
|
|
1952
|
+
},
|
|
1953
|
+
},
|
|
1954
|
+
},
|
|
1955
|
+
# ------------------------------------------------------------------ #
|
|
1956
|
+
# Material interface tools #
|
|
1957
|
+
# ------------------------------------------------------------------ #
|
|
1958
|
+
{
|
|
1959
|
+
"type": "function",
|
|
1960
|
+
"function": {
|
|
1961
|
+
"name": "create_material_entry",
|
|
1962
|
+
"description": (
|
|
1963
|
+
"Create a structured data entry for a crystal or compound (uses entry_type=data), "
|
|
1964
|
+
"recording its formula, crystal system, space group, and description "
|
|
1965
|
+
"in a standardised template suitable for downstream interface workflows."
|
|
1966
|
+
),
|
|
1967
|
+
"parameters": {
|
|
1968
|
+
"type": "object",
|
|
1969
|
+
"properties": {
|
|
1970
|
+
"formula": {"type": "string", "description": "Chemical formula or material name (e.g. 'TiO2', 'GaN')"},
|
|
1971
|
+
"crystal_system": {"type": "string", "description": "e.g. cubic, tetragonal, hexagonal"},
|
|
1972
|
+
"space_group": {"type": "string", "description": "Hermann-Mauguin symbol or number (e.g. 'Fm-3m', '225')"},
|
|
1973
|
+
"description": {"type": "string"},
|
|
1974
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
1975
|
+
"source_provenance": {"type": "string"},
|
|
1976
|
+
},
|
|
1977
|
+
"required": ["formula"],
|
|
1978
|
+
},
|
|
1979
|
+
},
|
|
1980
|
+
},
|
|
1981
|
+
{
|
|
1982
|
+
"type": "function",
|
|
1983
|
+
"function": {
|
|
1984
|
+
"name": "build_material_interface_workflow",
|
|
1985
|
+
"description": (
|
|
1986
|
+
"DEPRECATED. Returns an error explaining how to use the generic "
|
|
1987
|
+
"'Material interface construction' capability + slab-stacking procedure "
|
|
1988
|
+
"instead of creating one node per material pair."
|
|
1989
|
+
),
|
|
1990
|
+
"parameters": {
|
|
1991
|
+
"type": "object",
|
|
1992
|
+
"properties": {
|
|
1993
|
+
"material_a": {"type": "string"},
|
|
1994
|
+
"material_b": {"type": "string"},
|
|
1995
|
+
"method": {"type": "string", "default": "slab_stacking"},
|
|
1996
|
+
"description": {"type": "string"},
|
|
1997
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
1998
|
+
},
|
|
1999
|
+
"required": ["material_a", "material_b"],
|
|
2000
|
+
},
|
|
2001
|
+
},
|
|
2002
|
+
},
|
|
2003
|
+
{
|
|
2004
|
+
"type": "function",
|
|
2005
|
+
"function": {
|
|
2006
|
+
"name": "submit_feedback",
|
|
2007
|
+
"description": (
|
|
2008
|
+
"Record correctness feedback on a node. Updates verification_status "
|
|
2009
|
+
"and appends to the node's feedback_log. Use after testing a node, "
|
|
2010
|
+
"or when an external agent reports success/failure. "
|
|
2011
|
+
"verdict: works | peer_works | bugged | deprecated | unclear."
|
|
2012
|
+
),
|
|
2013
|
+
"parameters": {
|
|
2014
|
+
"type": "object",
|
|
2015
|
+
"properties": {
|
|
2016
|
+
"entry_id": {"type": "string", "description": "Node ID or slug"},
|
|
2017
|
+
"verdict": {
|
|
2018
|
+
"type": "string",
|
|
2019
|
+
"enum": ["works", "peer_works", "bugged", "deprecated", "unclear"],
|
|
2020
|
+
},
|
|
2021
|
+
"note": {"type": "string", "description": "Short human-readable note"},
|
|
2022
|
+
"evidence": {"type": "string", "description": "Output, error message, or link"},
|
|
2023
|
+
"agent_id": {"type": "string", "description": "Identifier of the reporter"},
|
|
2024
|
+
},
|
|
2025
|
+
"required": ["entry_id", "verdict"],
|
|
2026
|
+
},
|
|
2027
|
+
},
|
|
2028
|
+
},
|
|
2029
|
+
{
|
|
2030
|
+
"type": "function",
|
|
2031
|
+
"function": {
|
|
2032
|
+
"name": "list_by_verification",
|
|
2033
|
+
"description": (
|
|
2034
|
+
"List nodes filtered by verification_status. "
|
|
2035
|
+
"Use to find unverified or bugged nodes that need attention."
|
|
2036
|
+
),
|
|
2037
|
+
"parameters": {
|
|
2038
|
+
"type": "object",
|
|
2039
|
+
"properties": {
|
|
2040
|
+
"status": {
|
|
2041
|
+
"type": "string",
|
|
2042
|
+
"enum": ["unverified", "self_tested", "peer_reviewed",
|
|
2043
|
+
"community_tested", "bugged", "deprecated"],
|
|
2044
|
+
"default": "unverified",
|
|
2045
|
+
},
|
|
2046
|
+
"limit": {"type": "integer", "default": 50},
|
|
2047
|
+
},
|
|
2048
|
+
},
|
|
2049
|
+
},
|
|
2050
|
+
},
|
|
2051
|
+
{
|
|
2052
|
+
"type": "function",
|
|
2053
|
+
"function": {
|
|
2054
|
+
"name": "list_needs_generalization",
|
|
2055
|
+
"description": (
|
|
2056
|
+
"List nodes flagged as overly specific by the abstraction check. "
|
|
2057
|
+
"These are candidates for merging into a generic capability."
|
|
2058
|
+
),
|
|
2059
|
+
"parameters": {
|
|
2060
|
+
"type": "object",
|
|
2061
|
+
"properties": {"limit": {"type": "integer", "default": 50}},
|
|
2062
|
+
},
|
|
2063
|
+
},
|
|
2064
|
+
},
|
|
2065
|
+
]
|
|
2066
|
+
|
|
2067
|
+
# Map function name → callable
|
|
2068
|
+
TOOL_DISPATCH: dict[str, Any] = {
|
|
2069
|
+
"create_entry": create_entry,
|
|
2070
|
+
"update_entry": update_entry,
|
|
2071
|
+
"delete_entry": delete_entry,
|
|
2072
|
+
"get_entry": get_entry,
|
|
2073
|
+
"search_entries": search_entries,
|
|
2074
|
+
"list_entries": list_entries,
|
|
2075
|
+
"create_edge": create_edge,
|
|
2076
|
+
"delete_edge": delete_edge,
|
|
2077
|
+
"get_neighbors": get_neighbors,
|
|
2078
|
+
"graph_stats": graph_stats,
|
|
2079
|
+
"resolve_wikilinks": resolve_wikilinks,
|
|
2080
|
+
"remove_dangling_edges": remove_dangling_edges,
|
|
2081
|
+
"fetch_url": fetch_url,
|
|
2082
|
+
"web_search": web_search,
|
|
2083
|
+
"find_similar_nodes": find_similar_nodes,
|
|
2084
|
+
"get_graph_overview": get_graph_overview,
|
|
2085
|
+
"list_nodes_by_type": list_nodes_by_type,
|
|
2086
|
+
"merge_entries": merge_entries,
|
|
2087
|
+
"create_script_entry": create_script_entry,
|
|
2088
|
+
"add_script_to_entry": add_script_to_entry,
|
|
2089
|
+
"get_script": get_script,
|
|
2090
|
+
"list_scripts": list_scripts,
|
|
2091
|
+
"add_asset_to_entry": add_asset_to_entry,
|
|
2092
|
+
"list_assets": list_assets,
|
|
2093
|
+
"build_material_interface_workflow": build_material_interface_workflow,
|
|
2094
|
+
"create_material_entry": create_material_entry,
|
|
2095
|
+
"attach_script_to_entry": attach_script_to_entry,
|
|
2096
|
+
"submit_feedback": submit_feedback,
|
|
2097
|
+
"list_by_verification": list_by_verification,
|
|
2098
|
+
"list_needs_generalization": list_needs_generalization,
|
|
2099
|
+
# Hierarchical-memory tools (L1–L4 / progressive retrieval)
|
|
2100
|
+
"create_heuristic": create_heuristic,
|
|
2101
|
+
"create_constraint": create_constraint,
|
|
2102
|
+
"decompose_capability": decompose_capability,
|
|
2103
|
+
"retrieve_plan": retrieve_plan,
|
|
2104
|
+
"retrieve_heuristics": retrieve_heuristics,
|
|
2105
|
+
"retrieve_constraints": retrieve_constraints,
|
|
2106
|
+
}
|