know-do-graph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. agents/__init__.py +0 -0
  2. agents/extraction_agent/__init__.py +0 -0
  3. agents/extraction_agent/agent.py +170 -0
  4. agents/graph_agent/__init__.py +5 -0
  5. agents/graph_agent/agent.py +373 -0
  6. agents/graph_agent/tools.py +2106 -0
  7. agents/maintenance_agent/__init__.py +0 -0
  8. agents/maintenance_agent/agent.py +283 -0
  9. agents/orchestrator/__init__.py +0 -0
  10. agents/orchestrator/agent.py +217 -0
  11. agents/review_agent/__init__.py +0 -0
  12. agents/review_agent/agent.py +188 -0
  13. agents/review_agent/tools.py +472 -0
  14. api/__init__.py +0 -0
  15. api/main.py +136 -0
  16. api/routes/__init__.py +0 -0
  17. api/routes/agent.py +81 -0
  18. api/routes/entries.py +411 -0
  19. api/routes/graph.py +132 -0
  20. api/routes/mem.py +179 -0
  21. api/routes/remote.py +815 -0
  22. api/routes/remote_sync.py +230 -0
  23. api/routes/retrieve.py +88 -0
  24. core/__init__.py +0 -0
  25. core/app_state.py +9 -0
  26. core/events.py +84 -0
  27. core/extraction/__init__.py +0 -0
  28. core/extraction/wikilink_parser.py +48 -0
  29. core/graph/__init__.py +0 -0
  30. core/graph/graph.py +204 -0
  31. core/memory/__init__.py +0 -0
  32. core/memory/memgraph.py +458 -0
  33. core/resources/starter.db +0 -0
  34. core/retrieval/__init__.py +0 -0
  35. core/retrieval/embedder.py +122 -0
  36. core/retrieval/fusion.py +52 -0
  37. core/retrieval/progressive.py +399 -0
  38. core/retrieval/retrieval.py +346 -0
  39. core/retrieval/vector_store.py +91 -0
  40. core/schemas/__init__.py +0 -0
  41. core/schemas/edge.py +46 -0
  42. core/schemas/entry.py +388 -0
  43. core/storage/__init__.py +0 -0
  44. core/storage/database.py +104 -0
  45. core/storage/models.py +66 -0
  46. core/storage/repository.py +243 -0
  47. core/sync/__init__.py +20 -0
  48. core/sync/autolink.py +301 -0
  49. core/sync/db_merge.py +297 -0
  50. core/sync/db_watcher.py +84 -0
  51. core/sync/remote_sync.py +345 -0
  52. examples/__init__.py +0 -0
  53. examples/example_entries.py +206 -0
  54. examples/pymatgen_interface_examples.py +811 -0
  55. frontend/dist/assets/index-BLfo7ZZu.css +1 -0
  56. frontend/dist/assets/index-G-mYbZ9R.js +83 -0
  57. frontend/dist/assets/index-G-mYbZ9R.js.map +1 -0
  58. frontend/dist/index.html +92 -0
  59. know_do_graph-0.1.0.dist-info/METADATA +765 -0
  60. know_do_graph-0.1.0.dist-info/RECORD +63 -0
  61. know_do_graph-0.1.0.dist-info/WHEEL +4 -0
  62. know_do_graph-0.1.0.dist-info/entry_points.txt +2 -0
  63. main.py +944 -0
@@ -0,0 +1,2106 @@
1
+ """Tool definitions for the GraphAgent.
2
+
3
+ Each function corresponds to an OpenAI function-calling tool. All functions
4
+ receive the live ``KnowDoGraph`` instance via the module-level ``_graph``
5
+ variable which is set once by ``GraphAgent.__init__``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from typing import Any
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # Helpers
16
+ # ---------------------------------------------------------------------------
17
+
18
+
19
+ def _slug(title: str) -> str:
20
+ slug = title.lower().strip()
21
+ slug = re.sub(r"[^\w\s-]", "", slug)
22
+ slug = re.sub(r"[\s_]+", "-", slug)
23
+ return re.sub(r"-+", "-", slug).strip("-")
24
+
25
+
26
+ # Tokens that suggest a title is a *concrete instance* of a more general skill
27
+ # (chemical formulas, two-formula interfaces, "build X molecule", etc.).
28
+ # Used by create_entry's soft abstraction check.
29
+ _CONCRETE_INSTANCE_PATTERNS = [
30
+ re.compile(r"\b[A-Z][a-zA-Z]*\d+[A-Za-z]*\d*\b"), # CH4, H2O, TiO2, SrTiO3
31
+ re.compile(r"\b[A-Z][A-Za-z0-9]+/[A-Z][A-Za-z0-9]+\b"), # TiO2/SrTiO3
32
+ re.compile(r"^build\s+[A-Z][a-zA-Z]*\d+"), # "Build H2O" (formula must contain a digit)
33
+ ]
34
+
35
+
36
+ def _looks_overly_specific(title: str) -> bool:
37
+ """Heuristic: True if *title* mentions a concrete formula/material pair."""
38
+ return any(p.search(title) for p in _CONCRETE_INSTANCE_PATTERNS)
39
+
40
+
41
+ def _check_generalization(title: str, db: Any) -> dict:
42
+ """Soft abstraction check.
43
+
44
+ Returns ``{needs_generalization: bool, similar: [...], suggestion: str}``.
45
+ Looks for existing nodes whose titles overlap the proposed *title* (case-
46
+ insensitive token overlap) — those are likely the generic ancestor or a
47
+ near-duplicate.
48
+ """
49
+ from core.retrieval.retrieval import RetrievalEngine
50
+ from core import app_state
51
+
52
+ engine = RetrievalEngine(db, app_state.graph)
53
+ candidates = engine.search_entries(query=title, limit=5)
54
+ flag = _looks_overly_specific(title) or len(candidates) > 0
55
+ return {
56
+ "needs_generalization": flag,
57
+ "similar": [{"id": e.id, "title": e.title, "type": e.entry_type.value} for e in candidates],
58
+ }
59
+
60
+
61
+ # ---------------------------------------------------------------------------
62
+ # Node / Entry tools
63
+ # ---------------------------------------------------------------------------
64
+
65
+
66
+ def create_entry(
67
+ title: str,
68
+ content: str = "",
69
+ entry_type: str = "generic",
70
+ tags: list[str] | None = None,
71
+ aliases: list[str] | None = None,
72
+ source_provenance: str | None = None,
73
+ graph: Any = None,
74
+ ) -> dict:
75
+ """Create a new knowledge entry (node) in the graph.
76
+
77
+ Performs a soft abstraction check: if *title* looks overly specific (mentions
78
+ a concrete formula/material pair) or strongly overlaps an existing node, the
79
+ new entry is created with ``metadata.needs_generalization = True`` and the
80
+ response includes the similar candidates so the agent can decide to merge
81
+ or rename instead.
82
+ """
83
+ from core.schemas.entry import Entry, EntryMetadata, EntryType
84
+ from core.storage.database import SessionLocal
85
+ from core.storage.repository import EntryRepository
86
+
87
+ with SessionLocal() as db:
88
+ check = _check_generalization(title, db)
89
+ meta = EntryMetadata(
90
+ source_provenance=source_provenance,
91
+ needs_generalization=check["needs_generalization"],
92
+ )
93
+ entry = Entry(
94
+ title=title,
95
+ content=content,
96
+ entry_type=EntryType(entry_type),
97
+ tags=tags or [],
98
+ aliases=aliases or [],
99
+ metadata=meta,
100
+ )
101
+ saved = EntryRepository(db).create(entry)
102
+ if graph is not None:
103
+ graph.add_entry(saved)
104
+ return {
105
+ "id": saved.id,
106
+ "slug": saved.slug,
107
+ "title": saved.title,
108
+ "needs_generalization": check["needs_generalization"],
109
+ "similar_existing": check["similar"],
110
+ }
111
+
112
+
113
+ def update_entry(
114
+ entry_id: str,
115
+ title: str | None = None,
116
+ content: str | None = None,
117
+ entry_type: str | None = None,
118
+ tags: list[str] | None = None,
119
+ aliases: list[str] | None = None,
120
+ graph: Any = None,
121
+ ) -> dict:
122
+ """Update fields on an existing entry."""
123
+ from core import app_state
124
+ from core.retrieval.retrieval import RetrievalEngine
125
+ from core.schemas.entry import EntryType
126
+ from core.storage.database import SessionLocal
127
+ from core.storage.repository import EntryRepository
128
+
129
+ g = graph or app_state.graph
130
+ with SessionLocal() as db:
131
+ engine = RetrievalEngine(db, g)
132
+ entry = engine.resolve_identifier(entry_id)
133
+ if entry is None:
134
+ return {"error": f"Entry '{entry_id}' not found."}
135
+ if title is not None:
136
+ entry.title = title
137
+ if content is not None:
138
+ entry.content = content
139
+ entry.refresh_refs()
140
+ if entry_type is not None:
141
+ entry.entry_type = EntryType(entry_type)
142
+ if tags is not None:
143
+ entry.tags = tags
144
+ if aliases is not None:
145
+ entry.aliases = aliases
146
+ saved = EntryRepository(db).update(entry)
147
+ if graph is not None and saved:
148
+ graph.add_entry(saved) # upsert node attributes
149
+ return {"id": saved.id, "slug": saved.slug, "title": saved.title} if saved else {"error": "Update failed."}
150
+
151
+
152
+ def delete_entry(entry_id: str, graph: Any = None) -> dict:
153
+ """Delete an entry (node) and its associated edges."""
154
+ from core.storage.database import SessionLocal
155
+ from core.storage.repository import EntryRepository
156
+
157
+ with SessionLocal() as db:
158
+ deleted = EntryRepository(db).delete(entry_id)
159
+ if deleted and graph is not None:
160
+ graph.remove_entry(entry_id)
161
+ return {"deleted": deleted, "entry_id": entry_id}
162
+
163
+
164
+ def search_entries(query: str, limit: int = 10, mode: str = "hybrid", graph: Any = None) -> list[dict]:
165
+ """Hybrid semantic + keyword search over entries."""
166
+ from core import app_state
167
+ from core.retrieval.retrieval import RetrievalEngine
168
+ from core.storage.database import SessionLocal
169
+
170
+ g = graph or app_state.graph
171
+ with SessionLocal() as db:
172
+ engine = RetrievalEngine(db, g)
173
+ results = engine.search_entries(query=query, limit=limit, mode=mode)
174
+ return [
175
+ {"id": e.id, "slug": e.slug, "title": e.title, "type": e.entry_type.value, "tags": e.tags}
176
+ for e in results
177
+ ]
178
+
179
+
180
+ def get_entry(identifier: str, graph: Any = None) -> dict:
181
+ """Retrieve a single entry by ID, slug, or alias."""
182
+ from core import app_state
183
+ from core.retrieval.retrieval import RetrievalEngine
184
+ from core.storage.database import SessionLocal
185
+
186
+ g = graph or app_state.graph
187
+ with SessionLocal() as db:
188
+ engine = RetrievalEngine(db, g)
189
+ entry = engine.resolve_identifier(identifier)
190
+ if entry is None:
191
+ return {"error": f"Entry '{identifier}' not found."}
192
+ return {
193
+ "id": entry.id,
194
+ "slug": entry.slug,
195
+ "title": entry.title,
196
+ "type": entry.entry_type.value,
197
+ "tags": entry.tags,
198
+ "aliases": entry.aliases,
199
+ "content": entry.content,
200
+ "refs": entry.internal_refs,
201
+ "source": entry.metadata.source_provenance,
202
+ "status": entry.metadata.refinement_status.value,
203
+ }
204
+
205
+
206
+ def list_entries(limit: int = 20, graph: Any = None) -> list[dict]:
207
+ """List entries in the graph."""
208
+ from core import app_state
209
+ from core.retrieval.retrieval import RetrievalEngine
210
+ from core.storage.database import SessionLocal
211
+
212
+ g = graph or app_state.graph
213
+ with SessionLocal() as db:
214
+ engine = RetrievalEngine(db, g)
215
+ entries = engine.list_entries(limit=limit)
216
+ return [
217
+ {"id": e.id, "slug": e.slug, "title": e.title, "type": e.entry_type.value}
218
+ for e in entries
219
+ ]
220
+
221
+
222
+ # ---------------------------------------------------------------------------
223
+ # Edge tools
224
+ # ---------------------------------------------------------------------------
225
+
226
+
227
+ def create_edge(
228
+ source_id: str,
229
+ target_id: str,
230
+ relation: str = "related_to",
231
+ weight: float = 1.0,
232
+ graph: Any = None,
233
+ ) -> dict:
234
+ """Create a directed edge between two entries.
235
+
236
+ Both ``source_id`` and ``target_id`` may be a real entry ID, a slug, or an
237
+ alias — they are resolved against the database. If either side does not
238
+ resolve to an existing entry, the edge is rejected (no DB write, no
239
+ in-memory mutation) and an error is returned so the agent can either fix
240
+ its arguments or create the missing node first. This prevents the ghost
241
+ "grey" placeholder nodes that networkx would otherwise auto-create.
242
+ """
243
+ from core.retrieval.retrieval import RetrievalEngine
244
+ from core.schemas.edge import Edge, EdgeRelation
245
+ from core.storage.database import SessionLocal
246
+ from core.storage.repository import EdgeRepository
247
+
248
+ try:
249
+ rel = EdgeRelation(relation)
250
+ except ValueError:
251
+ rel = EdgeRelation.wikilink
252
+
253
+ with SessionLocal() as db:
254
+ engine = RetrievalEngine(db, graph)
255
+ src = engine.resolve_identifier(source_id)
256
+ tgt = engine.resolve_identifier(target_id)
257
+ missing = []
258
+ if src is None:
259
+ missing.append(source_id)
260
+ if tgt is None:
261
+ missing.append(target_id)
262
+ if missing:
263
+ return {
264
+ "error": "edge_endpoint_not_found",
265
+ "missing": missing,
266
+ "hint": "Resolve source_id and target_id to existing entries (use search_entries / get_entry) or call create_entry first.",
267
+ }
268
+ if src.id == tgt.id:
269
+ return {"error": "self_loop_rejected", "entry_id": src.id}
270
+
271
+ edge = Edge(source_id=src.id, target_id=tgt.id, relation=rel, weight=weight)
272
+ saved = EdgeRepository(db).create(edge)
273
+ if graph is not None:
274
+ graph.add_edge(saved)
275
+ return {
276
+ "id": saved.id,
277
+ "source_id": saved.source_id,
278
+ "target_id": saved.target_id,
279
+ "relation": saved.relation.value,
280
+ }
281
+
282
+
283
+ def delete_edge(edge_id: str, graph: Any = None) -> dict:
284
+ """Delete an edge by its ID."""
285
+ from core.storage.database import SessionLocal
286
+ from core.storage.models import EdgeModel
287
+ from core.storage.repository import EdgeRepository
288
+ from core.schemas.edge import Edge
289
+
290
+ with SessionLocal() as db:
291
+ model = db.get(EdgeModel, edge_id)
292
+ if model is None:
293
+ return {"error": f"Edge '{edge_id}' not found."}
294
+ src_id, tgt_id = model.source_id, model.target_id
295
+ deleted = EdgeRepository(db).delete(edge_id)
296
+ if deleted and graph is not None:
297
+ graph.remove_edge(src_id, tgt_id)
298
+ return {"deleted": deleted, "edge_id": edge_id}
299
+
300
+
301
+ def get_neighbors(entry_id: str, direction: str = "both", graph: Any = None) -> list[dict]:
302
+ """Get neighboring entries connected by edges."""
303
+ from core import app_state
304
+
305
+ g = graph or app_state.graph
306
+ neighbors = g.get_neighbors(entry_id, direction=direction)
307
+ return neighbors
308
+
309
+
310
+ # ---------------------------------------------------------------------------
311
+ # Graph-level tools
312
+ # ---------------------------------------------------------------------------
313
+
314
+
315
+ def graph_stats(graph: Any = None) -> dict:
316
+ """Return high-level statistics about the graph."""
317
+ from core import app_state
318
+
319
+ g = graph or app_state.graph
320
+ return g.stats()
321
+
322
+
323
+ def resolve_wikilinks(graph: Any = None) -> dict:
324
+ """Scan all entries for [[wikilinks]] and create edges for matches."""
325
+ from core import app_state
326
+ from agents.extraction_agent.agent import ExtractionAgent
327
+
328
+ g = graph or app_state.graph
329
+ agent = ExtractionAgent(g)
330
+ count = agent.resolve_wikilinks()
331
+ return {"edges_created": count}
332
+
333
+
334
+ def remove_dangling_edges(graph: Any = None) -> dict:
335
+ """Remove edges pointing to deleted entries."""
336
+ from core import app_state
337
+ from agents.maintenance_agent.agent import MaintenanceAgent
338
+
339
+ g = graph or app_state.graph
340
+ agent = MaintenanceAgent(g)
341
+ count = agent.remove_dangling_edges()
342
+ return {"edges_removed": count}
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # Web / URL tools
347
+ # ---------------------------------------------------------------------------
348
+
349
+
350
+ def fetch_url(url: str, timeout: int = 15) -> dict:
351
+ """Fetch the text content of a URL and return it so the agent can read it.
352
+
353
+ Uses ``httpx`` if available, falls back to ``urllib``.
354
+ Returns a dict with keys ``url``, ``status_code``, and ``text``.
355
+ """
356
+ try:
357
+ try:
358
+ import httpx
359
+ with httpx.Client(follow_redirects=True, timeout=timeout) as client:
360
+ resp = client.get(url, headers={"User-Agent": "KnowDoGraph/1.0"})
361
+ return {"url": url, "status_code": resp.status_code, "text": resp.text[:20000]}
362
+ except ImportError:
363
+ import urllib.request
364
+ req = urllib.request.Request(url, headers={"User-Agent": "KnowDoGraph/1.0"})
365
+ with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310
366
+ return {"url": url, "status_code": resp.status, "text": resp.read(20000).decode("utf-8", errors="replace")}
367
+ except Exception as exc:
368
+ return {"url": url, "error": str(exc)}
369
+
370
+
371
+ def web_search(query: str, max_results: int = 5) -> list[dict]:
372
+ """Search the web using DuckDuckGo and return result snippets."""
373
+ try:
374
+ from duckduckgo_search import DDGS
375
+
376
+ with DDGS() as ddgs:
377
+ results = list(ddgs.text(query, max_results=max_results))
378
+ return [
379
+ {"title": r.get("title", ""), "url": r.get("href", ""), "snippet": r.get("body", "")}
380
+ for r in results
381
+ ]
382
+ except Exception as exc:
383
+ return [{"error": str(exc)}]
384
+
385
+
386
+ # ---------------------------------------------------------------------------
387
+ # Node-discovery / graph-intelligence tools
388
+ # ---------------------------------------------------------------------------
389
+
390
+
391
+ def find_similar_nodes(title: str, limit: int = 8, mode: str = "hybrid", graph: Any = None) -> list[dict]:
392
+ """Search for nodes whose title or aliases closely resemble *title*.
393
+
394
+ Use this before creating a new node to avoid duplicates and decide whether
395
+ to reuse an existing entry, add an alias, or create a truly new node.
396
+ Returns id, slug, title, type, tags, and aliases for each candidate.
397
+ """
398
+ from core import app_state
399
+ from core.retrieval.retrieval import RetrievalEngine
400
+ from core.storage.database import SessionLocal
401
+
402
+ g = graph or app_state.graph
403
+ with SessionLocal() as db:
404
+ engine = RetrievalEngine(db, g)
405
+ results = engine.search_entries(query=title, limit=limit, mode=mode)
406
+ return [
407
+ {
408
+ "id": e.id,
409
+ "slug": e.slug,
410
+ "title": e.title,
411
+ "type": e.entry_type.value,
412
+ "tags": e.tags,
413
+ "aliases": e.aliases,
414
+ }
415
+ for e in results
416
+ ]
417
+
418
+
419
+ def get_graph_overview(sample_size: int = 15, graph: Any = None) -> dict:
420
+ """Return a high-level overview of the graph without dumping every node.
421
+
422
+ Includes:
423
+ - Node/edge counts and DAG status
424
+ - Distribution of entry types
425
+ - A random sample of node titles (to check naming conventions)
426
+ - Top-5 most connected nodes
427
+
428
+ Use this to orient yourself before deciding how to add or restructure nodes.
429
+ """
430
+ import random
431
+ from collections import Counter
432
+
433
+ from core import app_state
434
+ from core.retrieval.retrieval import RetrievalEngine
435
+ from core.storage.database import SessionLocal
436
+
437
+ g = graph or app_state.graph
438
+ stats = g.stats()
439
+
440
+ with SessionLocal() as db:
441
+ engine = RetrievalEngine(db, g)
442
+ all_entries = engine.list_entries(limit=2000)
443
+
444
+ type_dist = dict(Counter(e.entry_type.value for e in all_entries))
445
+ sample = random.sample(all_entries, min(sample_size, len(all_entries)))
446
+ sample_titles = [{"id": e.id, "title": e.title, "type": e.entry_type.value, "tags": e.tags} for e in sample]
447
+
448
+ # Top connected nodes (by total degree in the in-memory graph)
449
+ top_nodes: list[dict] = []
450
+ try:
451
+ degree_map = dict(g._g.degree()) # type: ignore[attr-defined]
452
+ top_ids = sorted(degree_map, key=lambda k: degree_map[k], reverse=True)[:5]
453
+ id_to_entry = {e.id: e for e in all_entries}
454
+ top_nodes = [
455
+ {"id": nid, "title": id_to_entry[nid].title if nid in id_to_entry else "?", "degree": degree_map[nid]}
456
+ for nid in top_ids
457
+ ]
458
+ except Exception:
459
+ pass
460
+
461
+ return {
462
+ "stats": stats,
463
+ "type_distribution": type_dist,
464
+ "sample_nodes": sample_titles,
465
+ "top_connected": top_nodes,
466
+ }
467
+
468
+
469
+ def list_nodes_by_type(entry_type: str, limit: int = 50, graph: Any = None) -> list[dict]:
470
+ """List all nodes of a given entry type (returns id, slug, title, tags, aliases)."""
471
+ from core import app_state
472
+ from core.retrieval.retrieval import RetrievalEngine
473
+ from core.schemas.entry import EntryType
474
+ from core.storage.database import SessionLocal
475
+
476
+ g = graph or app_state.graph
477
+ with SessionLocal() as db:
478
+ engine = RetrievalEngine(db, g)
479
+ try:
480
+ et = EntryType(entry_type)
481
+ except ValueError:
482
+ return [{"error": f"Unknown entry_type '{entry_type}'"}]
483
+ results = engine.search_entries(entry_type=et, limit=limit)
484
+ return [
485
+ {"id": e.id, "slug": e.slug, "title": e.title, "tags": e.tags, "aliases": e.aliases}
486
+ for e in results
487
+ ]
488
+
489
+
490
+ def merge_entries(
491
+ primary_id: str,
492
+ duplicate_id: str,
493
+ merge_aliases: bool = True,
494
+ merge_tags: bool = True,
495
+ graph: Any = None,
496
+ ) -> dict:
497
+ """Merge *duplicate_id* into *primary_id*.
498
+
499
+ The duplicate's aliases and tags are optionally merged into the primary.
500
+ All edges pointing to/from the duplicate are re-targeted to the primary.
501
+ The duplicate entry is then deleted.
502
+
503
+ Use this to consolidate redundant nodes identified during review.
504
+ """
505
+ from core import app_state
506
+ from core.retrieval.retrieval import RetrievalEngine
507
+ from core.storage.database import SessionLocal
508
+ from core.storage.models import EdgeModel
509
+ from core.storage.repository import EntryRepository
510
+
511
+ g = graph or app_state.graph
512
+ with SessionLocal() as db:
513
+ engine = RetrievalEngine(db, g)
514
+ primary = engine.resolve_identifier(primary_id)
515
+ duplicate = engine.resolve_identifier(duplicate_id)
516
+ if primary is None:
517
+ return {"error": f"Primary entry '{primary_id}' not found."}
518
+ if duplicate is None:
519
+ return {"error": f"Duplicate entry '{duplicate_id}' not found."}
520
+ if primary.id == duplicate.id:
521
+ return {"error": "primary_id and duplicate_id refer to the same entry."}
522
+
523
+ # Re-target edges
524
+ edges_retargeted = 0
525
+ for edge_model in db.query(EdgeModel).filter(EdgeModel.target_id == duplicate.id).all():
526
+ if edge_model.source_id != primary.id:
527
+ edge_model.target_id = primary.id
528
+ edges_retargeted += 1
529
+ for edge_model in db.query(EdgeModel).filter(EdgeModel.source_id == duplicate.id).all():
530
+ if edge_model.target_id != primary.id:
531
+ edge_model.source_id = primary.id
532
+ edges_retargeted += 1
533
+
534
+ # Merge metadata into primary
535
+ if merge_aliases:
536
+ new_aliases = list(dict.fromkeys(primary.aliases + duplicate.aliases + [duplicate.title]))
537
+ primary.aliases = new_aliases
538
+ if merge_tags:
539
+ primary.tags = list(dict.fromkeys(primary.tags + duplicate.tags))
540
+
541
+ repo = EntryRepository(db)
542
+ repo.update(primary)
543
+
544
+ # Delete the duplicate entry model directly
545
+ from core.storage.models import EntryModel
546
+ dup_model = db.get(EntryModel, duplicate.id)
547
+ if dup_model:
548
+ db.delete(dup_model)
549
+ db.commit()
550
+
551
+ # Refresh in-memory graph
552
+ if g is not None:
553
+ g.remove_entry(duplicate.id)
554
+ with SessionLocal() as db2:
555
+ from core.retrieval.retrieval import RetrievalEngine as RE
556
+ refreshed = RE(db2, g).get_entry_by_id(primary.id)
557
+ if refreshed:
558
+ g.add_entry(refreshed)
559
+
560
+ return {
561
+ "merged": True,
562
+ "primary_id": primary.id,
563
+ "removed_duplicate_id": duplicate.id,
564
+ "edges_retargeted": edges_retargeted,
565
+ }
566
+
567
+
568
+ # ---------------------------------------------------------------------------
569
+ # Script entry tools
570
+ # ---------------------------------------------------------------------------
571
+
572
+
573
+ def create_script_entry(
574
+ title: str,
575
+ code: str,
576
+ language: str = "python",
577
+ requirements: list[str] | None = None,
578
+ description: str = "",
579
+ tags: list[str] | None = None,
580
+ aliases: list[str] | None = None,
581
+ filename: str | None = None,
582
+ source_provenance: str | None = None,
583
+ graph: Any = None,
584
+ ) -> dict:
585
+ """DEPRECATED — creates a standalone script node.
586
+
587
+ Scripts should be attached directly to their parent node via
588
+ ``add_script_to_entry``. This function is kept for backward compatibility
589
+ but will be removed in a future version.
590
+ """
591
+ return {
592
+ "error": (
593
+ "create_script_entry is deprecated. Use add_script_to_entry to attach "
594
+ "scripts directly to the procedure or capability node they belong to. "
595
+ "This keeps script code out of the content field that agents read by default."
596
+ )
597
+ }
598
+
599
+
600
+ def add_script_to_entry(
601
+ entry_id: str,
602
+ code: str,
603
+ filename: str | None = None,
604
+ language: str = "python",
605
+ requirements: list[str] | None = None,
606
+ description: str = "",
607
+ graph: Any = None,
608
+ ) -> dict:
609
+ """Attach an executable script directly to an existing entry.
610
+
611
+ The script is stored in the entry's ``scripts`` list (``scripts_json`` column),
612
+ separate from the human-readable ``content`` field. Agents reading the entry
613
+ will see the description/workflow in ``content`` but will not accidentally
614
+ consume large script bodies. A node can hold any number of scripts.
615
+
616
+ Returns the updated entry id, title, and a summary of all attached scripts.
617
+ """
618
+ from core import app_state
619
+ from core.retrieval.retrieval import RetrievalEngine
620
+ from core.schemas.entry import NodeAsset, ASSET_FOLDER_SCRIPTS
621
+ from core.storage.database import SessionLocal
622
+ from core.storage.repository import EntryRepository
623
+
624
+ g = graph or app_state.graph
625
+ with SessionLocal() as db:
626
+ engine = RetrievalEngine(db, g)
627
+ entry = engine.resolve_identifier(entry_id)
628
+ if entry is None:
629
+ return {"error": f"Entry '{entry_id}' not found."}
630
+
631
+ resolved_filename = filename or (_slug(entry.title) + _ext_for_language(language))
632
+ # Replace existing asset at scripts/<filename> if present
633
+ entry.assets = [
634
+ a for a in entry.assets
635
+ if not (a.folder == ASSET_FOLDER_SCRIPTS and a.filename == resolved_filename)
636
+ ]
637
+ entry.assets.append(NodeAsset(
638
+ folder=ASSET_FOLDER_SCRIPTS,
639
+ filename=resolved_filename,
640
+ kind="file",
641
+ content=code,
642
+ language=language,
643
+ requirements=requirements or [],
644
+ description=description,
645
+ ))
646
+ updated = EntryRepository(db).update(entry)
647
+
648
+ if g is not None and updated:
649
+ g.add_entry(updated)
650
+
651
+ return {
652
+ "id": updated.id,
653
+ "title": updated.title,
654
+ "scripts": [{"filename": s.filename, "language": s.language, "requirements": s.requirements}
655
+ for s in updated.scripts],
656
+ }
657
+
658
+
659
+ def _ext_for_language(language: str) -> str:
660
+ """Map language name to a file extension."""
661
+ mapping = {
662
+ "python": ".py",
663
+ "py": ".py",
664
+ "bash": ".sh",
665
+ "shell": ".sh",
666
+ "sh": ".sh",
667
+ "julia": ".jl",
668
+ "javascript": ".js",
669
+ "js": ".js",
670
+ "typescript": ".ts",
671
+ "ts": ".ts",
672
+ "r": ".r",
673
+ "matlab": ".m",
674
+ "ruby": ".rb",
675
+ "rust": ".rs",
676
+ "go": ".go",
677
+ "c": ".c",
678
+ "cpp": ".cpp",
679
+ "c++": ".cpp",
680
+ }
681
+ return mapping.get(language.lower(), ".txt")
682
+
683
+
684
+ def add_asset_to_entry(
685
+ entry_id: str,
686
+ folder: str,
687
+ filename: str,
688
+ content: str = "",
689
+ kind: str = "file",
690
+ language: str | None = None,
691
+ mime_type: str | None = None,
692
+ description: str = "",
693
+ requirements: list[str] | None = None,
694
+ graph: Any = None,
695
+ ) -> dict:
696
+ """Attach a generic *asset* to an entry inside a named folder.
697
+
698
+ Each entry behaves like a small folder containing typed assets. Conventional
699
+ folders are ``scripts``, ``references``, ``docs``, ``examples``, ``data``,
700
+ ``notes``; any other folder name is allowed.
701
+
702
+ Parameters
703
+ ----------
704
+ folder:
705
+ Sub-folder name (e.g. ``"scripts"``, ``"references"``, ``"docs"``).
706
+ filename:
707
+ File name within the folder (may contain a sub-path like
708
+ ``"examples/relax.py"``).
709
+ content:
710
+ File body for ``kind="file"`` / ``"text"``, or the URL for
711
+ ``kind="link"``.
712
+ kind:
713
+ One of ``"file"`` (binary/code download), ``"text"`` (inline markdown
714
+ / notes), or ``"link"`` (external reference; ``content`` is a URL).
715
+ language:
716
+ Programming language hint for syntax / mime detection (``"python"`` …).
717
+ description:
718
+ Short human-readable description shown in the UI.
719
+ requirements:
720
+ Package dependencies (for runnable scripts).
721
+
722
+ The asset becomes addressable as
723
+ ``GET /entries/{entry_id}/assets/{folder}/{filename}``.
724
+ """
725
+ from core import app_state
726
+ from core.retrieval.retrieval import RetrievalEngine
727
+ from core.schemas.entry import NodeAsset
728
+ from core.storage.database import SessionLocal
729
+ from core.storage.repository import EntryRepository
730
+
731
+ g = graph or app_state.graph
732
+ with SessionLocal() as db:
733
+ engine = RetrievalEngine(db, g)
734
+ entry = engine.resolve_identifier(entry_id)
735
+ if entry is None:
736
+ return {"error": f"Entry '{entry_id}' not found."}
737
+ try:
738
+ asset = NodeAsset(
739
+ folder=folder,
740
+ filename=filename,
741
+ kind=kind,
742
+ content=content,
743
+ language=language,
744
+ mime_type=mime_type,
745
+ description=description,
746
+ requirements=requirements or [],
747
+ )
748
+ except ValueError as exc:
749
+ return {"error": str(exc)}
750
+ # Replace existing asset at same folder/filename
751
+ entry.assets = [
752
+ a for a in entry.assets
753
+ if not (a.folder == asset.folder and a.filename == asset.filename)
754
+ ]
755
+ entry.assets.append(asset)
756
+ updated = EntryRepository(db).update(entry)
757
+
758
+ if g is not None and updated:
759
+ g.add_entry(updated)
760
+
761
+ return {
762
+ "id": updated.id,
763
+ "title": updated.title,
764
+ "asset": {
765
+ "folder": asset.folder,
766
+ "filename": asset.filename,
767
+ "kind": asset.kind,
768
+ "download_url": f"/entries/{updated.id}/assets/{asset.folder}/{asset.filename}",
769
+ },
770
+ "total_assets": len(updated.assets),
771
+ }
772
+
773
+
774
+ def list_assets(identifier: str, folder: str | None = None, graph: Any = None) -> dict:
775
+ """List the assets attached to an entry, grouped by folder.
776
+
777
+ Pass ``folder`` to filter to a single sub-folder (e.g. ``"references"``).
778
+ Returns metadata only — fetch bodies via the asset download URL.
779
+ """
780
+ from core import app_state
781
+ from core.retrieval.retrieval import RetrievalEngine
782
+ from core.storage.database import SessionLocal
783
+
784
+ g = graph or app_state.graph
785
+ with SessionLocal() as db:
786
+ engine = RetrievalEngine(db, g)
787
+ entry = engine.resolve_identifier(identifier)
788
+ if entry is None:
789
+ return {"error": f"Entry '{identifier}' not found."}
790
+
791
+ grouped: dict[str, list[dict]] = {}
792
+ for a in entry.assets:
793
+ if folder and a.folder != folder.lower():
794
+ continue
795
+ grouped.setdefault(a.folder, []).append({
796
+ "filename": a.filename,
797
+ "kind": a.kind,
798
+ "language": a.language,
799
+ "description": a.description,
800
+ "size": len(a.content or ""),
801
+ "download_url": f"/entries/{entry.id}/assets/{a.folder}/{a.filename}",
802
+ })
803
+ return {
804
+ "id": entry.id,
805
+ "slug": entry.slug,
806
+ "title": entry.title,
807
+ "folders": grouped,
808
+ "total": sum(len(v) for v in grouped.values()),
809
+ }
810
+
811
+
812
+ def get_script(identifier: str, filename: str | None = None, graph: Any = None) -> dict:
813
+ """List the scripts attached to an entry (no code returned — use the download URL to fetch source).
814
+
815
+ Pass ``filename`` to retrieve metadata for a specific script; omit to get all.
816
+ """
817
+ from core import app_state
818
+ from core.retrieval.retrieval import RetrievalEngine
819
+ from core.storage.database import SessionLocal
820
+
821
+ g = graph or app_state.graph
822
+ with SessionLocal() as db:
823
+ engine = RetrievalEngine(db, g)
824
+ entry = engine.resolve_identifier(identifier)
825
+ if entry is None:
826
+ return {"error": f"Entry '{identifier}' not found."}
827
+ if not entry.scripts:
828
+ return {"error": f"Entry '{identifier}' has no attached scripts."}
829
+
830
+ scripts = entry.scripts
831
+ if filename:
832
+ scripts = [s for s in scripts if s.filename == filename]
833
+ if not scripts:
834
+ return {"error": f"No script named '{filename}' on entry '{identifier}'."}
835
+
836
+ return {
837
+ "id": entry.id,
838
+ "slug": entry.slug,
839
+ "title": entry.title,
840
+ "scripts": [
841
+ {
842
+ "filename": s.filename,
843
+ "language": s.language,
844
+ "requirements": s.requirements,
845
+ "description": s.description,
846
+ "download_url": f"/entries/{entry.id}/scripts/{s.filename}",
847
+ }
848
+ for s in scripts
849
+ ],
850
+ }
851
+
852
+
853
+ def list_scripts(limit: int = 50, graph: Any = None) -> list[dict]:
854
+ """List all entries that have scripts directly attached, with filename and download URLs."""
855
+ from core import app_state
856
+ from core.retrieval.retrieval import RetrievalEngine
857
+ from core.storage.database import SessionLocal
858
+
859
+ g = graph or app_state.graph
860
+ with SessionLocal() as db:
861
+ engine = RetrievalEngine(db, g)
862
+ candidates = engine.list_entries(limit=max(limit * 5, 500))
863
+ results = [e for e in candidates if e.scripts]
864
+ return [
865
+ {
866
+ "id": e.id,
867
+ "slug": e.slug,
868
+ "title": e.title,
869
+ "tags": e.tags,
870
+ "scripts": [
871
+ {
872
+ "filename": s.filename,
873
+ "language": s.language,
874
+ "requirements": s.requirements,
875
+ "download_url": f"/entries/{e.id}/scripts/{s.filename}",
876
+ }
877
+ for s in e.scripts
878
+ ],
879
+ }
880
+ for e in results[:limit]
881
+ ]
882
+
883
+
884
+ # ---------------------------------------------------------------------------
885
+ # Material interface tools
886
+ # ---------------------------------------------------------------------------
887
+
888
+
889
+ def build_material_interface_workflow(
890
+ material_a: str,
891
+ material_b: str,
892
+ method: str = "slab_stacking",
893
+ description: str = "",
894
+ tags: list[str] | None = None,
895
+ graph: Any = None,
896
+ ) -> dict:
897
+ """DEPRECATED — produces overly specific per-material-pair nodes.
898
+
899
+ Use the generic ``Material interface construction`` capability node plus
900
+ ``Slab-stacking procedure`` (or similar) and add the specific pair as either
901
+ (a) a `data` example linked via ``provenance``, or (b) a parameterised note
902
+ in the procedure's content. Returns an error directing the agent to do so.
903
+ """
904
+ return {
905
+ "error": (
906
+ "build_material_interface_workflow is deprecated because it creates "
907
+ "one node per material pair. Instead: ensure a generic "
908
+ "'Material interface construction' capability and a "
909
+ "'Slab-stacking procedure' node exist (create them if missing), "
910
+ f"then add a single data entry for the {material_a}/{material_b} "
911
+ "instance with provenance edges to those generic nodes."
912
+ ),
913
+ "suggested_generic_titles": [
914
+ "Material interface construction",
915
+ f"{method.replace('_', ' ').title()} procedure",
916
+ ],
917
+ }
918
+
919
+
920
+ # ---------------------------------------------------------------------------
921
+ # Verification feedback tools
922
+ # ---------------------------------------------------------------------------
923
+
924
+
925
+ def submit_feedback(
926
+ entry_id: str,
927
+ verdict: str,
928
+ note: str = "",
929
+ evidence: str = "",
930
+ agent_id: str = "unknown",
931
+ graph: Any = None,
932
+ ) -> dict:
933
+ """Record correctness feedback on a node and update its verification_status.
934
+
935
+ Parameters
936
+ ----------
937
+ verdict:
938
+ One of ``works`` (→ self_tested), ``peer_works`` (→ peer_reviewed),
939
+ ``bugged`` (→ bugged), ``deprecated`` (→ deprecated), ``unclear``
940
+ (records the note without changing status).
941
+ note, evidence:
942
+ Free-text describing the test/observation. Stored in the entry's
943
+ ``metadata.feedback_log`` for the maintenance agent to consult.
944
+ agent_id:
945
+ Identifier of the agent or human that submitted the feedback.
946
+ """
947
+ from datetime import datetime, timezone
948
+ from core import app_state
949
+ from core.retrieval.retrieval import RetrievalEngine
950
+ from core.schemas.entry import VerificationStatus
951
+ from core.storage.database import SessionLocal
952
+ from core.storage.repository import EntryRepository
953
+
954
+ verdict_to_status = {
955
+ "works": VerificationStatus.self_tested,
956
+ "peer_works": VerificationStatus.peer_reviewed,
957
+ "bugged": VerificationStatus.bugged,
958
+ "deprecated": VerificationStatus.deprecated,
959
+ "unclear": None,
960
+ }
961
+ if verdict not in verdict_to_status:
962
+ return {"error": f"verdict must be one of {sorted(verdict_to_status)}"}
963
+
964
+ g = graph or app_state.graph
965
+ with SessionLocal() as db:
966
+ engine = RetrievalEngine(db, g)
967
+ entry = engine.resolve_identifier(entry_id)
968
+ if entry is None:
969
+ return {"error": f"Entry '{entry_id}' not found."}
970
+
971
+ entry.metadata.feedback_log.append({
972
+ "timestamp": datetime.now(timezone.utc).isoformat(),
973
+ "agent_id": agent_id,
974
+ "verdict": verdict,
975
+ "note": note,
976
+ "evidence": evidence,
977
+ })
978
+ new_status = verdict_to_status[verdict]
979
+ if new_status is not None:
980
+ entry.metadata.verification_status = new_status
981
+ EntryRepository(db).update(entry)
982
+
983
+ return {
984
+ "entry_id": entry.id,
985
+ "verification_status": entry.metadata.verification_status.value,
986
+ "feedback_count": len(entry.metadata.feedback_log),
987
+ }
988
+
989
+
990
+ def list_by_verification(
991
+ status: str = "unverified",
992
+ limit: int = 50,
993
+ graph: Any = None,
994
+ ) -> list[dict]:
995
+ """List nodes filtered by verification_status (unverified | bugged | ...)."""
996
+ from core import app_state
997
+ from core.retrieval.retrieval import RetrievalEngine
998
+ from core.storage.database import SessionLocal
999
+
1000
+ g = graph or app_state.graph
1001
+ with SessionLocal() as db:
1002
+ engine = RetrievalEngine(db, g)
1003
+ entries = engine.list_entries(limit=max(limit * 5, 500))
1004
+ matching = [
1005
+ e for e in entries
1006
+ if e.metadata.verification_status and e.metadata.verification_status.value == status
1007
+ ]
1008
+ return [
1009
+ {
1010
+ "id": e.id,
1011
+ "slug": e.slug,
1012
+ "title": e.title,
1013
+ "type": e.entry_type.value,
1014
+ "verification_status": e.metadata.verification_status.value,
1015
+ "feedback_count": len(e.metadata.feedback_log),
1016
+ }
1017
+ for e in matching[:limit]
1018
+ ]
1019
+
1020
+
1021
+ def list_needs_generalization(limit: int = 50, graph: Any = None) -> list[dict]:
1022
+ """List nodes flagged as overly specific by the abstraction check."""
1023
+ from core import app_state
1024
+ from core.retrieval.retrieval import RetrievalEngine
1025
+ from core.storage.database import SessionLocal
1026
+
1027
+ g = graph or app_state.graph
1028
+ with SessionLocal() as db:
1029
+ engine = RetrievalEngine(db, g)
1030
+ entries = engine.list_entries(limit=max(limit * 5, 500))
1031
+ matching = [e for e in entries if e.metadata.needs_generalization]
1032
+ return [
1033
+ {"id": e.id, "slug": e.slug, "title": e.title, "type": e.entry_type.value}
1034
+ for e in matching[:limit]
1035
+ ]
1036
+
1037
+
1038
+ def create_material_entry(
1039
+ formula: str,
1040
+ crystal_system: str = "",
1041
+ space_group: str = "",
1042
+ description: str = "",
1043
+ tags: list[str] | None = None,
1044
+ source_provenance: str | None = None,
1045
+ graph: Any = None,
1046
+ ) -> dict:
1047
+ """Create a structured *material* entry for a crystal or compound.
1048
+
1049
+ Stores formula, crystal system, space group, and description in a standardised
1050
+ content template so the entry is immediately useful for downstream interface
1051
+ workflows and agent reasoning.
1052
+ """
1053
+ from core.schemas.entry import Entry, EntryMetadata, EntryType
1054
+ from core.storage.database import SessionLocal
1055
+ from core.storage.repository import EntryRepository
1056
+
1057
+ content_lines = [f"## {formula}\n"]
1058
+ if crystal_system:
1059
+ content_lines.append(f"- **Crystal system**: {crystal_system}")
1060
+ if space_group:
1061
+ content_lines.append(f"- **Space group**: {space_group}")
1062
+ if description:
1063
+ content_lines.append(f"\n{description}")
1064
+ content_lines.append(
1065
+ "\n### Usage\n"
1066
+ f"This material can be used as a component in [[{formula}/X Interface]] workflows."
1067
+ )
1068
+
1069
+ entry = Entry(
1070
+ title=formula,
1071
+ content="\n".join(content_lines),
1072
+ entry_type=EntryType.data,
1073
+ tags=list(dict.fromkeys(["material", "crystal"] + (tags or []))),
1074
+ metadata=EntryMetadata(source_provenance=source_provenance),
1075
+ )
1076
+ with SessionLocal() as db:
1077
+ saved = EntryRepository(db).create(entry)
1078
+ if graph is not None:
1079
+ graph.add_entry(saved)
1080
+ return {"id": saved.id, "slug": saved.slug, "title": saved.title, "type": "data"}
1081
+
1082
+
1083
+ def attach_script_to_entry(
1084
+ entry_id: str,
1085
+ script_id: str,
1086
+ relation: str = "implements",
1087
+ graph: Any = None,
1088
+ ) -> dict:
1089
+ """DEPRECATED — linked standalone script nodes via edges.
1090
+
1091
+ Use ``add_script_to_entry`` instead to attach scripts directly to a node.
1092
+ """
1093
+ return {
1094
+ "error": (
1095
+ "attach_script_to_entry is deprecated. Use add_script_to_entry to attach "
1096
+ "scripts directly to the target entry instead of creating a separate script node."
1097
+ )
1098
+ }
1099
+
1100
+
1101
+ # ---------------------------------------------------------------------------
1102
+ # Hierarchical-memory tools (L3 heuristics / L4 constraints)
1103
+ # ---------------------------------------------------------------------------
1104
+
1105
+
1106
+ def _create_sidecar(
1107
+ *,
1108
+ skill_id: str,
1109
+ title: str,
1110
+ content: str,
1111
+ entry_type_value: str,
1112
+ skill_level_value: str,
1113
+ edge_relation_value: str,
1114
+ tags: list[str] | None,
1115
+ applicability: dict | None,
1116
+ update_failure_modes: bool,
1117
+ graph: Any,
1118
+ ) -> dict:
1119
+ """Shared helper for create_heuristic / create_constraint."""
1120
+ from core import app_state
1121
+ from core.retrieval.retrieval import RetrievalEngine
1122
+ from core.schemas.edge import Edge, EdgeRelation
1123
+ from core.schemas.entry import Entry, EntryMetadata, EntryType, SkillLevel
1124
+ from core.storage.database import SessionLocal
1125
+ from core.storage.repository import EdgeRepository, EntryRepository
1126
+
1127
+ g = graph or app_state.graph
1128
+ with SessionLocal() as db:
1129
+ engine = RetrievalEngine(db, g)
1130
+ skill_entry = engine.resolve_identifier(skill_id)
1131
+ if skill_entry is None:
1132
+ return {"error": f"Skill '{skill_id}' not found."}
1133
+
1134
+ meta = EntryMetadata(
1135
+ skill_level=SkillLevel(skill_level_value),
1136
+ applicability=applicability or {},
1137
+ source_provenance=f"sidecar_of:{skill_entry.slug}",
1138
+ )
1139
+ entry = Entry(
1140
+ title=title,
1141
+ content=content,
1142
+ entry_type=EntryType(entry_type_value),
1143
+ tags=tags or [],
1144
+ metadata=meta,
1145
+ )
1146
+ saved = EntryRepository(db).create(entry)
1147
+ edge = Edge(
1148
+ source_id=saved.id,
1149
+ target_id=skill_entry.id,
1150
+ relation=EdgeRelation(edge_relation_value),
1151
+ )
1152
+ EdgeRepository(db).create(edge)
1153
+
1154
+ if update_failure_modes and entry_type_value == "constraint":
1155
+ if saved.slug not in skill_entry.metadata.failure_modes:
1156
+ skill_entry.metadata.failure_modes.append(saved.slug)
1157
+ EntryRepository(db).update(skill_entry)
1158
+
1159
+ if g is not None:
1160
+ g.add_entry(saved)
1161
+ g.add_edge(edge)
1162
+ return {
1163
+ "id": saved.id,
1164
+ "slug": saved.slug,
1165
+ "title": saved.title,
1166
+ "level": skill_level_value,
1167
+ "attached_to": skill_entry.slug,
1168
+ "relation": edge_relation_value,
1169
+ }
1170
+
1171
+
1172
+ def create_heuristic(
1173
+ skill: str,
1174
+ title: str,
1175
+ content: str,
1176
+ tags: list[str] | None = None,
1177
+ domain: str | None = None,
1178
+ confidence: float | None = None,
1179
+ papers: list[str] | None = None,
1180
+ graph: Any = None,
1181
+ ) -> dict:
1182
+ """Create an L3 heuristic node attached to an L1/L2 *skill*.
1183
+
1184
+ Heuristics are conditional, empirical guidance ("cooling rate strongly
1185
+ affects sp2/sp3 ratio") — NOT universal truths. Use this instead of
1186
+ embedding heuristics inside a capability's content blob.
1187
+
1188
+ Wires a ``heuristic_for`` edge from the new node to *skill*.
1189
+ """
1190
+ applicability: dict = {}
1191
+ if domain:
1192
+ applicability["domain"] = domain
1193
+ if confidence is not None:
1194
+ applicability["confidence"] = float(confidence)
1195
+ if papers:
1196
+ applicability["papers"] = list(papers)
1197
+ return _create_sidecar(
1198
+ skill_id=skill,
1199
+ title=title,
1200
+ content=content,
1201
+ entry_type_value="heuristic",
1202
+ skill_level_value="L3",
1203
+ edge_relation_value="heuristic_for",
1204
+ tags=tags,
1205
+ applicability=applicability,
1206
+ update_failure_modes=False,
1207
+ graph=graph,
1208
+ )
1209
+
1210
+
1211
+ def create_constraint(
1212
+ skill: str,
1213
+ title: str,
1214
+ content: str,
1215
+ tags: list[str] | None = None,
1216
+ domain: str | None = None,
1217
+ severity: str | None = None,
1218
+ papers: list[str] | None = None,
1219
+ graph: Any = None,
1220
+ ) -> dict:
1221
+ """Create an L4 constraint / failure-mode node attached to an L1/L2 *skill*.
1222
+
1223
+ Constraints describe known limitations, instability regions, and failure
1224
+ patterns ("unsuitable for bond-breaking processes"). Wires a
1225
+ ``constraint_on`` edge from the new node to *skill* and appends the new
1226
+ node's slug to ``skill.metadata.failure_modes`` for quick planner access.
1227
+ """
1228
+ applicability: dict = {}
1229
+ if domain:
1230
+ applicability["domain"] = domain
1231
+ if severity:
1232
+ applicability["severity"] = severity
1233
+ if papers:
1234
+ applicability["papers"] = list(papers)
1235
+ return _create_sidecar(
1236
+ skill_id=skill,
1237
+ title=title,
1238
+ content=content,
1239
+ entry_type_value="constraint",
1240
+ skill_level_value="L4",
1241
+ edge_relation_value="constraint_on",
1242
+ tags=tags,
1243
+ applicability=applicability,
1244
+ update_failure_modes=True,
1245
+ graph=graph,
1246
+ )
1247
+
1248
+
1249
+ def decompose_capability(
1250
+ capability: str,
1251
+ procedure: str,
1252
+ graph: Any = None,
1253
+ ) -> dict:
1254
+ """Wire a ``decomposes_to`` edge from an L1 *capability* to an L2 *procedure*.
1255
+
1256
+ Both arguments are entry id/slug/alias of existing nodes. Use this to
1257
+ record that *procedure* is one of the executable decompositions of
1258
+ *capability*. Multiple decompositions per capability are allowed.
1259
+ """
1260
+ from core import app_state
1261
+ from core.retrieval.retrieval import RetrievalEngine
1262
+ from core.schemas.edge import Edge, EdgeRelation
1263
+ from core.storage.database import SessionLocal
1264
+ from core.storage.repository import EdgeRepository
1265
+
1266
+ g = graph or app_state.graph
1267
+ with SessionLocal() as db:
1268
+ engine = RetrievalEngine(db, g)
1269
+ cap = engine.resolve_identifier(capability)
1270
+ proc = engine.resolve_identifier(procedure)
1271
+ if cap is None:
1272
+ return {"error": f"Capability '{capability}' not found."}
1273
+ if proc is None:
1274
+ return {"error": f"Procedure '{procedure}' not found."}
1275
+ edge = Edge(source_id=cap.id, target_id=proc.id, relation=EdgeRelation.decomposes_to)
1276
+ saved = EdgeRepository(db).create(edge)
1277
+ if g is not None:
1278
+ g.add_edge(saved)
1279
+ return {
1280
+ "edge_id": saved.id,
1281
+ "capability": cap.slug,
1282
+ "procedure": proc.slug,
1283
+ "relation": "decomposes_to",
1284
+ }
1285
+
1286
+
1287
+ def retrieve_plan(goal: str, k: int = 5, include_l2: bool = True, graph: Any = None) -> list[dict]:
1288
+ """Stage-1 retrieval: return planner-level skills (L1, optionally L2) for *goal*.
1289
+
1290
+ Excludes heuristics and constraints — fetch them with
1291
+ ``retrieve_heuristics`` / ``retrieve_constraints`` once a candidate is
1292
+ selected.
1293
+ """
1294
+ from core import app_state
1295
+ from core.retrieval.progressive import ProgressiveRetriever
1296
+ from core.storage.database import SessionLocal
1297
+
1298
+ g = graph or app_state.graph
1299
+ with SessionLocal() as db:
1300
+ ret = ProgressiveRetriever(db, g)
1301
+ results = ret.plan(goal=goal, k=k, include_l2=include_l2)
1302
+ return [
1303
+ {
1304
+ "id": e.id,
1305
+ "slug": e.slug,
1306
+ "title": e.title,
1307
+ "entry_type": e.entry_type.value,
1308
+ "tags": e.tags,
1309
+ }
1310
+ for e in results
1311
+ ]
1312
+
1313
+
1314
+ def retrieve_heuristics(skill: str, k: int = 5, graph: Any = None) -> list[dict]:
1315
+ """Stage-2 retrieval: L3 heuristics attached to *skill*."""
1316
+ from core import app_state
1317
+ from core.retrieval.progressive import ProgressiveRetriever
1318
+ from core.storage.database import SessionLocal
1319
+
1320
+ g = graph or app_state.graph
1321
+ with SessionLocal() as db:
1322
+ ret = ProgressiveRetriever(db, g)
1323
+ results = ret.heuristics_for(skill, k=k)
1324
+ return [
1325
+ {
1326
+ "id": e.id,
1327
+ "slug": e.slug,
1328
+ "title": e.title,
1329
+ "content": e.content,
1330
+ "applicability": e.metadata.applicability,
1331
+ }
1332
+ for e in results
1333
+ ]
1334
+
1335
+
1336
+ def retrieve_constraints(skill: str, k: int = 5, graph: Any = None) -> list[dict]:
1337
+ """Stage-3 retrieval: L4 constraints / failure modes for *skill*."""
1338
+ from core import app_state
1339
+ from core.retrieval.progressive import ProgressiveRetriever
1340
+ from core.storage.database import SessionLocal
1341
+
1342
+ g = graph or app_state.graph
1343
+ with SessionLocal() as db:
1344
+ ret = ProgressiveRetriever(db, g)
1345
+ results = ret.constraints_for(skill, k=k)
1346
+ return [
1347
+ {
1348
+ "id": e.id,
1349
+ "slug": e.slug,
1350
+ "title": e.title,
1351
+ "content": e.content,
1352
+ "applicability": e.metadata.applicability,
1353
+ }
1354
+ for e in results
1355
+ ]
1356
+
1357
+
1358
+ # ---------------------------------------------------------------------------
1359
+ # OpenAI tool schema definitions
1360
+ # ---------------------------------------------------------------------------
1361
+
1362
+ TOOL_SCHEMAS: list[dict] = [
1363
+ {
1364
+ "type": "function",
1365
+ "function": {
1366
+ "name": "create_entry",
1367
+ "description": "Create a new knowledge entry (node) in the graph.",
1368
+ "parameters": {
1369
+ "type": "object",
1370
+ "properties": {
1371
+ "title": {"type": "string", "description": "Entry title"},
1372
+ "content": {"type": "string", "description": "Entry body (wiki text, markdown)"},
1373
+ "entry_type": {
1374
+ "type": "string",
1375
+ "enum": ["capability", "procedure", "workflow", "tool", "repository",
1376
+ "environment", "dependency", "data", "analytical", "memory", "heuristic", "constraint", "generic"],
1377
+ "description": "Semantic type of this entry",
1378
+ },
1379
+ "tags": {"type": "array", "items": {"type": "string"}, "description": "List of tags"},
1380
+ "aliases": {"type": "array", "items": {"type": "string"}, "description": "Alternative names / synonyms for this entry"},
1381
+ "source_provenance": {"type": "string", "description": "URL or path this entry was sourced from"},
1382
+ },
1383
+ "required": ["title"],
1384
+ },
1385
+ },
1386
+ },
1387
+ {
1388
+ "type": "function",
1389
+ "function": {
1390
+ "name": "update_entry",
1391
+ "description": "Update fields on an existing entry by its ID or slug.",
1392
+ "parameters": {
1393
+ "type": "object",
1394
+ "properties": {
1395
+ "entry_id": {"type": "string", "description": "Entry ID or slug"},
1396
+ "title": {"type": "string"},
1397
+ "content": {"type": "string"},
1398
+ "entry_type": {
1399
+ "type": "string",
1400
+ "enum": ["capability", "procedure", "workflow", "tool", "repository",
1401
+ "environment", "dependency", "data", "analytical", "memory", "heuristic", "constraint", "generic"],
1402
+ },
1403
+ "tags": {"type": "array", "items": {"type": "string"}},
1404
+ "aliases": {"type": "array", "items": {"type": "string"}, "description": "Alternative names / synonyms"},
1405
+ },
1406
+ "required": ["entry_id"],
1407
+ },
1408
+ },
1409
+ },
1410
+ {
1411
+ "type": "function",
1412
+ "function": {
1413
+ "name": "delete_entry",
1414
+ "description": "Delete an entry (node) and all its edges by ID.",
1415
+ "parameters": {
1416
+ "type": "object",
1417
+ "properties": {
1418
+ "entry_id": {"type": "string", "description": "Entry ID"},
1419
+ },
1420
+ "required": ["entry_id"],
1421
+ },
1422
+ },
1423
+ },
1424
+ {
1425
+ "type": "function",
1426
+ "function": {
1427
+ "name": "get_entry",
1428
+ "description": "Retrieve full details of a single entry by ID or slug.",
1429
+ "parameters": {
1430
+ "type": "object",
1431
+ "properties": {
1432
+ "identifier": {"type": "string", "description": "Entry ID or slug"},
1433
+ },
1434
+ "required": ["identifier"],
1435
+ },
1436
+ },
1437
+ },
1438
+ {
1439
+ "type": "function",
1440
+ "function": {
1441
+ "name": "search_entries",
1442
+ "description": (
1443
+ "Search for entries using hybrid semantic + keyword retrieval. "
1444
+ "The default 'hybrid' mode fuses embedding-based vector similarity (ANN) "
1445
+ "with keyword scoring via Reciprocal Rank Fusion, then re-ranks by "
1446
+ "verification trust and usage count. "
1447
+ "Use 'semantic' when you want conceptually/thematically similar results "
1448
+ "even if the exact words differ (e.g. paraphrases, synonyms, related domains). "
1449
+ "Use 'keyword' for exact title, acronym, or tag lookups. "
1450
+ "Strategy tip: if the first search misses, retry with a different mode or "
1451
+ "a rephrased / more general query."
1452
+ ),
1453
+ "parameters": {
1454
+ "type": "object",
1455
+ "properties": {
1456
+ "query": {"type": "string"},
1457
+ "limit": {"type": "integer", "default": 10},
1458
+ "mode": {
1459
+ "type": "string",
1460
+ "enum": ["hybrid", "semantic", "keyword"],
1461
+ "default": "hybrid",
1462
+ "description": (
1463
+ "hybrid: keyword + embedding ANN fused (default). "
1464
+ "semantic: embedding-only, best for conceptual similarity. "
1465
+ "keyword: exact text match, best for known titles or acronyms."
1466
+ ),
1467
+ },
1468
+ },
1469
+ "required": ["query"],
1470
+ },
1471
+ },
1472
+ },
1473
+ {
1474
+ "type": "function",
1475
+ "function": {
1476
+ "name": "list_entries",
1477
+ "description": "List entries in the graph (returns id, slug, title, type).",
1478
+ "parameters": {
1479
+ "type": "object",
1480
+ "properties": {
1481
+ "limit": {"type": "integer", "default": 20},
1482
+ },
1483
+ },
1484
+ },
1485
+ },
1486
+ {
1487
+ "type": "function",
1488
+ "function": {
1489
+ "name": "create_edge",
1490
+ "description": "Create a directed edge (relationship) between two entries.",
1491
+ "parameters": {
1492
+ "type": "object",
1493
+ "properties": {
1494
+ "source_id": {"type": "string", "description": "Source entry ID"},
1495
+ "target_id": {"type": "string", "description": "Target entry ID"},
1496
+ "relation": {
1497
+ "type": "string",
1498
+ "enum": ["dependency", "compatible_with", "alternative_to", "related_workflow",
1499
+ "generated_from", "memory_of", "refinement_of", "derived_from",
1500
+ "warning_about", "cited_by", "wikilink", "prerequisite", "replacement",
1501
+ "execution_pathway", "transformation", "provenance", "compatibility",
1502
+ "implements", "uses", "documents"],
1503
+ "description": "Semantic relation type",
1504
+ },
1505
+ "weight": {"type": "number", "default": 1.0},
1506
+ },
1507
+ "required": ["source_id", "target_id"],
1508
+ },
1509
+ },
1510
+ },
1511
+ {
1512
+ "type": "function",
1513
+ "function": {
1514
+ "name": "delete_edge",
1515
+ "description": "Delete an edge by its ID.",
1516
+ "parameters": {
1517
+ "type": "object",
1518
+ "properties": {
1519
+ "edge_id": {"type": "string"},
1520
+ },
1521
+ "required": ["edge_id"],
1522
+ },
1523
+ },
1524
+ },
1525
+ {
1526
+ "type": "function",
1527
+ "function": {
1528
+ "name": "get_neighbors",
1529
+ "description": "Get entries directly connected to a given entry.",
1530
+ "parameters": {
1531
+ "type": "object",
1532
+ "properties": {
1533
+ "entry_id": {"type": "string"},
1534
+ "direction": {
1535
+ "type": "string",
1536
+ "enum": ["out", "in", "both"],
1537
+ "default": "both",
1538
+ },
1539
+ },
1540
+ "required": ["entry_id"],
1541
+ },
1542
+ },
1543
+ },
1544
+ {
1545
+ "type": "function",
1546
+ "function": {
1547
+ "name": "graph_stats",
1548
+ "description": "Return node count, edge count, and DAG status of the graph.",
1549
+ "parameters": {"type": "object", "properties": {}},
1550
+ },
1551
+ },
1552
+ {
1553
+ "type": "function",
1554
+ "function": {
1555
+ "name": "resolve_wikilinks",
1556
+ "description": "Scan all entry content for [[wikilinks]] and create edges for resolved matches.",
1557
+ "parameters": {"type": "object", "properties": {}},
1558
+ },
1559
+ },
1560
+ {
1561
+ "type": "function",
1562
+ "function": {
1563
+ "name": "remove_dangling_edges",
1564
+ "description": "Remove edges whose source or target entry no longer exists.",
1565
+ "parameters": {"type": "object", "properties": {}},
1566
+ },
1567
+ },
1568
+ {
1569
+ "type": "function",
1570
+ "function": {
1571
+ "name": "fetch_url",
1572
+ "description": "Fetch and return the text content of any URL (web page, API endpoint, documentation site, etc.). Use this when the user provides a specific URL or when you need to read a page in full rather than just search snippets.",
1573
+ "parameters": {
1574
+ "type": "object",
1575
+ "properties": {
1576
+ "url": {"type": "string", "description": "The URL to fetch"},
1577
+ "timeout": {"type": "integer", "default": 15, "description": "Request timeout in seconds"},
1578
+ },
1579
+ "required": ["url"],
1580
+ },
1581
+ },
1582
+ },
1583
+ {
1584
+ "type": "function",
1585
+ "function": {
1586
+ "name": "web_search",
1587
+ "description": "Search the web via DuckDuckGo and return titles, URLs and snippets.",
1588
+ "parameters": {
1589
+ "type": "object",
1590
+ "properties": {
1591
+ "query": {"type": "string", "description": "Search query"},
1592
+ "max_results": {"type": "integer", "default": 5},
1593
+ },
1594
+ "required": ["query"],
1595
+ },
1596
+ },
1597
+ },
1598
+ {
1599
+ "type": "function",
1600
+ "function": {
1601
+ "name": "find_similar_nodes",
1602
+ "description": (
1603
+ "Find existing nodes that are semantically or lexically similar to a proposed title. "
1604
+ "Uses hybrid embedding + keyword search by default. "
1605
+ "ALWAYS call this before creating a new node to avoid duplicates — "
1606
+ "try both the specific title AND a generalised version. "
1607
+ "If the default mode returns poor results, retry with mode='semantic' to catch "
1608
+ "conceptually equivalent nodes that use different wording, or mode='keyword' "
1609
+ "to find exact-title/acronym matches. "
1610
+ "Returns id, slug, title, type, tags, and aliases for each candidate."
1611
+ ),
1612
+ "parameters": {
1613
+ "type": "object",
1614
+ "properties": {
1615
+ "title": {"type": "string", "description": "The proposed node title or concept to check"},
1616
+ "limit": {"type": "integer", "default": 8},
1617
+ "mode": {
1618
+ "type": "string",
1619
+ "enum": ["hybrid", "semantic", "keyword"],
1620
+ "default": "hybrid",
1621
+ "description": (
1622
+ "hybrid: keyword + embedding ANN fused (default). "
1623
+ "semantic: embedding-only, best for conceptual/paraphrase matching. "
1624
+ "keyword: exact text match, best for known titles or acronyms."
1625
+ ),
1626
+ },
1627
+ },
1628
+ "required": ["title"],
1629
+ },
1630
+ },
1631
+ },
1632
+ {
1633
+ "type": "function",
1634
+ "function": {
1635
+ "name": "get_graph_overview",
1636
+ "description": (
1637
+ "Get a high-level overview of the graph: stats, type distribution, "
1638
+ "a random sample of node titles, and the most connected nodes. "
1639
+ "Use this to orient yourself before adding or restructuring content."
1640
+ ),
1641
+ "parameters": {
1642
+ "type": "object",
1643
+ "properties": {
1644
+ "sample_size": {"type": "integer", "default": 15, "description": "Number of random nodes to sample"},
1645
+ },
1646
+ },
1647
+ },
1648
+ },
1649
+ {
1650
+ "type": "function",
1651
+ "function": {
1652
+ "name": "list_nodes_by_type",
1653
+ "description": "List all nodes of a specific entry type (capability, tool, procedure, etc.).",
1654
+ "parameters": {
1655
+ "type": "object",
1656
+ "properties": {
1657
+ "entry_type": {
1658
+ "type": "string",
1659
+ "enum": ["capability", "procedure", "workflow", "tool", "repository",
1660
+ "environment", "dependency", "data", "analytical", "memory", "heuristic", "constraint", "generic"],
1661
+ },
1662
+ "limit": {"type": "integer", "default": 50},
1663
+ },
1664
+ "required": ["entry_type"],
1665
+ },
1666
+ },
1667
+ },
1668
+ {
1669
+ "type": "function",
1670
+ "function": {
1671
+ "name": "merge_entries",
1672
+ "description": (
1673
+ "Merge a duplicate node into a primary node. "
1674
+ "Re-targets all edges, optionally merges aliases and tags, then deletes the duplicate. "
1675
+ "Use when two nodes represent the same concept."
1676
+ ),
1677
+ "parameters": {
1678
+ "type": "object",
1679
+ "properties": {
1680
+ "primary_id": {"type": "string", "description": "ID or slug of the entry to keep"},
1681
+ "duplicate_id": {"type": "string", "description": "ID or slug of the entry to remove"},
1682
+ "merge_aliases": {"type": "boolean", "default": True, "description": "Add duplicate's title and aliases to primary's aliases"},
1683
+ "merge_tags": {"type": "boolean", "default": True, "description": "Merge duplicate's tags into primary"},
1684
+ },
1685
+ "required": ["primary_id", "duplicate_id"],
1686
+ },
1687
+ },
1688
+ },
1689
+ # ------------------------------------------------------------------ #
1690
+ # Hierarchical-memory tools (L1–L4) #
1691
+ # ------------------------------------------------------------------ #
1692
+ {
1693
+ "type": "function",
1694
+ "function": {
1695
+ "name": "create_heuristic",
1696
+ "description": (
1697
+ "Create an L3 heuristic node attached to an L1 capability or L2 procedure. "
1698
+ "Use this for conditional empirical guidance (e.g. 'cooling rate strongly affects "
1699
+ "sp2/sp3 ratio'), NOT for universal truths. Wires a 'heuristic_for' edge."
1700
+ ),
1701
+ "parameters": {
1702
+ "type": "object",
1703
+ "properties": {
1704
+ "skill": {"type": "string", "description": "Target skill id/slug/alias (L1 or L2)"},
1705
+ "title": {"type": "string", "description": "Short heuristic title"},
1706
+ "content": {"type": "string", "description": "Detailed heuristic description"},
1707
+ "tags": {"type": "array", "items": {"type": "string"}},
1708
+ "domain": {"type": "string", "description": "Applicable domain (e.g. 'amorphous-carbon', 'spintronics')"},
1709
+ "confidence": {"type": "number", "description": "0.0–1.0 confidence in this heuristic"},
1710
+ "papers": {"type": "array", "items": {"type": "string"}, "description": "Supporting paper URLs/DOIs"},
1711
+ },
1712
+ "required": ["skill", "title", "content"],
1713
+ },
1714
+ },
1715
+ },
1716
+ {
1717
+ "type": "function",
1718
+ "function": {
1719
+ "name": "create_constraint",
1720
+ "description": (
1721
+ "Create an L4 constraint / failure-mode node attached to an L1/L2 skill. "
1722
+ "Use for known limitations, instability regions, or failure patterns (e.g. "
1723
+ "'unsuitable for bond-breaking processes'). Wires a 'constraint_on' edge and "
1724
+ "denormalises the constraint slug into the skill's metadata.failure_modes list."
1725
+ ),
1726
+ "parameters": {
1727
+ "type": "object",
1728
+ "properties": {
1729
+ "skill": {"type": "string", "description": "Target skill id/slug/alias"},
1730
+ "title": {"type": "string"},
1731
+ "content": {"type": "string"},
1732
+ "tags": {"type": "array", "items": {"type": "string"}},
1733
+ "domain": {"type": "string"},
1734
+ "severity": {"type": "string", "description": "low | medium | high"},
1735
+ "papers": {"type": "array", "items": {"type": "string"}},
1736
+ },
1737
+ "required": ["skill", "title", "content"],
1738
+ },
1739
+ },
1740
+ },
1741
+ {
1742
+ "type": "function",
1743
+ "function": {
1744
+ "name": "decompose_capability",
1745
+ "description": (
1746
+ "Wire a 'decomposes_to' edge from an L1 capability to an L2 procedure that "
1747
+ "implements it. Use to record executable decompositions of high-level skills."
1748
+ ),
1749
+ "parameters": {
1750
+ "type": "object",
1751
+ "properties": {
1752
+ "capability": {"type": "string", "description": "L1 capability id/slug"},
1753
+ "procedure": {"type": "string", "description": "L2 procedure id/slug"},
1754
+ },
1755
+ "required": ["capability", "procedure"],
1756
+ },
1757
+ },
1758
+ },
1759
+ {
1760
+ "type": "function",
1761
+ "function": {
1762
+ "name": "retrieve_plan",
1763
+ "description": (
1764
+ "Stage-1 progressive retrieval: return planner-level skills (L1 capabilities, "
1765
+ "optionally L2 procedures) for a goal. Excludes heuristics/constraints — fetch "
1766
+ "those on demand via retrieve_heuristics / retrieve_constraints once a candidate "
1767
+ "is selected."
1768
+ ),
1769
+ "parameters": {
1770
+ "type": "object",
1771
+ "properties": {
1772
+ "goal": {"type": "string", "description": "Free-text description of the task"},
1773
+ "k": {"type": "integer", "default": 5},
1774
+ "include_l2": {"type": "boolean", "default": True},
1775
+ },
1776
+ "required": ["goal"],
1777
+ },
1778
+ },
1779
+ },
1780
+ {
1781
+ "type": "function",
1782
+ "function": {
1783
+ "name": "retrieve_heuristics",
1784
+ "description": (
1785
+ "Stage-2 progressive retrieval: L3 heuristics attached to a selected skill. "
1786
+ "Falls back to semantic search over L3 nodes if no edges exist."
1787
+ ),
1788
+ "parameters": {
1789
+ "type": "object",
1790
+ "properties": {
1791
+ "skill": {"type": "string"},
1792
+ "k": {"type": "integer", "default": 5},
1793
+ },
1794
+ "required": ["skill"],
1795
+ },
1796
+ },
1797
+ },
1798
+ {
1799
+ "type": "function",
1800
+ "function": {
1801
+ "name": "retrieve_constraints",
1802
+ "description": (
1803
+ "Stage-3 progressive retrieval: L4 constraints / failure modes attached to a "
1804
+ "selected skill. Use this when the verifier reports an error or when execution "
1805
+ "uncertainty is high."
1806
+ ),
1807
+ "parameters": {
1808
+ "type": "object",
1809
+ "properties": {
1810
+ "skill": {"type": "string"},
1811
+ "k": {"type": "integer", "default": 5},
1812
+ },
1813
+ "required": ["skill"],
1814
+ },
1815
+ },
1816
+ },
1817
+ # ------------------------------------------------------------------ #
1818
+ # Script management tools #
1819
+ # ------------------------------------------------------------------ #
1820
+ {
1821
+ "type": "function",
1822
+ "function": {
1823
+ "name": "add_script_to_entry",
1824
+ "description": (
1825
+ "Attach an executable script directly to an existing entry. "
1826
+ "The script is stored separately from the human-readable content field, "
1827
+ "so agents reading the entry will not accidentally consume large script bodies. "
1828
+ "A single entry can hold multiple scripts (e.g. two helper modules for one skill). "
1829
+ "Use for Python, bash, Julia, or any other runnable script."
1830
+ ),
1831
+ "parameters": {
1832
+ "type": "object",
1833
+ "properties": {
1834
+ "entry_id": {"type": "string", "description": "ID or slug of the target entry"},
1835
+ "code": {"type": "string", "description": "Full source code of the script"},
1836
+ "filename": {"type": "string", "description": "Suggested filename (e.g. relax.py). Defaults to slug + extension."},
1837
+ "language": {
1838
+ "type": "string",
1839
+ "description": "Programming language (e.g. python, bash, julia, r)",
1840
+ "default": "python",
1841
+ },
1842
+ "requirements": {
1843
+ "type": "array",
1844
+ "items": {"type": "string"},
1845
+ "description": "Package dependencies (e.g. ['ase', 'numpy'])",
1846
+ },
1847
+ "description": {"type": "string", "description": "Short description of what the script does"},
1848
+ },
1849
+ "required": ["entry_id", "code"],
1850
+ },
1851
+ },
1852
+ },
1853
+ {
1854
+ "type": "function",
1855
+ "function": {
1856
+ "name": "get_script",
1857
+ "description": (
1858
+ "List the scripts attached to an entry (metadata only — no code). "
1859
+ "Returns filenames, language, requirements, and download URLs. "
1860
+ "Use the download URL to fetch the actual source code."
1861
+ ),
1862
+ "parameters": {
1863
+ "type": "object",
1864
+ "properties": {
1865
+ "identifier": {"type": "string", "description": "Entry ID or slug"},
1866
+ "filename": {"type": "string", "description": "Filter to a specific script filename (optional)"},
1867
+ },
1868
+ "required": ["identifier"],
1869
+ },
1870
+ },
1871
+ },
1872
+ {
1873
+ "type": "function",
1874
+ "function": {
1875
+ "name": "list_scripts",
1876
+ "description": "List all entries that have scripts directly attached, with filenames and download URLs.",
1877
+ "parameters": {
1878
+ "type": "object",
1879
+ "properties": {
1880
+ "limit": {"type": "integer", "default": 50},
1881
+ },
1882
+ },
1883
+ },
1884
+ },
1885
+ # ------------------------------------------------------------------ #
1886
+ # Generic asset (folder-style) tools #
1887
+ # ------------------------------------------------------------------ #
1888
+ {
1889
+ "type": "function",
1890
+ "function": {
1891
+ "name": "add_asset_to_entry",
1892
+ "description": (
1893
+ "Attach a generic asset (script, reference, doc, example, data file, "
1894
+ "external link, free-form note) to an entry in a named folder. "
1895
+ "Each node is treated as a small folder of typed assets addressable "
1896
+ "as `[entry]/[folder]/[filename]`. "
1897
+ "Conventional folders: scripts, references, docs, examples, data, notes "
1898
+ "(custom folder names are also accepted). "
1899
+ "Use kind='link' for URLs (content = the URL), 'text' for inline "
1900
+ "markdown/notes, 'file' for downloadable bodies."
1901
+ ),
1902
+ "parameters": {
1903
+ "type": "object",
1904
+ "properties": {
1905
+ "entry_id": {"type": "string", "description": "ID or slug of the target entry"},
1906
+ "folder": {
1907
+ "type": "string",
1908
+ "description": "Sub-folder name (e.g. scripts, references, docs, examples, data, notes)",
1909
+ },
1910
+ "filename": {
1911
+ "type": "string",
1912
+ "description": "Filename inside the folder (may include a sub-path like 'inputs/lj.in')",
1913
+ },
1914
+ "content": {
1915
+ "type": "string",
1916
+ "description": "Body for file/text assets; the URL itself for link assets",
1917
+ },
1918
+ "kind": {
1919
+ "type": "string",
1920
+ "enum": ["file", "link", "text"],
1921
+ "default": "file",
1922
+ },
1923
+ "language": {"type": "string", "description": "Optional language hint (python, bash, markdown, …)"},
1924
+ "mime_type": {"type": "string", "description": "Optional MIME override"},
1925
+ "description": {"type": "string"},
1926
+ "requirements": {
1927
+ "type": "array",
1928
+ "items": {"type": "string"},
1929
+ "description": "Package dependencies (for runnable scripts)",
1930
+ },
1931
+ },
1932
+ "required": ["entry_id", "folder", "filename"],
1933
+ },
1934
+ },
1935
+ },
1936
+ {
1937
+ "type": "function",
1938
+ "function": {
1939
+ "name": "list_assets",
1940
+ "description": (
1941
+ "List the assets attached to an entry, grouped by folder. "
1942
+ "Optionally filter to a single folder. Returns metadata + download URLs; "
1943
+ "fetch bodies via the download URL."
1944
+ ),
1945
+ "parameters": {
1946
+ "type": "object",
1947
+ "properties": {
1948
+ "identifier": {"type": "string", "description": "Entry ID or slug"},
1949
+ "folder": {"type": "string", "description": "Optional folder name filter"},
1950
+ },
1951
+ "required": ["identifier"],
1952
+ },
1953
+ },
1954
+ },
1955
+ # ------------------------------------------------------------------ #
1956
+ # Material interface tools #
1957
+ # ------------------------------------------------------------------ #
1958
+ {
1959
+ "type": "function",
1960
+ "function": {
1961
+ "name": "create_material_entry",
1962
+ "description": (
1963
+ "Create a structured data entry for a crystal or compound (uses entry_type=data), "
1964
+ "recording its formula, crystal system, space group, and description "
1965
+ "in a standardised template suitable for downstream interface workflows."
1966
+ ),
1967
+ "parameters": {
1968
+ "type": "object",
1969
+ "properties": {
1970
+ "formula": {"type": "string", "description": "Chemical formula or material name (e.g. 'TiO2', 'GaN')"},
1971
+ "crystal_system": {"type": "string", "description": "e.g. cubic, tetragonal, hexagonal"},
1972
+ "space_group": {"type": "string", "description": "Hermann-Mauguin symbol or number (e.g. 'Fm-3m', '225')"},
1973
+ "description": {"type": "string"},
1974
+ "tags": {"type": "array", "items": {"type": "string"}},
1975
+ "source_provenance": {"type": "string"},
1976
+ },
1977
+ "required": ["formula"],
1978
+ },
1979
+ },
1980
+ },
1981
+ {
1982
+ "type": "function",
1983
+ "function": {
1984
+ "name": "build_material_interface_workflow",
1985
+ "description": (
1986
+ "DEPRECATED. Returns an error explaining how to use the generic "
1987
+ "'Material interface construction' capability + slab-stacking procedure "
1988
+ "instead of creating one node per material pair."
1989
+ ),
1990
+ "parameters": {
1991
+ "type": "object",
1992
+ "properties": {
1993
+ "material_a": {"type": "string"},
1994
+ "material_b": {"type": "string"},
1995
+ "method": {"type": "string", "default": "slab_stacking"},
1996
+ "description": {"type": "string"},
1997
+ "tags": {"type": "array", "items": {"type": "string"}},
1998
+ },
1999
+ "required": ["material_a", "material_b"],
2000
+ },
2001
+ },
2002
+ },
2003
+ {
2004
+ "type": "function",
2005
+ "function": {
2006
+ "name": "submit_feedback",
2007
+ "description": (
2008
+ "Record correctness feedback on a node. Updates verification_status "
2009
+ "and appends to the node's feedback_log. Use after testing a node, "
2010
+ "or when an external agent reports success/failure. "
2011
+ "verdict: works | peer_works | bugged | deprecated | unclear."
2012
+ ),
2013
+ "parameters": {
2014
+ "type": "object",
2015
+ "properties": {
2016
+ "entry_id": {"type": "string", "description": "Node ID or slug"},
2017
+ "verdict": {
2018
+ "type": "string",
2019
+ "enum": ["works", "peer_works", "bugged", "deprecated", "unclear"],
2020
+ },
2021
+ "note": {"type": "string", "description": "Short human-readable note"},
2022
+ "evidence": {"type": "string", "description": "Output, error message, or link"},
2023
+ "agent_id": {"type": "string", "description": "Identifier of the reporter"},
2024
+ },
2025
+ "required": ["entry_id", "verdict"],
2026
+ },
2027
+ },
2028
+ },
2029
+ {
2030
+ "type": "function",
2031
+ "function": {
2032
+ "name": "list_by_verification",
2033
+ "description": (
2034
+ "List nodes filtered by verification_status. "
2035
+ "Use to find unverified or bugged nodes that need attention."
2036
+ ),
2037
+ "parameters": {
2038
+ "type": "object",
2039
+ "properties": {
2040
+ "status": {
2041
+ "type": "string",
2042
+ "enum": ["unverified", "self_tested", "peer_reviewed",
2043
+ "community_tested", "bugged", "deprecated"],
2044
+ "default": "unverified",
2045
+ },
2046
+ "limit": {"type": "integer", "default": 50},
2047
+ },
2048
+ },
2049
+ },
2050
+ },
2051
+ {
2052
+ "type": "function",
2053
+ "function": {
2054
+ "name": "list_needs_generalization",
2055
+ "description": (
2056
+ "List nodes flagged as overly specific by the abstraction check. "
2057
+ "These are candidates for merging into a generic capability."
2058
+ ),
2059
+ "parameters": {
2060
+ "type": "object",
2061
+ "properties": {"limit": {"type": "integer", "default": 50}},
2062
+ },
2063
+ },
2064
+ },
2065
+ ]
2066
+
2067
+ # Map function name → callable
2068
+ TOOL_DISPATCH: dict[str, Any] = {
2069
+ "create_entry": create_entry,
2070
+ "update_entry": update_entry,
2071
+ "delete_entry": delete_entry,
2072
+ "get_entry": get_entry,
2073
+ "search_entries": search_entries,
2074
+ "list_entries": list_entries,
2075
+ "create_edge": create_edge,
2076
+ "delete_edge": delete_edge,
2077
+ "get_neighbors": get_neighbors,
2078
+ "graph_stats": graph_stats,
2079
+ "resolve_wikilinks": resolve_wikilinks,
2080
+ "remove_dangling_edges": remove_dangling_edges,
2081
+ "fetch_url": fetch_url,
2082
+ "web_search": web_search,
2083
+ "find_similar_nodes": find_similar_nodes,
2084
+ "get_graph_overview": get_graph_overview,
2085
+ "list_nodes_by_type": list_nodes_by_type,
2086
+ "merge_entries": merge_entries,
2087
+ "create_script_entry": create_script_entry,
2088
+ "add_script_to_entry": add_script_to_entry,
2089
+ "get_script": get_script,
2090
+ "list_scripts": list_scripts,
2091
+ "add_asset_to_entry": add_asset_to_entry,
2092
+ "list_assets": list_assets,
2093
+ "build_material_interface_workflow": build_material_interface_workflow,
2094
+ "create_material_entry": create_material_entry,
2095
+ "attach_script_to_entry": attach_script_to_entry,
2096
+ "submit_feedback": submit_feedback,
2097
+ "list_by_verification": list_by_verification,
2098
+ "list_needs_generalization": list_needs_generalization,
2099
+ # Hierarchical-memory tools (L1–L4 / progressive retrieval)
2100
+ "create_heuristic": create_heuristic,
2101
+ "create_constraint": create_constraint,
2102
+ "decompose_capability": decompose_capability,
2103
+ "retrieve_plan": retrieve_plan,
2104
+ "retrieve_heuristics": retrieve_heuristics,
2105
+ "retrieve_constraints": retrieve_constraints,
2106
+ }