know-do-graph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. agents/__init__.py +0 -0
  2. agents/extraction_agent/__init__.py +0 -0
  3. agents/extraction_agent/agent.py +170 -0
  4. agents/graph_agent/__init__.py +5 -0
  5. agents/graph_agent/agent.py +373 -0
  6. agents/graph_agent/tools.py +2106 -0
  7. agents/maintenance_agent/__init__.py +0 -0
  8. agents/maintenance_agent/agent.py +283 -0
  9. agents/orchestrator/__init__.py +0 -0
  10. agents/orchestrator/agent.py +217 -0
  11. agents/review_agent/__init__.py +0 -0
  12. agents/review_agent/agent.py +188 -0
  13. agents/review_agent/tools.py +472 -0
  14. api/__init__.py +0 -0
  15. api/main.py +136 -0
  16. api/routes/__init__.py +0 -0
  17. api/routes/agent.py +81 -0
  18. api/routes/entries.py +411 -0
  19. api/routes/graph.py +132 -0
  20. api/routes/mem.py +179 -0
  21. api/routes/remote.py +815 -0
  22. api/routes/remote_sync.py +230 -0
  23. api/routes/retrieve.py +88 -0
  24. core/__init__.py +0 -0
  25. core/app_state.py +9 -0
  26. core/events.py +84 -0
  27. core/extraction/__init__.py +0 -0
  28. core/extraction/wikilink_parser.py +48 -0
  29. core/graph/__init__.py +0 -0
  30. core/graph/graph.py +204 -0
  31. core/memory/__init__.py +0 -0
  32. core/memory/memgraph.py +458 -0
  33. core/resources/starter.db +0 -0
  34. core/retrieval/__init__.py +0 -0
  35. core/retrieval/embedder.py +122 -0
  36. core/retrieval/fusion.py +52 -0
  37. core/retrieval/progressive.py +399 -0
  38. core/retrieval/retrieval.py +346 -0
  39. core/retrieval/vector_store.py +91 -0
  40. core/schemas/__init__.py +0 -0
  41. core/schemas/edge.py +46 -0
  42. core/schemas/entry.py +388 -0
  43. core/storage/__init__.py +0 -0
  44. core/storage/database.py +104 -0
  45. core/storage/models.py +66 -0
  46. core/storage/repository.py +243 -0
  47. core/sync/__init__.py +20 -0
  48. core/sync/autolink.py +301 -0
  49. core/sync/db_merge.py +297 -0
  50. core/sync/db_watcher.py +84 -0
  51. core/sync/remote_sync.py +345 -0
  52. examples/__init__.py +0 -0
  53. examples/example_entries.py +206 -0
  54. examples/pymatgen_interface_examples.py +811 -0
  55. frontend/dist/assets/index-BLfo7ZZu.css +1 -0
  56. frontend/dist/assets/index-G-mYbZ9R.js +83 -0
  57. frontend/dist/assets/index-G-mYbZ9R.js.map +1 -0
  58. frontend/dist/index.html +92 -0
  59. know_do_graph-0.1.0.dist-info/METADATA +765 -0
  60. know_do_graph-0.1.0.dist-info/RECORD +63 -0
  61. know_do_graph-0.1.0.dist-info/WHEEL +4 -0
  62. know_do_graph-0.1.0.dist-info/entry_points.txt +2 -0
  63. main.py +944 -0
api/routes/remote.py ADDED
@@ -0,0 +1,815 @@
1
+ """Remote agent access routes.
2
+
3
+ Exposes a simplified, agent-friendly interface for remote clients to:
4
+ - Chat with the orchestrator agent (with optional multi-turn session history)
5
+ - Search and query the knowledge graph
6
+ - Submit feedback / memory traces
7
+ - Browse graph stats
8
+
9
+ All endpoints live under the ``/remote`` prefix.
10
+
11
+ The root GET / and GET /remote both return a plain-text instruction sheet
12
+ so that ``curl http://<host>:<port>/`` immediately tells any client how to
13
+ interact with the server.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import os
19
+ import textwrap
20
+ import uuid
21
+ from typing import Optional
22
+
23
+ from fastapi import APIRouter, Depends, HTTPException, Request
24
+ from fastapi.responses import PlainTextResponse
25
+ from pydantic import BaseModel
26
+ from sqlalchemy.orm import Session
27
+
28
+ from core.app_state import graph as _graph
29
+ from core.memory.memgraph import MemGraph
30
+ from core.retrieval.progressive import ProgressiveRetriever
31
+ from core.retrieval.retrieval import RetrievalEngine
32
+ from core.schemas.edge import EdgeRelation
33
+ from core.schemas.entry import EntryType
34
+ from core.storage.database import get_db
35
+
36
+ router = APIRouter()
37
+
38
+ # ── In-memory session store ───────────────────────────────────────────────────
39
+ # Maps session_id → list of OpenAI-format message dicts (history after system prompt)
40
+ _sessions: dict[str, list[dict]] = {}
41
+
42
+ # ── Instruction text ──────────────────────────────────────────────────────────
43
+ _INSTRUCTIONS_TEMPLATE = textwrap.dedent(
44
+ """\
45
+ ╔══════════════════════════════════════════════════════════════════════╗
46
+ ║ Know-Do Graph — Remote Agent Access ║
47
+ ╚══════════════════════════════════════════════════════════════════════╝
48
+
49
+ A wiki-native executable knowledge graph with LLM agents.
50
+ Remote agents and humans can query the graph, chat with agents, and
51
+ submit feedback over plain HTTP.
52
+
53
+ ═══ QUICK START ═════════════════════════════════════════════════════
54
+
55
+ # Chat with the graph agent (one-shot):
56
+ curl -X POST http://{host}/remote/chat \\
57
+ -H "Content-Type: application/json" \\
58
+ -d '{{"message": "What entries are in the graph?"}}'
59
+
60
+ # Multi-turn chat — use a session_id to retain history across calls:
61
+ curl -X POST http://{host}/remote/chat \\
62
+ -H "Content-Type: application/json" \\
63
+ -d '{{"message": "Tell me about procedures", "session_id": "agent-01"}}'
64
+
65
+ curl -X POST http://{host}/remote/chat \\
66
+ -H "Content-Type: application/json" \\
67
+ -d '{{"message": "Now show me the dependencies", "session_id": "agent-01"}}'
68
+
69
+ # Search the knowledge graph:
70
+ curl "http://{host}/remote/search?q=relaxation&limit=5"
71
+
72
+ # Filter by entry type (capability | procedure | workflow | tool | ...):
73
+ curl "http://{host}/remote/search?entry_type=procedure"
74
+
75
+ # Get a specific entry by ID, slug, or alias:
76
+ curl "http://{host}/remote/entry/<id-or-slug>"
77
+
78
+ # Get related entries (BFS traversal, default depth=1):
79
+ curl "http://{host}/remote/entry/<id>/related?depth=2"
80
+
81
+ # Progressive disclosure — when an entry has attached L3/L4 sidecar
82
+ # nodes, the entry response includes a `progressive_hints` block. These
83
+ # endpoints behave like /remote/search but are scoped to the L3/L4
84
+ # nodes attached to the watched entry. Returns summaries — fetch full
85
+ # content via /remote/entry/<id>:
86
+ curl "http://{host}/remote/entry/<id>/heuristics?q=keywords&limit=10"
87
+ curl "http://{host}/remote/entry/<id>/constraints?q=keywords&limit=10"
88
+
89
+ # Graph stats + full node/edge dump:
90
+ curl "http://{host}/remote/graph"
91
+
92
+ # Submit feedback or observations:
93
+ curl -X POST http://{host}/remote/feedback \\
94
+ -H "Content-Type: application/json" \\
95
+ -d '{{"session_id": "agent-01", "content": "Entry X needs more detail", "tags": ["feedback"]}}'
96
+
97
+ # Report that you have TESTED a node and it works (or is bugged).
98
+ # This updates the entry's verification_status and appends to its feedback_log.
99
+ # verdict: works | peer_works | bugged | deprecated | unclear
100
+ curl -X POST http://{host}/entries/<id-or-slug>/feedback \\
101
+ -H "Content-Type: application/json" \\
102
+ -d '{{"verdict": "works", "note": "ran on H2O, converged", "agent_id": "matcreator-01"}}'
103
+
104
+ # Or do both in one call — store a memory trace AND update the entry:
105
+ curl -X POST http://{host}/remote/feedback \\
106
+ -H "Content-Type: application/json" \\
107
+ -d '{{"session_id": "agent-01", "content": "MACE relaxation diverged on Cu",
108
+ "entry_id": "mace-relaxation", "verdict": "bugged",
109
+ "agent_id": "matcreator-01"}}'
110
+
111
+ ═══ SUBMIT KNOWLEDGE FOR LATER DISTILLATION ══════════════════════
112
+
113
+ # Deposit a plain-text summary or context dump into the inbox:
114
+ curl -X POST http://{host}/remote/submit \\
115
+ -H "Content-Type: application/json" \\
116
+ -d '{{"title": "MACE geometry optimisation walkthrough",
117
+ "content": "...",
118
+ "agent_id": "matcreator-01"}}'
119
+
120
+ # Deposit an OpenAI-style conversation transcript:
121
+ curl -X POST http://{host}/remote/submit \\
122
+ -H "Content-Type: application/json" \\
123
+ -d '{{"title": "ASE relaxation session",
124
+ "format": "openai",
125
+ "messages": [{{"role":"user","content":"..."}},
126
+ {{"role":"assistant","content":"..."}}],
127
+ "agent_id": "matcreator-01"}}'
128
+
129
+ # Check what is waiting in the inbox:
130
+ curl "http://{host}/remote/inbox"
131
+
132
+ # Trigger distillation — the graph agent processes the inbox and creates nodes:
133
+ curl -X POST http://{host}/remote/distill \\
134
+ -H "Content-Type: application/json" \\
135
+ -d '{{}}'
136
+
137
+ # Dry-run: preview the distillation prompt without touching the graph:
138
+ curl -X POST http://{host}/remote/distill \\
139
+ -H "Content-Type: application/json" \\
140
+ -d '{{"dry_run": true}}'
141
+
142
+ # Clear session history:
143
+ curl -X DELETE http://{host}/remote/session/agent-01
144
+
145
+ ═══ ENDPOINTS ═══════════════════════════════════════════════════════
146
+
147
+ GET / — This instruction sheet (plain text)
148
+ GET /remote — Same instruction sheet
149
+ GET /health — Server health + graph stats (JSON)
150
+ GET /docs — Interactive API explorer (OpenAPI)
151
+
152
+ POST /remote/chat — Chat with the orchestrator agent (read-only)
153
+ GET /remote/search — Search entries
154
+ GET /remote/graph — Graph stats + full node/edge list
155
+ GET /remote/entry/{{id}} — Entry by ID, slug, or alias
156
+ GET /remote/entry/{{id}}/related — Related entries (BFS)
157
+ GET /remote/entry/{{id}}/heuristics — Attached L3 heuristics (experience); scoped search, supports q/tags/limit
158
+ GET /remote/entry/{{id}}/constraints — Attached L4 constraints (limits); scoped search, supports q/tags/limit
159
+ POST /remote/feedback — Free-form trace; optionally also updates an entry
160
+ POST /entries/{{id}}/feedback — Direct verification feedback on a node
161
+ GET /entries/{{id}}/download — Download a script entry's source code
162
+ DELETE /remote/session/{{id}} — Clear a session's chat history
163
+
164
+ POST /remote/submit — Deposit raw knowledge into the inbox
165
+ GET /remote/inbox — List pending inbox submissions
166
+ POST /remote/distill — Run graph agent to convert inbox into nodes
167
+
168
+ ═══ NODE VERIFICATION ═══════════════════════════════════════════════
169
+
170
+ Every entry has a `verification_status` (unverified by default). When you
171
+ use a skill/procedure node and verify it works (or find it broken),
172
+ POST to /entries/{{id}}/feedback so the graph learns. Verdicts:
173
+ works | peer_works | bugged | deprecated | unclear
174
+
175
+ ═══ CHAT REQUEST BODY ═══════════════════════════════════════════════
176
+
177
+ {{
178
+ "message": "Your question or instruction", // required
179
+ "session_id": "optional-id-for-multi-turn", // optional
180
+ "model": "optional-model-override" // optional
181
+ }}
182
+
183
+ Response: {{"response": "...", "session_id": "..."}}
184
+
185
+ ═══ FEEDBACK REQUEST BODY ═══════════════════════════════════════════
186
+
187
+ {{
188
+ "session_id": "your-session-id", // required
189
+ "content": "Your observation or feedback", // required
190
+ "tags": ["optional", "tags"], // optional
191
+ "success": true | false | null // optional
192
+ }}
193
+
194
+ ═══ SEARCH PARAMETERS ═══════════════════════════════════════════════
195
+
196
+ q — free-text query (title, content, tags)
197
+ tags — comma-separated tag filter e.g. tags=python,simulation
198
+ entry_type — one of: capability, procedure, workflow, tool, repository,
199
+ environment, dependency, data, analytical, memory, generic
200
+ limit — max results (default 20)
201
+
202
+ Search returns a compact summary per entry — id, title, slug,
203
+ entry_type, tags, aliases, and a short content snippet. Use
204
+ GET /remote/entry/<id-or-slug> to fetch the full content of any
205
+ hit you want to inspect.
206
+
207
+ ═══ NOTES ═══════════════════════════════════════════════════════════
208
+
209
+ • For full CRUD access use /entries, /graph, /mem, and /agent routes.
210
+ • Human-readable graph explorer: http://{host}/ui
211
+ • Full API reference: http://{host}/docs
212
+ """
213
+ )
214
+
215
+
216
+ def _render_instructions(request: Request) -> str:
217
+ host = request.headers.get("host", "localhost:8000")
218
+ return _INSTRUCTIONS_TEMPLATE.format(host=host)
219
+
220
+
221
+ # ── Pydantic request models ───────────────────────────────────────────────────
222
+
223
+
224
+ class ChatRequest(BaseModel):
225
+ message: str
226
+ session_id: Optional[str] = None
227
+ model: Optional[str] = None
228
+
229
+
230
+ class FeedbackRequest(BaseModel):
231
+ session_id: str
232
+ content: str
233
+ tags: list[str] = []
234
+ success: Optional[bool] = None
235
+ # When set, the feedback also updates the named entry's verification_status
236
+ # via the same mechanism as POST /entries/{id}/feedback.
237
+ entry_id: Optional[str] = None
238
+ verdict: Optional[str] = None # works | peer_works | bugged | deprecated | unclear
239
+ agent_id: Optional[str] = None
240
+
241
+
242
+ class SubmitRequest(BaseModel):
243
+ """Payload for POST /remote/submit.
244
+
245
+ External agents use this to deposit raw knowledge (text, conversation
246
+ transcripts, summaries) into the graph's inbox for later distillation.
247
+
248
+ At least one of ``content`` or ``messages`` must be provided.
249
+ """
250
+ session_id: Optional[str] = None # groups submissions; auto-generated if omitted
251
+ title: Optional[str] = None # short label for what this submission is about
252
+ content: Optional[str] = None # plain-text content or summary
253
+ # Structured message arrays — supply one of these *instead of* content when
254
+ # you have a conversation transcript.
255
+ messages: Optional[list[dict]] = None # OpenAI / AutoGen format messages list
256
+ format: str = "text" # "text" | "openai" | "autogen"
257
+ tags: list[str] = []
258
+ agent_id: Optional[str] = None # identifies the submitting agent
259
+
260
+
261
+ class DistillRequest(BaseModel):
262
+ """Payload for POST /remote/distill."""
263
+ session_id: Optional[str] = None # if given, distil only that session's inbox
264
+ model: Optional[str] = None # LLM model override for the distillation agent
265
+ dry_run: bool = False # if True, return the prompt without running the agent
266
+
267
+
268
+ # ── Routes ────────────────────────────────────────────────────────────────────
269
+
270
+
271
+ @router.get(
272
+ "",
273
+ response_class=PlainTextResponse,
274
+ summary="Remote agent instructions",
275
+ tags=["remote"],
276
+ )
277
+ @router.get(
278
+ "/",
279
+ response_class=PlainTextResponse,
280
+ include_in_schema=False,
281
+ )
282
+ def remote_instructions(request: Request) -> PlainTextResponse:
283
+ """Return the plain-text instruction sheet for remote agents and humans."""
284
+ return PlainTextResponse(_render_instructions(request))
285
+
286
+
287
+ @router.post(
288
+ "/chat",
289
+ summary="Chat with the orchestrator agent",
290
+ tags=["remote"],
291
+ )
292
+ def remote_chat(body: ChatRequest) -> dict:
293
+ """Send a message to the OrchestratorAgent.
294
+
295
+ Optionally pass ``session_id`` to maintain conversation history across
296
+ multiple calls. A new UUID session is created automatically when omitted.
297
+ """
298
+ if not os.environ.get("OPENAI_API_KEY"):
299
+ raise HTTPException(
300
+ status_code=503,
301
+ detail="OPENAI_API_KEY is not configured on this server.",
302
+ )
303
+
304
+ from agents.orchestrator.agent import OrchestratorAgent
305
+
306
+ session_id = body.session_id or str(uuid.uuid4())
307
+
308
+ agent = OrchestratorAgent(graph=_graph, model=body.model, read_only=True)
309
+
310
+ # Restore prior history (everything after the agent's own system prompt)
311
+ if session_id in _sessions:
312
+ agent._history.extend(_sessions[session_id])
313
+
314
+ response = agent.chat(body.message)
315
+
316
+ # Persist history for future turns (skip the system prompt at index 0)
317
+ _sessions[session_id] = list(agent._history[1:])
318
+
319
+ return {"response": response, "session_id": session_id}
320
+
321
+
322
+ @router.get(
323
+ "/search",
324
+ summary="Search the knowledge graph",
325
+ tags=["remote"],
326
+ )
327
+ def remote_search(
328
+ q: Optional[str] = None,
329
+ tags: Optional[str] = None,
330
+ entry_type: Optional[EntryType] = None,
331
+ limit: int = 20,
332
+ db: Session = Depends(get_db),
333
+ ) -> list[dict]:
334
+ """Full-text search with optional tag and entry-type filters.
335
+
336
+ ``tags`` accepts a comma-separated list, e.g. ``tags=python,simulation``.
337
+ """
338
+ engine = RetrievalEngine(db, _graph)
339
+ tag_list = [t.strip() for t in tags.split(",")] if tags else None
340
+ results = engine.search_entries(query=q, tags=tag_list, entry_type=entry_type, limit=limit)
341
+ return [_summarize_entry(e) for e in results]
342
+
343
+
344
+ # Metadata fields that are internal / dev-only and not useful to remote agents.
345
+ _METADATA_INTERNAL_KEYS = {
346
+ # Sync / maintenance
347
+ "remote_source",
348
+ "custom",
349
+ "feedback_log",
350
+ "needs_generalization",
351
+ "extraction_method",
352
+ "refinement_status",
353
+ "review_count",
354
+ "modify_count",
355
+ "last_reviewed_at",
356
+ # Timestamps — not actionable for consumers
357
+ "timestamp",
358
+ }
359
+
360
+
361
+ def _strip_empty(d: dict) -> dict:
362
+ """Recursively remove None values and empty containers from a dict."""
363
+ out = {}
364
+ for k, v in d.items():
365
+ if v is None:
366
+ continue
367
+ if isinstance(v, dict):
368
+ v = _strip_empty(v)
369
+ if not v:
370
+ continue
371
+ elif isinstance(v, list) and len(v) == 0:
372
+ continue
373
+ out[k] = v
374
+ return out
375
+
376
+
377
+ def _clean_entry(entry) -> dict:
378
+ """Return a full entry dict with internal/dev-only and empty fields stripped.
379
+
380
+ Removes ``remote_source``, ``internal_refs``, ``scripts``, ``assets``,
381
+ noisy metadata sub-fields, and any null/empty values.
382
+ """
383
+ d = entry.model_dump(mode="json")
384
+ # Drop top-level internal fields.
385
+ for key in ("internal_refs", "scripts", "assets"):
386
+ d.pop(key, None)
387
+ # Strip internal sub-fields from metadata.
388
+ meta = d.get("metadata") or {}
389
+ for key in _METADATA_INTERNAL_KEYS:
390
+ meta.pop(key, None)
391
+ d["metadata"] = _strip_empty(meta)
392
+ # Strip null / empty top-level fields (but keep metadata even if empty).
393
+ d = _strip_empty(d)
394
+ if "metadata" not in d:
395
+ d["metadata"] = {}
396
+ return d
397
+
398
+
399
+ def _summarize_entry(entry, snippet_words: int = 40) -> dict:
400
+ """Return a lightweight summary of an entry for search-result listings.
401
+
402
+ Includes only identifiers, type/tags, and a short content snippet so that
403
+ agents can decide which entries to fetch in full via ``/remote/entry/<id>``.
404
+ """
405
+ content = (entry.content or "").strip()
406
+ # Skip YAML frontmatter when present so the snippet shows real prose.
407
+ if content.startswith("---"):
408
+ end = content.find("\n---", 3)
409
+ if end != -1:
410
+ content = content[end + 4 :].lstrip()
411
+ # Drop heading markers / blank lines from the very top.
412
+ lines = [ln for ln in content.splitlines() if ln.strip()]
413
+ body = " ".join(lines)
414
+ words = body.split()
415
+ snippet = " ".join(words[:snippet_words])
416
+ if len(words) > snippet_words:
417
+ snippet += " …"
418
+
419
+ return {
420
+ "id": str(entry.id),
421
+ "title": entry.title,
422
+ "slug": entry.slug,
423
+ "entry_type": entry.entry_type.value if hasattr(entry.entry_type, "value") else entry.entry_type,
424
+ "tags": list(entry.tags or []),
425
+ "aliases": list(getattr(entry, "aliases", []) or []),
426
+ "snippet": snippet,
427
+ }
428
+
429
+
430
+ @router.get(
431
+ "/graph",
432
+ summary="Graph statistics and full node/edge list",
433
+ tags=["remote"],
434
+ )
435
+ def remote_graph_overview() -> dict:
436
+ """Return graph stats (node/edge counts) plus a full dump of all nodes and edges."""
437
+ g = _graph._g
438
+ return {
439
+ **_graph.stats(),
440
+ "nodes": [{"id": n, **d} for n, d in g.nodes(data=True)],
441
+ "edges": [{"source": u, "target": v, **d} for u, v, d in g.edges(data=True)],
442
+ }
443
+
444
+
445
+ @router.get(
446
+ "/entry/{entry_id}",
447
+ summary="Get an entry by ID, slug, or alias",
448
+ tags=["remote"],
449
+ )
450
+ def remote_get_entry(entry_id: str, db: Session = Depends(get_db)) -> dict:
451
+ """Retrieve a single entry by its UUID, slug, or any registered alias.
452
+
453
+ The response is augmented with a ``progressive_hints`` block that tells
454
+ the caller how many L3 heuristics (operational experience) and L4
455
+ constraints (known limitations / failure modes) are **directly attached**
456
+ to this node via graph edges, plus the URLs to fetch them. This lets a
457
+ remote agent decide whether to drill down for additional guidance
458
+ without paying the cost of loading those bodies up front.
459
+ """
460
+ engine = RetrievalEngine(db, _graph)
461
+ entry = engine.resolve_identifier(entry_id)
462
+ if not entry:
463
+ raise HTTPException(status_code=404, detail="Entry not found")
464
+ data = _clean_entry(entry)
465
+
466
+ retriever = ProgressiveRetriever(db, _graph)
467
+ counts = retriever.count_attached(entry.id)
468
+ data["progressive_hints"] = _build_progressive_hints(entry.id, counts)
469
+ return data
470
+
471
+
472
+ def _build_progressive_hints(entry_id: str, counts: dict) -> dict:
473
+ """Build the progressive-disclosure hint block for a single entry.
474
+
475
+ Only counts nodes directly attached via ``heuristic_for`` /
476
+ ``constraint_on`` / ``warning_about`` edges — i.e. the L3/L4 sidecars
477
+ of the **currently watched node**, not the whole graph.
478
+ """
479
+ h = int(counts.get("heuristics", 0))
480
+ c = int(counts.get("constraints", 0))
481
+ hints: dict = {
482
+ "heuristics_count": h,
483
+ "constraints_count": c,
484
+ "heuristics_url": f"/remote/entry/{entry_id}/heuristics",
485
+ "constraints_url": f"/remote/entry/{entry_id}/constraints",
486
+ }
487
+ if h or c:
488
+ hints["message"] = (
489
+ f"This entry has {h} attached heuristic(s) (operational experience) "
490
+ f"and {c} attached constraint(s) (known limitations / failure modes) "
491
+ f"that may guide your subsequent use. "
492
+ f"These endpoints behave like /remote/search but are scoped to the "
493
+ f"nodes attached to THIS entry — pass ?q=<keywords> to narrow down "
494
+ f"(otherwise the top results by usage are returned):\n"
495
+ f" GET /remote/entry/{entry_id}/heuristics?q=...&limit=10\n"
496
+ f" GET /remote/entry/{entry_id}/constraints?q=...&limit=10"
497
+ )
498
+ return hints
499
+
500
+
501
+ @router.get(
502
+ "/entry/{entry_id}/heuristics",
503
+ summary="Search L3 heuristics attached to an entry",
504
+ tags=["remote"],
505
+ )
506
+ def remote_get_heuristics(
507
+ entry_id: str,
508
+ q: Optional[str] = None,
509
+ tags: Optional[str] = None,
510
+ limit: int = 10,
511
+ mode: str = "hybrid",
512
+ db: Session = Depends(get_db),
513
+ ) -> dict:
514
+ """Search the L3 heuristics (operational experience) attached to *entry_id*.
515
+
516
+ Scope is restricted to nodes connected to this entry via ``heuristic_for``
517
+ edges — so even if the graph contains thousands of L3 nodes overall, only
518
+ the ones attached to this entry are considered.
519
+
520
+ - ``q`` — free-text query; uses the same hybrid keyword+vector ranking as
521
+ ``/remote/search``. Omit to get the top results by usage_count.
522
+ - ``tags`` — comma-separated tag filter.
523
+ - ``limit`` — max results returned (default 10).
524
+ - ``mode`` — ``hybrid`` (default) | ``semantic`` | ``keyword``.
525
+
526
+ Returns summaries (id / title / snippet / tags), not full bodies — use
527
+ ``GET /remote/entry/<id>`` to fetch the full content of any hit.
528
+ """
529
+ engine = RetrievalEngine(db, _graph)
530
+ if not engine.resolve_identifier(entry_id):
531
+ raise HTTPException(status_code=404, detail="Entry not found")
532
+ retriever = ProgressiveRetriever(db, _graph)
533
+ tag_list = [t.strip() for t in tags.split(",")] if tags else None
534
+ results, total = retriever.search_attached(
535
+ skill=entry_id,
536
+ kind="heuristics",
537
+ query=q,
538
+ tags=tag_list,
539
+ limit=limit,
540
+ mode=mode,
541
+ )
542
+ return {
543
+ "total_attached": total,
544
+ "returned": len(results),
545
+ "query": q,
546
+ "results": [_summarize_entry(e) for e in results],
547
+ }
548
+
549
+
550
+ @router.get(
551
+ "/entry/{entry_id}/constraints",
552
+ summary="Search L4 constraints attached to an entry",
553
+ tags=["remote"],
554
+ )
555
+ def remote_get_constraints(
556
+ entry_id: str,
557
+ q: Optional[str] = None,
558
+ tags: Optional[str] = None,
559
+ limit: int = 10,
560
+ mode: str = "hybrid",
561
+ db: Session = Depends(get_db),
562
+ ) -> dict:
563
+ """Search the L4 constraints / failure modes attached to *entry_id*.
564
+
565
+ Scope is restricted to nodes connected to this entry via ``constraint_on``
566
+ or ``warning_about`` edges. Same query interface as ``/heuristics``.
567
+
568
+ Returns summaries; fetch full content via ``GET /remote/entry/<id>``.
569
+ """
570
+ engine = RetrievalEngine(db, _graph)
571
+ if not engine.resolve_identifier(entry_id):
572
+ raise HTTPException(status_code=404, detail="Entry not found")
573
+ retriever = ProgressiveRetriever(db, _graph)
574
+ tag_list = [t.strip() for t in tags.split(",")] if tags else None
575
+ results, total = retriever.search_attached(
576
+ skill=entry_id,
577
+ kind="constraints",
578
+ query=q,
579
+ tags=tag_list,
580
+ limit=limit,
581
+ mode=mode,
582
+ )
583
+ return {
584
+ "total_attached": total,
585
+ "returned": len(results),
586
+ "query": q,
587
+ "results": [_summarize_entry(e) for e in results],
588
+ }
589
+
590
+
591
+ @router.get(
592
+ "/entry/{entry_id}/related",
593
+ summary="Get entries related to an entry via BFS",
594
+ tags=["remote"],
595
+ )
596
+ def remote_get_related(
597
+ entry_id: str,
598
+ depth: int = 1,
599
+ relation: Optional[str] = None,
600
+ db: Session = Depends(get_db),
601
+ ) -> list[dict]:
602
+ """BFS traversal from ``entry_id`` up to ``depth`` hops.
603
+
604
+ Optionally filter by ``relation`` (e.g. ``dependency``, ``wikilink``).
605
+ """
606
+ engine = RetrievalEngine(db, _graph)
607
+ if not engine.get_entry_by_id(entry_id):
608
+ raise HTTPException(status_code=404, detail="Entry not found")
609
+ rel: Optional[EdgeRelation] = EdgeRelation(relation) if relation else None
610
+ results = engine.get_related_entries(entry_id, depth=depth, relation=rel)
611
+ return [_clean_entry(e) for e in results]
612
+
613
+
614
+ @router.post(
615
+ "/feedback",
616
+ status_code=201,
617
+ summary="Submit feedback as a memory trace (and optionally update an entry)",
618
+ tags=["remote"],
619
+ )
620
+ def remote_feedback(body: FeedbackRequest) -> dict:
621
+ """Store feedback as a MemGraph trace.
622
+
623
+ If ``entry_id`` and ``verdict`` are supplied, also call ``submit_feedback``
624
+ on that entry so its ``verification_status`` is updated and the event
625
+ appears in the entry's ``feedback_log`` — letting external agents close
626
+ the loop with a single call.
627
+ """
628
+ mem = MemGraph(session_id=body.session_id)
629
+ entry = mem.add(content=body.content, tags=body.tags, success=body.success)
630
+ result: dict = {"id": entry.id, "session_id": body.session_id, "stored": True}
631
+
632
+ if body.entry_id and body.verdict:
633
+ from agents.graph_agent.tools import submit_feedback
634
+
635
+ fb = submit_feedback(
636
+ entry_id=body.entry_id,
637
+ verdict=body.verdict,
638
+ note=body.content,
639
+ evidence="",
640
+ agent_id=body.agent_id or body.session_id,
641
+ graph=_graph,
642
+ )
643
+ result["entry_feedback"] = fb
644
+ return result
645
+
646
+
647
+ @router.delete(
648
+ "/session/{session_id}",
649
+ status_code=204,
650
+ summary="Clear a session's chat history",
651
+ tags=["remote"],
652
+ )
653
+ def remote_clear_session(session_id: str) -> None:
654
+ """Remove the in-memory conversation history for the given session."""
655
+ _sessions.pop(session_id, None)
656
+
657
+
658
+ # ── Inbox / distillation ──────────────────────────────────────────────────────
659
+
660
+ _INBOX_SESSION = "inbox" # MemGraph session used for all submit() entries
661
+ _INBOX_TAG = "pending-distillation"
662
+
663
+
664
+ @router.post(
665
+ "/submit",
666
+ status_code=201,
667
+ summary="Submit raw knowledge for later distillation into the graph",
668
+ tags=["remote"],
669
+ )
670
+ def remote_submit(body: SubmitRequest) -> dict:
671
+ """Deposit raw content from an external agent into the knowledge inbox.
672
+
673
+ The submission is stored as a ``MemEntry`` (tagged ``pending-distillation``)
674
+ and is *not* immediately added to the graph. A human or automated agent
675
+ can later call ``POST /remote/distill`` to process the inbox.
676
+
677
+ Supported formats
678
+ -----------------
679
+ * ``format="text"`` (default) — plain ``content`` string.
680
+ * ``format="openai"`` — ``messages`` list of OpenAI-style dicts.
681
+ * ``format="autogen"`` — ``messages`` list of AutoGen-style dicts.
682
+ """
683
+ if not body.content and not body.messages:
684
+ from fastapi import HTTPException
685
+ raise HTTPException(status_code=422, detail="Provide 'content' or 'messages'.")
686
+
687
+ session_id = body.session_id or _INBOX_SESSION
688
+ mem = MemGraph(session_id=session_id)
689
+
690
+ extra_tags = list(body.tags) + [_INBOX_TAG]
691
+ if body.agent_id:
692
+ extra_tags.append(f"agent:{body.agent_id}")
693
+
694
+ if body.format in ("openai", "autogen") and body.messages:
695
+ if body.format == "openai":
696
+ entries = mem.ingest_openai_messages(body.messages, tags=extra_tags, as_single_trace=True)
697
+ else:
698
+ entries = mem.ingest_autogen_messages(body.messages, tags=extra_tags, as_single_trace=True)
699
+ # Prepend a title line if given
700
+ if body.title and entries:
701
+ entries[0].content = f"# {body.title}\n\n{entries[0].content}"
702
+ mem._save()
703
+ ids = [e.id for e in entries]
704
+ else:
705
+ content = body.content or ""
706
+ if body.title:
707
+ content = f"# {body.title}\n\n{content}"
708
+ entry = mem.add(content=content, tags=extra_tags)
709
+ ids = [entry.id]
710
+
711
+ return {"submitted": True, "ids": ids, "session_id": session_id, "tag": _INBOX_TAG}
712
+
713
+
714
+ @router.get(
715
+ "/inbox",
716
+ summary="List pending knowledge submissions awaiting distillation",
717
+ tags=["remote"],
718
+ )
719
+ def remote_inbox(session_id: Optional[str] = None, limit: int = 50) -> list[dict]:
720
+ """Return all MemEntries tagged ``pending-distillation`` that have not yet
721
+ been promoted into the knowledge graph.
722
+
723
+ Pass ``session_id`` to scope results to a specific agent's session; omit
724
+ it to see all sessions' pending submissions.
725
+ """
726
+ sessions = [session_id] if session_id else MemGraph.list_sessions()
727
+ results: list[dict] = []
728
+ for sid in sessions:
729
+ mem = MemGraph(session_id=sid)
730
+ for e in mem.list():
731
+ if _INBOX_TAG in e.tags and not e.promoted:
732
+ results.append({
733
+ "id": e.id,
734
+ "session_id": e.session_id,
735
+ "title": (e.content.splitlines()[0].lstrip("# ") if e.content else ""),
736
+ "preview": e.content[:300] if e.content else "",
737
+ "tags": e.tags,
738
+ "created_at": e.created_at.isoformat(),
739
+ "source_format": e.source_format,
740
+ })
741
+ results.sort(key=lambda x: x["created_at"])
742
+ return results[:limit]
743
+
744
+
745
+ @router.post(
746
+ "/distill",
747
+ summary="Distil pending inbox submissions into knowledge graph nodes",
748
+ tags=["remote"],
749
+ )
750
+ def remote_distill(body: DistillRequest) -> dict:
751
+ """Run the GraphAgent over all pending inbox submissions and convert them
752
+ into proper graph nodes.
753
+
754
+ The agent will:
755
+ 1. Read each pending submission.
756
+ 2. Decide which capabilities/procedures/tools to extract.
757
+ 3. Call ``create_entry``, ``create_edge``, etc. to persist them.
758
+ 4. Return a summary of what was created.
759
+
760
+ Processed entries are marked ``promoted`` in the inbox so they are not
761
+ distilled again.
762
+
763
+ Set ``dry_run=true`` to preview the distillation prompt without executing it.
764
+ """
765
+ import os
766
+ if not os.environ.get("OPENAI_API_KEY"):
767
+ from fastapi import HTTPException
768
+ raise HTTPException(status_code=503, detail="OPENAI_API_KEY not configured.")
769
+
770
+ sessions = [body.session_id] if body.session_id else MemGraph.list_sessions()
771
+
772
+ # Collect all un-promoted inbox entries
773
+ pending: list[tuple[MemGraph, str, str]] = [] # (mem, entry_id, content)
774
+ for sid in sessions:
775
+ mem = MemGraph(session_id=sid)
776
+ for e in mem.list():
777
+ if _INBOX_TAG in e.tags and not e.promoted:
778
+ pending.append((mem, e.id, e.content))
779
+
780
+ if not pending:
781
+ return {"distilled": 0, "message": "Inbox is empty — nothing to distill."}
782
+
783
+ # Build a prompt that presents all submissions to the GraphAgent
784
+ blocks = []
785
+ for i, (_, eid, content) in enumerate(pending, 1):
786
+ blocks.append(f"--- Submission {i} (id: {eid}) ---\n{content}")
787
+ combined = "\n\n".join(blocks)
788
+
789
+ prompt = (
790
+ "The following raw knowledge submissions were sent by external agents. "
791
+ "Please extract the reusable capabilities, procedures, tools, and "
792
+ "relationships they describe, and add them to the knowledge graph as "
793
+ "properly structured nodes. Follow the abstraction rules: create generic "
794
+ "nodes, not overly-specific instances. Skip anything that is conversational "
795
+ "filler or not worth a standalone node. After processing, briefly list what "
796
+ "was created.\n\n"
797
+ + combined
798
+ )
799
+
800
+ if body.dry_run:
801
+ return {"dry_run": True, "pending_count": len(pending), "prompt": prompt}
802
+
803
+ from agents.graph_agent.agent import GraphAgent
804
+ agent = GraphAgent(graph=_graph, model=body.model)
805
+ response = agent.chat(prompt)
806
+
807
+ # Mark all processed entries as promoted
808
+ for mem, eid, _ in pending:
809
+ mem.mark_promoted(eid, target_id="distilled")
810
+
811
+ return {
812
+ "distilled": len(pending),
813
+ "response": response,
814
+ }
815
+