hexus 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hexus/__init__.py ADDED
@@ -0,0 +1,1855 @@
1
+ """hexus — Postgres + hexus memory provider for hermes-agent.
2
+ #
3
+ # Forked from andreab67/hermes-hexus (BSD-3-Clause).
4
+ #
5
+ # Mirrors hermes-agent's built-in `memory` tool entries (MEMORY.md / USER.md
6
+ # in tools/memory_tool.py) into a single Postgres table, adds 384-dim
7
+ # embeddings for semantic recall, and scopes by `agent_identity` so each
8
+ # named agent (marketing / sales / trading / incident / …) has its own
9
+ # theme.
10
+ #
11
+ # Design philosophy: this is a STORAGE LAYER for hermes-agent's native
12
+ # memory model, not a new memory model. We don't invent facts, entities,
13
+ # trust scores, deriver pipelines, or dialectic synthesis. We give the
14
+ # built-in `memory` tool a durable Postgres backing + semantic search,
15
+ # nothing more. Honcho went heavy and exploded; this stays lean.
16
+ #
17
+ # v0.4.0 (hexus fork) — embeddings are produced locally by
18
+ # sentence-transformers all-MiniLM-L6-v2 (see hexus.embedder) by
19
+ # default. The HTTP-embed path from upstream is preserved as a fallback
20
+ # for operators with an existing Ollama / OpenAI-compatible endpoint
21
+ # (configure `embed_url` in plugin config to use it).
22
+ #
23
+ # Config in $HERMES_HOME/config.yaml under plugins.hexus:
24
+ #
25
+ # plugins:
26
+ # hexus:
27
+ # dsn: "dbname=hermes_memory user=hermes host=/var/run/postgresql"
28
+ # # No embed_url → use the local sentence-transformers model
29
+ # embed_url: null
30
+ # embed_model: "sentence-transformers/all-MiniLM-L6-v2"
31
+ # prefetch_limit: 5
32
+ # min_similarity: 0.30
33
+ # embed_on_write: true
34
+ # scope_default: "current" # 'current' | 'all'
35
+ # embed_eager_load: false # set true to load BERT at init
36
+ #
37
+ # Tools exposed: `recall_memory` (one explicit search tool). All built-in
38
+ # memory writes (add/replace/remove) are mirrored automatically via the
39
+ # on_memory_write hook — no agent-facing change.
40
+ """
41
+
42
+ from __future__ import annotations
43
+
44
+ import json
45
+ import logging
46
+ import os
47
+ import re
48
+ from pathlib import Path
49
+ from typing import Any, Dict, List, Optional
50
+
51
+ try:
52
+ from agent.memory_provider import MemoryProvider
53
+ from tools.registry import tool_error
54
+ from hermes_cli.config import cfg_get
55
+ except (
56
+ ImportError
57
+ ): # pragma: no cover - standalone smoke tests do not install hermes-agent
58
+ MemoryProvider = None # type: ignore[assignment]
59
+ tool_error = None # type: ignore[assignment]
60
+ cfg_get = None # type: ignore[assignment]
61
+
62
+ from .embed import embed, EmbeddingError
63
+ from .store import MemoryStore
64
+ import hashlib
65
+ from .writer import AsyncWriter, _PendingWrite
66
+ from .pipeline.router import ContentRouter
67
+
68
+
69
+ # Boilerplate / acknowledgement-only turns that are not worth embedding or
70
+ # storing. Case-insensitive whole-string match after strip. Combined with
71
+ # a length floor (default 40 chars) in _is_noise.
72
+ _NOISE_RE = re.compile(
73
+ r"^("
74
+ r"ok(ay)?|thanks?( you)?|thx|ty|np|"
75
+ r"yes|no|sure|got it|done|cool|nice|great|"
76
+ r"continue|please|exit|cancel|stop|quit|"
77
+ r"yeah|yep|nope|alright"
78
+ r")[\s\.\!\?]*$",
79
+ re.IGNORECASE,
80
+ )
81
+
82
+ logger = logging.getLogger(__name__)
83
+
84
+ if os.environ.get("HEXUS_DEBUG", "").lower() in ("1", "true", "yes", "on"):
85
+ logger.setLevel(logging.DEBUG)
86
+
87
+
88
+ # ---------------------------------------------------------------------------
89
+ # Tool schema — one explicit search over memory_entries
90
+ # ---------------------------------------------------------------------------
91
+
92
+ RECALL_CONVERSATION_SCHEMA = {
93
+ "name": "recall_conversation",
94
+ "description": (
95
+ "Semantic search over past chat turns (every substantive "
96
+ "user/assistant exchange across all sessions). Use this when "
97
+ "the user references something you discussed earlier — last week, "
98
+ "yesterday, in another session — and you need the actual turn "
99
+ "text, not just a durable memory entry. Returns top-K matching "
100
+ "turns with role, content, session_id, and timestamp.\n\n"
101
+ "SCOPES: 'current' (your theme — default), 'session' (current "
102
+ "session only), 'all' (every theme).\n\n"
103
+ "Skip for in-session continuity (already in your context). Skip "
104
+ "for durable facts (use recall_memory instead — that's the "
105
+ "MEMORY.md / USER.md entries the agent decided to remember)."
106
+ ),
107
+ "parameters": {
108
+ "type": "object",
109
+ "properties": {
110
+ "query": {
111
+ "type": "string",
112
+ "description": "Free-text query describing what to recall.",
113
+ },
114
+ "scope": {
115
+ "type": "string",
116
+ "description": "Theme scope: 'current', 'session', 'all', or a named agent.",
117
+ "default": "current",
118
+ },
119
+ "limit": {
120
+ "type": "integer",
121
+ "description": "Max results (1-20, default 5).",
122
+ "default": 5,
123
+ },
124
+ },
125
+ "required": ["query"],
126
+ },
127
+ }
128
+
129
+
130
+ RECALL_MEMORY_SCHEMA = {
131
+ "name": "recall_memory",
132
+ "description": (
133
+ "Semantic search over durable memory entries (the same entries the "
134
+ "built-in `memory` tool writes to MEMORY.md / USER.md, stored "
135
+ "durably in Postgres with embeddings).\n\n"
136
+ "WHEN TO USE: when the answer might be in a past memory entry that "
137
+ "is NOT already in your system prompt's memory block — older "
138
+ "entries, or entries from another named agent. The current scope's "
139
+ "recent entries are already injected ambient; only use this tool "
140
+ "for deeper / cross-scope recall.\n\n"
141
+ "SCOPES:\n"
142
+ " 'current' — your own theme (default; e.g. 'marketing')\n"
143
+ " 'all' — across all agent themes\n"
144
+ " '<name>' — a specific theme: 'marketing', 'sales', 'trading', 'incident', …"
145
+ ),
146
+ "parameters": {
147
+ "type": "object",
148
+ "properties": {
149
+ "query": {
150
+ "type": "string",
151
+ "description": "Free-text query describing what to recall.",
152
+ },
153
+ "scope": {
154
+ "type": "string",
155
+ "description": "Theme scope: 'current', 'all', or a named agent.",
156
+ "default": "current",
157
+ },
158
+ "target": {
159
+ "type": "string",
160
+ "enum": ["memory", "user", "both"],
161
+ "description": "Which store to search. Default 'both'.",
162
+ "default": "both",
163
+ },
164
+ "limit": {
165
+ "type": "integer",
166
+ "description": "Max results (1-20, default 5).",
167
+ "default": 5,
168
+ },
169
+ "min_confidence": {
170
+ "type": "number",
171
+ "description": "Minimum confidence ratio of confirm/(confirm+reject) (0..1). Default 0.",
172
+ "default": 0.0,
173
+ },
174
+ },
175
+ "required": ["query"],
176
+ },
177
+ }
178
+
179
+
180
+ RECALL_DELEGATION_SCHEMA = {
181
+ "name": "recall_delegation",
182
+ "description": (
183
+ "Semantic search over subagent delegation tasks and results. "
184
+ "Use this when you need to recall what tasks were delegated to subagents "
185
+ "and what results they returned in previous steps or sessions."
186
+ ),
187
+ "parameters": {
188
+ "type": "object",
189
+ "properties": {
190
+ "query": {
191
+ "type": "string",
192
+ "description": "Free-text query describing what delegation task/result to recall.",
193
+ },
194
+ "scope": {
195
+ "type": "string",
196
+ "description": "Theme scope: 'current', 'all', or a named agent.",
197
+ "default": "current",
198
+ },
199
+ "limit": {
200
+ "type": "integer",
201
+ "description": "Max results (1-20, default 5).",
202
+ "default": 5,
203
+ },
204
+ },
205
+ "required": ["query"],
206
+ },
207
+ }
208
+
209
+
210
+ ENTITY_GRAPH_SCHEMA = {
211
+ "name": "entity_graph",
212
+ "description": (
213
+ "Retrieve entities that co-occur with a target entity. "
214
+ "Helps you find related topics, systems, or concepts in memory."
215
+ ),
216
+ "parameters": {
217
+ "type": "object",
218
+ "properties": {
219
+ "entity_type": {
220
+ "type": "string",
221
+ "description": "The type of starting entity (e.g. 'docker_image', 'url').",
222
+ },
223
+ "entity_value": {
224
+ "type": "string",
225
+ "description": "The specific entity value (e.g. 'postgres', 'github.com').",
226
+ },
227
+ "scope": {
228
+ "type": "string",
229
+ "description": "Theme scope: 'current', 'all', or a named agent.",
230
+ "default": "current",
231
+ },
232
+ "limit": {
233
+ "type": "integer",
234
+ "description": "Max results (1-20, default 5).",
235
+ "default": 5,
236
+ },
237
+ },
238
+ "required": ["entity_type", "entity_value"],
239
+ },
240
+ }
241
+
242
+ GRAPH_WALK_SCHEMA = {
243
+ "name": "graph_walk",
244
+ "description": (
245
+ "Traverse the co-occurrence graph up to N hops away from a start entity. "
246
+ "Lets you trace multi-hop connections and related concepts."
247
+ ),
248
+ "parameters": {
249
+ "type": "object",
250
+ "properties": {
251
+ "entity_type": {
252
+ "type": "string",
253
+ "description": "Type of starting entity.",
254
+ },
255
+ "entity_value": {
256
+ "type": "string",
257
+ "description": "Value of starting entity.",
258
+ },
259
+ "scope": {
260
+ "type": "string",
261
+ "description": "Theme scope: 'current', 'all', or a named agent.",
262
+ "default": "current",
263
+ },
264
+ "max_depth": {
265
+ "type": "integer",
266
+ "description": "Maximum hops to walk (1-5, default 2).",
267
+ "default": 2,
268
+ },
269
+ "limit": {
270
+ "type": "integer",
271
+ "description": "Max results (1-20, default 5).",
272
+ "default": 5,
273
+ },
274
+ },
275
+ "required": ["entity_type", "entity_value"],
276
+ },
277
+ }
278
+
279
+ COMMON_TOPICS_SCHEMA = {
280
+ "name": "common_topics",
281
+ "description": "Retrieve clusters/cliques of heavily co-occurring entities.",
282
+ "parameters": {
283
+ "type": "object",
284
+ "properties": {
285
+ "scope": {
286
+ "type": "string",
287
+ "description": "Theme scope: 'current', 'all', or a named agent.",
288
+ "default": "current",
289
+ },
290
+ "min_strength": {
291
+ "type": "integer",
292
+ "description": "Minimum count of co-occurrences.",
293
+ "default": 2,
294
+ },
295
+ "limit": {
296
+ "type": "integer",
297
+ "description": "Max results (1-20, default 10).",
298
+ "default": 10,
299
+ },
300
+ },
301
+ "required": [],
302
+ },
303
+ }
304
+
305
+ CONFIRM_MEMORY_SCHEMA = {
306
+ "name": "confirm_memory",
307
+ "description": "Increment confirm_count in metadata JSONB for the given entry ID to confirm its relevance.",
308
+ "parameters": {
309
+ "type": "object",
310
+ "properties": {
311
+ "id": {
312
+ "type": "integer",
313
+ "description": "The integer ID of the memory entry to confirm.",
314
+ },
315
+ },
316
+ "required": ["id"],
317
+ },
318
+ }
319
+
320
+ REJECT_MEMORY_SCHEMA = {
321
+ "name": "reject_memory",
322
+ "description": "Increment reject_count in metadata JSONB for the given entry ID to flag it as noise.",
323
+ "parameters": {
324
+ "type": "object",
325
+ "properties": {
326
+ "id": {
327
+ "type": "integer",
328
+ "description": "The integer ID of the memory entry to reject.",
329
+ },
330
+ },
331
+ "required": ["id"],
332
+ },
333
+ }
334
+
335
+ SUMMARIZE_SESSION_SCHEMA = {
336
+ "name": "summarize_session",
337
+ "description": "Compute the vector centroid of a session's turns and find the K closest turns.",
338
+ "parameters": {
339
+ "type": "object",
340
+ "properties": {
341
+ "session_id": {
342
+ "type": "string",
343
+ "description": "The session identifier to summarize.",
344
+ },
345
+ "limit": {
346
+ "type": "integer",
347
+ "description": "Max results (1-20, default 5).",
348
+ "default": 5,
349
+ },
350
+ },
351
+ "required": ["session_id"],
352
+ },
353
+ }
354
+
355
+ HEADROOM_RETRIEVE_SCHEMA = {
356
+ "name": "headroom_retrieve",
357
+ "description": (
358
+ "Retrieve the original full content of a memory entry by its integer ID. "
359
+ "Use this when a memory entry returned by recall_memory contains a compressed/truncated "
360
+ "version of the content, and you need to inspect the full detail."
361
+ ),
362
+ "parameters": {
363
+ "type": "object",
364
+ "properties": {
365
+ "id": {
366
+ "type": "integer",
367
+ "description": "The integer ID of the memory entry to retrieve.",
368
+ },
369
+ },
370
+ "required": ["id"],
371
+ },
372
+ }
373
+
374
+ MEMORY_STATS_SCHEMA = {
375
+ "name": "memory_stats",
376
+ "description": "Return metrics from Hexus database and background async queue stats.",
377
+ "parameters": {
378
+ "type": "object",
379
+ "properties": {},
380
+ },
381
+ }
382
+
383
+
384
+ # ---------------------------------------------------------------------------
385
+ # Config
386
+
387
+ # ---------------------------------------------------------------------------
388
+
389
+ DEFAULTS = {
390
+ "dsn": "dbname=hermes_memory user=hermes host=/var/run/postgresql connect_timeout=5",
391
+ # embed_url=None means "use the local sentence-transformers model"
392
+ # (see hexus.embedder.LocalBertEmbedder). Set to an HTTP URL
393
+ # (e.g. "http://ollama:11434") to fall back to the OpenAI-compatible
394
+ # /v1/embeddings + Ollama-native /api/embed dispatch in embed.py.
395
+ "embed_url": None,
396
+ # sentence-transformers checkpoint name. Default is MiniLM-L6-v2
397
+ # (384-dim, ~90MB, <500MB RAM, ~20-50 sentences/sec on the NUC i7).
398
+ # The HTTP path uses this only for the OpenAI-compat request body
399
+ # (the Ollama-native path uses whatever the server is configured with).
400
+ "embed_model": "sentence-transformers/all-MiniLM-L6-v2",
401
+ "prefetch_limit": 5,
402
+ "min_similarity": 0.30,
403
+ "embed_on_write": True,
404
+ "scope_default": "current",
405
+ "write_queue_maxsize": 256,
406
+ # v0.1.1 — bulk sync MEMORY.md / USER.md on init
407
+ "bulk_sync_on_init": True,
408
+ # v0.2 — conversation turn capture
409
+ "sync_turns": True,
410
+ "turn_min_chars": 40, # turns shorter than this are noise unless > 200 chars or contain tool refs
411
+ # v0.4.0 — expected embedding dim. Local BERT is 384; HTTP path
412
+ # must also produce 384-dim vectors (or the operator must override
413
+ # this in their plugin config). The embed layer validates the dim
414
+ # at HTTP-response time so a misconfigured model fails fast.
415
+ "expected_dim": 384,
416
+ # v0.4.0 — eagerly load the local embedder at plugin init?
417
+ # Default False: keep import + init fast, pay the cold-start cost
418
+ # on the first embed call. Set True if you want the model loaded
419
+ # on a known thread with visible log output, e.g. on the NUC's
420
+ # gateway boot path.
421
+ "embed_eager_load": False,
422
+ "entity_extractor_enabled": True,
423
+ "entity_extractor_patterns": None,
424
+ "webhook_url": None,
425
+ "webhook_secret": None,
426
+ }
427
+
428
+
429
+ def _load_plugin_config() -> dict:
430
+ try:
431
+ from hermes_constants import get_hermes_home
432
+
433
+ config_path = get_hermes_home() / "config.yaml"
434
+ if not config_path.exists():
435
+ return {}
436
+ import yaml
437
+
438
+ with open(config_path, encoding="utf-8-sig") as fh:
439
+ data = yaml.safe_load(fh) or {}
440
+ if cfg_get is None:
441
+ return {}
442
+ expanded = _expand_config_vars(
443
+ cfg_get(data, "plugins", "hexus", default={}) or {}
444
+ )
445
+ return expanded if isinstance(expanded, dict) else {}
446
+ except Exception: # noqa: BLE001
447
+ return {}
448
+
449
+
450
+ def _expand_config_vars(obj):
451
+ """Expand env references in plugin config values.
452
+
453
+ Hermes's normal config loader expands plain ``${VAR}`` references, but
454
+ this plugin reads the YAML directly so it can run before Hermes has
455
+ necessarily loaded the expanded config. Support both plain references and
456
+ the shell-style forms already used by the homelab config:
457
+
458
+ ``${VAR}`` → env value or unchanged placeholder
459
+ ``${VAR:-default}`` → env value or default
460
+ ``${VAR:?message}`` → env value or ValueError
461
+ """
462
+ if isinstance(obj, str):
463
+ return _ENV_REF_RE.sub(_expand_env_match, obj)
464
+ if isinstance(obj, dict):
465
+ return {key: _expand_config_vars(value) for key, value in obj.items()}
466
+ if isinstance(obj, list):
467
+ return [_expand_config_vars(value) for value in obj]
468
+ return obj
469
+
470
+
471
+ _ENV_REF_RE = re.compile(r"\$\{([^}:]+)(?::([\-?])((?:[^}]|\\})+))?\}")
472
+
473
+
474
+ def _expand_env_match(match: re.Match[str]) -> str:
475
+ name = match.group(1)
476
+ op = match.group(2)
477
+ payload = match.group(3) or ""
478
+
479
+ value = os.environ.get(name)
480
+ if value is not None:
481
+ return value
482
+ if op == "-":
483
+ return payload.replace("\\}", "}")
484
+ if op == "?":
485
+ message = payload.replace("\\}", "}")
486
+ raise ValueError(f"missing required environment variable {name}: {message}")
487
+ return match.group(0)
488
+
489
+
490
+ # ---------------------------------------------------------------------------
491
+ # Provider
492
+ # ---------------------------------------------------------------------------
493
+
494
+
495
+ class HexusMemoryProvider(MemoryProvider or object):
496
+ """Postgres mirror of built-in memory entries, with semantic recall."""
497
+
498
+ def __init__(self, config: dict | None = None):
499
+ if MemoryProvider is None:
500
+ raise RuntimeError(
501
+ "HexusMemoryProvider requires Hermes Agent internals; "
502
+ "install this package inside Hermes Agent to use the provider."
503
+ )
504
+ self._config = {**DEFAULTS, **(config or {})}
505
+ self._store: Optional[MemoryStore] = None
506
+ self._writer: Optional[AsyncWriter] = None
507
+ self._agent_identity: str = "default"
508
+ self._session_id: str = ""
509
+ self._healthy = False
510
+ self._embed_warned = False
511
+ self._last_md_mtimes: Dict[str, float] = {}
512
+ self._hermes_home: Optional[str] = None
513
+ self._content_router = ContentRouter()
514
+
515
+ @property
516
+ def name(self) -> str:
517
+ return "hexus"
518
+
519
+ # -- Lifecycle -----------------------------------------------------------
520
+
521
+ def is_available(self) -> bool:
522
+ try:
523
+ import psycopg # noqa: F401
524
+
525
+ return True
526
+ except ImportError:
527
+ return False
528
+
529
+ def initialize(self, session_id: str, **kwargs) -> None:
530
+ self._session_id = session_id
531
+ self._hermes_home = kwargs.get("hermes_home")
532
+ # Per-agent theme scoping — priority order:
533
+ # 1. gateway_session_key — from the `X-Hermes-Session-Key` header on
534
+ # API requests. This is the EXPLICIT minion-scope signal sent by
535
+ # systemd-run callers (marketing-daily, sales-daily, intraday
536
+ # workers, …) and takes precedence over the profile fallback
537
+ # because the gateway always sets agent_identity='default' for
538
+ # API traffic — without prioritising the header, every minion
539
+ # collapses to one shared 'default' scope.
540
+ # 2. agent_identity ≠ 'default' — explicit profile name from CLI
541
+ # (`hermes --profile marketing`). Skipped when it's the
542
+ # auto-default sentinel to allow header (#1) to win.
543
+ # 3. agent_workspace — shared workspace name from some platforms.
544
+ # 4. agent_identity == 'default' — accept it now (no other source).
545
+ # 5. 'default' — last-resort bucket for unscoped traffic.
546
+ explicit_identity = kwargs.get("agent_identity")
547
+ if explicit_identity == "default":
548
+ explicit_identity = None # sentinel — let header take over
549
+ self._agent_identity = (
550
+ kwargs.get("gateway_session_key")
551
+ or explicit_identity
552
+ or kwargs.get("agent_workspace")
553
+ or kwargs.get("agent_identity") # accept 'default' if nothing else set
554
+ or "default"
555
+ )
556
+ logger.debug(
557
+ "hexus: initialized HexusMemoryProvider. session_id=%s, hermes_home=%s, agent_identity=%s, healthy_initially=%s",
558
+ self._session_id,
559
+ self._hermes_home,
560
+ self._agent_identity,
561
+ self._healthy,
562
+ )
563
+
564
+ # Re-initialization guard (v0.3.1): one registered provider instance
565
+ # can have initialize() called again for a new session — the gateway
566
+ # reuses the registered provider rather than constructing a fresh one
567
+ # per session. Without tearing down the previous session's writer +
568
+ # pool first, each re-init abandoned a ConnectionPool whose warm
569
+ # connection lingered in Postgres until idle_session_timeout — the
570
+ # v0.3.0 connection leak that saturated the server's slots under a
571
+ # burst of concurrent sessions. shutdown() is idempotent and drains
572
+ # in-flight writes, so calling it unconditionally here is safe.
573
+ if self._store is not None or self._writer is not None:
574
+ self.shutdown()
575
+
576
+ self._store = MemoryStore(
577
+ self._config["dsn"],
578
+ entity_extractor_enabled=self._config.get("entity_extractor_enabled", True),
579
+ entity_extractor_patterns=self._config.get("entity_extractor_patterns"),
580
+ )
581
+ try:
582
+ # Schema is verify-only at runtime — admin applies the
583
+ # migration out-of-band (see plugin README install step).
584
+ self._store.ensure_schema()
585
+ health = self._store.health()
586
+ self._healthy = bool(health.get("ok"))
587
+ if not self._healthy:
588
+ logger.warning("hexus unhealthy on init: %s", health.get("error"))
589
+ except MemoryStore.SchemaNotApplied as exc:
590
+ logger.error("hexus schema not applied — %s", exc)
591
+ self._healthy = False
592
+ except Exception as exc: # noqa: BLE001
593
+ logger.warning("hexus init failed: %s", exc)
594
+ self._healthy = False
595
+
596
+ # Background writer — bounded queue, lazy thread start. Decouples
597
+ # on_memory_write + sync_turn from the (potentially slow) embed +
598
+ # DB write so the agent loop never blocks on a stalled embed
599
+ # endpoint.
600
+ self._writer = AsyncWriter(
601
+ self._worker,
602
+ maxsize=int(self._config.get("write_queue_maxsize", 256)),
603
+ )
604
+
605
+ # v0.4.0 — optionally warm the local embedder now so the cold
606
+ # start lands on a known thread with a visible log line, rather
607
+ # than on the first user-facing embed call. Default False.
608
+ if self._config.get("embed_eager_load", False) and not self._config.get(
609
+ "embed_url"
610
+ ):
611
+ try:
612
+ from .embedder import get_default_embedder, DEFAULT_MODEL
613
+
614
+ get_default_embedder(
615
+ model_name=self._config.get("embed_model") or DEFAULT_MODEL
616
+ ).ensure_loaded()
617
+ except Exception as exc: # noqa: BLE001
618
+ logger.warning("hexus eager embed load failed: %s", exc)
619
+
620
+ # v0.1.1: bulk import existing MEMORY.md / USER.md content so the
621
+ # plugin sees pre-plugin entries + direct file edits, not just the
622
+ # new writes captured via on_memory_write.
623
+ if self._healthy and self._config.get("bulk_sync_on_init", True):
624
+ self._bulk_sync_from_disk(self._hermes_home)
625
+
626
+ # Dispatch session_new webhook event
627
+ if self._config.get("webhook_url"):
628
+ from .webhook.dispatcher import dispatch_webhook
629
+
630
+ dispatch_webhook(
631
+ url=self._config.get("webhook_url"),
632
+ secret=self._config.get("webhook_secret"),
633
+ event="session_new",
634
+ payload={
635
+ "session_id": self._session_id,
636
+ "agent_identity": self._agent_identity,
637
+ },
638
+ )
639
+
640
+ def shutdown(self) -> None:
641
+ # Drain the in-flight writes first so we don't drop work...
642
+ if self._writer:
643
+ self._writer.shutdown(timeout=5.0)
644
+ self._writer = None
645
+ # ...then close the pool the writer was draining into.
646
+ if self._store:
647
+ self._store.close()
648
+ self._store = None
649
+ self._healthy = False
650
+
651
+ def on_session_switch(self, new_session_id: str, **kwargs) -> None:
652
+ logger.debug("hexus: on_session_switch to session_id=%s", new_session_id)
653
+ self._session_id = new_session_id
654
+
655
+ def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
656
+ """Called at the start of each turn. Check if memory/skill files on disk have changed and re-sync."""
657
+ logger.debug(
658
+ "hexus: on_turn_start. turn_number=%d, message_len=%d",
659
+ turn_number,
660
+ len(message) if message else 0,
661
+ )
662
+ if self._healthy:
663
+ self._check_and_sync_markdown_files()
664
+ self._sync_skills_from_disk()
665
+
666
+ def _check_and_sync_markdown_files(self) -> None:
667
+ """Check modification times of MEMORY.md and USER.md on disk, and trigger sync if changed."""
668
+ if not self._hermes_home:
669
+ try:
670
+ from hermes_constants import get_hermes_home
671
+
672
+ self._hermes_home = str(get_hermes_home())
673
+ except Exception:
674
+ return
675
+
676
+ memories_dir = Path(self._hermes_home) / "memories"
677
+ changed = False
678
+ for fname in ("MEMORY.md", "USER.md"):
679
+ fpath = memories_dir / fname
680
+ if fpath.exists():
681
+ try:
682
+ mtime = os.path.getmtime(fpath)
683
+ if (
684
+ fname not in self._last_md_mtimes
685
+ or mtime > self._last_md_mtimes[fname]
686
+ ):
687
+ changed = True
688
+ self._last_md_mtimes[fname] = mtime
689
+ except Exception as exc:
690
+ logger.debug("hexus failed to get mtime for %s: %s", fname, exc)
691
+
692
+ if changed:
693
+ logger.info(
694
+ "hexus: detected local changes in memory markdown files, starting sync"
695
+ )
696
+ self._bulk_sync_from_disk(self._hermes_home)
697
+
698
+ def _sync_skills_from_disk(self) -> None:
699
+ if not self._store or not self._hermes_home:
700
+ logger.debug(
701
+ "hexus: skills sync skipped. store_exists=%s, hermes_home=%s",
702
+ self._store is not None,
703
+ self._hermes_home,
704
+ )
705
+ return
706
+
707
+ skills_dir = Path(self._hermes_home) / "skills"
708
+ logger.debug("hexus: scanning skills directory: %s", skills_dir)
709
+ if not skills_dir.exists():
710
+ logger.debug("hexus: skills directory does not exist: %s", skills_dir)
711
+ return
712
+
713
+ for skill_file in skills_dir.rglob("SKILL.md"):
714
+ try:
715
+ mtime = os.path.getmtime(skill_file)
716
+ rel_path = str(skill_file.relative_to(skills_dir))
717
+
718
+ # Check if we already processed this mtime
719
+ if (
720
+ rel_path in self._last_md_mtimes
721
+ and mtime <= self._last_md_mtimes[rel_path]
722
+ ):
723
+ logger.debug(
724
+ "hexus: skill '%s' unmodified (mtime: %f), skipping sync",
725
+ rel_path,
726
+ mtime,
727
+ )
728
+ continue
729
+
730
+ # Parse skill_file
731
+ content = skill_file.read_text(encoding="utf-8")
732
+ if not content.strip():
733
+ logger.debug("hexus: skill '%s' content is empty", rel_path)
734
+ continue
735
+
736
+ # Extract skill name from folder name
737
+ skill_name = skill_file.parent.name
738
+ logger.debug("hexus: syncing skill '%s' (mtime: %f)", skill_name, mtime)
739
+
740
+ # Add to DB
741
+ vec = self._maybe_embed(content)
742
+ self._store.add(
743
+ agent_identity=self._agent_identity,
744
+ target="memory",
745
+ content=f"[Skill: {skill_name}] {content}",
746
+ embedding=vec,
747
+ metadata={
748
+ "source": "skill_sync",
749
+ "skill_name": skill_name,
750
+ "file_path": str(skill_file),
751
+ },
752
+ compressed=None,
753
+ content_hash=hashlib.sha256(content.encode("utf-8")).digest(),
754
+ )
755
+
756
+ self._last_md_mtimes[rel_path] = mtime
757
+ logger.info("hexus: synced skill '%s' from disk", skill_name)
758
+ except Exception as exc:
759
+ logger.debug("hexus failed to sync skill %s: %s", skill_file, exc)
760
+
761
+ # -- System prompt + ambient recall --------------------------------------
762
+
763
+ def system_prompt_block(self) -> str:
764
+ if not self._healthy or not self._store:
765
+ return ""
766
+ try:
767
+ count_scoped = self._store.count(agent_identity=self._agent_identity)
768
+ count_all = self._store.count()
769
+ except Exception: # noqa: BLE001
770
+ count_scoped = count_all = 0
771
+ if count_all == 0:
772
+ return (
773
+ "# hexus memory\n"
774
+ "Active. Empty store. Use the built-in `memory` tool to save "
775
+ "durable notes — entries are mirrored to Postgres with "
776
+ "embeddings for semantic recall across sessions."
777
+ )
778
+ return (
779
+ "# hexus memory\n"
780
+ f"Active. {count_scoped} entries for '{self._agent_identity}', "
781
+ f"{count_all} total across all themes. "
782
+ "Use `recall_memory(query, scope='all'|'<theme>')` for deeper / "
783
+ "cross-theme recall beyond what's in the built-in memory block."
784
+ )
785
+
786
+ def prefetch(self, query: str, *, session_id: str = "") -> str:
787
+ if not self._healthy or not self._store or not query:
788
+ return ""
789
+ try:
790
+ vec = embed(
791
+ query,
792
+ base_url=self._config["embed_url"],
793
+ model=self._config["embed_model"],
794
+ )
795
+ except EmbeddingError as exc:
796
+ logger.debug("hexus prefetch embed failed: %s", exc)
797
+ return ""
798
+
799
+ # Ambient prefetch is scoped to the current agent_identity by
800
+ # default — keeps marketing turns from polluting trading recall.
801
+ try:
802
+ rows = self._store.search(
803
+ query_embedding=vec,
804
+ agent_identity=self._agent_identity,
805
+ limit=int(self._config.get("prefetch_limit", 5)),
806
+ min_similarity=float(self._config.get("min_similarity", 0.30)),
807
+ )
808
+ except Exception as exc: # noqa: BLE001
809
+ logger.debug("hexus prefetch query failed: %s", exc)
810
+ return ""
811
+ if not rows:
812
+ return ""
813
+
814
+ lines = [f"## Recall (hexus, {self._agent_identity})"]
815
+ for r in rows:
816
+ score = r.get("score") or 0.0
817
+ tgt = r.get("target") or "?"
818
+ content = (r.get("content") or "").strip().replace("\n", " ")
819
+ if len(content) > 280:
820
+ content = content[:280] + "…"
821
+ lines.append(f"- [{score:.2f}] ({tgt}) {content}")
822
+ return "\n".join(lines)
823
+
824
+ # -- Turn capture (v0.2) -------------------------------------------------
825
+
826
+ def sync_turn(
827
+ self,
828
+ user_content: str,
829
+ assistant_content: str,
830
+ *,
831
+ session_id: str = "",
832
+ ) -> None:
833
+ """Persist a (user, assistant) turn pair to the conversations table.
834
+
835
+ Non-blocking — enqueues writes; the async writer drains, embeds,
836
+ and INSERTs. Boilerplate / very short turns are filtered out so
837
+ the recall table stays high-signal.
838
+ """
839
+ if not self._healthy or not self._writer:
840
+ return
841
+ if not self._config.get("sync_turns", True):
842
+ return
843
+
844
+ sid = session_id or self._session_id or "default"
845
+ min_chars = int(self._config.get("turn_min_chars", 40))
846
+
847
+ for role, content in (("user", user_content), ("assistant", assistant_content)):
848
+ if not content:
849
+ continue
850
+ if self._is_noise(content, min_chars=min_chars):
851
+ continue
852
+ self._writer.enqueue(
853
+ action="turn",
854
+ agent_identity=self._agent_identity,
855
+ target="conversations", # synthetic; worker dispatches on action
856
+ content=content,
857
+ extra={"role": role, "session_id": sid},
858
+ metadata={"session_id": sid},
859
+ )
860
+
861
+ @staticmethod
862
+ def _is_noise(content: str, *, min_chars: int) -> bool:
863
+ """True for short / boilerplate content we don't want in recall."""
864
+ stripped = (content or "").strip()
865
+ if not stripped:
866
+ return True
867
+ if len(stripped) < min_chars:
868
+ return True
869
+ if _NOISE_RE.match(stripped):
870
+ return True
871
+ return False
872
+
873
+ # -- Built-in memory mirror (THE main integration point) ----------------
874
+
875
+ def on_memory_write(
876
+ self,
877
+ action: str,
878
+ target: str,
879
+ content: str,
880
+ metadata: Optional[Dict[str, Any]] = None,
881
+ ) -> None:
882
+ """Mirror built-in `memory` tool writes to Postgres (non-blocking).
883
+
884
+ Built-in tool fires this on every add/replace/remove. We enqueue
885
+ the write; the background thread drains, embeds, and INSERTs.
886
+ Returns instantly so the agent loop never blocks on the embed
887
+ endpoint or the DB.
888
+ """
889
+ if not self._healthy or not self._writer:
890
+ return
891
+ if target not in ("memory", "user"):
892
+ logger.debug("hexus ignoring unsupported target: %r", target)
893
+ return
894
+ if action not in ("add", "replace", "remove"):
895
+ logger.debug("hexus ignoring unknown action: %r", action)
896
+ return
897
+
898
+ meta = dict(metadata or {})
899
+ meta.setdefault("session_id", self._session_id)
900
+ old_text = meta.get("old_text") or meta.get("replaces")
901
+
902
+ self._writer.enqueue(
903
+ action=action,
904
+ agent_identity=self._agent_identity,
905
+ target=target,
906
+ content=content,
907
+ extra={"old_text": str(old_text)} if old_text else {},
908
+ metadata=meta,
909
+ )
910
+
911
+ def _worker(self, item: "_PendingWrite") -> None:
912
+ """Drain-thread worker: embed + DB write for a single queued item.
913
+
914
+ Must NOT raise — the AsyncWriter logs + survives if we do, but
915
+ we still want failures to degrade gracefully (drop the write,
916
+ keep the queue moving).
917
+ """
918
+ if not self._store:
919
+ return
920
+ try:
921
+ if item.action == "add":
922
+ compressed = self._content_router.maybe_compress(item.content)
923
+ hash_target = compressed if compressed is not None else item.content
924
+ content_hash = hashlib.sha256(hash_target.encode("utf-8")).digest()
925
+ vec = self._maybe_embed(item.content)
926
+ self._store.add(
927
+ agent_identity=item.agent_identity,
928
+ target=item.target,
929
+ content=item.content,
930
+ embedding=vec,
931
+ metadata=item.metadata,
932
+ compressed=compressed,
933
+ content_hash=content_hash,
934
+ )
935
+ if self._config.get("webhook_url"):
936
+ from .webhook.dispatcher import dispatch_webhook
937
+
938
+ dispatch_webhook(
939
+ url=self._config.get("webhook_url"),
940
+ secret=self._config.get("webhook_secret"),
941
+ event="memory_retain",
942
+ payload={
943
+ "agent_identity": item.agent_identity,
944
+ "target": item.target,
945
+ "content": item.content,
946
+ "metadata": item.metadata,
947
+ },
948
+ )
949
+ elif item.action == "replace":
950
+ compressed = self._content_router.maybe_compress(item.content)
951
+ hash_target = compressed if compressed is not None else item.content
952
+ content_hash = hashlib.sha256(hash_target.encode("utf-8")).digest()
953
+ old_text = item.extra.get("old_text")
954
+ vec = self._maybe_embed(item.content)
955
+ if old_text:
956
+ n = self._store.replace(
957
+ agent_identity=item.agent_identity,
958
+ target=item.target,
959
+ old_text=old_text,
960
+ new_content=item.content,
961
+ new_embedding=vec,
962
+ compressed=compressed,
963
+ content_hash=content_hash,
964
+ )
965
+ if n == 0:
966
+ # Nothing matched — degrade to add (built-in wrote
967
+ # the new entry to disk; mirror it).
968
+ self._store.add(
969
+ agent_identity=item.agent_identity,
970
+ target=item.target,
971
+ content=item.content,
972
+ embedding=vec,
973
+ metadata=item.metadata,
974
+ compressed=compressed,
975
+ content_hash=content_hash,
976
+ )
977
+ else:
978
+ # No old_text in metadata → can't locate prior row;
979
+ # add the new content so we don't lose it.
980
+ self._store.add(
981
+ agent_identity=item.agent_identity,
982
+ target=item.target,
983
+ content=item.content,
984
+ embedding=vec,
985
+ metadata=item.metadata,
986
+ compressed=compressed,
987
+ content_hash=content_hash,
988
+ )
989
+ if self._config.get("webhook_url"):
990
+ from .webhook.dispatcher import dispatch_webhook
991
+
992
+ dispatch_webhook(
993
+ url=self._config.get("webhook_url"),
994
+ secret=self._config.get("webhook_secret"),
995
+ event="memory_retain",
996
+ payload={
997
+ "agent_identity": item.agent_identity,
998
+ "target": item.target,
999
+ "content": item.content,
1000
+ "metadata": item.metadata,
1001
+ },
1002
+ )
1003
+ elif item.action == "remove":
1004
+ self._store.remove(
1005
+ agent_identity=item.agent_identity,
1006
+ target=item.target,
1007
+ old_text=item.content,
1008
+ )
1009
+ if self._config.get("webhook_url"):
1010
+ from .webhook.dispatcher import dispatch_webhook
1011
+
1012
+ dispatch_webhook(
1013
+ url=self._config.get("webhook_url"),
1014
+ secret=self._config.get("webhook_secret"),
1015
+ event="memory_forget",
1016
+ payload={
1017
+ "agent_identity": item.agent_identity,
1018
+ "target": item.target,
1019
+ "content": item.content,
1020
+ },
1021
+ )
1022
+ elif item.action == "turn":
1023
+ role = item.extra.get("role") or "user"
1024
+ sid = item.extra.get("session_id") or "default"
1025
+ vec = self._maybe_embed(item.content)
1026
+ self._store.append_turn(
1027
+ session_id=sid,
1028
+ agent_identity=item.agent_identity,
1029
+ role=role,
1030
+ content=item.content,
1031
+ embedding=vec,
1032
+ metadata=item.metadata,
1033
+ )
1034
+ elif item.action == "delegation":
1035
+ parent_sid = item.metadata.get("parent_session_id") or "default"
1036
+ child_sid = item.extra.get("child_session_id") or "default"
1037
+ combined_text = (
1038
+ f"Task: {item.content}\nResult: {item.extra.get('result', '')}"
1039
+ )
1040
+ vec = self._maybe_embed(combined_text)
1041
+ self._store.record_delegation(
1042
+ parent_session_id=parent_sid,
1043
+ child_session_id=child_sid,
1044
+ agent_identity=item.agent_identity,
1045
+ task=item.content,
1046
+ result=item.extra.get("result") or "",
1047
+ embedding=vec,
1048
+ metadata=item.metadata,
1049
+ )
1050
+ elif item.action == "summarize_session_hook":
1051
+ summary_text = self._generate_session_summary(item.content)
1052
+ if summary_text:
1053
+ vec = self._maybe_embed(summary_text)
1054
+ self._store.add(
1055
+ agent_identity=item.agent_identity,
1056
+ target="memory",
1057
+ content=f"[Session Summary for {item.metadata.get('session_id', '')}] {summary_text}",
1058
+ embedding=vec,
1059
+ metadata={
1060
+ "session_id": item.metadata.get("session_id"),
1061
+ "source": "session_summarizer",
1062
+ },
1063
+ compressed=None,
1064
+ content_hash=hashlib.sha256(
1065
+ summary_text.encode("utf-8")
1066
+ ).digest(),
1067
+ )
1068
+ except Exception as exc: # noqa: BLE001
1069
+ logger.debug(
1070
+ "hexus worker (%s/%s/%s) failed: %s",
1071
+ item.action,
1072
+ item.agent_identity,
1073
+ item.target,
1074
+ str(exc)[:200],
1075
+ )
1076
+
1077
+ def _generate_session_summary(self, messages_json: str) -> Optional[str]:
1078
+ import urllib.request
1079
+
1080
+ try:
1081
+ messages = json.loads(messages_json)
1082
+ if not messages:
1083
+ return None
1084
+
1085
+ # Format history
1086
+ history = []
1087
+ for m in messages:
1088
+ role = m.get("role", "unknown")
1089
+ content = m.get("content", "")
1090
+ if content:
1091
+ history.append(f"{role.upper()}: {content}")
1092
+
1093
+ if not history:
1094
+ return None
1095
+
1096
+ history_str = "\n".join(history)
1097
+
1098
+ api_base = os.environ.get("LLM_API_BASE") or "http://headroom:8787/v1"
1099
+ # Try to get LITELLM_MASTER_KEY or HEADROOM_INTERNAL_TOKEN
1100
+ api_key = os.environ.get("HEADROOM_INTERNAL_TOKEN") or os.environ.get(
1101
+ "LITELLM_MASTER_KEY"
1102
+ )
1103
+
1104
+ url = f"{api_base.rstrip('/')}/chat/completions"
1105
+ summary_model = os.environ.get("HEXUS_SUMMARY_MODEL")
1106
+ if not summary_model:
1107
+ raise ValueError(
1108
+ "HEXUS_SUMMARY_MODEL environment variable is not configured."
1109
+ )
1110
+ payload = {
1111
+ "model": summary_model,
1112
+ "messages": [
1113
+ {
1114
+ "role": "system",
1115
+ "content": (
1116
+ "You are an assistant that summarizes conversation transcripts. "
1117
+ "Write a highly concise, 1-2 sentence summary of the key outcomes, "
1118
+ "user preferences, and facts learned during this session. "
1119
+ "Do not include boilerplate like 'In this session...' or 'The user...'"
1120
+ ),
1121
+ },
1122
+ {"role": "user", "content": f"Transcript:\n{history_str}"},
1123
+ ],
1124
+ "temperature": 0.3,
1125
+ "max_tokens": 150,
1126
+ }
1127
+
1128
+ logger.debug(
1129
+ "hexus: generating session summary. url=%s, model=%s, api_key_configured=%s, history_turns=%d",
1130
+ url,
1131
+ summary_model,
1132
+ api_key is not None,
1133
+ len(history),
1134
+ )
1135
+
1136
+ req = urllib.request.Request(
1137
+ url,
1138
+ data=json.dumps(payload).encode("utf-8"),
1139
+ headers={"Content-Type": "application/json"},
1140
+ )
1141
+ if api_key:
1142
+ req.add_header("Authorization", f"Bearer {api_key}")
1143
+
1144
+ with urllib.request.urlopen(req, timeout=15) as resp:
1145
+ resp_data = json.loads(resp.read().decode("utf-8"))
1146
+ summary_content = resp_data["choices"][0]["message"]["content"].strip()
1147
+ logger.debug(
1148
+ "hexus: session summary successfully generated: %s", summary_content
1149
+ )
1150
+ return summary_content
1151
+ except Exception as e:
1152
+ logger.debug("hexus failed to generate session summary via LLM: %s", e)
1153
+ return None
1154
+
1155
+ # -- Observability Hooks (M4) --------------------------------------------
1156
+
1157
+ def on_delegation(
1158
+ self,
1159
+ task: str,
1160
+ result: str,
1161
+ *,
1162
+ child_session_id: str = "",
1163
+ **kwargs,
1164
+ ) -> None:
1165
+ """Called on the parent agent when a subagent finishes."""
1166
+ if not self._healthy or not self._writer:
1167
+ return
1168
+
1169
+ meta = {
1170
+ "parent_session_id": self._session_id,
1171
+ "child_session_id": child_session_id,
1172
+ }
1173
+
1174
+ self._writer.enqueue(
1175
+ action="delegation",
1176
+ agent_identity=self._agent_identity,
1177
+ target="delegations",
1178
+ content=task,
1179
+ extra={"result": result, "child_session_id": child_session_id},
1180
+ metadata=meta,
1181
+ )
1182
+
1183
+ def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
1184
+ """Called before context compaction discards older messages."""
1185
+ if not self._healthy or not self._writer or not messages:
1186
+ return ""
1187
+
1188
+ compaction_info = f"[CONTEXT COMPACTION] Compacting {len(messages)} turns from session {self._session_id}."
1189
+ self._writer.enqueue(
1190
+ action="turn",
1191
+ agent_identity=self._agent_identity,
1192
+ target="conversations",
1193
+ content=compaction_info,
1194
+ extra={"role": "system", "session_id": self._session_id},
1195
+ metadata={"is_compaction": True, "message_count": len(messages)},
1196
+ )
1197
+ return ""
1198
+
1199
+ def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
1200
+ """Called when a session ends."""
1201
+ if not self._healthy or not self._writer:
1202
+ return
1203
+
1204
+ end_info = f"[SESSION ENDED] Session {self._session_id} has ended."
1205
+ self._writer.enqueue(
1206
+ action="turn",
1207
+ agent_identity=self._agent_identity,
1208
+ target="conversations",
1209
+ content=end_info,
1210
+ extra={"role": "system", "session_id": self._session_id},
1211
+ metadata={"session_end": True},
1212
+ )
1213
+
1214
+ if messages and len(messages) >= 2:
1215
+ self._writer.enqueue(
1216
+ action="summarize_session_hook",
1217
+ agent_identity=self._agent_identity,
1218
+ target="memory",
1219
+ content=json.dumps(messages),
1220
+ metadata={"session_id": self._session_id},
1221
+ )
1222
+
1223
+ # Dispatch session_end webhook event
1224
+ if self._config.get("webhook_url"):
1225
+ from .webhook.dispatcher import dispatch_webhook
1226
+
1227
+ dispatch_webhook(
1228
+ url=self._config.get("webhook_url"),
1229
+ secret=self._config.get("webhook_secret"),
1230
+ event="session_end",
1231
+ payload={
1232
+ "session_id": self._session_id,
1233
+ "agent_identity": self._agent_identity,
1234
+ },
1235
+ )
1236
+
1237
+ # -- Bulk sync (v0.1.1) --------------------------------------------------
1238
+
1239
+ def _bulk_sync_from_disk(self, hermes_home: Optional[str]) -> None:
1240
+ """Import MEMORY.md + USER.md entries from disk into memory_entries.
1241
+
1242
+ Called by initialize(). Runs synchronously (not via async writer)
1243
+ so the table is warm before the first turn's prefetch. Cheap on
1244
+ re-init: an existence pre-check skips already-imported entries
1245
+ without re-embedding.
1246
+ """
1247
+ if not self._store:
1248
+ return
1249
+ if not hermes_home:
1250
+ # Fall back to hermes_constants if the runtime didn't pass it.
1251
+ try:
1252
+ from hermes_constants import get_hermes_home
1253
+
1254
+ hermes_home = str(get_hermes_home())
1255
+ except Exception: # noqa: BLE001
1256
+ return
1257
+
1258
+ memories_dir = Path(hermes_home) / "memories"
1259
+ embed_fn = self._make_embed_fn()
1260
+
1261
+ for target, fname in (("memory", "MEMORY.md"), ("user", "USER.md")):
1262
+ fpath = memories_dir / fname
1263
+ try:
1264
+ result = self._store.bulk_upsert_md(
1265
+ agent_identity=self._agent_identity,
1266
+ target=target,
1267
+ file_path=fpath,
1268
+ embed_fn=embed_fn,
1269
+ )
1270
+ # Store modification time to prevent immediate re-sync
1271
+ if fpath.exists():
1272
+ self._last_md_mtimes[fname] = os.path.getmtime(fpath)
1273
+ if result.get("inserted"):
1274
+ logger.info(
1275
+ "hexus bulk-sync %s: parsed=%d inserted=%d skipped=%d",
1276
+ fname,
1277
+ result.get("parsed", 0),
1278
+ result.get("inserted", 0),
1279
+ result.get("skipped", 0),
1280
+ )
1281
+ except Exception as exc: # noqa: BLE001
1282
+ logger.warning("hexus bulk-sync %s failed: %s", fname, exc)
1283
+
1284
+ def _make_embed_fn(self):
1285
+ """Return a closure over the configured embed endpoint, or None."""
1286
+ if not self._config.get("embed_on_write", True):
1287
+ return None
1288
+ base_url = self._config["embed_url"]
1289
+ model = self._config["embed_model"]
1290
+
1291
+ def _fn(text: str):
1292
+ return embed(text, base_url=base_url, model=model)
1293
+
1294
+ return _fn
1295
+
1296
+ # -- Tool surface --------------------------------------------------------
1297
+
1298
+ def get_tool_schemas(self) -> List[Dict[str, Any]]:
1299
+ return [
1300
+ RECALL_MEMORY_SCHEMA,
1301
+ RECALL_CONVERSATION_SCHEMA,
1302
+ RECALL_DELEGATION_SCHEMA,
1303
+ ENTITY_GRAPH_SCHEMA,
1304
+ GRAPH_WALK_SCHEMA,
1305
+ COMMON_TOPICS_SCHEMA,
1306
+ CONFIRM_MEMORY_SCHEMA,
1307
+ REJECT_MEMORY_SCHEMA,
1308
+ SUMMARIZE_SESSION_SCHEMA,
1309
+ HEADROOM_RETRIEVE_SCHEMA,
1310
+ MEMORY_STATS_SCHEMA,
1311
+ ]
1312
+
1313
+ def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
1314
+ if tool_name == "recall_conversation":
1315
+ return self._handle_recall_conversation(args)
1316
+ if tool_name == "recall_delegation":
1317
+ return self._handle_recall_delegation(args)
1318
+ if tool_name == "entity_graph":
1319
+ return self._handle_entity_graph(args)
1320
+ if tool_name == "graph_walk":
1321
+ return self._handle_graph_walk(args)
1322
+ if tool_name == "common_topics":
1323
+ return self._handle_common_topics(args)
1324
+ if tool_name == "confirm_memory":
1325
+ return self._handle_confirm_memory(args)
1326
+ if tool_name == "reject_memory":
1327
+ return self._handle_reject_memory(args)
1328
+ if tool_name == "summarize_session":
1329
+ return self._handle_summarize_session(args)
1330
+ if tool_name == "headroom_retrieve":
1331
+ return self._handle_headroom_retrieve(args)
1332
+ if tool_name == "memory_stats":
1333
+ return self._handle_memory_stats(args)
1334
+ if tool_name != "recall_memory":
1335
+ return tool_error(f"Unknown tool: {tool_name}")
1336
+
1337
+ if not self._healthy or not self._store:
1338
+ return json.dumps({"results": [], "count": 0, "error": "hexus unavailable"})
1339
+
1340
+ query = (args.get("query") or "").strip()
1341
+ if not query:
1342
+ return tool_error("Missing required arg: query")
1343
+
1344
+ try:
1345
+ limit = max(1, min(int(args.get("limit", 5)), 20))
1346
+ except (TypeError, ValueError):
1347
+ limit = 5
1348
+
1349
+ # Scope resolution: 'current' → my agent_identity; 'all' → no filter;
1350
+ # anything else → treat as explicit theme name.
1351
+ scope = (
1352
+ args.get("scope") or self._config.get("scope_default") or "current"
1353
+ ).strip()
1354
+ if scope == "current":
1355
+ agent_filter: Optional[str] = self._agent_identity
1356
+ elif scope == "all":
1357
+ agent_filter = None
1358
+ else:
1359
+ agent_filter = scope
1360
+
1361
+ # Target resolution: 'memory'/'user'/'both'.
1362
+ target_arg = (args.get("target") or "both").strip()
1363
+ target_filter: Optional[str] = None if target_arg == "both" else target_arg
1364
+ if target_filter not in (None, "memory", "user"):
1365
+ return tool_error(f"Invalid target: {target_arg!r}")
1366
+
1367
+ try:
1368
+ min_confidence = float(args.get("min_confidence", 0.0))
1369
+ except (TypeError, ValueError):
1370
+ min_confidence = 0.0
1371
+
1372
+ try:
1373
+ vec = embed(
1374
+ query,
1375
+ base_url=self._config["embed_url"],
1376
+ model=self._config["embed_model"],
1377
+ )
1378
+ except EmbeddingError as exc:
1379
+ return json.dumps({"results": [], "count": 0, "error": f"embed: {exc}"})
1380
+
1381
+ try:
1382
+ rows = self._store.search(
1383
+ query_embedding=vec,
1384
+ agent_identity=agent_filter,
1385
+ target=target_filter,
1386
+ limit=limit,
1387
+ min_confidence=min_confidence,
1388
+ )
1389
+ except Exception as exc: # noqa: BLE001
1390
+ return json.dumps({"results": [], "count": 0, "error": f"db: {exc}"})
1391
+
1392
+ results = []
1393
+ for r in rows:
1394
+ ts = r.get("updated_at") or r.get("created_at")
1395
+ results.append(
1396
+ {
1397
+ "id": r.get("id"),
1398
+ "agent_identity": r.get("agent_identity"),
1399
+ "target": r.get("target"),
1400
+ "ts": ts.isoformat() if ts else None,
1401
+ "score": round(float(r.get("score") or 0.0), 4),
1402
+ "content": (r.get("content") or "")[:2000],
1403
+ }
1404
+ )
1405
+ return json.dumps({"results": results, "count": len(results)})
1406
+
1407
+ def _handle_recall_conversation(self, args: Dict[str, Any]) -> str:
1408
+ """Tool handler for recall_conversation over the conversations table."""
1409
+ if not self._healthy or not self._store:
1410
+ return json.dumps({"results": [], "count": 0, "error": "hexus unavailable"})
1411
+
1412
+ query = (args.get("query") or "").strip()
1413
+ if not query:
1414
+ return tool_error("Missing required arg: query")
1415
+ try:
1416
+ limit = max(1, min(int(args.get("limit", 5)), 20))
1417
+ except (TypeError, ValueError):
1418
+ limit = 5
1419
+
1420
+ scope = (args.get("scope") or "current").strip()
1421
+ agent_filter: Optional[str] = None
1422
+ session_filter: Optional[str] = None
1423
+ if scope == "current":
1424
+ agent_filter = self._agent_identity
1425
+ elif scope == "session":
1426
+ session_filter = self._session_id or None
1427
+ elif scope == "all":
1428
+ pass # no filters
1429
+ else:
1430
+ agent_filter = scope # treat as a specific theme name
1431
+
1432
+ try:
1433
+ vec = embed(
1434
+ query,
1435
+ base_url=self._config["embed_url"],
1436
+ model=self._config["embed_model"],
1437
+ )
1438
+ except EmbeddingError as exc:
1439
+ return json.dumps({"results": [], "count": 0, "error": f"embed: {exc}"})
1440
+
1441
+ try:
1442
+ rows = self._store.search_turns(
1443
+ query_embedding=vec,
1444
+ agent_identity=agent_filter,
1445
+ session_id=session_filter,
1446
+ limit=limit,
1447
+ )
1448
+ except Exception as exc: # noqa: BLE001
1449
+ return json.dumps({"results": [], "count": 0, "error": f"db: {exc}"})
1450
+
1451
+ results = []
1452
+ for r in rows:
1453
+ ts = r.get("ts")
1454
+ results.append(
1455
+ {
1456
+ "id": r.get("id"),
1457
+ "agent_identity": r.get("agent_identity"),
1458
+ "session_id": r.get("session_id"),
1459
+ "role": r.get("role"),
1460
+ "ts": ts.isoformat() if ts else None,
1461
+ "score": round(float(r.get("score") or 0.0), 4),
1462
+ "content": (r.get("content") or "")[:2000],
1463
+ }
1464
+ )
1465
+ return json.dumps({"results": results, "count": len(results)})
1466
+
1467
+ def _handle_recall_delegation(self, args: Dict[str, Any]) -> str:
1468
+ """Tool handler for recall_delegation over the delegations table."""
1469
+ if not self._healthy or not self._store:
1470
+ return json.dumps({"results": [], "count": 0, "error": "hexus unavailable"})
1471
+
1472
+ query = (args.get("query") or "").strip()
1473
+ if not query:
1474
+ return tool_error("Missing required arg: query")
1475
+ try:
1476
+ limit = max(1, min(int(args.get("limit", 5)), 20))
1477
+ except (TypeError, ValueError):
1478
+ limit = 5
1479
+
1480
+ scope = (args.get("scope") or "current").strip()
1481
+ agent_filter: Optional[str] = None
1482
+ if scope == "current":
1483
+ agent_filter = self._agent_identity
1484
+ elif scope == "all":
1485
+ pass # no filters
1486
+ else:
1487
+ agent_filter = scope # treat as a specific theme name
1488
+
1489
+ try:
1490
+ vec = embed(
1491
+ query,
1492
+ base_url=self._config["embed_url"],
1493
+ model=self._config["embed_model"],
1494
+ )
1495
+ except EmbeddingError as exc:
1496
+ return json.dumps({"results": [], "count": 0, "error": f"embed: {exc}"})
1497
+
1498
+ try:
1499
+ rows = self._store.search_delegations(
1500
+ query_embedding=vec,
1501
+ agent_identity=agent_filter,
1502
+ limit=limit,
1503
+ )
1504
+ except Exception as exc: # noqa: BLE001
1505
+ return json.dumps({"results": [], "count": 0, "error": f"db: {exc}"})
1506
+
1507
+ results = []
1508
+ for r in rows:
1509
+ ts = r.get("ts")
1510
+ results.append(
1511
+ {
1512
+ "id": r.get("id"),
1513
+ "parent_session_id": r.get("parent_session_id"),
1514
+ "child_session_id": r.get("child_session_id"),
1515
+ "agent_identity": r.get("agent_identity"),
1516
+ "task": r.get("task"),
1517
+ "result": r.get("result"),
1518
+ "ts": ts.isoformat() if ts else None,
1519
+ "score": round(float(r.get("score") or 0.0), 4),
1520
+ }
1521
+ )
1522
+ return json.dumps({"results": results, "count": len(results)})
1523
+
1524
+ def _handle_entity_graph(self, args: Dict[str, Any]) -> str:
1525
+ """Tool handler for entity_graph."""
1526
+ if not self._healthy or not self._store:
1527
+ return json.dumps({"error": "hexus unavailable"})
1528
+
1529
+ entity_type = (args.get("entity_type") or "").strip()
1530
+ if not entity_type:
1531
+ return tool_error("Missing required arg: entity_type")
1532
+ entity_value = (args.get("entity_value") or "").strip()
1533
+ if not entity_value:
1534
+ return tool_error("Missing required arg: entity_value")
1535
+
1536
+ try:
1537
+ limit = max(1, min(int(args.get("limit", 5)), 20))
1538
+ except (TypeError, ValueError):
1539
+ limit = 5
1540
+
1541
+ scope = (args.get("scope") or "current").strip()
1542
+ agent_filter: Optional[str] = None
1543
+ if scope == "current":
1544
+ agent_filter = self._agent_identity
1545
+ elif scope == "all":
1546
+ pass
1547
+ else:
1548
+ agent_filter = scope
1549
+
1550
+ try:
1551
+ res = self._store.entity_graph(
1552
+ entity_type=entity_type,
1553
+ entity_value=entity_value,
1554
+ agent_identity=agent_filter,
1555
+ limit=limit,
1556
+ )
1557
+ return json.dumps(res)
1558
+ except Exception as exc:
1559
+ return json.dumps({"error": f"db: {exc}"})
1560
+
1561
+ def _handle_graph_walk(self, args: Dict[str, Any]) -> str:
1562
+ """Tool handler for graph_walk."""
1563
+ if not self._healthy or not self._store:
1564
+ return json.dumps({"error": "hexus unavailable"})
1565
+
1566
+ entity_type = (args.get("entity_type") or "").strip()
1567
+ if not entity_type:
1568
+ return tool_error("Missing required arg: entity_type")
1569
+ entity_value = (args.get("entity_value") or "").strip()
1570
+ if not entity_value:
1571
+ return tool_error("Missing required arg: entity_value")
1572
+
1573
+ try:
1574
+ max_depth = max(1, min(int(args.get("max_depth", 2)), 5))
1575
+ except (TypeError, ValueError):
1576
+ max_depth = 2
1577
+
1578
+ try:
1579
+ limit = max(1, min(int(args.get("limit", 5)), 20))
1580
+ except (TypeError, ValueError):
1581
+ limit = 5
1582
+
1583
+ scope = (args.get("scope") or "current").strip()
1584
+ agent_filter: Optional[str] = None
1585
+ if scope == "current":
1586
+ agent_filter = self._agent_identity
1587
+ elif scope == "all":
1588
+ pass
1589
+ else:
1590
+ agent_filter = scope
1591
+
1592
+ try:
1593
+ res = self._store.graph_walk(
1594
+ entity_type=entity_type,
1595
+ entity_value=entity_value,
1596
+ agent_identity=agent_filter,
1597
+ max_depth=max_depth,
1598
+ limit=limit,
1599
+ )
1600
+ return json.dumps({"results": res})
1601
+ except Exception as exc:
1602
+ return json.dumps({"error": f"db: {exc}"})
1603
+
1604
+ def _handle_common_topics(self, args: Dict[str, Any]) -> str:
1605
+ """Tool handler for common_topics."""
1606
+ if not self._healthy or not self._store:
1607
+ return json.dumps({"error": "hexus unavailable"})
1608
+
1609
+ try:
1610
+ min_strength = max(1, int(args.get("min_strength", 2)))
1611
+ except (TypeError, ValueError):
1612
+ min_strength = 2
1613
+
1614
+ try:
1615
+ limit = max(1, min(int(args.get("limit", 10)), 20))
1616
+ except (TypeError, ValueError):
1617
+ limit = 10
1618
+
1619
+ scope = (args.get("scope") or "current").strip()
1620
+ agent_filter: Optional[str] = None
1621
+ if scope == "current":
1622
+ agent_filter = self._agent_identity
1623
+ elif scope == "all":
1624
+ pass
1625
+ else:
1626
+ agent_filter = scope
1627
+
1628
+ try:
1629
+ res = self._store.common_topics(
1630
+ agent_identity=agent_filter,
1631
+ min_strength=min_strength,
1632
+ limit=limit,
1633
+ )
1634
+ return json.dumps({"results": res})
1635
+ except Exception as exc:
1636
+ return json.dumps({"error": f"db: {exc}"})
1637
+
1638
+ def _handle_confirm_memory(self, args: Dict[str, Any]) -> str:
1639
+ """Tool handler for confirm_memory."""
1640
+ if not self._healthy or not self._store:
1641
+ return json.dumps({"error": "hexus unavailable"})
1642
+
1643
+ entry_id = args.get("id")
1644
+ if entry_id is None:
1645
+ return tool_error("Missing required arg: id")
1646
+ try:
1647
+ entry_id = int(entry_id)
1648
+ except (TypeError, ValueError):
1649
+ return tool_error("id must be an integer")
1650
+
1651
+ try:
1652
+ success = self._store.confirm_entry(entry_id)
1653
+ return json.dumps({"id": entry_id, "success": success})
1654
+ except Exception as exc:
1655
+ return json.dumps({"error": f"db: {exc}"})
1656
+
1657
+ def _handle_reject_memory(self, args: Dict[str, Any]) -> str:
1658
+ """Tool handler for reject_memory."""
1659
+ if not self._healthy or not self._store:
1660
+ return json.dumps({"error": "hexus unavailable"})
1661
+
1662
+ entry_id = args.get("id")
1663
+ if entry_id is None:
1664
+ return tool_error("Missing required arg: id")
1665
+ try:
1666
+ entry_id = int(entry_id)
1667
+ except (TypeError, ValueError):
1668
+ return tool_error("id must be an integer")
1669
+
1670
+ try:
1671
+ success = self._store.reject_entry(entry_id)
1672
+ return json.dumps({"id": entry_id, "success": success})
1673
+ except Exception as exc:
1674
+ return json.dumps({"error": f"db: {exc}"})
1675
+
1676
+ def _handle_summarize_session(self, args: Dict[str, Any]) -> str:
1677
+ """Tool handler for summarize_session."""
1678
+ if not self._healthy or not self._store:
1679
+ return json.dumps({"error": "hexus unavailable"})
1680
+
1681
+ session_id = (args.get("session_id") or "").strip()
1682
+ if not session_id:
1683
+ return tool_error("Missing required arg: session_id")
1684
+
1685
+ try:
1686
+ limit = max(1, min(int(args.get("limit", 5)), 20))
1687
+ except (TypeError, ValueError):
1688
+ limit = 5
1689
+
1690
+ try:
1691
+ res = self._store.summarize_session(
1692
+ session_id=session_id,
1693
+ limit=limit,
1694
+ )
1695
+ return json.dumps(res)
1696
+ except Exception as exc:
1697
+ return json.dumps({"error": f"db: {exc}"})
1698
+
1699
+ def _handle_headroom_retrieve(self, args: Dict[str, Any]) -> str:
1700
+ """Tool handler for headroom_retrieve."""
1701
+ if not self._healthy or not self._store:
1702
+ return json.dumps({"error": "hexus unavailable"})
1703
+
1704
+ entry_id = args.get("id")
1705
+ if entry_id is None:
1706
+ return tool_error("Missing required arg: id")
1707
+ try:
1708
+ entry_id = int(entry_id)
1709
+ except (TypeError, ValueError):
1710
+ return tool_error("id must be an integer")
1711
+
1712
+ try:
1713
+ content = self._store.fetch_full(entry_id)
1714
+ if content is None:
1715
+ return json.dumps({"id": entry_id, "found": False, "content": None})
1716
+ return json.dumps({"id": entry_id, "found": True, "content": content})
1717
+ except Exception as exc:
1718
+ return json.dumps({"error": f"db: {exc}"})
1719
+
1720
+ def _handle_memory_stats(self, args: Dict[str, Any]) -> str:
1721
+ """Tool handler for memory_stats."""
1722
+ if not self._healthy or not self._store:
1723
+ return json.dumps({"error": "hexus unavailable"})
1724
+ try:
1725
+ from mcp_server.tools import memory_stats
1726
+
1727
+ res = memory_stats(self._store, args)
1728
+ return json.dumps(res)
1729
+ except Exception as exc:
1730
+ return json.dumps({"error": f"stats check failed: {exc}"})
1731
+
1732
+ # -- Setup hooks ---------------------------------------------------------
1733
+
1734
+ def get_config_schema(self) -> List[Dict[str, Any]]:
1735
+ return [
1736
+ {
1737
+ "key": "dsn",
1738
+ "description": "Postgres DSN (psycopg connection string)",
1739
+ "default": DEFAULTS["dsn"],
1740
+ "required": True,
1741
+ },
1742
+ {
1743
+ "key": "embed_url",
1744
+ "description": "Embedding endpoint base URL (OpenAI-compatible or Ollama native)",
1745
+ "default": DEFAULTS["embed_url"],
1746
+ "required": True,
1747
+ },
1748
+ {
1749
+ "key": "embed_model",
1750
+ "description": "Embedding model name (must return 768-dim vectors)",
1751
+ "default": DEFAULTS["embed_model"],
1752
+ },
1753
+ {
1754
+ "key": "prefetch_limit",
1755
+ "description": "Max ambient recall results injected per turn",
1756
+ "default": str(DEFAULTS["prefetch_limit"]),
1757
+ },
1758
+ {
1759
+ "key": "min_similarity",
1760
+ "description": "Cosine similarity cutoff for ambient prefetch (0.0–1.0)",
1761
+ "default": str(DEFAULTS["min_similarity"]),
1762
+ },
1763
+ {
1764
+ "key": "embed_on_write",
1765
+ "description": "Compute embedding on each write; turn off for text-only mode",
1766
+ "default": "true",
1767
+ "choices": ["true", "false"],
1768
+ },
1769
+ {
1770
+ "key": "scope_default",
1771
+ "description": "Default scope for recall_memory when caller omits it",
1772
+ "default": DEFAULTS["scope_default"],
1773
+ "choices": ["current", "all"],
1774
+ },
1775
+ {
1776
+ "key": "write_queue_maxsize",
1777
+ "description": "Bounded async-writer queue size; full = oldest writes drop with a warning",
1778
+ "default": str(DEFAULTS["write_queue_maxsize"]),
1779
+ },
1780
+ {
1781
+ "key": "bulk_sync_on_init",
1782
+ "description": "Import MEMORY.md / USER.md content from disk on agent init (v0.1.1)",
1783
+ "default": "true",
1784
+ "choices": ["true", "false"],
1785
+ },
1786
+ {
1787
+ "key": "sync_turns",
1788
+ "description": "Capture every substantive (user, assistant) turn pair into the conversations table",
1789
+ "default": "true",
1790
+ "choices": ["true", "false"],
1791
+ },
1792
+ {
1793
+ "key": "turn_min_chars",
1794
+ "description": "Turns shorter than this (after strip) are treated as boilerplate and skipped",
1795
+ "default": str(DEFAULTS["turn_min_chars"]),
1796
+ },
1797
+ {
1798
+ "key": "webhook_url",
1799
+ "description": "Custom HTTP URL to dispatch webhook payloads upon lifecycle events",
1800
+ "default": "",
1801
+ "required": False,
1802
+ },
1803
+ {
1804
+ "key": "webhook_secret",
1805
+ "description": "Secret token for signing webhook payloads with HMAC-SHA256",
1806
+ "default": "",
1807
+ "required": False,
1808
+ },
1809
+ ]
1810
+
1811
+ def save_config(self, values: Dict[str, Any], hermes_home: str) -> None:
1812
+ from pathlib import Path
1813
+
1814
+ config_path = Path(hermes_home) / "config.yaml"
1815
+ try:
1816
+ import yaml
1817
+
1818
+ existing: Dict[str, Any] = {}
1819
+ if config_path.exists():
1820
+ with open(config_path, encoding="utf-8-sig") as fh:
1821
+ existing = yaml.safe_load(fh) or {}
1822
+ existing.setdefault("plugins", {})
1823
+ existing["plugins"]["hexus"] = values
1824
+ with open(config_path, "w", encoding="utf-8") as fh:
1825
+ yaml.dump(existing, fh, default_flow_style=False)
1826
+ except Exception as exc: # noqa: BLE001
1827
+ logger.warning("hexus save_config failed: %s", exc)
1828
+
1829
+ # -- Helpers -------------------------------------------------------------
1830
+
1831
+ def _maybe_embed(self, content: str) -> Optional[List[float]]:
1832
+ if not self._config.get("embed_on_write", True):
1833
+ return None
1834
+ try:
1835
+ return embed(
1836
+ content,
1837
+ base_url=self._config["embed_url"],
1838
+ model=self._config["embed_model"],
1839
+ )
1840
+ except EmbeddingError as exc:
1841
+ if not self._embed_warned:
1842
+ logger.warning("hexus embed failed (degrading to text-only): %s", exc)
1843
+ self._embed_warned = True
1844
+ return None
1845
+
1846
+
1847
+ # ---------------------------------------------------------------------------
1848
+ # Plugin entry point
1849
+ # ---------------------------------------------------------------------------
1850
+
1851
+
1852
+ def register(ctx) -> None:
1853
+ """Register the hexus memory provider with the plugin system."""
1854
+ provider = HexusMemoryProvider(config=_load_plugin_config())
1855
+ ctx.register_memory_provider(provider)