nexo-brain 7.31.1 → 7.31.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.31.1",
3
+ "version": "7.31.2",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,9 +18,9 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.31.1` is the current packaged-runtime line. Patch release over v7.31.0 - headless automations pause and queue when the selected engine is unavailable (credits, rate limits, expired auth) and resume automatically with one operator notice in their language; protocol nudge shaping ships in shadow mode; and the client config push stops writing an invalid `mcp__*` permission rule to Claude Code settings.
21
+ Version `7.31.2` is the current packaged-runtime line. Patch release over v7.31.1 - the session ID becomes a durable identity (sessions survive quiet work periods; physical cleanup at 24h), runtime residents are isolated per generation so two installs can never kill each other's resident, and obsolete residents retire themselves once clients disconnect.
22
22
 
23
- Previously in `7.31.0`: minor release over v7.30.33 - the recommended Claude Code model moves from Opus 4.8 to Fable 5 with max reasoning (`claude-fable-5`) across all four main resonance tiers (the `muy_bajo` tier keeps Haiku for cheap internal classifiers and Codex stays on GPT-5.5), existing installs riding NEXO defaults auto-migrate on update while customized models are respected, and learning housekeeping no longer aborts when the embedding backend is missing.
23
+ Previously in `7.31.1`: patch release over v7.31.0 - headless automations pause and queue when the selected engine is unavailable (credits, rate limits, expired auth) and resume automatically with one operator notice in their language; protocol nudge shaping ships in shadow mode; and the client config push stops writing an invalid `mcp__*` permission rule to Claude Code settings.
24
24
 
25
25
  Previously in `7.30.33`: patch release over v7.30.32 - personal agent/script status now keeps the newest real run between manual executions and cron history, so a successful manual agent run cannot be hidden behind an older scheduled failure.
26
26
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.31.1",
3
+ "version": "7.31.2",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
package/src/db/_core.py CHANGED
@@ -36,10 +36,18 @@ _data_dir = os.path.dirname(DB_PATH)
36
36
  os.makedirs(_data_dir, exist_ok=True)
37
37
 
38
38
  # TTLs in seconds (match session-coord.sh behavior)
39
- SESSION_STALE_SECONDS = 900 # 15 min (documented TTL)
39
+ SESSION_STALE_SECONDS = 900 # 15 min (documented TTL) — visibility horizon only
40
40
  MESSAGE_TTL_SECONDS = 3600 # 1 hour
41
41
  QUESTION_TTL_SECONDS = 600 # 10 min
42
42
 
43
+ # Phase 2.1 — purge horizon, deliberately FAR above the visibility TTL.
44
+ # SESSION_STALE_SECONDS only governs what counts as "active" in listings;
45
+ # physically DELETING rows at 15 min destroyed the continuity of any session
46
+ # that spent >15 min in code tools without touching a nexo_* tool (incident
47
+ # 10-jun: two working sessions lost mid-task, orphaning their open protocol
48
+ # tasks). Rows now survive 24h so revival/heartbeat can find them.
49
+ SESSION_PURGE_SECONDS = 24 * 3600
50
+
43
51
  # Single shared connection per process with write serialization.
44
52
  # SQLite allows only one writer at a time. Using a shared connection with
45
53
  # check_same_thread=False and a write lock ensures:
@@ -2,7 +2,7 @@ from __future__ import annotations
2
2
  """NEXO DB — Sessions module."""
3
3
  import time, secrets, string, sqlite3
4
4
  from datetime import datetime
5
- from db._core import get_db, _gen_id, now_epoch, local_time_str, SESSION_STALE_SECONDS, MESSAGE_TTL_SECONDS, QUESTION_TTL_SECONDS
5
+ from db._core import get_db, _gen_id, now_epoch, local_time_str, SESSION_STALE_SECONDS, SESSION_PURGE_SECONDS, MESSAGE_TTL_SECONDS, QUESTION_TTL_SECONDS
6
6
 
7
7
  # ── Session operations ──────────────────────────────────────────────
8
8
 
@@ -166,9 +166,17 @@ def get_active_sessions() -> list[dict]:
166
166
 
167
167
 
168
168
  def clean_stale_sessions() -> int:
169
- """Remove stale sessions. Returns count removed."""
169
+ """Purge sessions older than the PURGE horizon. Returns count removed.
170
+
171
+ Phase 2.1 — this used to delete at SESSION_STALE_SECONDS (15 min), which
172
+ destroyed any session that worked quietly in code tools for a while: the
173
+ next session/cron to start would erase it, its next nexo_track failed
174
+ with "Session not found" and its open protocol tasks were orphaned.
175
+ Deletion now happens at SESSION_PURGE_SECONDS (24h); the 15-min TTL keeps
176
+ governing VISIBILITY (get_active_sessions/search_sessions) unchanged.
177
+ """
170
178
  conn = get_db()
171
- cutoff = now_epoch() - SESSION_STALE_SECONDS
179
+ cutoff = now_epoch() - SESSION_PURGE_SECONDS
172
180
  stale = conn.execute(
173
181
  "SELECT sid FROM sessions WHERE last_update_epoch <= ?", (cutoff,)
174
182
  ).fetchall()
@@ -309,13 +317,42 @@ def search_sessions(keyword: str) -> list[dict]:
309
317
 
310
318
  # ── File tracking ───────────────────────────────────────────────────
311
319
 
320
+ def revive_session(sid: str, task_hint: str = "(revived session)") -> bool:
321
+ """Phase 2.1 — re-create a session row for a valid SID that vanished.
322
+
323
+ A session can disappear legitimately (purge horizon, manual cleanup,
324
+ DB swap) while its client keeps working with the same SID. The durable
325
+ identity is the SID, not the row: revive it instead of erroring, so the
326
+ "Session not found. Register first." class of breakage cannot occur.
327
+ Returns True when a row was actually (re)created.
328
+ """
329
+ sid = _validate_sid(sid)
330
+ conn = get_db()
331
+ now = now_epoch()
332
+ cursor = conn.execute(
333
+ "INSERT OR IGNORE INTO sessions (sid, task, started_epoch, last_update_epoch, local_time) "
334
+ "VALUES (?, ?, ?, ?, ?)",
335
+ (sid, task_hint, now, now, local_time_str()),
336
+ )
337
+ conn.commit()
338
+ return cursor.rowcount > 0
339
+
340
+
312
341
  def track_files(sid: str, paths: list[str]) -> dict:
313
- """Track files for a session. Returns conflicts if any."""
342
+ """Track files for a session. Returns conflicts if any.
343
+
344
+ Phase 2.1 — a valid SID whose row vanished is REVIVED instead of being
345
+ told "Session not found. Register first." (the heartbeat already revived
346
+ missing sessions; this layer was internally inconsistent). The result
347
+ carries ``revived: True`` so callers can log the recovery.
348
+ """
349
+ sid = _validate_sid(sid)
314
350
  conn = get_db()
315
351
  now = now_epoch()
352
+ revived = False
316
353
  session = conn.execute("SELECT sid FROM sessions WHERE sid = ?", (sid,)).fetchone()
317
354
  if not session:
318
- return {"error": f"Session {sid} not found. Register first."}
355
+ revived = revive_session(sid, task_hint="(revived by nexo_track)")
319
356
 
320
357
  for path in paths:
321
358
  conn.execute(
@@ -324,7 +361,10 @@ def track_files(sid: str, paths: list[str]) -> dict:
324
361
  )
325
362
  conn.commit()
326
363
  conflicts = _check_conflicts(conn, sid)
327
- return {"tracked": paths, "conflicts": conflicts}
364
+ result = {"tracked": paths, "conflicts": conflicts}
365
+ if revived:
366
+ result["revived"] = True
367
+ return result
328
368
 
329
369
 
330
370
  def untrack_files(sid: str, paths: list[str] | None = None):
@@ -34,6 +34,16 @@ STATE_FILE = "runtime-service.json"
34
34
  LOCK_FILE = "runtime-service.lock"
35
35
  LOG_FILE = "runtime-service.log"
36
36
 
37
+ # Phase 2.1/2.2 — state isolation per runtime generation. Two different Brain
38
+ # installs (e.g. the managed ~/.nexo/core runtime and an npm-global one)
39
+ # used to share ONE state file: each side saw a "stale_runtime" resident and
40
+ # KILLED the other's in an endless ping-pong (1,314 resident restarts logged
41
+ # on the operator machine; every restart forced the next conversation to pay
42
+ # a 10-48s cold Brain boot and expired client sessions). With the state file
43
+ # keyed by runtime generation, a runtime can only ever see — and manage —
44
+ # its OWN resident. Foreign residents become invisible instead of killable.
45
+
46
+
37
47
 
38
48
  def env_flag(name: str, *, default: bool = False) -> bool:
39
49
  value = os.environ.get(name)
@@ -55,12 +65,29 @@ def service_url(host: str | None = None, port: int | None = None, path: str | No
55
65
  return f"http://{host or service_host()}:{int(port or service_port())}{path or service_path()}"
56
66
 
57
67
 
58
- def service_state_path() -> Path:
68
+ def _generation_state_token(generation: str) -> str:
69
+ """Stable filesystem-safe token for a runtime generation."""
70
+ text = str(generation or "unknown").strip() or "unknown"
71
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]
72
+
73
+
74
+ def _current_generation_token() -> str:
75
+ return _generation_state_token(current_runtime_identity().get("runtime_generation", "unknown"))
76
+
77
+
78
+ def _legacy_service_state_path() -> Path:
59
79
  root = paths.runtime_state_dir()
60
80
  root.mkdir(parents=True, exist_ok=True)
61
81
  return root / STATE_FILE
62
82
 
63
83
 
84
+ def service_state_path() -> Path:
85
+ root = paths.runtime_state_dir()
86
+ root.mkdir(parents=True, exist_ok=True)
87
+ token = _current_generation_token()
88
+ return root / f"runtime-service-{token}.json"
89
+
90
+
64
91
  def service_log_path() -> Path:
65
92
  root = paths.logs_dir()
66
93
  root.mkdir(parents=True, exist_ok=True)
@@ -70,7 +97,8 @@ def service_log_path() -> Path:
70
97
  def service_lock_path() -> Path:
71
98
  root = paths.runtime_state_dir()
72
99
  root.mkdir(parents=True, exist_ok=True)
73
- return root / LOCK_FILE
100
+ token = _current_generation_token()
101
+ return root / f"runtime-service-{token}.lock"
74
102
 
75
103
 
76
104
  @contextmanager
@@ -128,10 +156,22 @@ def service_start_lock(*, timeout: float = 10.0):
128
156
  def read_service_state() -> dict[str, Any]:
129
157
  try:
130
158
  path = service_state_path()
131
- if not path.is_file():
132
- return {}
133
- data = json.loads(path.read_text(encoding="utf-8"))
134
- return data if isinstance(data, dict) else {}
159
+ if path.is_file():
160
+ data = json.loads(path.read_text(encoding="utf-8"))
161
+ return data if isinstance(data, dict) else {}
162
+ # Phase 2.1 one-time soft migration: adopt a pre-generation legacy
163
+ # state file ONLY if it belongs to this same runtime. A foreign
164
+ # install's legacy state stays invisible (never "stale to kill").
165
+ legacy = _legacy_service_state_path()
166
+ if legacy.is_file():
167
+ data = json.loads(legacy.read_text(encoding="utf-8"))
168
+ if isinstance(data, dict) and state_matches_current_runtime(data):
169
+ try:
170
+ legacy.replace(path)
171
+ except Exception:
172
+ pass
173
+ return data
174
+ return {}
135
175
  except Exception:
136
176
  return {}
137
177
 
@@ -445,6 +485,107 @@ def runtime_service_status() -> dict[str, Any]:
445
485
  }
446
486
 
447
487
 
488
+ # Phase 2.1/2.2 — resident obsolescence watch.
489
+ #
490
+ # With per-generation state files, residents no longer kill each other; the
491
+ # missing piece is cleanup: after a runtime update, the OLD resident must
492
+ # eventually exit, while the CURRENT one must stay warm forever (a hot Brain
493
+ # is what turns 10-48s conversation starts into fast ones). Rules:
494
+ # - a resident whose on-disk runtime generation still matches its own NEVER
495
+ # self-terminates, idle or not;
496
+ # - an OBSOLETE resident (disk generation changed under it) exits cleanly
497
+ # once it has had no established client connections for two consecutive
498
+ # checks (anti-flap), removing its state file on the way out;
499
+ # - if connections cannot be counted (no lsof/netstat), it stays alive —
500
+ # fail-safe towards living.
501
+
502
+ OBSOLESCENCE_CHECK_SECONDS = 300
503
+
504
+
505
+ def _count_established_connections(port: int) -> int | None:
506
+ """Best-effort count of ESTABLISHED TCP connections to ``port``.
507
+
508
+ Returns None when it cannot tell (missing tooling) so callers can fail
509
+ safe. Uses lsof on POSIX and netstat on Windows; both ship with the OS.
510
+ """
511
+ try:
512
+ if os.name == "nt":
513
+ out = subprocess.run(
514
+ ["netstat", "-ano", "-p", "tcp"],
515
+ capture_output=True, text=True, timeout=10,
516
+ ).stdout
517
+ needle = f":{port} "
518
+ return sum(
519
+ 1 for line in out.splitlines()
520
+ if "ESTABLISHED" in line and needle in line.split("ESTABLISHED")[0]
521
+ )
522
+ out = subprocess.run(
523
+ ["lsof", "-nP", f"-iTCP:{port}", "-sTCP:ESTABLISHED"],
524
+ capture_output=True, text=True, timeout=10,
525
+ ).stdout
526
+ rows = [line for line in out.splitlines() if "ESTABLISHED" in line]
527
+ # lsof lists both directions of loopback pairs; the resident's own
528
+ # accept side is one row per client connection.
529
+ return len(rows)
530
+ except Exception:
531
+ return None
532
+
533
+
534
+ def _resident_is_obsolete(boot_generation: str) -> bool:
535
+ try:
536
+ from runtime_versioning import compute_mcp_runtime_fingerprint, read_version_for_path, runtime_generation
537
+
538
+ root = current_server_path().parent
539
+ version = read_version_for_path(root) or read_version_for_path(root.parent)
540
+ fingerprint = compute_mcp_runtime_fingerprint(root, use_cache=False)
541
+ current = runtime_generation(version, fingerprint, str(root))
542
+ return bool(boot_generation) and bool(current) and current != boot_generation
543
+ except Exception:
544
+ return False # cannot tell -> assume still current (fail-safe)
545
+
546
+
547
+ def start_resident_obsolescence_watch(*, port: int, on_exit=None) -> None:
548
+ """Spawn the daemon thread that retires obsolete residents gracefully."""
549
+ import threading
550
+
551
+ boot_generation = str(current_runtime_identity().get("runtime_generation") or "")
552
+
553
+ def _watch() -> None:
554
+ strikes = 0
555
+ while True:
556
+ time.sleep(OBSOLESCENCE_CHECK_SECONDS)
557
+ try:
558
+ if not _resident_is_obsolete(boot_generation):
559
+ strikes = 0
560
+ continue
561
+ connections = _count_established_connections(port)
562
+ if connections is None or connections > 0:
563
+ strikes = 0
564
+ continue
565
+ strikes += 1
566
+ if strikes < 2:
567
+ continue
568
+ print(
569
+ f"[runtime-service] obsolete resident (gen {boot_generation[:24]}…) idle for two checks — exiting cleanly",
570
+ file=sys.stderr,
571
+ )
572
+ try:
573
+ service_state_path().unlink(missing_ok=True)
574
+ except Exception:
575
+ pass
576
+ if callable(on_exit):
577
+ try:
578
+ on_exit()
579
+ except Exception:
580
+ pass
581
+ os._exit(0)
582
+ except Exception:
583
+ strikes = 0 # the watch must never kill a healthy resident
584
+
585
+ thread = threading.Thread(target=_watch, name="resident-obsolescence-watch", daemon=True)
586
+ thread.start()
587
+
588
+
448
589
  def run_mcp_proxy_adapter(*, name: str, instructions: str, run_kwargs: dict[str, Any]) -> None:
449
590
  from fastmcp.server import create_proxy
450
591
 
package/src/server.py CHANGED
@@ -3093,4 +3093,14 @@ if __name__ == "__main__":
3093
3093
  "mode": "runtime-service",
3094
3094
  }
3095
3095
  )
3096
+ # Phase 2.1/2.2 — retire this resident gracefully if the on-disk
3097
+ # runtime is updated under it AND no clients remain connected.
3098
+ # The current-generation resident never self-terminates: a warm
3099
+ # Brain is what keeps conversation starts fast.
3100
+ from runtime_service import start_resident_obsolescence_watch
3101
+
3102
+ start_resident_obsolescence_watch(
3103
+ port=port,
3104
+ on_exit=lambda: (close_local_context_db(), close_db()),
3105
+ )
3096
3106
  mcp.run(**run_kwargs)