nexo-brain 5.1.1 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "5.1.1",
3
+ "version": "5.2.0",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -87,6 +87,14 @@ Versions `3.1.7` through `3.2.0` close the recent-memory gap:
87
87
  - when even that misses, NEXO now exposes raw transcript fallback tools for Claude Code and Codex session stores
88
88
  - NEXO can now inspect itself through a live system catalog derived from canonical sources instead of relying only on stale docs or operator memory
89
89
 
90
+ Version `5.2.0` closes two focused gaps in the Cortex layer that were left open by the v5.1 audit — the high-stakes response-contract detector was English-only, and the `nexo-cortex-cycle` cron was writing a quality snapshot that no reader ever consumed:
91
+
92
+ - `HIGH_STAKES_KEYWORDS_ES` adds ~45 Spanish keywords to the high-stakes detector with accented and unaccented variants, so a goal written in Spanish (`migrar la base de datos de producción`) trips the same gate as its English twin.
93
+ - `NEGATION_PATTERNS` suppresses false positives when the user explicitly disclaims touching the sensitive area (`sin afectar producción`, `no tocar prod`, `without touching production`, `don't modify`). The raw keyword being present is no longer enough to flag the task.
94
+ - `evaluate_response_confidence` accepts two new optional kwargs, `pre_action_context_hits` (+up to 10) and `area_has_atlas_entry` (+5), so the score can finally reward tasks that loaded real context instead of only punishing unprepared ones. Both signals are capped and cannot override a real risk penalty.
95
+ - A monotonic numeric safeguard layers on top of the boolean decision tree: `answer` downgrades to `verify` when `final_score < 50`, and `verify` downgrades to `defer` when `high_stakes` and `final_score < 30`. The safeguard can only make response discipline stricter, never looser.
96
+ - `handle_cortex_quality` in `src/plugins/cortex.py` now reads `$NEXO_HOME/operations/cortex-quality-latest.json` when the requested window (7 or 1 days) is fresh (<6h 30m) and the schema matches — silent fallback to the live SQL computation on any failure. The handler's JSON response now includes `"source": "cache" | "live"` for observability.
97
+
90
98
  Version `5.1.0` lands the full NEXO-AUDIT-2026-04-11 roadmap as a single minor bump — every open evolution / adaptive / cognitive / skills loop now closes under itself, the knowledge graph exports cleanly, OpenTelemetry spans can be turned on without a hard dependency, and every PR has to clear lint, security, coverage, and release-readiness gates before it can merge:
91
99
 
92
100
  - Evolution cycle now auto-applies user-approved proposals on the next run (backed by the new idempotent migration `m38`), adaptive learned-weight rollbacks surface as visible followups, outcome patterns auto-promote to draft skills, and a Voyager-style detector exposes co-occurring skill pairs as composite-skill candidates via `nexo_skill_compose_candidates`.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "5.1.1",
3
+ "version": "5.2.0",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
@@ -15,9 +15,12 @@ v0.1: Single MCP tool + middleware validation.
15
15
  """
16
16
 
17
17
  import json
18
+ import os
18
19
  import re
19
20
  import secrets
20
21
  import time
22
+ from datetime import datetime
23
+ from pathlib import Path
21
24
 
22
25
 
23
26
  def _get_db():
@@ -1003,12 +1006,88 @@ def handle_cortex_override(evaluation_id: int, chosen: str, reason: str) -> str:
1003
1006
  return json.dumps({"ok": True, "evaluation": updated}, ensure_ascii=False, indent=2)
1004
1007
 
1005
1008
 
1009
+ # v5.2.0: Cortex quality cache reader. The `nexo-cortex-cycle` cron
1010
+ # (src/scripts/nexo-cortex-cycle.py) writes a fresh quality snapshot to
1011
+ # $NEXO_HOME/operations/cortex-quality-latest.json every 6h. Until this
1012
+ # release the reader was missing — the snapshot was write-only and every
1013
+ # call to `nexo_cortex_quality` re-ran the SQL summary. Now the handler
1014
+ # reads the cache first for the 7d / 1d windows and falls back silently
1015
+ # to the live computation on any failure.
1016
+ _CORTEX_QUALITY_CACHE_PATH = (
1017
+ Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
1018
+ / "operations"
1019
+ / "cortex-quality-latest.json"
1020
+ )
1021
+ # 6h cron + 30 min slack so a slightly-late run still serves cache.
1022
+ _CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS = 23400
1023
+ _CORTEX_QUALITY_CACHE_WINDOWS = {1: "window_1d", 7: "window_7d"}
1024
+ _CORTEX_QUALITY_CACHE_SCHEMA = 1
1025
+
1026
+
1027
+ def _load_cortex_quality_cache(days: int) -> dict | None:
1028
+ """Return cached summary dict for the requested window, or None if unusable.
1029
+
1030
+ Silent on any failure so the live path always wins on a corrupt cache.
1031
+ Respects the snapshot schema written by `_persist_quality_snapshot`
1032
+ in src/scripts/nexo-cortex-cycle.py — do NOT change the layout here
1033
+ without updating the writer in the same release.
1034
+ """
1035
+ window_key = _CORTEX_QUALITY_CACHE_WINDOWS.get(days)
1036
+ if window_key is None:
1037
+ return None
1038
+ try:
1039
+ if not _CORTEX_QUALITY_CACHE_PATH.is_file():
1040
+ return None
1041
+ payload = json.loads(
1042
+ _CORTEX_QUALITY_CACHE_PATH.read_text(encoding="utf-8")
1043
+ )
1044
+ except Exception:
1045
+ return None
1046
+ if not isinstance(payload, dict):
1047
+ return None
1048
+ if payload.get("schema") != _CORTEX_QUALITY_CACHE_SCHEMA:
1049
+ return None
1050
+ captured_at = payload.get("captured_at") or ""
1051
+ if not isinstance(captured_at, str):
1052
+ return None
1053
+ try:
1054
+ captured = datetime.fromisoformat(captured_at)
1055
+ except Exception:
1056
+ return None
1057
+ age = time.time() - captured.timestamp()
1058
+ if age < 0 or age > _CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS:
1059
+ return None
1060
+ window = payload.get(window_key)
1061
+ if not isinstance(window, dict):
1062
+ return None
1063
+ return window
1064
+
1065
+
1006
1066
  def handle_cortex_quality(days: int = 30) -> str:
1007
- """Summarise recommendation quality, overrides, and linked outcome results."""
1067
+ """Summarise recommendation quality, overrides, and linked outcome results.
1068
+
1069
+ v5.2.0: Serves the snapshot written by `nexo-cortex-cycle` when the
1070
+ requested window is 7 or 1 days and the snapshot is fresh
1071
+ (< 6h30m old, schema == 1). Falls back silently to a live SQL
1072
+ summary on any failure, so the caller always gets a valid response.
1073
+ The returned JSON includes `"source": "cache" | "live"` so the
1074
+ path taken is observable from the outside.
1075
+ """
1008
1076
  from db import cortex_evaluation_summary
1009
1077
 
1078
+ cached = _load_cortex_quality_cache(days)
1079
+ if cached is not None:
1080
+ return json.dumps(
1081
+ {"ok": True, "summary": cached, "source": "cache"},
1082
+ ensure_ascii=False,
1083
+ indent=2,
1084
+ )
1010
1085
  summary = cortex_evaluation_summary(days=days)
1011
- return json.dumps({"ok": True, "summary": summary}, ensure_ascii=False, indent=2)
1086
+ return json.dumps(
1087
+ {"ok": True, "summary": summary, "source": "live"},
1088
+ ensure_ascii=False,
1089
+ indent=2,
1090
+ )
1012
1091
 
1013
1092
 
1014
1093
  TOOLS = [
@@ -64,6 +64,74 @@ HIGH_STAKES_KEYWORDS = {
64
64
  "revenue",
65
65
  "cost",
66
66
  }
67
+ # v5.2.0: Spanish high-stakes keywords. Parity with the English set so a
68
+ # goal written in Spanish ("migrar producción a nuevo servidor") trips
69
+ # the same high-stakes gate as its English twin. Accented and unaccented
70
+ # variants are both listed because user prompts mix both freely.
71
+ HIGH_STAKES_KEYWORDS_ES = {
72
+ "crítico",
73
+ "critico",
74
+ "crítica",
75
+ "critica",
76
+ "producción",
77
+ "produccion",
78
+ "cliente",
79
+ "clientes",
80
+ "despliegue",
81
+ "desplegar",
82
+ "pago",
83
+ "pagos",
84
+ "facturación",
85
+ "facturacion",
86
+ "factura",
87
+ "credencial",
88
+ "credenciales",
89
+ "contraseña",
90
+ "seguridad",
91
+ "legal",
92
+ "médico",
93
+ "medico",
94
+ "financiero",
95
+ "financiera",
96
+ "privacidad",
97
+ "marca",
98
+ "reputación",
99
+ "reputacion",
100
+ "ingresos",
101
+ "borrar",
102
+ "eliminar",
103
+ "migración",
104
+ "migracion",
105
+ "migrar",
106
+ "lanzamiento",
107
+ "lanzar",
108
+ "precio",
109
+ "precios",
110
+ "reembolso",
111
+ "público",
112
+ "publico",
113
+ "riesgo",
114
+ "riesgos",
115
+ "coste",
116
+ "costes",
117
+ "ventas",
118
+ "pedido",
119
+ "pedidos",
120
+ }
121
+ # v5.2.0: Negation patterns that should SUPPRESS the high-stakes flag.
122
+ # Without this, a user message like "sin afectar producción" or
123
+ # "no tocar prod" triggers a false positive just because the keyword
124
+ # is physically present. Bilingual and conservative on purpose.
125
+ NEGATION_PATTERNS = (
126
+ re.compile(r"\bno\s+tocar\s+prod(?:ucci[oó]n|uccion)?\b", re.IGNORECASE),
127
+ re.compile(r"\bsin\s+(?:tocar|afectar|romper|modificar)\b", re.IGNORECASE),
128
+ re.compile(r"\bnunca\s+(?:borrar|eliminar|tocar)\b", re.IGNORECASE),
129
+ re.compile(r"\bno\s+(?:borrar|eliminar|tocar|modificar)\b", re.IGNORECASE),
130
+ re.compile(r"\bevitar\s+(?:borrar|eliminar|tocar|romper)\b", re.IGNORECASE),
131
+ re.compile(r"\bavoid\s+(?:deleting|touching|breaking|modifying)\b", re.IGNORECASE),
132
+ re.compile(r"\bdon'?t\s+(?:touch|break|modify|delete)\b", re.IGNORECASE),
133
+ re.compile(r"\bwithout\s+(?:touching|breaking|affecting)\b", re.IGNORECASE),
134
+ )
67
135
 
68
136
 
69
137
  def _parse_list(value) -> list[str]:
@@ -104,9 +172,32 @@ def _parse_int_list(value) -> list[int]:
104
172
  return parsed
105
173
 
106
174
 
175
+ def _has_negation_context(text: str) -> bool:
176
+ """Return True when the text explicitly disclaims touching the sensitive area.
177
+
178
+ Used to suppress high-stakes false positives where the user is stating
179
+ the *boundary* of safe work ("without touching production") rather than
180
+ the *target* of a risky action ("migrate production").
181
+ """
182
+ if not text:
183
+ return False
184
+ return any(pattern.search(text) for pattern in NEGATION_PATTERNS)
185
+
186
+
107
187
  def _detect_high_stakes(*parts: str) -> bool:
108
188
  combined = " ".join((part or "").strip().lower() for part in parts if part)
109
- return any(keyword in combined for keyword in HIGH_STAKES_KEYWORDS)
189
+ if not combined:
190
+ return False
191
+ # Negation override: "sin afectar producción" / "don't touch prod" / etc.
192
+ # Explicit disclaimers suppress the flag even if a high-stakes keyword
193
+ # is physically present, otherwise boundary statements get miscategorised
194
+ # as action targets.
195
+ if _has_negation_context(combined):
196
+ return False
197
+ return any(
198
+ keyword in combined
199
+ for keyword in HIGH_STAKES_KEYWORDS | HIGH_STAKES_KEYWORDS_ES
200
+ )
110
201
 
111
202
 
112
203
  def _decision_support_required(*, task_type: str, high_stakes: bool) -> bool:
@@ -124,6 +215,8 @@ def evaluate_response_confidence(
124
215
  unknowns=None,
125
216
  verification_step: str = "",
126
217
  stakes: str = "",
218
+ pre_action_context_hits: int = 0,
219
+ area_has_atlas_entry: bool = False,
127
220
  ) -> dict:
128
221
  evidence_refs = _parse_list(evidence_refs)
129
222
  unknowns = _parse_list(unknowns)
@@ -152,6 +245,22 @@ def evaluate_response_confidence(
152
245
  score -= 20
153
246
  reasons.append("high-stakes context detected")
154
247
 
248
+ # v5.2.0: Positive signals. Before this release the score was purely
249
+ # a penalty accumulator — there was no way to reward tasks that had
250
+ # meaningful prior context loaded or that sat inside a known area.
251
+ # Cap at +10 and +5 so these can never override a real risk signal.
252
+ if pre_action_context_hits > 0:
253
+ boost = min(10, pre_action_context_hits * 2)
254
+ score += boost
255
+ reasons.append(
256
+ f"+{boost} from {pre_action_context_hits} pre-action context hit(s)"
257
+ )
258
+ if area_has_atlas_entry:
259
+ score += 5
260
+ reasons.append("+5 from known project-atlas area")
261
+
262
+ final_score = max(0, min(100, score))
263
+
155
264
  mode = "answer"
156
265
  if task_type in RESPONSE_TASKS:
157
266
  if high_stakes and (unknowns or not evidence_refs):
@@ -161,6 +270,23 @@ def evaluate_response_confidence(
161
270
  elif high_stakes or not evidence_refs or not verification_step.strip():
162
271
  mode = "verify"
163
272
 
273
+ # v5.2.0: Numeric safeguard. The boolean decision tree above
274
+ # covers every obvious case, but tasks can accumulate soft
275
+ # penalties without tripping any single rule. When the final
276
+ # score is critically low, downgrade the mode by one step.
277
+ # This catches edge cases and is monotonic — it can only make
278
+ # the response discipline stricter, never looser.
279
+ if mode == "answer" and final_score < 50:
280
+ mode = "verify"
281
+ reasons.append(
282
+ f"numeric safeguard: score {final_score} < 50 forces verify"
283
+ )
284
+ elif mode == "verify" and final_score < 30 and high_stakes:
285
+ mode = "defer"
286
+ reasons.append(
287
+ f"numeric safeguard: high-stakes with score {final_score} forces defer"
288
+ )
289
+
164
290
  next_action = {
165
291
  "answer": "You may answer directly, but stay within the evidence you actually have.",
166
292
  "verify": "Verify the claim with concrete evidence before answering.",
@@ -170,7 +296,7 @@ def evaluate_response_confidence(
170
296
 
171
297
  return {
172
298
  "mode": mode,
173
- "confidence": max(0, min(100, score)),
299
+ "confidence": final_score,
174
300
  "high_stakes": high_stakes,
175
301
  "reasons": reasons,
176
302
  "next_action": next_action,