nexo-brain 5.1.1 → 5.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +8 -0
- package/package.json +1 -1
- package/src/plugins/cortex.py +102 -2
- package/src/plugins/protocol.py +128 -2
- package/src/scripts/deep-sleep/apply_findings.py +2 -1
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.1",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -87,6 +87,14 @@ Versions `3.1.7` through `3.2.0` close the recent-memory gap:
|
|
|
87
87
|
- when even that misses, NEXO now exposes raw transcript fallback tools for Claude Code and Codex session stores
|
|
88
88
|
- NEXO can now inspect itself through a live system catalog derived from canonical sources instead of relying only on stale docs or operator memory
|
|
89
89
|
|
|
90
|
+
Version `5.2.0` closes two focused gaps in the Cortex layer that were left open by the v5.1 audit — the high-stakes response-contract detector was English-only, and the `nexo-cortex-cycle` cron was writing a quality snapshot that no reader ever consumed:
|
|
91
|
+
|
|
92
|
+
- `HIGH_STAKES_KEYWORDS_ES` adds ~45 Spanish keywords to the high-stakes detector with accented and unaccented variants, so a goal written in Spanish (`migrar la base de datos de producción`) trips the same gate as its English twin.
|
|
93
|
+
- `NEGATION_PATTERNS` suppresses false positives when the user explicitly disclaims touching the sensitive area (`sin afectar producción`, `no tocar prod`, `without touching production`, `don't modify`). The raw keyword being present is no longer enough to flag the task.
|
|
94
|
+
- `evaluate_response_confidence` accepts two new optional kwargs, `pre_action_context_hits` (+up to 10) and `area_has_atlas_entry` (+5), so the score can finally reward tasks that loaded real context instead of only punishing unprepared ones. Both signals are capped and cannot override a real risk penalty.
|
|
95
|
+
- A monotonic numeric safeguard layers on top of the boolean decision tree: `answer` downgrades to `verify` when `final_score < 50`, and `verify` downgrades to `defer` when `high_stakes` and `final_score < 30`. The safeguard can only make response discipline stricter, never looser.
|
|
96
|
+
- `handle_cortex_quality` in `src/plugins/cortex.py` now reads `$NEXO_HOME/operations/cortex-quality-latest.json` when the requested window (7 or 1 days) is fresh (<6h 30m) and the schema matches — silent fallback to the live SQL computation on any failure. The handler's JSON response now includes `"source": "cache" | "live"` for observability.
|
|
97
|
+
|
|
90
98
|
Version `5.1.0` lands the full NEXO-AUDIT-2026-04-11 roadmap as a single minor bump — every open evolution / adaptive / cognitive / skills loop now closes under itself, the knowledge graph exports cleanly, OpenTelemetry spans can be turned on without a hard dependency, and every PR has to clear lint, security, coverage, and release-readiness gates before it can merge:
|
|
91
99
|
|
|
92
100
|
- Evolution cycle now auto-applies user-approved proposals on the next run (backed by the new idempotent migration `m38`), adaptive learned-weight rollbacks surface as visible followups, outcome patterns auto-promote to draft skills, and a Voyager-style detector exposes co-occurring skill pairs as composite-skill candidates via `nexo_skill_compose_candidates`.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.1",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/plugins/cortex.py
CHANGED
|
@@ -15,9 +15,12 @@ v0.1: Single MCP tool + middleware validation.
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
import json
|
|
18
|
+
import os
|
|
18
19
|
import re
|
|
19
20
|
import secrets
|
|
20
21
|
import time
|
|
22
|
+
from datetime import datetime, timedelta
|
|
23
|
+
from pathlib import Path
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
def _get_db():
|
|
@@ -894,6 +897,27 @@ def handle_cortex_decide(
|
|
|
894
897
|
task_id=task_id,
|
|
895
898
|
)
|
|
896
899
|
|
|
900
|
+
# Auto-create outcome when none exists, so cortex decisions
|
|
901
|
+
# get verified by outcome-checker and close the feedback loop.
|
|
902
|
+
if resolved_outcome_id is None and clean_goal and task_id:
|
|
903
|
+
try:
|
|
904
|
+
from db import create_outcome
|
|
905
|
+
|
|
906
|
+
_deadline = (datetime.now() + timedelta(days=7)).strftime("%Y-%m-%d")
|
|
907
|
+
_outcome = create_outcome(
|
|
908
|
+
action_type="cortex_decision",
|
|
909
|
+
description=f"Cortex decision: {clean_goal[:120]}",
|
|
910
|
+
expected_result=f"Recommended '{scored[0]['name']}' succeeds",
|
|
911
|
+
metric_source="decision_outcome",
|
|
912
|
+
action_id=task_id,
|
|
913
|
+
session_id=session_id,
|
|
914
|
+
deadline=_deadline,
|
|
915
|
+
)
|
|
916
|
+
if isinstance(_outcome, dict) and _outcome.get("id"):
|
|
917
|
+
resolved_outcome_id = int(_outcome["id"])
|
|
918
|
+
except Exception:
|
|
919
|
+
pass # non-critical: decision still records without outcome
|
|
920
|
+
|
|
897
921
|
try:
|
|
898
922
|
from db import create_cortex_evaluation
|
|
899
923
|
|
|
@@ -1003,12 +1027,88 @@ def handle_cortex_override(evaluation_id: int, chosen: str, reason: str) -> str:
|
|
|
1003
1027
|
return json.dumps({"ok": True, "evaluation": updated}, ensure_ascii=False, indent=2)
|
|
1004
1028
|
|
|
1005
1029
|
|
|
1030
|
+
# v5.2.0: Cortex quality cache reader. The `nexo-cortex-cycle` cron
|
|
1031
|
+
# (src/scripts/nexo-cortex-cycle.py) writes a fresh quality snapshot to
|
|
1032
|
+
# $NEXO_HOME/operations/cortex-quality-latest.json every 6h. Until this
|
|
1033
|
+
# release the reader was missing — the snapshot was write-only and every
|
|
1034
|
+
# call to `nexo_cortex_quality` re-ran the SQL summary. Now the handler
|
|
1035
|
+
# reads the cache first for the 7d / 1d windows and falls back silently
|
|
1036
|
+
# to the live computation on any failure.
|
|
1037
|
+
_CORTEX_QUALITY_CACHE_PATH = (
|
|
1038
|
+
Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
1039
|
+
/ "operations"
|
|
1040
|
+
/ "cortex-quality-latest.json"
|
|
1041
|
+
)
|
|
1042
|
+
# 6h cron + 30 min slack so a slightly-late run still serves cache.
|
|
1043
|
+
_CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS = 23400
|
|
1044
|
+
_CORTEX_QUALITY_CACHE_WINDOWS = {1: "window_1d", 7: "window_7d"}
|
|
1045
|
+
_CORTEX_QUALITY_CACHE_SCHEMA = 1
|
|
1046
|
+
|
|
1047
|
+
|
|
1048
|
+
def _load_cortex_quality_cache(days: int) -> dict | None:
|
|
1049
|
+
"""Return cached summary dict for the requested window, or None if unusable.
|
|
1050
|
+
|
|
1051
|
+
Silent on any failure so the live path always wins on a corrupt cache.
|
|
1052
|
+
Respects the snapshot schema written by `_persist_quality_snapshot`
|
|
1053
|
+
in src/scripts/nexo-cortex-cycle.py — do NOT change the layout here
|
|
1054
|
+
without updating the writer in the same release.
|
|
1055
|
+
"""
|
|
1056
|
+
window_key = _CORTEX_QUALITY_CACHE_WINDOWS.get(days)
|
|
1057
|
+
if window_key is None:
|
|
1058
|
+
return None
|
|
1059
|
+
try:
|
|
1060
|
+
if not _CORTEX_QUALITY_CACHE_PATH.is_file():
|
|
1061
|
+
return None
|
|
1062
|
+
payload = json.loads(
|
|
1063
|
+
_CORTEX_QUALITY_CACHE_PATH.read_text(encoding="utf-8")
|
|
1064
|
+
)
|
|
1065
|
+
except Exception:
|
|
1066
|
+
return None
|
|
1067
|
+
if not isinstance(payload, dict):
|
|
1068
|
+
return None
|
|
1069
|
+
if payload.get("schema") != _CORTEX_QUALITY_CACHE_SCHEMA:
|
|
1070
|
+
return None
|
|
1071
|
+
captured_at = payload.get("captured_at") or ""
|
|
1072
|
+
if not isinstance(captured_at, str):
|
|
1073
|
+
return None
|
|
1074
|
+
try:
|
|
1075
|
+
captured = datetime.fromisoformat(captured_at)
|
|
1076
|
+
except Exception:
|
|
1077
|
+
return None
|
|
1078
|
+
age = time.time() - captured.timestamp()
|
|
1079
|
+
if age < 0 or age > _CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS:
|
|
1080
|
+
return None
|
|
1081
|
+
window = payload.get(window_key)
|
|
1082
|
+
if not isinstance(window, dict):
|
|
1083
|
+
return None
|
|
1084
|
+
return window
|
|
1085
|
+
|
|
1086
|
+
|
|
1006
1087
|
def handle_cortex_quality(days: int = 30) -> str:
|
|
1007
|
-
"""Summarise recommendation quality, overrides, and linked outcome results.
|
|
1088
|
+
"""Summarise recommendation quality, overrides, and linked outcome results.
|
|
1089
|
+
|
|
1090
|
+
v5.2.0: Serves the snapshot written by `nexo-cortex-cycle` when the
|
|
1091
|
+
requested window is 7 or 1 days and the snapshot is fresh
|
|
1092
|
+
(< 6h30m old, schema == 1). Falls back silently to a live SQL
|
|
1093
|
+
summary on any failure, so the caller always gets a valid response.
|
|
1094
|
+
The returned JSON includes `"source": "cache" | "live"` so the
|
|
1095
|
+
path taken is observable from the outside.
|
|
1096
|
+
"""
|
|
1008
1097
|
from db import cortex_evaluation_summary
|
|
1009
1098
|
|
|
1099
|
+
cached = _load_cortex_quality_cache(days)
|
|
1100
|
+
if cached is not None:
|
|
1101
|
+
return json.dumps(
|
|
1102
|
+
{"ok": True, "summary": cached, "source": "cache"},
|
|
1103
|
+
ensure_ascii=False,
|
|
1104
|
+
indent=2,
|
|
1105
|
+
)
|
|
1010
1106
|
summary = cortex_evaluation_summary(days=days)
|
|
1011
|
-
return json.dumps(
|
|
1107
|
+
return json.dumps(
|
|
1108
|
+
{"ok": True, "summary": summary, "source": "live"},
|
|
1109
|
+
ensure_ascii=False,
|
|
1110
|
+
indent=2,
|
|
1111
|
+
)
|
|
1012
1112
|
|
|
1013
1113
|
|
|
1014
1114
|
TOOLS = [
|
package/src/plugins/protocol.py
CHANGED
|
@@ -64,6 +64,74 @@ HIGH_STAKES_KEYWORDS = {
|
|
|
64
64
|
"revenue",
|
|
65
65
|
"cost",
|
|
66
66
|
}
|
|
67
|
+
# v5.2.0: Spanish high-stakes keywords. Parity with the English set so a
|
|
68
|
+
# goal written in Spanish ("migrar producción a nuevo servidor") trips
|
|
69
|
+
# the same high-stakes gate as its English twin. Accented and unaccented
|
|
70
|
+
# variants are both listed because user prompts mix both freely.
|
|
71
|
+
HIGH_STAKES_KEYWORDS_ES = {
|
|
72
|
+
"crítico",
|
|
73
|
+
"critico",
|
|
74
|
+
"crítica",
|
|
75
|
+
"critica",
|
|
76
|
+
"producción",
|
|
77
|
+
"produccion",
|
|
78
|
+
"cliente",
|
|
79
|
+
"clientes",
|
|
80
|
+
"despliegue",
|
|
81
|
+
"desplegar",
|
|
82
|
+
"pago",
|
|
83
|
+
"pagos",
|
|
84
|
+
"facturación",
|
|
85
|
+
"facturacion",
|
|
86
|
+
"factura",
|
|
87
|
+
"credencial",
|
|
88
|
+
"credenciales",
|
|
89
|
+
"contraseña",
|
|
90
|
+
"seguridad",
|
|
91
|
+
"legal",
|
|
92
|
+
"médico",
|
|
93
|
+
"medico",
|
|
94
|
+
"financiero",
|
|
95
|
+
"financiera",
|
|
96
|
+
"privacidad",
|
|
97
|
+
"marca",
|
|
98
|
+
"reputación",
|
|
99
|
+
"reputacion",
|
|
100
|
+
"ingresos",
|
|
101
|
+
"borrar",
|
|
102
|
+
"eliminar",
|
|
103
|
+
"migración",
|
|
104
|
+
"migracion",
|
|
105
|
+
"migrar",
|
|
106
|
+
"lanzamiento",
|
|
107
|
+
"lanzar",
|
|
108
|
+
"precio",
|
|
109
|
+
"precios",
|
|
110
|
+
"reembolso",
|
|
111
|
+
"público",
|
|
112
|
+
"publico",
|
|
113
|
+
"riesgo",
|
|
114
|
+
"riesgos",
|
|
115
|
+
"coste",
|
|
116
|
+
"costes",
|
|
117
|
+
"ventas",
|
|
118
|
+
"pedido",
|
|
119
|
+
"pedidos",
|
|
120
|
+
}
|
|
121
|
+
# v5.2.0: Negation patterns that should SUPPRESS the high-stakes flag.
|
|
122
|
+
# Without this, a user message like "sin afectar producción" or
|
|
123
|
+
# "no tocar prod" triggers a false positive just because the keyword
|
|
124
|
+
# is physically present. Bilingual and conservative on purpose.
|
|
125
|
+
NEGATION_PATTERNS = (
|
|
126
|
+
re.compile(r"\bno\s+tocar\s+prod(?:ucci[oó]n|uccion)?\b", re.IGNORECASE),
|
|
127
|
+
re.compile(r"\bsin\s+(?:tocar|afectar|romper|modificar)\b", re.IGNORECASE),
|
|
128
|
+
re.compile(r"\bnunca\s+(?:borrar|eliminar|tocar)\b", re.IGNORECASE),
|
|
129
|
+
re.compile(r"\bno\s+(?:borrar|eliminar|tocar|modificar)\b", re.IGNORECASE),
|
|
130
|
+
re.compile(r"\bevitar\s+(?:borrar|eliminar|tocar|romper)\b", re.IGNORECASE),
|
|
131
|
+
re.compile(r"\bavoid\s+(?:deleting|touching|breaking|modifying)\b", re.IGNORECASE),
|
|
132
|
+
re.compile(r"\bdon'?t\s+(?:touch|break|modify|delete)\b", re.IGNORECASE),
|
|
133
|
+
re.compile(r"\bwithout\s+(?:touching|breaking|affecting)\b", re.IGNORECASE),
|
|
134
|
+
)
|
|
67
135
|
|
|
68
136
|
|
|
69
137
|
def _parse_list(value) -> list[str]:
|
|
@@ -104,9 +172,32 @@ def _parse_int_list(value) -> list[int]:
|
|
|
104
172
|
return parsed
|
|
105
173
|
|
|
106
174
|
|
|
175
|
+
def _has_negation_context(text: str) -> bool:
|
|
176
|
+
"""Return True when the text explicitly disclaims touching the sensitive area.
|
|
177
|
+
|
|
178
|
+
Used to suppress high-stakes false positives where the user is stating
|
|
179
|
+
the *boundary* of safe work ("without touching production") rather than
|
|
180
|
+
the *target* of a risky action ("migrate production").
|
|
181
|
+
"""
|
|
182
|
+
if not text:
|
|
183
|
+
return False
|
|
184
|
+
return any(pattern.search(text) for pattern in NEGATION_PATTERNS)
|
|
185
|
+
|
|
186
|
+
|
|
107
187
|
def _detect_high_stakes(*parts: str) -> bool:
|
|
108
188
|
combined = " ".join((part or "").strip().lower() for part in parts if part)
|
|
109
|
-
|
|
189
|
+
if not combined:
|
|
190
|
+
return False
|
|
191
|
+
# Negation override: "sin afectar producción" / "don't touch prod" / etc.
|
|
192
|
+
# Explicit disclaimers suppress the flag even if a high-stakes keyword
|
|
193
|
+
# is physically present, otherwise boundary statements get miscategorised
|
|
194
|
+
# as action targets.
|
|
195
|
+
if _has_negation_context(combined):
|
|
196
|
+
return False
|
|
197
|
+
return any(
|
|
198
|
+
keyword in combined
|
|
199
|
+
for keyword in HIGH_STAKES_KEYWORDS | HIGH_STAKES_KEYWORDS_ES
|
|
200
|
+
)
|
|
110
201
|
|
|
111
202
|
|
|
112
203
|
def _decision_support_required(*, task_type: str, high_stakes: bool) -> bool:
|
|
@@ -124,6 +215,8 @@ def evaluate_response_confidence(
|
|
|
124
215
|
unknowns=None,
|
|
125
216
|
verification_step: str = "",
|
|
126
217
|
stakes: str = "",
|
|
218
|
+
pre_action_context_hits: int = 0,
|
|
219
|
+
area_has_atlas_entry: bool = False,
|
|
127
220
|
) -> dict:
|
|
128
221
|
evidence_refs = _parse_list(evidence_refs)
|
|
129
222
|
unknowns = _parse_list(unknowns)
|
|
@@ -152,6 +245,22 @@ def evaluate_response_confidence(
|
|
|
152
245
|
score -= 20
|
|
153
246
|
reasons.append("high-stakes context detected")
|
|
154
247
|
|
|
248
|
+
# v5.2.0: Positive signals. Before this release the score was purely
|
|
249
|
+
# a penalty accumulator — there was no way to reward tasks that had
|
|
250
|
+
# meaningful prior context loaded or that sat inside a known area.
|
|
251
|
+
# Cap at +10 and +5 so these can never override a real risk signal.
|
|
252
|
+
if pre_action_context_hits > 0:
|
|
253
|
+
boost = min(10, pre_action_context_hits * 2)
|
|
254
|
+
score += boost
|
|
255
|
+
reasons.append(
|
|
256
|
+
f"+{boost} from {pre_action_context_hits} pre-action context hit(s)"
|
|
257
|
+
)
|
|
258
|
+
if area_has_atlas_entry:
|
|
259
|
+
score += 5
|
|
260
|
+
reasons.append("+5 from known project-atlas area")
|
|
261
|
+
|
|
262
|
+
final_score = max(0, min(100, score))
|
|
263
|
+
|
|
155
264
|
mode = "answer"
|
|
156
265
|
if task_type in RESPONSE_TASKS:
|
|
157
266
|
if high_stakes and (unknowns or not evidence_refs):
|
|
@@ -161,6 +270,23 @@ def evaluate_response_confidence(
|
|
|
161
270
|
elif high_stakes or not evidence_refs or not verification_step.strip():
|
|
162
271
|
mode = "verify"
|
|
163
272
|
|
|
273
|
+
# v5.2.0: Numeric safeguard. The boolean decision tree above
|
|
274
|
+
# covers every obvious case, but tasks can accumulate soft
|
|
275
|
+
# penalties without tripping any single rule. When the final
|
|
276
|
+
# score is critically low, downgrade the mode by one step.
|
|
277
|
+
# This catches edge cases and is monotonic — it can only make
|
|
278
|
+
# the response discipline stricter, never looser.
|
|
279
|
+
if mode == "answer" and final_score < 50:
|
|
280
|
+
mode = "verify"
|
|
281
|
+
reasons.append(
|
|
282
|
+
f"numeric safeguard: score {final_score} < 50 forces verify"
|
|
283
|
+
)
|
|
284
|
+
elif mode == "verify" and final_score < 30 and high_stakes:
|
|
285
|
+
mode = "defer"
|
|
286
|
+
reasons.append(
|
|
287
|
+
f"numeric safeguard: high-stakes with score {final_score} forces defer"
|
|
288
|
+
)
|
|
289
|
+
|
|
164
290
|
next_action = {
|
|
165
291
|
"answer": "You may answer directly, but stay within the evidence you actually have.",
|
|
166
292
|
"verify": "Verify the claim with concrete evidence before answering.",
|
|
@@ -170,7 +296,7 @@ def evaluate_response_confidence(
|
|
|
170
296
|
|
|
171
297
|
return {
|
|
172
298
|
"mode": mode,
|
|
173
|
-
"confidence":
|
|
299
|
+
"confidence": final_score,
|
|
174
300
|
"high_stakes": high_stakes,
|
|
175
301
|
"reasons": reasons,
|
|
176
302
|
"next_action": next_action,
|
|
@@ -855,7 +855,8 @@ def _parse_any_datetime(value) -> datetime | None:
|
|
|
855
855
|
except Exception:
|
|
856
856
|
continue
|
|
857
857
|
try:
|
|
858
|
-
|
|
858
|
+
dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
|
|
859
|
+
return dt.replace(tzinfo=None)
|
|
859
860
|
except Exception:
|
|
860
861
|
return None
|
|
861
862
|
|