nexo-brain 5.1.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +8 -0
- package/package.json +1 -1
- package/src/doctor/providers/runtime.py +132 -0
- package/src/plugins/cortex.py +81 -2
- package/src/plugins/episodic_memory.py +13 -1
- package/src/plugins/protocol.py +128 -2
- package/src/scripts/nexo-daily-self-audit.py +81 -3
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.0",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -87,6 +87,14 @@ Versions `3.1.7` through `3.2.0` close the recent-memory gap:
|
|
|
87
87
|
- when even that misses, NEXO now exposes raw transcript fallback tools for Claude Code and Codex session stores
|
|
88
88
|
- NEXO can now inspect itself through a live system catalog derived from canonical sources instead of relying only on stale docs or operator memory
|
|
89
89
|
|
|
90
|
+
Version `5.2.0` closes two focused gaps in the Cortex layer that were left open by the v5.1 audit — the high-stakes response-contract detector was English-only, and the `nexo-cortex-cycle` cron was writing a quality snapshot that no reader ever consumed:
|
|
91
|
+
|
|
92
|
+
- `HIGH_STAKES_KEYWORDS_ES` adds ~45 Spanish keywords to the high-stakes detector with accented and unaccented variants, so a goal written in Spanish (`migrar la base de datos de producción`) trips the same gate as its English twin.
|
|
93
|
+
- `NEGATION_PATTERNS` suppresses false positives when the user explicitly disclaims touching the sensitive area (`sin afectar producción`, `no tocar prod`, `without touching production`, `don't modify`). The raw keyword being present is no longer enough to flag the task.
|
|
94
|
+
- `evaluate_response_confidence` accepts two new optional kwargs, `pre_action_context_hits` (+up to 10) and `area_has_atlas_entry` (+5), so the score can finally reward tasks that loaded real context instead of only punishing unprepared ones. Both signals are capped and cannot override a real risk penalty.
|
|
95
|
+
- A monotonic numeric safeguard layers on top of the boolean decision tree: `answer` downgrades to `verify` when `final_score < 50`, and `verify` downgrades to `defer` when `high_stakes` and `final_score < 30`. The safeguard can only make response discipline stricter, never looser.
|
|
96
|
+
- `handle_cortex_quality` in `src/plugins/cortex.py` now reads `$NEXO_HOME/operations/cortex-quality-latest.json` when the requested window (7 or 1 days) is fresh (<6h 30m) and the schema matches — silent fallback to the live SQL computation on any failure. The handler's JSON response now includes `"source": "cache" | "live"` for observability.
|
|
97
|
+
|
|
90
98
|
Version `5.1.0` lands the full NEXO-AUDIT-2026-04-11 roadmap as a single minor bump — every open evolution / adaptive / cognitive / skills loop now closes under itself, the knowledge graph exports cleanly, OpenTelemetry spans can be turned on without a hard dependency, and every PR has to clear lint, security, coverage, and release-readiness gates before it can merge:
|
|
91
99
|
|
|
92
100
|
- Evolution cycle now auto-applies user-approved proposals on the next run (backed by the new idempotent migration `m38`), adaptive learned-weight rollbacks surface as visible followups, outcome patterns auto-promote to draft skills, and a Voyager-style detector exposes co-occurring skill pairs as composite-skill candidates via `nexo_skill_compose_candidates`.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.0",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
|
@@ -2710,6 +2710,137 @@ def check_release_artifact_sync() -> DoctorCheck:
|
|
|
2710
2710
|
)
|
|
2711
2711
|
|
|
2712
2712
|
|
|
2713
|
+
def check_release_trace_hygiene() -> DoctorCheck:
|
|
2714
|
+
db_path = NEXO_HOME / "data" / "nexo.db"
|
|
2715
|
+
if not db_path.is_file():
|
|
2716
|
+
return DoctorCheck(
|
|
2717
|
+
id="runtime.release_trace_hygiene",
|
|
2718
|
+
tier="runtime",
|
|
2719
|
+
status="healthy",
|
|
2720
|
+
severity="info",
|
|
2721
|
+
summary="Release trace hygiene unavailable (no DB)",
|
|
2722
|
+
evidence=[],
|
|
2723
|
+
repair_plan=[],
|
|
2724
|
+
escalation_prompt="",
|
|
2725
|
+
)
|
|
2726
|
+
|
|
2727
|
+
try:
|
|
2728
|
+
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
2729
|
+
conn.row_factory = sqlite3.Row
|
|
2730
|
+
try:
|
|
2731
|
+
tables = {
|
|
2732
|
+
row[0]
|
|
2733
|
+
for row in conn.execute(
|
|
2734
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name IN ('workflow_goals', 'workflow_runs')"
|
|
2735
|
+
).fetchall()
|
|
2736
|
+
}
|
|
2737
|
+
if "workflow_goals" not in tables or "workflow_runs" not in tables:
|
|
2738
|
+
return DoctorCheck(
|
|
2739
|
+
id="runtime.release_trace_hygiene",
|
|
2740
|
+
tier="runtime",
|
|
2741
|
+
status="healthy",
|
|
2742
|
+
severity="info",
|
|
2743
|
+
summary="Release trace hygiene unavailable (workflow tables absent)",
|
|
2744
|
+
evidence=[],
|
|
2745
|
+
repair_plan=[],
|
|
2746
|
+
escalation_prompt="",
|
|
2747
|
+
)
|
|
2748
|
+
|
|
2749
|
+
stale_run_samples: list[str] = []
|
|
2750
|
+
stale_goal_samples: list[str] = []
|
|
2751
|
+
now = dt.datetime.now(dt.timezone.utc)
|
|
2752
|
+
stale_after_hours = 6
|
|
2753
|
+
|
|
2754
|
+
run_rows = conn.execute(
|
|
2755
|
+
"""SELECT run_id, goal, updated_at
|
|
2756
|
+
FROM workflow_runs
|
|
2757
|
+
WHERE workflow_kind = 'audit-phase'
|
|
2758
|
+
AND status NOT IN ('completed', 'failed', 'cancelled')
|
|
2759
|
+
ORDER BY updated_at DESC"""
|
|
2760
|
+
).fetchall()
|
|
2761
|
+
for row in run_rows:
|
|
2762
|
+
updated_at = _parse_timestamp(row["updated_at"] or "")
|
|
2763
|
+
if updated_at is None:
|
|
2764
|
+
stale_run_samples.append(f"{row['run_id']}: unreadable updated_at")
|
|
2765
|
+
continue
|
|
2766
|
+
if updated_at.tzinfo is None:
|
|
2767
|
+
updated_at = updated_at.replace(tzinfo=dt.timezone.utc)
|
|
2768
|
+
age_hours = (now - updated_at).total_seconds() / 3600
|
|
2769
|
+
if age_hours >= stale_after_hours:
|
|
2770
|
+
stale_run_samples.append(
|
|
2771
|
+
f"{row['run_id']}: {age_hours:.1f}h stale ({str(row['goal'] or '')[:72]})"
|
|
2772
|
+
)
|
|
2773
|
+
|
|
2774
|
+
goal_rows = conn.execute(
|
|
2775
|
+
"""SELECT g.goal_id, g.title, g.updated_at,
|
|
2776
|
+
COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id), 0) AS run_count,
|
|
2777
|
+
COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id
|
|
2778
|
+
AND r.status NOT IN ('completed', 'failed', 'cancelled')), 0) AS open_run_count
|
|
2779
|
+
FROM workflow_goals g
|
|
2780
|
+
WHERE g.status = 'active'
|
|
2781
|
+
AND (g.goal_id LIKE 'WG-AUDIT-%' OR g.title LIKE 'NEXO-AUDIT-%')
|
|
2782
|
+
ORDER BY g.updated_at DESC"""
|
|
2783
|
+
).fetchall()
|
|
2784
|
+
for row in goal_rows:
|
|
2785
|
+
if int(row["open_run_count"] or 0) > 0:
|
|
2786
|
+
continue
|
|
2787
|
+
updated_at = _parse_timestamp(row["updated_at"] or "")
|
|
2788
|
+
if updated_at is None:
|
|
2789
|
+
stale_goal_samples.append(f"{row['goal_id']}: unreadable updated_at")
|
|
2790
|
+
continue
|
|
2791
|
+
if updated_at.tzinfo is None:
|
|
2792
|
+
updated_at = updated_at.replace(tzinfo=dt.timezone.utc)
|
|
2793
|
+
age_hours = (now - updated_at).total_seconds() / 3600
|
|
2794
|
+
if age_hours >= stale_after_hours:
|
|
2795
|
+
stale_goal_samples.append(
|
|
2796
|
+
f"{row['goal_id']}: {age_hours:.1f}h stale ({str(row['title'] or '')[:72]})"
|
|
2797
|
+
)
|
|
2798
|
+
finally:
|
|
2799
|
+
conn.close()
|
|
2800
|
+
except Exception as exc:
|
|
2801
|
+
return DoctorCheck(
|
|
2802
|
+
id="runtime.release_trace_hygiene",
|
|
2803
|
+
tier="runtime",
|
|
2804
|
+
status="degraded",
|
|
2805
|
+
severity="warn",
|
|
2806
|
+
summary="Release trace hygiene check failed",
|
|
2807
|
+
evidence=[str(exc)],
|
|
2808
|
+
repair_plan=["Inspect workflow_goals/workflow_runs state manually"],
|
|
2809
|
+
escalation_prompt="Release traces could not be audited, so stale audit artifacts may be hiding in the runtime.",
|
|
2810
|
+
)
|
|
2811
|
+
|
|
2812
|
+
evidence = [
|
|
2813
|
+
f"stale audit workflows: {len(stale_run_samples)}",
|
|
2814
|
+
f"stale audit goals: {len(stale_goal_samples)}",
|
|
2815
|
+
]
|
|
2816
|
+
evidence.extend(stale_run_samples[:3])
|
|
2817
|
+
evidence.extend(stale_goal_samples[:3])
|
|
2818
|
+
if stale_run_samples or stale_goal_samples:
|
|
2819
|
+
return DoctorCheck(
|
|
2820
|
+
id="runtime.release_trace_hygiene",
|
|
2821
|
+
tier="runtime",
|
|
2822
|
+
status="degraded",
|
|
2823
|
+
severity="warn",
|
|
2824
|
+
summary="Release trace hygiene needs cleanup",
|
|
2825
|
+
evidence=evidence,
|
|
2826
|
+
repair_plan=[
|
|
2827
|
+
"Close or complete stale audit-phase workflows and active audit goals",
|
|
2828
|
+
"Keep workflow/goal state aligned with the real shipped state after releases",
|
|
2829
|
+
],
|
|
2830
|
+
escalation_prompt="Audit/release traces drifted away from reality, which makes shipping state look ambiguous.",
|
|
2831
|
+
)
|
|
2832
|
+
return DoctorCheck(
|
|
2833
|
+
id="runtime.release_trace_hygiene",
|
|
2834
|
+
tier="runtime",
|
|
2835
|
+
status="healthy",
|
|
2836
|
+
severity="info",
|
|
2837
|
+
summary="Release trace hygiene OK",
|
|
2838
|
+
evidence=evidence,
|
|
2839
|
+
repair_plan=[],
|
|
2840
|
+
escalation_prompt="",
|
|
2841
|
+
)
|
|
2842
|
+
|
|
2843
|
+
|
|
2713
2844
|
def check_state_watchers() -> DoctorCheck:
|
|
2714
2845
|
db_path = NEXO_HOME / "data" / "nexo.db"
|
|
2715
2846
|
summary_path = NEXO_HOME / "operations" / "state-watchers-status.json"
|
|
@@ -2988,6 +3119,7 @@ def run_runtime_checks(fix: bool = False) -> list[DoctorCheck]:
|
|
|
2988
3119
|
safe_check(check_automation_telemetry),
|
|
2989
3120
|
safe_check(check_state_watchers),
|
|
2990
3121
|
safe_check(check_release_artifact_sync),
|
|
3122
|
+
safe_check(check_release_trace_hygiene),
|
|
2991
3123
|
safe_check(check_launchagent_inventory),
|
|
2992
3124
|
safe_check(check_launchagent_integrity, fix=fix),
|
|
2993
3125
|
safe_check(check_personal_script_registry, fix=fix),
|
package/src/plugins/cortex.py
CHANGED
|
@@ -15,9 +15,12 @@ v0.1: Single MCP tool + middleware validation.
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
import json
|
|
18
|
+
import os
|
|
18
19
|
import re
|
|
19
20
|
import secrets
|
|
20
21
|
import time
|
|
22
|
+
from datetime import datetime
|
|
23
|
+
from pathlib import Path
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
def _get_db():
|
|
@@ -1003,12 +1006,88 @@ def handle_cortex_override(evaluation_id: int, chosen: str, reason: str) -> str:
|
|
|
1003
1006
|
return json.dumps({"ok": True, "evaluation": updated}, ensure_ascii=False, indent=2)
|
|
1004
1007
|
|
|
1005
1008
|
|
|
1009
|
+
# v5.2.0: Cortex quality cache reader. The `nexo-cortex-cycle` cron
|
|
1010
|
+
# (src/scripts/nexo-cortex-cycle.py) writes a fresh quality snapshot to
|
|
1011
|
+
# $NEXO_HOME/operations/cortex-quality-latest.json every 6h. Until this
|
|
1012
|
+
# release the reader was missing — the snapshot was write-only and every
|
|
1013
|
+
# call to `nexo_cortex_quality` re-ran the SQL summary. Now the handler
|
|
1014
|
+
# reads the cache first for the 7d / 1d windows and falls back silently
|
|
1015
|
+
# to the live computation on any failure.
|
|
1016
|
+
_CORTEX_QUALITY_CACHE_PATH = (
|
|
1017
|
+
Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
1018
|
+
/ "operations"
|
|
1019
|
+
/ "cortex-quality-latest.json"
|
|
1020
|
+
)
|
|
1021
|
+
# 6h cron + 30 min slack so a slightly-late run still serves cache.
|
|
1022
|
+
_CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS = 23400
|
|
1023
|
+
_CORTEX_QUALITY_CACHE_WINDOWS = {1: "window_1d", 7: "window_7d"}
|
|
1024
|
+
_CORTEX_QUALITY_CACHE_SCHEMA = 1
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
def _load_cortex_quality_cache(days: int) -> dict | None:
|
|
1028
|
+
"""Return cached summary dict for the requested window, or None if unusable.
|
|
1029
|
+
|
|
1030
|
+
Silent on any failure so the live path always wins on a corrupt cache.
|
|
1031
|
+
Respects the snapshot schema written by `_persist_quality_snapshot`
|
|
1032
|
+
in src/scripts/nexo-cortex-cycle.py — do NOT change the layout here
|
|
1033
|
+
without updating the writer in the same release.
|
|
1034
|
+
"""
|
|
1035
|
+
window_key = _CORTEX_QUALITY_CACHE_WINDOWS.get(days)
|
|
1036
|
+
if window_key is None:
|
|
1037
|
+
return None
|
|
1038
|
+
try:
|
|
1039
|
+
if not _CORTEX_QUALITY_CACHE_PATH.is_file():
|
|
1040
|
+
return None
|
|
1041
|
+
payload = json.loads(
|
|
1042
|
+
_CORTEX_QUALITY_CACHE_PATH.read_text(encoding="utf-8")
|
|
1043
|
+
)
|
|
1044
|
+
except Exception:
|
|
1045
|
+
return None
|
|
1046
|
+
if not isinstance(payload, dict):
|
|
1047
|
+
return None
|
|
1048
|
+
if payload.get("schema") != _CORTEX_QUALITY_CACHE_SCHEMA:
|
|
1049
|
+
return None
|
|
1050
|
+
captured_at = payload.get("captured_at") or ""
|
|
1051
|
+
if not isinstance(captured_at, str):
|
|
1052
|
+
return None
|
|
1053
|
+
try:
|
|
1054
|
+
captured = datetime.fromisoformat(captured_at)
|
|
1055
|
+
except Exception:
|
|
1056
|
+
return None
|
|
1057
|
+
age = time.time() - captured.timestamp()
|
|
1058
|
+
if age < 0 or age > _CORTEX_QUALITY_CACHE_MAX_AGE_SECONDS:
|
|
1059
|
+
return None
|
|
1060
|
+
window = payload.get(window_key)
|
|
1061
|
+
if not isinstance(window, dict):
|
|
1062
|
+
return None
|
|
1063
|
+
return window
|
|
1064
|
+
|
|
1065
|
+
|
|
1006
1066
|
def handle_cortex_quality(days: int = 30) -> str:
|
|
1007
|
-
"""Summarise recommendation quality, overrides, and linked outcome results.
|
|
1067
|
+
"""Summarise recommendation quality, overrides, and linked outcome results.
|
|
1068
|
+
|
|
1069
|
+
v5.2.0: Serves the snapshot written by `nexo-cortex-cycle` when the
|
|
1070
|
+
requested window is 7 or 1 days and the snapshot is fresh
|
|
1071
|
+
(< 6h30m old, schema == 1). Falls back silently to a live SQL
|
|
1072
|
+
summary on any failure, so the caller always gets a valid response.
|
|
1073
|
+
The returned JSON includes `"source": "cache" | "live"` so the
|
|
1074
|
+
path taken is observable from the outside.
|
|
1075
|
+
"""
|
|
1008
1076
|
from db import cortex_evaluation_summary
|
|
1009
1077
|
|
|
1078
|
+
cached = _load_cortex_quality_cache(days)
|
|
1079
|
+
if cached is not None:
|
|
1080
|
+
return json.dumps(
|
|
1081
|
+
{"ok": True, "summary": cached, "source": "cache"},
|
|
1082
|
+
ensure_ascii=False,
|
|
1083
|
+
indent=2,
|
|
1084
|
+
)
|
|
1010
1085
|
summary = cortex_evaluation_summary(days=days)
|
|
1011
|
-
return json.dumps(
|
|
1086
|
+
return json.dumps(
|
|
1087
|
+
{"ok": True, "summary": summary, "source": "live"},
|
|
1088
|
+
ensure_ascii=False,
|
|
1089
|
+
indent=2,
|
|
1090
|
+
)
|
|
1012
1091
|
|
|
1013
1092
|
|
|
1014
1093
|
TOOLS = [
|
|
@@ -229,8 +229,20 @@ def handle_session_diary_write(decisions: str, summary: str,
|
|
|
229
229
|
orphan_changes = conn.execute(
|
|
230
230
|
"SELECT COUNT(*) FROM change_log WHERE (commit_ref IS NULL OR commit_ref = '')"
|
|
231
231
|
).fetchone()[0]
|
|
232
|
+
recent_orphan_changes = conn.execute(
|
|
233
|
+
"""SELECT COUNT(*) FROM change_log
|
|
234
|
+
WHERE (commit_ref IS NULL OR commit_ref = '')
|
|
235
|
+
AND created_at >= datetime('now', '-7 days')"""
|
|
236
|
+
).fetchone()[0]
|
|
232
237
|
if orphan_changes > 0:
|
|
233
|
-
|
|
238
|
+
if recent_orphan_changes > 0 and recent_orphan_changes != orphan_changes:
|
|
239
|
+
warnings.append(
|
|
240
|
+
f"{recent_orphan_changes} changes recientes sin commit_ref ({orphan_changes} históricas total)"
|
|
241
|
+
)
|
|
242
|
+
elif recent_orphan_changes > 0:
|
|
243
|
+
warnings.append(f"{recent_orphan_changes} changes recientes sin commit_ref")
|
|
244
|
+
else:
|
|
245
|
+
warnings.append(f"{orphan_changes} changes históricas sin commit_ref")
|
|
234
246
|
orphan_decisions = conn.execute(
|
|
235
247
|
"SELECT COUNT(*) FROM decisions WHERE (outcome IS NULL OR outcome = '') AND created_at < datetime('now', '-7 days')"
|
|
236
248
|
).fetchone()[0]
|
package/src/plugins/protocol.py
CHANGED
|
@@ -64,6 +64,74 @@ HIGH_STAKES_KEYWORDS = {
|
|
|
64
64
|
"revenue",
|
|
65
65
|
"cost",
|
|
66
66
|
}
|
|
67
|
+
# v5.2.0: Spanish high-stakes keywords. Parity with the English set so a
|
|
68
|
+
# goal written in Spanish ("migrar producción a nuevo servidor") trips
|
|
69
|
+
# the same high-stakes gate as its English twin. Accented and unaccented
|
|
70
|
+
# variants are both listed because user prompts mix both freely.
|
|
71
|
+
HIGH_STAKES_KEYWORDS_ES = {
|
|
72
|
+
"crítico",
|
|
73
|
+
"critico",
|
|
74
|
+
"crítica",
|
|
75
|
+
"critica",
|
|
76
|
+
"producción",
|
|
77
|
+
"produccion",
|
|
78
|
+
"cliente",
|
|
79
|
+
"clientes",
|
|
80
|
+
"despliegue",
|
|
81
|
+
"desplegar",
|
|
82
|
+
"pago",
|
|
83
|
+
"pagos",
|
|
84
|
+
"facturación",
|
|
85
|
+
"facturacion",
|
|
86
|
+
"factura",
|
|
87
|
+
"credencial",
|
|
88
|
+
"credenciales",
|
|
89
|
+
"contraseña",
|
|
90
|
+
"seguridad",
|
|
91
|
+
"legal",
|
|
92
|
+
"médico",
|
|
93
|
+
"medico",
|
|
94
|
+
"financiero",
|
|
95
|
+
"financiera",
|
|
96
|
+
"privacidad",
|
|
97
|
+
"marca",
|
|
98
|
+
"reputación",
|
|
99
|
+
"reputacion",
|
|
100
|
+
"ingresos",
|
|
101
|
+
"borrar",
|
|
102
|
+
"eliminar",
|
|
103
|
+
"migración",
|
|
104
|
+
"migracion",
|
|
105
|
+
"migrar",
|
|
106
|
+
"lanzamiento",
|
|
107
|
+
"lanzar",
|
|
108
|
+
"precio",
|
|
109
|
+
"precios",
|
|
110
|
+
"reembolso",
|
|
111
|
+
"público",
|
|
112
|
+
"publico",
|
|
113
|
+
"riesgo",
|
|
114
|
+
"riesgos",
|
|
115
|
+
"coste",
|
|
116
|
+
"costes",
|
|
117
|
+
"ventas",
|
|
118
|
+
"pedido",
|
|
119
|
+
"pedidos",
|
|
120
|
+
}
|
|
121
|
+
# v5.2.0: Negation patterns that should SUPPRESS the high-stakes flag.
|
|
122
|
+
# Without this, a user message like "sin afectar producción" or
|
|
123
|
+
# "no tocar prod" triggers a false positive just because the keyword
|
|
124
|
+
# is physically present. Bilingual and conservative on purpose.
|
|
125
|
+
NEGATION_PATTERNS = (
|
|
126
|
+
re.compile(r"\bno\s+tocar\s+prod(?:ucci[oó]n|uccion)?\b", re.IGNORECASE),
|
|
127
|
+
re.compile(r"\bsin\s+(?:tocar|afectar|romper|modificar)\b", re.IGNORECASE),
|
|
128
|
+
re.compile(r"\bnunca\s+(?:borrar|eliminar|tocar)\b", re.IGNORECASE),
|
|
129
|
+
re.compile(r"\bno\s+(?:borrar|eliminar|tocar|modificar)\b", re.IGNORECASE),
|
|
130
|
+
re.compile(r"\bevitar\s+(?:borrar|eliminar|tocar|romper)\b", re.IGNORECASE),
|
|
131
|
+
re.compile(r"\bavoid\s+(?:deleting|touching|breaking|modifying)\b", re.IGNORECASE),
|
|
132
|
+
re.compile(r"\bdon'?t\s+(?:touch|break|modify|delete)\b", re.IGNORECASE),
|
|
133
|
+
re.compile(r"\bwithout\s+(?:touching|breaking|affecting)\b", re.IGNORECASE),
|
|
134
|
+
)
|
|
67
135
|
|
|
68
136
|
|
|
69
137
|
def _parse_list(value) -> list[str]:
|
|
@@ -104,9 +172,32 @@ def _parse_int_list(value) -> list[int]:
|
|
|
104
172
|
return parsed
|
|
105
173
|
|
|
106
174
|
|
|
175
|
+
def _has_negation_context(text: str) -> bool:
|
|
176
|
+
"""Return True when the text explicitly disclaims touching the sensitive area.
|
|
177
|
+
|
|
178
|
+
Used to suppress high-stakes false positives where the user is stating
|
|
179
|
+
the *boundary* of safe work ("without touching production") rather than
|
|
180
|
+
the *target* of a risky action ("migrate production").
|
|
181
|
+
"""
|
|
182
|
+
if not text:
|
|
183
|
+
return False
|
|
184
|
+
return any(pattern.search(text) for pattern in NEGATION_PATTERNS)
|
|
185
|
+
|
|
186
|
+
|
|
107
187
|
def _detect_high_stakes(*parts: str) -> bool:
|
|
108
188
|
combined = " ".join((part or "").strip().lower() for part in parts if part)
|
|
109
|
-
|
|
189
|
+
if not combined:
|
|
190
|
+
return False
|
|
191
|
+
# Negation override: "sin afectar producción" / "don't touch prod" / etc.
|
|
192
|
+
# Explicit disclaimers suppress the flag even if a high-stakes keyword
|
|
193
|
+
# is physically present, otherwise boundary statements get miscategorised
|
|
194
|
+
# as action targets.
|
|
195
|
+
if _has_negation_context(combined):
|
|
196
|
+
return False
|
|
197
|
+
return any(
|
|
198
|
+
keyword in combined
|
|
199
|
+
for keyword in HIGH_STAKES_KEYWORDS | HIGH_STAKES_KEYWORDS_ES
|
|
200
|
+
)
|
|
110
201
|
|
|
111
202
|
|
|
112
203
|
def _decision_support_required(*, task_type: str, high_stakes: bool) -> bool:
|
|
@@ -124,6 +215,8 @@ def evaluate_response_confidence(
|
|
|
124
215
|
unknowns=None,
|
|
125
216
|
verification_step: str = "",
|
|
126
217
|
stakes: str = "",
|
|
218
|
+
pre_action_context_hits: int = 0,
|
|
219
|
+
area_has_atlas_entry: bool = False,
|
|
127
220
|
) -> dict:
|
|
128
221
|
evidence_refs = _parse_list(evidence_refs)
|
|
129
222
|
unknowns = _parse_list(unknowns)
|
|
@@ -152,6 +245,22 @@ def evaluate_response_confidence(
|
|
|
152
245
|
score -= 20
|
|
153
246
|
reasons.append("high-stakes context detected")
|
|
154
247
|
|
|
248
|
+
# v5.2.0: Positive signals. Before this release the score was purely
|
|
249
|
+
# a penalty accumulator — there was no way to reward tasks that had
|
|
250
|
+
# meaningful prior context loaded or that sat inside a known area.
|
|
251
|
+
# Cap at +10 and +5 so these can never override a real risk signal.
|
|
252
|
+
if pre_action_context_hits > 0:
|
|
253
|
+
boost = min(10, pre_action_context_hits * 2)
|
|
254
|
+
score += boost
|
|
255
|
+
reasons.append(
|
|
256
|
+
f"+{boost} from {pre_action_context_hits} pre-action context hit(s)"
|
|
257
|
+
)
|
|
258
|
+
if area_has_atlas_entry:
|
|
259
|
+
score += 5
|
|
260
|
+
reasons.append("+5 from known project-atlas area")
|
|
261
|
+
|
|
262
|
+
final_score = max(0, min(100, score))
|
|
263
|
+
|
|
155
264
|
mode = "answer"
|
|
156
265
|
if task_type in RESPONSE_TASKS:
|
|
157
266
|
if high_stakes and (unknowns or not evidence_refs):
|
|
@@ -161,6 +270,23 @@ def evaluate_response_confidence(
|
|
|
161
270
|
elif high_stakes or not evidence_refs or not verification_step.strip():
|
|
162
271
|
mode = "verify"
|
|
163
272
|
|
|
273
|
+
# v5.2.0: Numeric safeguard. The boolean decision tree above
|
|
274
|
+
# covers every obvious case, but tasks can accumulate soft
|
|
275
|
+
# penalties without tripping any single rule. When the final
|
|
276
|
+
# score is critically low, downgrade the mode by one step.
|
|
277
|
+
# This catches edge cases and is monotonic — it can only make
|
|
278
|
+
# the response discipline stricter, never looser.
|
|
279
|
+
if mode == "answer" and final_score < 50:
|
|
280
|
+
mode = "verify"
|
|
281
|
+
reasons.append(
|
|
282
|
+
f"numeric safeguard: score {final_score} < 50 forces verify"
|
|
283
|
+
)
|
|
284
|
+
elif mode == "verify" and final_score < 30 and high_stakes:
|
|
285
|
+
mode = "defer"
|
|
286
|
+
reasons.append(
|
|
287
|
+
f"numeric safeguard: high-stakes with score {final_score} forces defer"
|
|
288
|
+
)
|
|
289
|
+
|
|
164
290
|
next_action = {
|
|
165
291
|
"answer": "You may answer directly, but stay within the evidence you actually have.",
|
|
166
292
|
"verify": "Verify the claim with concrete evidence before answering.",
|
|
@@ -170,7 +296,7 @@ def evaluate_response_confidence(
|
|
|
170
296
|
|
|
171
297
|
return {
|
|
172
298
|
"mode": mode,
|
|
173
|
-
"confidence":
|
|
299
|
+
"confidence": final_score,
|
|
174
300
|
"high_stakes": high_stakes,
|
|
175
301
|
"reasons": reasons,
|
|
176
302
|
"next_action": next_action,
|
|
@@ -78,6 +78,10 @@ CLAUDE_CLI = _resolve_claude_cli()
|
|
|
78
78
|
|
|
79
79
|
findings = []
|
|
80
80
|
|
|
81
|
+
AUDIT_GOAL_NEXT_ACTION = "Convert the recurring theme into an explicit workflow or close it as intentional noise."
|
|
82
|
+
AUDIT_GOAL_OWNER = "system:self-audit"
|
|
83
|
+
AUDIT_GOAL_STALE_HOURS = 36
|
|
84
|
+
|
|
81
85
|
|
|
82
86
|
def log(msg):
|
|
83
87
|
ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
@@ -492,7 +496,7 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
|
|
|
492
496
|
f"Recurring {area} theme detected by daily self-audit. "
|
|
493
497
|
f"The theme '{sample_goal}' appeared {count} times without a durable goal, learning, or resolved workflow."
|
|
494
498
|
)
|
|
495
|
-
next_action =
|
|
499
|
+
next_action = AUDIT_GOAL_NEXT_ACTION
|
|
496
500
|
success_signal = "The theme stops resurfacing in unresolved protocol tasks."
|
|
497
501
|
now_iso = datetime.now().isoformat(timespec="seconds")
|
|
498
502
|
if existing:
|
|
@@ -504,7 +508,7 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
|
|
|
504
508
|
if "priority" in columns:
|
|
505
509
|
updates["priority"] = "high"
|
|
506
510
|
if "owner" in columns:
|
|
507
|
-
updates["owner"] =
|
|
511
|
+
updates["owner"] = AUDIT_GOAL_OWNER
|
|
508
512
|
if "next_action" in columns:
|
|
509
513
|
updates["next_action"] = next_action
|
|
510
514
|
if "success_signal" in columns:
|
|
@@ -534,7 +538,7 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
|
|
|
534
538
|
if "priority" in columns:
|
|
535
539
|
values["priority"] = "high"
|
|
536
540
|
if "owner" in columns:
|
|
537
|
-
values["owner"] =
|
|
541
|
+
values["owner"] = AUDIT_GOAL_OWNER
|
|
538
542
|
if "next_action" in columns:
|
|
539
543
|
values["next_action"] = next_action
|
|
540
544
|
if "success_signal" in columns:
|
|
@@ -553,6 +557,75 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
|
|
|
553
557
|
return {"ok": True, "action": "created", "goal_id": goal_id}
|
|
554
558
|
|
|
555
559
|
|
|
560
|
+
def _retire_stale_audit_goals_inline(
|
|
561
|
+
conn: sqlite3.Connection, *, max_age_hours: int = AUDIT_GOAL_STALE_HOURS
|
|
562
|
+
) -> dict:
|
|
563
|
+
if not _table_exists(conn, "workflow_goals"):
|
|
564
|
+
return {"ok": False, "reason": "workflow_goals_missing"}
|
|
565
|
+
|
|
566
|
+
has_runs = _table_exists(conn, "workflow_runs")
|
|
567
|
+
if has_runs:
|
|
568
|
+
rows = conn.execute(
|
|
569
|
+
"""SELECT g.goal_id, g.title, g.status, g.owner, g.next_action, g.opened_at, g.updated_at,
|
|
570
|
+
COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id), 0) AS run_count,
|
|
571
|
+
COALESCE((SELECT COUNT(*) FROM workflow_runs r WHERE r.goal_id = g.goal_id
|
|
572
|
+
AND r.status NOT IN ('completed', 'failed', 'cancelled')), 0) AS open_run_count
|
|
573
|
+
FROM workflow_goals g
|
|
574
|
+
WHERE g.status = 'active'
|
|
575
|
+
AND g.goal_id LIKE 'WG-AUDIT-%'
|
|
576
|
+
ORDER BY g.updated_at DESC, g.opened_at DESC"""
|
|
577
|
+
).fetchall()
|
|
578
|
+
else:
|
|
579
|
+
rows = conn.execute(
|
|
580
|
+
"""SELECT g.goal_id, g.title, g.status, g.owner, g.next_action, g.opened_at, g.updated_at,
|
|
581
|
+
0 AS run_count,
|
|
582
|
+
0 AS open_run_count
|
|
583
|
+
FROM workflow_goals g
|
|
584
|
+
WHERE g.status = 'active'
|
|
585
|
+
AND g.goal_id LIKE 'WG-AUDIT-%'
|
|
586
|
+
ORDER BY g.updated_at DESC, g.opened_at DESC"""
|
|
587
|
+
).fetchall()
|
|
588
|
+
|
|
589
|
+
if not rows:
|
|
590
|
+
return {"ok": True, "retired": 0}
|
|
591
|
+
|
|
592
|
+
now = datetime.now()
|
|
593
|
+
now_iso = now.isoformat(timespec="seconds")
|
|
594
|
+
retired = 0
|
|
595
|
+
for row in rows:
|
|
596
|
+
if str(row["next_action"] or "").strip() != AUDIT_GOAL_NEXT_ACTION:
|
|
597
|
+
continue
|
|
598
|
+
owner = str(row["owner"] or "").strip()
|
|
599
|
+
if owner and owner != AUDIT_GOAL_OWNER:
|
|
600
|
+
continue
|
|
601
|
+
if int(row["open_run_count"] or 0) > 0:
|
|
602
|
+
continue
|
|
603
|
+
updated_at = _parse_mixed_datetime(row["updated_at"]) or _parse_mixed_datetime(row["opened_at"])
|
|
604
|
+
if not updated_at:
|
|
605
|
+
continue
|
|
606
|
+
age_hours = (now - updated_at).total_seconds() / 3600
|
|
607
|
+
if age_hours < max_age_hours:
|
|
608
|
+
continue
|
|
609
|
+
conn.execute(
|
|
610
|
+
"""UPDATE workflow_goals
|
|
611
|
+
SET status = 'abandoned',
|
|
612
|
+
next_action = ?,
|
|
613
|
+
blocker_reason = ?,
|
|
614
|
+
updated_at = ?,
|
|
615
|
+
closed_at = ?
|
|
616
|
+
WHERE goal_id = ?""",
|
|
617
|
+
(
|
|
618
|
+
"Ninguna. Placeholder stale retirado automáticamente; el self-audit lo recreará si el patrón reaparece.",
|
|
619
|
+
f"Self-audit placeholder stale >{max_age_hours}h sin workflow runs abiertos.",
|
|
620
|
+
now_iso,
|
|
621
|
+
now_iso,
|
|
622
|
+
row["goal_id"],
|
|
623
|
+
),
|
|
624
|
+
)
|
|
625
|
+
retired += 1
|
|
626
|
+
return {"ok": True, "retired": retired}
|
|
627
|
+
|
|
628
|
+
|
|
556
629
|
def _queue_public_core_handoff(
|
|
557
630
|
conn: sqlite3.Connection,
|
|
558
631
|
*,
|
|
@@ -1174,6 +1247,11 @@ def check_unformalized_mentions():
|
|
|
1174
1247
|
conn.close()
|
|
1175
1248
|
return
|
|
1176
1249
|
|
|
1250
|
+
retired_result = _retire_stale_audit_goals_inline(conn)
|
|
1251
|
+
retired_count = int(retired_result.get("retired") or 0)
|
|
1252
|
+
if retired_count:
|
|
1253
|
+
finding("INFO", "formalization", f"retired {retired_count} stale self-audit workflow goals")
|
|
1254
|
+
|
|
1177
1255
|
rows = conn.execute(
|
|
1178
1256
|
"""SELECT goal, area, learning_id, followup_id
|
|
1179
1257
|
FROM protocol_tasks
|