nexo-brain 7.27.3 → 7.27.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +3 -1
- package/bin/windows-wsl-bridge.js +9 -0
- package/package.json +1 -1
- package/src/classifier_local.py +44 -0
- package/src/db/__init__.py +8 -0
- package/src/db/_commitments.py +344 -0
- package/src/db/_memory_v2.py +52 -2
- package/src/db/_schema.py +37 -0
- package/src/desktop_bridge.py +1 -1
- package/src/doctor/providers/runtime.py +9 -3
- package/src/enforcement_engine.py +128 -2
- package/src/hook_guardrails.py +104 -0
- package/src/local_context/api.py +54 -22
- package/src/plugins/protocol.py +96 -0
- package/src/pre_answer_router.py +298 -6
- package/src/r14_correction_learning.py +3 -3
- package/src/requirements.txt +5 -1
- package/src/runtime_versioning.py +11 -1
- package/src/saved_not_used_audit.py +44 -3
- package/src/scripts/nexo-followup-runner.py +194 -0
- package/src/semantic_reasoner.py +2 -2
- package/src/semantic_router.py +58 -11
- package/src/server.py +37 -1
|
@@ -14,6 +14,7 @@ import queue
|
|
|
14
14
|
import subprocess
|
|
15
15
|
import threading
|
|
16
16
|
import time
|
|
17
|
+
import hashlib
|
|
17
18
|
from pathlib import Path
|
|
18
19
|
import re
|
|
19
20
|
import paths
|
|
@@ -408,10 +409,10 @@ class HeadlessEnforcer:
|
|
|
408
409
|
self._guardian_mode_cache: dict[str, str] = {}
|
|
409
410
|
# R14 state — opened on a detected correction, counts down by one each
|
|
410
411
|
# tool call. When it reaches zero without a nexo_learning_add we
|
|
411
|
-
# enqueue the R14 reminder
|
|
412
|
-
# plan doc 1; make it overridable via the env for field tuning.
|
|
412
|
+
# enqueue the R14 reminder and persist a correction-learning debt.
|
|
413
413
|
self._r14_window_remaining = 0
|
|
414
414
|
self._r14_correction_seen_for_turn = False
|
|
415
|
+
self._r14_correction_text = ""
|
|
415
416
|
# R25 — last user message is inspected for an explicit permit token
|
|
416
417
|
# ("force OK", "si borra", etc). Populated by on_user_message.
|
|
417
418
|
self._r25_last_user_text = ""
|
|
@@ -420,6 +421,7 @@ class HeadlessEnforcer:
|
|
|
420
421
|
self._r17_window_remaining = 0
|
|
421
422
|
self._r17_promise_seen_for_turn = False
|
|
422
423
|
self._r17_first_tool_call_in_window = True
|
|
424
|
+
self._r17_commitment_ids: list[str] = []
|
|
423
425
|
# R24 stale-memory state — incremented externally via notify_
|
|
424
426
|
# stale_memory_cited (e.g. from R07 when age_days >= threshold).
|
|
425
427
|
# Counts down on each tool call; fires when it reaches zero
|
|
@@ -706,6 +708,7 @@ class HeadlessEnforcer:
|
|
|
706
708
|
return
|
|
707
709
|
self._r14_window_remaining = _R14_WINDOW
|
|
708
710
|
self._r14_correction_seen_for_turn = True
|
|
711
|
+
self._r14_correction_text = text or ""
|
|
709
712
|
_logger.info("[R14 %s] correction detected; window opened for %d tool calls",
|
|
710
713
|
mode.upper(), self._r14_window_remaining)
|
|
711
714
|
# v7.7 Gap 7.2 — wire on_event so the map's
|
|
@@ -856,6 +859,7 @@ class HeadlessEnforcer:
|
|
|
856
859
|
_logger.info("[R14] satisfied by learning_add; closing window")
|
|
857
860
|
self._r14_window_remaining = 0
|
|
858
861
|
self._r14_correction_seen_for_turn = False
|
|
862
|
+
self._r14_correction_text = ""
|
|
859
863
|
return
|
|
860
864
|
self._r14_window_remaining -= 1
|
|
861
865
|
if self._r14_window_remaining > 0:
|
|
@@ -867,7 +871,35 @@ class HeadlessEnforcer:
|
|
|
867
871
|
else:
|
|
868
872
|
self._enqueue(_R14_PROMPT, "r14:correction-window-exhausted", rule_id="R14_correction_learning")
|
|
869
873
|
_logger.info("[R14 %s] enqueued correction reminder", mode.upper())
|
|
874
|
+
if self._session_id:
|
|
875
|
+
try:
|
|
876
|
+
from db import create_protocol_debt, list_protocol_debts, record_session_correction_requirement # type: ignore
|
|
877
|
+
|
|
878
|
+
record_session_correction_requirement(
|
|
879
|
+
self._session_id,
|
|
880
|
+
self._r14_correction_text,
|
|
881
|
+
source="r14_window_exhausted",
|
|
882
|
+
)
|
|
883
|
+
existing = list_protocol_debts(
|
|
884
|
+
status="open",
|
|
885
|
+
session_id=self._session_id,
|
|
886
|
+
debt_type="missing_learning_after_correction",
|
|
887
|
+
limit=1,
|
|
888
|
+
)
|
|
889
|
+
if not existing:
|
|
890
|
+
create_protocol_debt(
|
|
891
|
+
self._session_id,
|
|
892
|
+
"missing_learning_after_correction",
|
|
893
|
+
severity="error",
|
|
894
|
+
evidence=(
|
|
895
|
+
"R14 detected a user correction and the 2-tool-call "
|
|
896
|
+
"learning window expired without nexo_learning_add."
|
|
897
|
+
),
|
|
898
|
+
)
|
|
899
|
+
except Exception:
|
|
900
|
+
pass
|
|
870
901
|
self._r14_correction_seen_for_turn = False
|
|
902
|
+
self._r14_correction_text = ""
|
|
871
903
|
|
|
872
904
|
def on_assistant_text(self, text: str, *, declared_detector=None, has_open_task=None):
|
|
873
905
|
"""R16 — scan assistant message for done-claim with open protocol_task.
|
|
@@ -1018,13 +1050,106 @@ class HeadlessEnforcer:
|
|
|
1018
1050
|
self._r17_window_remaining = _R17_WINDOW
|
|
1019
1051
|
self._r17_promise_seen_for_turn = True
|
|
1020
1052
|
self._r17_first_tool_call_in_window = True
|
|
1053
|
+
self._record_r17_commitment(text or "")
|
|
1021
1054
|
_logger.info("[R17 %s] promise detected; window open %d", mode.upper(), _R17_WINDOW)
|
|
1022
1055
|
|
|
1056
|
+
def _record_r17_commitment(self, text: str) -> None:
|
|
1057
|
+
statement = (text or "").strip()
|
|
1058
|
+
if not statement:
|
|
1059
|
+
return
|
|
1060
|
+
try:
|
|
1061
|
+
from db import create_commitment, record_memory_event
|
|
1062
|
+
except Exception:
|
|
1063
|
+
return
|
|
1064
|
+
source_id = hashlib.sha1(
|
|
1065
|
+
f"{self._session_id or ''}|{statement[:800]}".encode("utf-8", errors="ignore"),
|
|
1066
|
+
usedforsecurity=False,
|
|
1067
|
+
).hexdigest()[:24]
|
|
1068
|
+
memory_event_uid = ""
|
|
1069
|
+
try:
|
|
1070
|
+
event = record_memory_event(
|
|
1071
|
+
event_type="assistant_promise_detected",
|
|
1072
|
+
source_type="commitment",
|
|
1073
|
+
source_id=source_id,
|
|
1074
|
+
session_id=self._session_id or "",
|
|
1075
|
+
actor=self._session_id or "nexo",
|
|
1076
|
+
metadata={"statement": statement[:800], "rule_id": "R17_promise_debt"},
|
|
1077
|
+
raw_ref=f"commitment:{source_id}",
|
|
1078
|
+
confidence=0.72,
|
|
1079
|
+
idempotency_key=f"r17-commitment:{source_id}",
|
|
1080
|
+
)
|
|
1081
|
+
memory_event_uid = str(event.get("event_uid") or "") if isinstance(event, dict) else ""
|
|
1082
|
+
except Exception as exc: # noqa: BLE001
|
|
1083
|
+
_logger.debug("R17 commitment memory event skipped: %s", exc)
|
|
1084
|
+
try:
|
|
1085
|
+
result = create_commitment(
|
|
1086
|
+
statement=statement,
|
|
1087
|
+
source_type="assistant_text",
|
|
1088
|
+
source_id=source_id,
|
|
1089
|
+
memory_event_uid=memory_event_uid,
|
|
1090
|
+
session_id=self._session_id or "",
|
|
1091
|
+
owner="agent",
|
|
1092
|
+
status="active",
|
|
1093
|
+
confidence=0.72,
|
|
1094
|
+
evidence_ref=f"memory_event:{memory_event_uid}" if memory_event_uid else "",
|
|
1095
|
+
metadata={"rule_id": "R17_promise_debt"},
|
|
1096
|
+
)
|
|
1097
|
+
commitment_id = str(result.get("id") or "")
|
|
1098
|
+
if commitment_id and commitment_id not in self._r17_commitment_ids:
|
|
1099
|
+
self._r17_commitment_ids.append(commitment_id)
|
|
1100
|
+
except Exception as exc: # noqa: BLE001
|
|
1101
|
+
_logger.debug("R17 commitment create skipped: %s", exc)
|
|
1102
|
+
|
|
1103
|
+
def _mark_r17_commitments_in_progress(self, tool_name: str) -> None:
|
|
1104
|
+
if not self._r17_commitment_ids:
|
|
1105
|
+
return
|
|
1106
|
+
try:
|
|
1107
|
+
from db import update_commitment_status
|
|
1108
|
+
except Exception:
|
|
1109
|
+
return
|
|
1110
|
+
for commitment_id in list(self._r17_commitment_ids)[-5:]:
|
|
1111
|
+
try:
|
|
1112
|
+
update_commitment_status(
|
|
1113
|
+
commitment_id,
|
|
1114
|
+
status="in_progress",
|
|
1115
|
+
evidence_ref=f"tool:{tool_name}",
|
|
1116
|
+
metadata={"last_tool_seen": tool_name},
|
|
1117
|
+
)
|
|
1118
|
+
except Exception as exc: # noqa: BLE001
|
|
1119
|
+
_logger.debug("R17 commitment progress update skipped: %s", exc)
|
|
1120
|
+
|
|
1121
|
+
def _resolve_r17_commitments_from_task_close(self, tool_input) -> None:
|
|
1122
|
+
payload = tool_input if isinstance(tool_input, dict) else {}
|
|
1123
|
+
sid = str(payload.get("sid") or self._session_id or "")
|
|
1124
|
+
task_id = str(payload.get("task_id") or "")
|
|
1125
|
+
evidence_text = " ".join(
|
|
1126
|
+
str(payload.get(field) or "")
|
|
1127
|
+
for field in ("evidence", "summary", "change_summary", "outcome_notes", "result", "verification")
|
|
1128
|
+
).strip()
|
|
1129
|
+
if not sid or not evidence_text:
|
|
1130
|
+
return
|
|
1131
|
+
try:
|
|
1132
|
+
from db import resolve_matching_commitments
|
|
1133
|
+
except Exception:
|
|
1134
|
+
return
|
|
1135
|
+
try:
|
|
1136
|
+
resolve_matching_commitments(
|
|
1137
|
+
session_id=sid,
|
|
1138
|
+
evidence_text=evidence_text,
|
|
1139
|
+
action_ref_type="protocol_task" if task_id else "",
|
|
1140
|
+
action_ref_id=task_id,
|
|
1141
|
+
evidence_ref=f"protocol_task:{task_id}" if task_id else "nexo_task_close",
|
|
1142
|
+
status="fulfilled",
|
|
1143
|
+
)
|
|
1144
|
+
except Exception as exc: # noqa: BLE001
|
|
1145
|
+
_logger.debug("R17 commitment resolution skipped: %s", exc)
|
|
1146
|
+
|
|
1023
1147
|
def _advance_r17_window(self, tool_name: str):
|
|
1024
1148
|
if not self._r17_promise_seen_for_turn:
|
|
1025
1149
|
return
|
|
1026
1150
|
if self._r17_first_tool_call_in_window:
|
|
1027
1151
|
self._r17_first_tool_call_in_window = False
|
|
1152
|
+
self._mark_r17_commitments_in_progress(tool_name)
|
|
1028
1153
|
return
|
|
1029
1154
|
self._r17_window_remaining -= 1
|
|
1030
1155
|
if self._r17_window_remaining > 0:
|
|
@@ -2160,6 +2285,7 @@ class HeadlessEnforcer:
|
|
|
2160
2285
|
if name == "nexo_task_close":
|
|
2161
2286
|
self.reset_task_cycle("nexo_task_open")
|
|
2162
2287
|
self._start_post_close_cooldown()
|
|
2288
|
+
self._resolve_r17_commitments_from_task_close(tool_input)
|
|
2163
2289
|
|
|
2164
2290
|
# v7.7 Gap 1 — autonomous detector for multi_step_task_detected.
|
|
2165
2291
|
# The event was dispatched by the map but nothing ever raised it.
|
package/src/hook_guardrails.py
CHANGED
|
@@ -19,6 +19,11 @@ from plugins.guard import _load_conditioned_learnings, _normalize_path_token
|
|
|
19
19
|
from protocol_settings import get_protocol_strictness
|
|
20
20
|
from product_mode import core_writes_allowed, is_protected_runtime_core_path
|
|
21
21
|
|
|
22
|
+
try:
|
|
23
|
+
from guardrails.minimal_delta import evaluate as _minimal_delta_evaluate
|
|
24
|
+
except Exception: # pragma: no cover - guardrail must never break the hook import
|
|
25
|
+
_minimal_delta_evaluate = None
|
|
26
|
+
|
|
22
27
|
READ_LIKE_TOOLS = {"Read"}
|
|
23
28
|
WRITE_LIKE_TOOLS = {"Edit", "MultiEdit", "Write"}
|
|
24
29
|
DELETE_LIKE_TOOLS = {"Delete"}
|
|
@@ -639,6 +644,72 @@ def _extract_touched_files(tool_input) -> list[str]:
|
|
|
639
644
|
return unique
|
|
640
645
|
|
|
641
646
|
|
|
647
|
+
def _minimal_delta_prompt_text(payload: dict, tool_input: dict) -> str:
|
|
648
|
+
parts: list[str] = []
|
|
649
|
+
for key in ("prompt", "user_prompt", "context_hint", "user_text"):
|
|
650
|
+
value = payload.get(key)
|
|
651
|
+
if isinstance(value, str) and value.strip():
|
|
652
|
+
parts.append(value.strip())
|
|
653
|
+
for key in ("prompt", "user_prompt", "context_hint", "user_text"):
|
|
654
|
+
value = tool_input.get(key)
|
|
655
|
+
if isinstance(value, str) and value.strip():
|
|
656
|
+
parts.append(value.strip())
|
|
657
|
+
tail = payload.get("transcript_tail")
|
|
658
|
+
if isinstance(tail, list):
|
|
659
|
+
for item in tail[-6:]:
|
|
660
|
+
if isinstance(item, str) and item.strip():
|
|
661
|
+
parts.append(item.strip())
|
|
662
|
+
elif isinstance(item, dict):
|
|
663
|
+
text = item.get("text") or item.get("content")
|
|
664
|
+
if isinstance(text, str) and text.strip():
|
|
665
|
+
parts.append(text.strip())
|
|
666
|
+
return "\n".join(parts)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _read_existing_text(filepath: str) -> str:
|
|
670
|
+
try:
|
|
671
|
+
return Path(filepath).read_text(encoding="utf-8", errors="replace")
|
|
672
|
+
except OSError:
|
|
673
|
+
return ""
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
def _collect_minimal_delta_warning_or_block(payload: dict, *, tool_name: str, files: list[str]) -> dict | None:
|
|
677
|
+
if _minimal_delta_evaluate is None or tool_name not in {"Edit", "MultiEdit", "Write"}:
|
|
678
|
+
return None
|
|
679
|
+
tool_input = payload.get("tool_input")
|
|
680
|
+
if not isinstance(tool_input, dict):
|
|
681
|
+
return None
|
|
682
|
+
prompt_text = _minimal_delta_prompt_text(payload, tool_input)
|
|
683
|
+
if not prompt_text:
|
|
684
|
+
return None
|
|
685
|
+
target_path = str(tool_input.get("file_path") or tool_input.get("path") or (files[0] if files else "")).strip()
|
|
686
|
+
if not target_path:
|
|
687
|
+
return None
|
|
688
|
+
|
|
689
|
+
if tool_name == "Write":
|
|
690
|
+
old_text = _read_existing_text(target_path)
|
|
691
|
+
new_text = str(tool_input.get("content") or "")
|
|
692
|
+
elif tool_name == "MultiEdit":
|
|
693
|
+
edits = tool_input.get("edits")
|
|
694
|
+
if not isinstance(edits, list):
|
|
695
|
+
return None
|
|
696
|
+
old_text = "\n".join(str(edit.get("old_string") or "") for edit in edits if isinstance(edit, dict))
|
|
697
|
+
new_text = "\n".join(str(edit.get("new_string") or "") for edit in edits if isinstance(edit, dict))
|
|
698
|
+
else:
|
|
699
|
+
old_text = str(tool_input.get("old_string") or "")
|
|
700
|
+
new_text = str(tool_input.get("new_string") or "")
|
|
701
|
+
if not new_text or old_text == new_text:
|
|
702
|
+
return None
|
|
703
|
+
|
|
704
|
+
try:
|
|
705
|
+
decision = _minimal_delta_evaluate(prompt_text, target_path, old_text, new_text)
|
|
706
|
+
except Exception:
|
|
707
|
+
return None
|
|
708
|
+
if decision.get("decision") not in {"warn", "block"}:
|
|
709
|
+
return None
|
|
710
|
+
return decision
|
|
711
|
+
|
|
712
|
+
|
|
642
713
|
def _extract_bash_command(tool_input) -> str:
|
|
643
714
|
if not isinstance(tool_input, dict):
|
|
644
715
|
return ""
|
|
@@ -1575,6 +1646,39 @@ def process_pre_tool_event(payload: dict) -> dict:
|
|
|
1575
1646
|
sid = _resolve_nexo_sid(conn, claude_sid)
|
|
1576
1647
|
open_task = _find_any_open_task(conn, sid) if sid else None
|
|
1577
1648
|
warnings: list[dict] = []
|
|
1649
|
+
minimal_delta = _collect_minimal_delta_warning_or_block(
|
|
1650
|
+
payload,
|
|
1651
|
+
tool_name=tool_name,
|
|
1652
|
+
files=files,
|
|
1653
|
+
)
|
|
1654
|
+
if minimal_delta and minimal_delta.get("decision") == "block":
|
|
1655
|
+
return {
|
|
1656
|
+
"ok": True,
|
|
1657
|
+
"session_id": sid,
|
|
1658
|
+
"tool_name": tool_name,
|
|
1659
|
+
"operation": op,
|
|
1660
|
+
"strictness": strictness,
|
|
1661
|
+
"blocks": [
|
|
1662
|
+
{
|
|
1663
|
+
"file": minimal_delta.get("target_path", ""),
|
|
1664
|
+
"reason_code": "minimal_delta_scope_creep",
|
|
1665
|
+
"severity": "error",
|
|
1666
|
+
"debt_type": "minimal_delta_scope_creep",
|
|
1667
|
+
"minimal_delta": minimal_delta,
|
|
1668
|
+
}
|
|
1669
|
+
],
|
|
1670
|
+
"warnings": warnings,
|
|
1671
|
+
"status": "blocked",
|
|
1672
|
+
}
|
|
1673
|
+
if minimal_delta and minimal_delta.get("decision") == "warn":
|
|
1674
|
+
warnings.append(
|
|
1675
|
+
{
|
|
1676
|
+
"file": minimal_delta.get("target_path", ""),
|
|
1677
|
+
"reason_code": "minimal_delta_soft_envelope",
|
|
1678
|
+
"severity": "warn",
|
|
1679
|
+
"minimal_delta": minimal_delta,
|
|
1680
|
+
}
|
|
1681
|
+
)
|
|
1578
1682
|
legacy_memory_blocks = _collect_legacy_memory_write_blocks(
|
|
1579
1683
|
conn,
|
|
1580
1684
|
sid=sid,
|
package/src/local_context/api.py
CHANGED
|
@@ -42,6 +42,7 @@ DEFAULT_ROUTER_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_ROUTER_MAX_CHA
|
|
|
42
42
|
DEFAULT_MAX_JOB_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_JOB_ATTEMPTS", "3") or "3")
|
|
43
43
|
DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_ATTEMPTS", "5") or "5")
|
|
44
44
|
DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS = float(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_DELAY_SECONDS", "0.35") or "0.35")
|
|
45
|
+
DEFAULT_HYGIENE_QUICK_SCAN_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_HYGIENE_QUICK_SCAN_LIMIT", "5000") or "5000")
|
|
45
46
|
INITIAL_INDEX_COMPLETE_KEY = "initial_index_complete"
|
|
46
47
|
INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
|
|
47
48
|
PERFORMANCE_PROFILE_KEY = "performance_profile"
|
|
@@ -1329,8 +1330,20 @@ def _purge_asset_ids(conn, asset_ids: list[str]) -> dict:
|
|
|
1329
1330
|
return counts
|
|
1330
1331
|
|
|
1331
1332
|
|
|
1332
|
-
def
|
|
1333
|
-
|
|
1333
|
+
def _bounded_fetchall(conn, sql: str, params: tuple[Any, ...] = (), *, max_rows: int | None = None) -> tuple[list[Any], bool]:
|
|
1334
|
+
if max_rows is None or max_rows <= 0:
|
|
1335
|
+
return conn.execute(sql, params).fetchall(), False
|
|
1336
|
+
rows = conn.execute(f"{sql} LIMIT ?", (*params, max_rows + 1)).fetchall()
|
|
1337
|
+
truncated = len(rows) > max_rows
|
|
1338
|
+
return rows[:max_rows], truncated
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
def _privacy_unsafe_asset_ids(conn, *, max_rows: int | None = None) -> tuple[list[str], bool]:
|
|
1342
|
+
rows, truncated = _bounded_fetchall(
|
|
1343
|
+
conn,
|
|
1344
|
+
"SELECT asset_id, path, privacy_class FROM local_assets",
|
|
1345
|
+
max_rows=max_rows,
|
|
1346
|
+
)
|
|
1334
1347
|
override_prefixes = _active_user_override_prefixes_conn(conn)
|
|
1335
1348
|
unsafe: list[str] = []
|
|
1336
1349
|
for row in rows:
|
|
@@ -1340,30 +1353,40 @@ def _privacy_unsafe_asset_ids(conn) -> list[str]:
|
|
|
1340
1353
|
continue
|
|
1341
1354
|
if should_skip_file(path) or privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}:
|
|
1342
1355
|
unsafe.append(str(row["asset_id"]))
|
|
1343
|
-
return unsafe
|
|
1356
|
+
return unsafe, truncated
|
|
1344
1357
|
|
|
1345
1358
|
|
|
1346
|
-
def _privacy_unsafe_dir_ids(conn) -> list[str]:
|
|
1347
|
-
rows
|
|
1359
|
+
def _privacy_unsafe_dir_ids(conn, *, max_rows: int | None = None) -> tuple[list[str], bool]:
|
|
1360
|
+
rows, truncated = _bounded_fetchall(
|
|
1361
|
+
conn,
|
|
1362
|
+
"SELECT dir_id, path FROM local_index_dirs",
|
|
1363
|
+
max_rows=max_rows,
|
|
1364
|
+
)
|
|
1348
1365
|
override_prefixes = _active_user_override_prefixes_conn(conn)
|
|
1349
|
-
|
|
1366
|
+
unsafe = [
|
|
1350
1367
|
str(row["dir_id"])
|
|
1351
1368
|
for row in rows
|
|
1352
1369
|
if should_skip_tree(str(row["path"] or "")) and not _path_under_any_prefix(str(row["path"] or ""), override_prefixes)
|
|
1353
1370
|
]
|
|
1371
|
+
return unsafe, truncated
|
|
1354
1372
|
|
|
1355
1373
|
|
|
1356
|
-
def _content_secret_asset_ids(conn) -> list[str]:
|
|
1357
|
-
|
|
1358
|
-
"""
|
|
1374
|
+
def _content_secret_asset_ids(conn, *, max_rows: int | None = None) -> tuple[list[str], bool]:
|
|
1375
|
+
sql = """
|
|
1359
1376
|
SELECT c.asset_id, c.text
|
|
1360
1377
|
FROM local_chunks c
|
|
1361
1378
|
JOIN local_assets a ON a.asset_id=c.asset_id
|
|
1362
1379
|
WHERE a.status='active'
|
|
1363
1380
|
AND COALESCE(a.privacy_class, 'normal')='normal'
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1381
|
+
"""
|
|
1382
|
+
params: tuple[Any, ...] = ()
|
|
1383
|
+
if max_rows is None or max_rows <= 0:
|
|
1384
|
+
rows = conn.execute(sql + " ORDER BY c.asset_id, c.chunk_index", params).fetchall()
|
|
1385
|
+
truncated = False
|
|
1386
|
+
else:
|
|
1387
|
+
rows = conn.execute(sql + " LIMIT ?", (max_rows + 1,)).fetchall()
|
|
1388
|
+
truncated = len(rows) > max_rows
|
|
1389
|
+
rows = rows[:max_rows]
|
|
1367
1390
|
secret_ids: set[str] = set()
|
|
1368
1391
|
for row in rows:
|
|
1369
1392
|
asset_id = str(row["asset_id"])
|
|
@@ -1371,7 +1394,7 @@ def _content_secret_asset_ids(conn) -> list[str]:
|
|
|
1371
1394
|
continue
|
|
1372
1395
|
if contains_secret(str(row["text"] or "")):
|
|
1373
1396
|
secret_ids.add(asset_id)
|
|
1374
|
-
return sorted(secret_ids)
|
|
1397
|
+
return sorted(secret_ids), truncated
|
|
1375
1398
|
|
|
1376
1399
|
|
|
1377
1400
|
def _mark_content_secret_assets(conn, asset_ids: list[str]) -> int:
|
|
@@ -1419,12 +1442,21 @@ def _mark_content_secret_assets(conn, asset_ids: list[str]) -> int:
|
|
|
1419
1442
|
return len(unique_ids)
|
|
1420
1443
|
|
|
1421
1444
|
|
|
1422
|
-
def local_index_privacy_hygiene(*, fix: bool = False) -> dict:
|
|
1445
|
+
def local_index_privacy_hygiene(*, fix: bool = False, quick: bool = False) -> dict:
|
|
1423
1446
|
conn = _conn()
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1447
|
+
max_rows = None if fix or not quick else DEFAULT_HYGIENE_QUICK_SCAN_LIMIT
|
|
1448
|
+
asset_ids, assets_truncated = _privacy_unsafe_asset_ids(conn, max_rows=max_rows)
|
|
1449
|
+
dir_ids, dirs_truncated = _privacy_unsafe_dir_ids(conn, max_rows=max_rows)
|
|
1450
|
+
content_secret_ids, chunks_truncated = _content_secret_asset_ids(conn, max_rows=max_rows)
|
|
1451
|
+
truncated = bool(assets_truncated or dirs_truncated or chunks_truncated)
|
|
1452
|
+
residue = {
|
|
1453
|
+
"assets": len(asset_ids),
|
|
1454
|
+
"dirs": len(dir_ids),
|
|
1455
|
+
"content_secret_assets": len(content_secret_ids),
|
|
1456
|
+
"truncated": truncated,
|
|
1457
|
+
"quick": bool(quick and not fix),
|
|
1458
|
+
"scan_limit": int(max_rows or 0),
|
|
1459
|
+
}
|
|
1428
1460
|
cleanup = {"assets": 0, "jobs": 0, "errors": 0, "chunks": 0, "embeddings": 0, "entities": 0, "relations": 0, "versions": 0, "dirs": 0, "content_secret_assets": 0}
|
|
1429
1461
|
if fix:
|
|
1430
1462
|
cleanup.update(_purge_asset_ids(conn, asset_ids))
|
|
@@ -1437,10 +1469,10 @@ def local_index_privacy_hygiene(*, fix: bool = False) -> dict:
|
|
|
1437
1469
|
conn.commit()
|
|
1438
1470
|
if asset_ids or dir_ids or content_secret_ids:
|
|
1439
1471
|
log_event("warn", "privacy_hygiene_repaired", "Local memory privacy hygiene repaired", cleanup=cleanup)
|
|
1440
|
-
return {"ok": True, "fix": fix, "residue": residue, "cleanup": cleanup}
|
|
1472
|
+
return {"ok": True, "fix": fix, "quick": bool(quick and not fix), "truncated": truncated, "residue": residue, "cleanup": cleanup}
|
|
1441
1473
|
|
|
1442
1474
|
|
|
1443
|
-
def local_index_hygiene(*, fix: bool = False) -> dict:
|
|
1475
|
+
def local_index_hygiene(*, fix: bool = False, quick: bool = False) -> dict:
|
|
1444
1476
|
conn = _conn()
|
|
1445
1477
|
removed_paths: list[str] = []
|
|
1446
1478
|
for row in conn.execute("SELECT id, root_path, source, status FROM local_index_roots").fetchall():
|
|
@@ -1455,10 +1487,10 @@ def local_index_hygiene(*, fix: bool = False) -> dict:
|
|
|
1455
1487
|
if fix:
|
|
1456
1488
|
cleanup = _purge_removed_root_payloads(conn)
|
|
1457
1489
|
conn.commit()
|
|
1458
|
-
privacy = local_index_privacy_hygiene(fix=fix)
|
|
1490
|
+
privacy = local_index_privacy_hygiene(fix=fix, quick=quick and not fix)
|
|
1459
1491
|
if fix and (removed_paths or any(int(cleanup.get(key, 0) or 0) for key in ("assets", "jobs", "errors", "dirs", "checkpoints"))):
|
|
1460
1492
|
log_event("info", "index_hygiene_repaired", "Local memory index hygiene repaired", roots=[redact_path(path) for path in removed_paths], cleanup=cleanup)
|
|
1461
|
-
return {"ok": True, "fix": fix, "removed_roots": removed_paths, "residue": before, "cleanup": cleanup, "privacy": privacy}
|
|
1493
|
+
return {"ok": True, "fix": fix, "quick": bool(quick and not fix), "removed_roots": removed_paths, "residue": before, "cleanup": cleanup, "privacy": privacy}
|
|
1462
1494
|
|
|
1463
1495
|
|
|
1464
1496
|
def repair_index_hygiene() -> dict:
|
package/src/plugins/protocol.py
CHANGED
|
@@ -61,6 +61,13 @@ R03_TRIVIAL_EVIDENCE_PATTERN = re.compile(
|
|
|
61
61
|
r"terminado|arreglado|cerrado|solved|resuelto)\s*[\.!]*\s*$",
|
|
62
62
|
re.IGNORECASE,
|
|
63
63
|
)
|
|
64
|
+
P0_P1_FINDING_PATTERN = re.compile(
|
|
65
|
+
r"^\s*(?:#{1,6}\s+|[-*+]\s+|\d+[.)]\s+)?(?:\*\*)?"
|
|
66
|
+
r"(P[01])(?:\*\*)?\s*(?:[:\-–—\])\)]|\b)",
|
|
67
|
+
re.IGNORECASE,
|
|
68
|
+
)
|
|
69
|
+
FOLLOWUP_REF_PATTERN = re.compile(r"\bNF-[A-Z0-9][A-Z0-9-]*\b", re.IGNORECASE)
|
|
70
|
+
ANALYZE_ARTIFACT_SUFFIXES = {".md", ".markdown", ".txt"}
|
|
64
71
|
|
|
65
72
|
|
|
66
73
|
def _is_trivial_evidence(text: str) -> tuple[bool, str]:
|
|
@@ -85,6 +92,54 @@ def _is_trivial_evidence(text: str) -> tuple[bool, str]:
|
|
|
85
92
|
return False, ""
|
|
86
93
|
|
|
87
94
|
|
|
95
|
+
def _existing_analyze_artifact_paths(refs: list[str]) -> list[Path]:
|
|
96
|
+
paths_found: list[Path] = []
|
|
97
|
+
seen: set[str] = set()
|
|
98
|
+
for ref in refs:
|
|
99
|
+
clean = str(ref or "").strip()
|
|
100
|
+
if not clean or clean.lower().startswith("followup_id"):
|
|
101
|
+
continue
|
|
102
|
+
if ":" in clean and not clean.startswith("/"):
|
|
103
|
+
prefix, value = clean.split(":", 1)
|
|
104
|
+
if prefix.strip().lower() in {"file", "path", "artifact", "report"}:
|
|
105
|
+
clean = value.strip()
|
|
106
|
+
candidate = Path(os.path.expanduser(clean))
|
|
107
|
+
if not candidate.is_file() or candidate.suffix.lower() not in ANALYZE_ARTIFACT_SUFFIXES:
|
|
108
|
+
continue
|
|
109
|
+
resolved = str(candidate.resolve())
|
|
110
|
+
if resolved in seen:
|
|
111
|
+
continue
|
|
112
|
+
seen.add(resolved)
|
|
113
|
+
paths_found.append(candidate)
|
|
114
|
+
return paths_found
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _count_p0_p1_findings(paths_found: list[Path]) -> tuple[int, list[dict]]:
|
|
118
|
+
total = 0
|
|
119
|
+
artifacts: list[dict] = []
|
|
120
|
+
for path in paths_found:
|
|
121
|
+
findings = 0
|
|
122
|
+
try:
|
|
123
|
+
with path.open("r", encoding="utf-8", errors="replace") as fh:
|
|
124
|
+
for line in fh:
|
|
125
|
+
if P0_P1_FINDING_PATTERN.search(line):
|
|
126
|
+
findings += 1
|
|
127
|
+
except OSError:
|
|
128
|
+
continue
|
|
129
|
+
if findings:
|
|
130
|
+
total += findings
|
|
131
|
+
artifacts.append({"path": str(path), "findings": findings})
|
|
132
|
+
return total, artifacts
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _count_followup_refs(refs: list[str]) -> int:
|
|
136
|
+
seen: set[str] = set()
|
|
137
|
+
for ref in refs:
|
|
138
|
+
for match in FOLLOWUP_REF_PATTERN.findall(str(ref or "")):
|
|
139
|
+
seen.add(match.upper())
|
|
140
|
+
return len(seen)
|
|
141
|
+
|
|
142
|
+
|
|
88
143
|
def _external_real_world_text(task: dict, *parts: str) -> str:
|
|
89
144
|
fields = [
|
|
90
145
|
task.get("goal", ""),
|
|
@@ -1493,6 +1548,7 @@ def handle_task_close(
|
|
|
1493
1548
|
if extra_refs:
|
|
1494
1549
|
refs_line = "Evidence refs: " + ", ".join(extra_refs)
|
|
1495
1550
|
clean_evidence = f"{clean_evidence}\n{refs_line}".strip() if clean_evidence else refs_line
|
|
1551
|
+
all_evidence_refs = [*_parse_list(task.get("evidence_refs") or "[]"), *extra_refs]
|
|
1496
1552
|
files_changed_list = _parse_list(files_changed)
|
|
1497
1553
|
planned_files = _parse_list(task.get("files") or "[]")
|
|
1498
1554
|
effective_files = files_changed_list or planned_files
|
|
@@ -1508,6 +1564,46 @@ def handle_task_close(
|
|
|
1508
1564
|
high_stakes=bool(task.get("response_high_stakes")),
|
|
1509
1565
|
)
|
|
1510
1566
|
|
|
1567
|
+
if (task.get("task_type") or "").strip() == "analyze" and clean_outcome == "done":
|
|
1568
|
+
artifact_paths = _existing_analyze_artifact_paths(all_evidence_refs)
|
|
1569
|
+
finding_count, finding_artifacts = _count_p0_p1_findings(artifact_paths)
|
|
1570
|
+
followup_ref_count = _count_followup_refs(all_evidence_refs)
|
|
1571
|
+
if finding_count > followup_ref_count:
|
|
1572
|
+
missing = finding_count - followup_ref_count
|
|
1573
|
+
debt = _ensure_open_debt(
|
|
1574
|
+
task["session_id"],
|
|
1575
|
+
task_id,
|
|
1576
|
+
"analyze_p0_p1_followups_missing",
|
|
1577
|
+
severity="error",
|
|
1578
|
+
evidence=(
|
|
1579
|
+
f"Analyze task produced {finding_count} P0/P1 finding(s) in report artifact(s) "
|
|
1580
|
+
f"but evidence_refs only contained {followup_ref_count} followup id(s); "
|
|
1581
|
+
f"{missing} actionable finding(s) would be left without durable followup. "
|
|
1582
|
+
f"Artifacts: {json.dumps(finding_artifacts, ensure_ascii=False)}"
|
|
1583
|
+
),
|
|
1584
|
+
debts=debts_created,
|
|
1585
|
+
)
|
|
1586
|
+
return json.dumps(
|
|
1587
|
+
{
|
|
1588
|
+
"ok": False,
|
|
1589
|
+
"error": "Cannot close analyze task as 'done' while P0/P1 report findings lack followup refs.",
|
|
1590
|
+
"hint": (
|
|
1591
|
+
"Create one followup for each P0/P1 finding and pass those followup IDs in evidence_refs, "
|
|
1592
|
+
"then retry nexo_task_close."
|
|
1593
|
+
),
|
|
1594
|
+
"task_id": task_id,
|
|
1595
|
+
"blocked_by": "analyze_p0_p1_followup_gate",
|
|
1596
|
+
"debt_id": debt.get("id"),
|
|
1597
|
+
"debt_type": "analyze_p0_p1_followups_missing",
|
|
1598
|
+
"findings": finding_count,
|
|
1599
|
+
"followup_refs": followup_ref_count,
|
|
1600
|
+
"missing_followups": missing,
|
|
1601
|
+
"artifacts": finding_artifacts,
|
|
1602
|
+
},
|
|
1603
|
+
ensure_ascii=False,
|
|
1604
|
+
indent=2,
|
|
1605
|
+
)
|
|
1606
|
+
|
|
1511
1607
|
pending_corrections = list_session_correction_requirements(
|
|
1512
1608
|
session_id=task["session_id"],
|
|
1513
1609
|
status="open",
|