nexo-brain 7.27.2 → 7.27.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/db/_schema.py CHANGED
@@ -1139,6 +1139,42 @@ def _m69_provider_runtime_metadata(conn):
1139
1139
  _migrate_add_index(conn, "idx_sessions_provider", "sessions", "session_provider")
1140
1140
 
1141
1141
 
1142
+ def _m70_commitments(conn):
1143
+ """Durable promise/commitment index linked to existing action artifacts."""
1144
+ conn.execute(
1145
+ """
1146
+ CREATE TABLE IF NOT EXISTS commitments (
1147
+ id TEXT PRIMARY KEY,
1148
+ created_at REAL NOT NULL,
1149
+ updated_at REAL NOT NULL,
1150
+ closed_at REAL DEFAULT NULL,
1151
+ source_type TEXT NOT NULL DEFAULT '',
1152
+ source_id TEXT DEFAULT '',
1153
+ memory_event_uid TEXT DEFAULT '',
1154
+ session_id TEXT DEFAULT '',
1155
+ conversation_id TEXT DEFAULT '',
1156
+ project_key TEXT DEFAULT '',
1157
+ statement TEXT NOT NULL,
1158
+ owner TEXT DEFAULT 'agent',
1159
+ deadline TEXT DEFAULT '',
1160
+ status TEXT DEFAULT 'active',
1161
+ confidence REAL DEFAULT 0.5,
1162
+ action_ref_type TEXT DEFAULT '',
1163
+ action_ref_id TEXT DEFAULT '',
1164
+ outcome_id INTEGER DEFAULT NULL,
1165
+ evidence_ref TEXT DEFAULT '',
1166
+ dedupe_key TEXT DEFAULT '',
1167
+ metadata_json TEXT DEFAULT '{}'
1168
+ )
1169
+ """
1170
+ )
1171
+ _migrate_add_index(conn, "idx_commitments_status", "commitments", "status, deadline, updated_at")
1172
+ _migrate_add_index(conn, "idx_commitments_session", "commitments", "session_id, status, updated_at")
1173
+ _migrate_add_index(conn, "idx_commitments_source", "commitments", "source_type, source_id")
1174
+ _migrate_add_index(conn, "idx_commitments_action", "commitments", "action_ref_type, action_ref_id")
1175
+ _migrate_add_index(conn, "idx_commitments_dedupe", "commitments", "dedupe_key")
1176
+
1177
+
1142
1178
  def _m42_v6_0_1_hotfix(conn):
1143
1179
  """v6.0.1 hotfix — last_heartbeat_ts on sessions + hook_inbox_reminders.
1144
1180
 
@@ -2270,6 +2306,7 @@ MIGRATIONS = [
2270
2306
  (67, "diary_quality_backfill_repair", _m67_diary_quality_backfill_repair),
2271
2307
  (68, "memory_fabric_index", _m68_memory_fabric_index),
2272
2308
  (69, "provider_runtime_metadata", _m69_provider_runtime_metadata),
2309
+ (70, "commitments", _m70_commitments),
2273
2310
  ]
2274
2311
 
2275
2312
 
@@ -321,7 +321,7 @@ def _onboard_steps() -> list[dict]:
321
321
  return [
322
322
  {
323
323
  "id": "name",
324
- "prompt": {"es": "¿Cómo te llamamos?", "en": "What should we call you?"},
324
+ "prompt": {"es": "¿Cómo te llamas?", "en": "What's your name?"},
325
325
  "hint": {
326
326
  "es": "Tu nombre corto, el que usarás en el día a día.",
327
327
  "en": "Your short name, the one we'll use day to day.",
@@ -2228,6 +2228,11 @@ def check_client_bootstrap_parity(fix: bool = False) -> DoctorCheck:
2228
2228
  severity = "warn"
2229
2229
  evidence.append(f"codex config missing managed `mcp_servers.nexo` at {codex_config.get('path')}")
2230
2230
  repair_plan.append("Re-sync Codex so manual sessions keep the shared brain even if `codex mcp add` state drifts")
2231
+ elif codex_config.get("exists") and codex_config.get("has_initial_messages"):
2232
+ status = "degraded"
2233
+ severity = "warn"
2234
+ evidence.append(f"codex config still has legacy `initial_messages` at {codex_config.get('path')}")
2235
+ repair_plan.append("Run `nexo clients sync --client codex` so Codex uses the AGENTS.md bootstrap without legacy system-role startup messages")
2231
2236
  elif codex_config.get("exists"):
2232
2237
  evidence.append(
2233
2238
  "codex config bootstrap managed"
@@ -3837,12 +3842,16 @@ def check_local_index_hygiene(fix: bool = False) -> DoctorCheck:
3837
3842
  try:
3838
3843
  from local_context import api as local_context_api
3839
3844
 
3840
- result = local_context_api.local_index_hygiene(fix=fix)
3845
+ try:
3846
+ result = local_context_api.local_index_hygiene(fix=fix, quick=not fix)
3847
+ except TypeError:
3848
+ result = local_context_api.local_index_hygiene(fix=fix)
3841
3849
  residue = result.get("residue") or {}
3842
3850
  cleanup = result.get("cleanup") or {}
3843
3851
  privacy = result.get("privacy") or {}
3844
3852
  privacy_residue = privacy.get("residue") or {}
3845
3853
  privacy_cleanup = privacy.get("cleanup") or {}
3854
+ privacy_truncated = bool(privacy.get("truncated") or privacy_residue.get("truncated"))
3846
3855
  suspect_roots = [str(path) for path in result.get("removed_roots") or []]
3847
3856
  residue_total = sum(int(residue.get(key, 0) or 0) for key in ("assets", "jobs", "errors", "dirs", "checkpoints"))
3848
3857
  cleanup_total = sum(int(cleanup.get(key, 0) or 0) for key in ("assets", "jobs", "errors", "dirs", "checkpoints"))
@@ -3854,9 +3863,11 @@ def check_local_index_hygiene(fix: bool = False) -> DoctorCheck:
3854
3863
  "cleanup=" + json.dumps(cleanup, sort_keys=True),
3855
3864
  "privacy_residue=" + json.dumps(privacy_residue, sort_keys=True),
3856
3865
  "privacy_cleanup=" + json.dumps(privacy_cleanup, sort_keys=True),
3866
+ "quick_scan=" + str(bool(result.get("quick") or privacy.get("quick"))),
3867
+ "privacy_truncated=" + str(privacy_truncated),
3857
3868
  ]
3858
3869
  evidence.extend(f"root={path}" for path in suspect_roots[:5])
3859
- if residue_total == 0 and privacy_residue_total == 0 and not suspect_roots:
3870
+ if residue_total == 0 and privacy_residue_total == 0 and not suspect_roots and not privacy_truncated:
3860
3871
  return DoctorCheck(
3861
3872
  id="runtime.local_index_hygiene",
3862
3873
  tier="runtime",
@@ -3884,7 +3895,7 @@ def check_local_index_hygiene(fix: bool = False) -> DoctorCheck:
3884
3895
  severity="warn",
3885
3896
  summary="Local memory index has stale or private residue",
3886
3897
  evidence=evidence,
3887
- repair_plan=["Run `nexo doctor --tier runtime --fix` to purge stale local memory roots and private local-memory residue"],
3898
+ repair_plan=["Run `nexo doctor --tier runtime --fix` to purge stale local memory roots/private residue, or run a full local_index_hygiene scan outside release readiness"],
3888
3899
  escalation_prompt="Local memory may contain stale or private index payloads that should be purged before indexing continues.",
3889
3900
  )
3890
3901
  except Exception as exc:
@@ -14,6 +14,7 @@ import queue
14
14
  import subprocess
15
15
  import threading
16
16
  import time
17
+ import hashlib
17
18
  from pathlib import Path
18
19
  import re
19
20
  import paths
@@ -408,10 +409,10 @@ class HeadlessEnforcer:
408
409
  self._guardian_mode_cache: dict[str, str] = {}
409
410
  # R14 state — opened on a detected correction, counts down by one each
410
411
  # tool call. When it reaches zero without a nexo_learning_add we
411
- # enqueue the R14 reminder. The window guard is "3 tool calls" per
412
- # plan doc 1; make it overridable via the env for field tuning.
412
+ # enqueue the R14 reminder and persist a correction-learning debt.
413
413
  self._r14_window_remaining = 0
414
414
  self._r14_correction_seen_for_turn = False
415
+ self._r14_correction_text = ""
415
416
  # R25 — last user message is inspected for an explicit permit token
416
417
  # ("force OK", "si borra", etc). Populated by on_user_message.
417
418
  self._r25_last_user_text = ""
@@ -420,6 +421,7 @@ class HeadlessEnforcer:
420
421
  self._r17_window_remaining = 0
421
422
  self._r17_promise_seen_for_turn = False
422
423
  self._r17_first_tool_call_in_window = True
424
+ self._r17_commitment_ids: list[str] = []
423
425
  # R24 stale-memory state — incremented externally via notify_
424
426
  # stale_memory_cited (e.g. from R07 when age_days >= threshold).
425
427
  # Counts down on each tool call; fires when it reaches zero
@@ -706,6 +708,7 @@ class HeadlessEnforcer:
706
708
  return
707
709
  self._r14_window_remaining = _R14_WINDOW
708
710
  self._r14_correction_seen_for_turn = True
711
+ self._r14_correction_text = text or ""
709
712
  _logger.info("[R14 %s] correction detected; window opened for %d tool calls",
710
713
  mode.upper(), self._r14_window_remaining)
711
714
  # v7.7 Gap 7.2 — wire on_event so the map's
@@ -856,6 +859,7 @@ class HeadlessEnforcer:
856
859
  _logger.info("[R14] satisfied by learning_add; closing window")
857
860
  self._r14_window_remaining = 0
858
861
  self._r14_correction_seen_for_turn = False
862
+ self._r14_correction_text = ""
859
863
  return
860
864
  self._r14_window_remaining -= 1
861
865
  if self._r14_window_remaining > 0:
@@ -867,7 +871,35 @@ class HeadlessEnforcer:
867
871
  else:
868
872
  self._enqueue(_R14_PROMPT, "r14:correction-window-exhausted", rule_id="R14_correction_learning")
869
873
  _logger.info("[R14 %s] enqueued correction reminder", mode.upper())
874
+ if self._session_id:
875
+ try:
876
+ from db import create_protocol_debt, list_protocol_debts, record_session_correction_requirement # type: ignore
877
+
878
+ record_session_correction_requirement(
879
+ self._session_id,
880
+ self._r14_correction_text,
881
+ source="r14_window_exhausted",
882
+ )
883
+ existing = list_protocol_debts(
884
+ status="open",
885
+ session_id=self._session_id,
886
+ debt_type="missing_learning_after_correction",
887
+ limit=1,
888
+ )
889
+ if not existing:
890
+ create_protocol_debt(
891
+ self._session_id,
892
+ "missing_learning_after_correction",
893
+ severity="error",
894
+ evidence=(
895
+ "R14 detected a user correction and the 2-tool-call "
896
+ "learning window expired without nexo_learning_add."
897
+ ),
898
+ )
899
+ except Exception:
900
+ pass
870
901
  self._r14_correction_seen_for_turn = False
902
+ self._r14_correction_text = ""
871
903
 
872
904
  def on_assistant_text(self, text: str, *, declared_detector=None, has_open_task=None):
873
905
  """R16 — scan assistant message for done-claim with open protocol_task.
@@ -1018,13 +1050,106 @@ class HeadlessEnforcer:
1018
1050
  self._r17_window_remaining = _R17_WINDOW
1019
1051
  self._r17_promise_seen_for_turn = True
1020
1052
  self._r17_first_tool_call_in_window = True
1053
+ self._record_r17_commitment(text or "")
1021
1054
  _logger.info("[R17 %s] promise detected; window open %d", mode.upper(), _R17_WINDOW)
1022
1055
 
1056
+ def _record_r17_commitment(self, text: str) -> None:
1057
+ statement = (text or "").strip()
1058
+ if not statement:
1059
+ return
1060
+ try:
1061
+ from db import create_commitment, record_memory_event
1062
+ except Exception:
1063
+ return
1064
+ source_id = hashlib.sha1(
1065
+ f"{self._session_id or ''}|{statement[:800]}".encode("utf-8", errors="ignore"),
1066
+ usedforsecurity=False,
1067
+ ).hexdigest()[:24]
1068
+ memory_event_uid = ""
1069
+ try:
1070
+ event = record_memory_event(
1071
+ event_type="assistant_promise_detected",
1072
+ source_type="commitment",
1073
+ source_id=source_id,
1074
+ session_id=self._session_id or "",
1075
+ actor=self._session_id or "nexo",
1076
+ metadata={"statement": statement[:800], "rule_id": "R17_promise_debt"},
1077
+ raw_ref=f"commitment:{source_id}",
1078
+ confidence=0.72,
1079
+ idempotency_key=f"r17-commitment:{source_id}",
1080
+ )
1081
+ memory_event_uid = str(event.get("event_uid") or "") if isinstance(event, dict) else ""
1082
+ except Exception as exc: # noqa: BLE001
1083
+ _logger.debug("R17 commitment memory event skipped: %s", exc)
1084
+ try:
1085
+ result = create_commitment(
1086
+ statement=statement,
1087
+ source_type="assistant_text",
1088
+ source_id=source_id,
1089
+ memory_event_uid=memory_event_uid,
1090
+ session_id=self._session_id or "",
1091
+ owner="agent",
1092
+ status="active",
1093
+ confidence=0.72,
1094
+ evidence_ref=f"memory_event:{memory_event_uid}" if memory_event_uid else "",
1095
+ metadata={"rule_id": "R17_promise_debt"},
1096
+ )
1097
+ commitment_id = str(result.get("id") or "")
1098
+ if commitment_id and commitment_id not in self._r17_commitment_ids:
1099
+ self._r17_commitment_ids.append(commitment_id)
1100
+ except Exception as exc: # noqa: BLE001
1101
+ _logger.debug("R17 commitment create skipped: %s", exc)
1102
+
1103
+ def _mark_r17_commitments_in_progress(self, tool_name: str) -> None:
1104
+ if not self._r17_commitment_ids:
1105
+ return
1106
+ try:
1107
+ from db import update_commitment_status
1108
+ except Exception:
1109
+ return
1110
+ for commitment_id in list(self._r17_commitment_ids)[-5:]:
1111
+ try:
1112
+ update_commitment_status(
1113
+ commitment_id,
1114
+ status="in_progress",
1115
+ evidence_ref=f"tool:{tool_name}",
1116
+ metadata={"last_tool_seen": tool_name},
1117
+ )
1118
+ except Exception as exc: # noqa: BLE001
1119
+ _logger.debug("R17 commitment progress update skipped: %s", exc)
1120
+
1121
+ def _resolve_r17_commitments_from_task_close(self, tool_input) -> None:
1122
+ payload = tool_input if isinstance(tool_input, dict) else {}
1123
+ sid = str(payload.get("sid") or self._session_id or "")
1124
+ task_id = str(payload.get("task_id") or "")
1125
+ evidence_text = " ".join(
1126
+ str(payload.get(field) or "")
1127
+ for field in ("evidence", "summary", "change_summary", "outcome_notes", "result", "verification")
1128
+ ).strip()
1129
+ if not sid or not evidence_text:
1130
+ return
1131
+ try:
1132
+ from db import resolve_matching_commitments
1133
+ except Exception:
1134
+ return
1135
+ try:
1136
+ resolve_matching_commitments(
1137
+ session_id=sid,
1138
+ evidence_text=evidence_text,
1139
+ action_ref_type="protocol_task" if task_id else "",
1140
+ action_ref_id=task_id,
1141
+ evidence_ref=f"protocol_task:{task_id}" if task_id else "nexo_task_close",
1142
+ status="fulfilled",
1143
+ )
1144
+ except Exception as exc: # noqa: BLE001
1145
+ _logger.debug("R17 commitment resolution skipped: %s", exc)
1146
+
1023
1147
  def _advance_r17_window(self, tool_name: str):
1024
1148
  if not self._r17_promise_seen_for_turn:
1025
1149
  return
1026
1150
  if self._r17_first_tool_call_in_window:
1027
1151
  self._r17_first_tool_call_in_window = False
1152
+ self._mark_r17_commitments_in_progress(tool_name)
1028
1153
  return
1029
1154
  self._r17_window_remaining -= 1
1030
1155
  if self._r17_window_remaining > 0:
@@ -2160,6 +2285,7 @@ class HeadlessEnforcer:
2160
2285
  if name == "nexo_task_close":
2161
2286
  self.reset_task_cycle("nexo_task_open")
2162
2287
  self._start_post_close_cooldown()
2288
+ self._resolve_r17_commitments_from_task_close(tool_input)
2163
2289
 
2164
2290
  # v7.7 Gap 1 — autonomous detector for multi_step_task_detected.
2165
2291
  # The event was dispatched by the map but nothing ever raised it.
@@ -19,6 +19,11 @@ from plugins.guard import _load_conditioned_learnings, _normalize_path_token
19
19
  from protocol_settings import get_protocol_strictness
20
20
  from product_mode import core_writes_allowed, is_protected_runtime_core_path
21
21
 
22
+ try:
23
+ from guardrails.minimal_delta import evaluate as _minimal_delta_evaluate
24
+ except Exception: # pragma: no cover - guardrail must never break the hook import
25
+ _minimal_delta_evaluate = None
26
+
22
27
  READ_LIKE_TOOLS = {"Read"}
23
28
  WRITE_LIKE_TOOLS = {"Edit", "MultiEdit", "Write"}
24
29
  DELETE_LIKE_TOOLS = {"Delete"}
@@ -639,6 +644,72 @@ def _extract_touched_files(tool_input) -> list[str]:
639
644
  return unique
640
645
 
641
646
 
647
+ def _minimal_delta_prompt_text(payload: dict, tool_input: dict) -> str:
648
+ parts: list[str] = []
649
+ for key in ("prompt", "user_prompt", "context_hint", "user_text"):
650
+ value = payload.get(key)
651
+ if isinstance(value, str) and value.strip():
652
+ parts.append(value.strip())
653
+ for key in ("prompt", "user_prompt", "context_hint", "user_text"):
654
+ value = tool_input.get(key)
655
+ if isinstance(value, str) and value.strip():
656
+ parts.append(value.strip())
657
+ tail = payload.get("transcript_tail")
658
+ if isinstance(tail, list):
659
+ for item in tail[-6:]:
660
+ if isinstance(item, str) and item.strip():
661
+ parts.append(item.strip())
662
+ elif isinstance(item, dict):
663
+ text = item.get("text") or item.get("content")
664
+ if isinstance(text, str) and text.strip():
665
+ parts.append(text.strip())
666
+ return "\n".join(parts)
667
+
668
+
669
+ def _read_existing_text(filepath: str) -> str:
670
+ try:
671
+ return Path(filepath).read_text(encoding="utf-8", errors="replace")
672
+ except OSError:
673
+ return ""
674
+
675
+
676
+ def _collect_minimal_delta_warning_or_block(payload: dict, *, tool_name: str, files: list[str]) -> dict | None:
677
+ if _minimal_delta_evaluate is None or tool_name not in {"Edit", "MultiEdit", "Write"}:
678
+ return None
679
+ tool_input = payload.get("tool_input")
680
+ if not isinstance(tool_input, dict):
681
+ return None
682
+ prompt_text = _minimal_delta_prompt_text(payload, tool_input)
683
+ if not prompt_text:
684
+ return None
685
+ target_path = str(tool_input.get("file_path") or tool_input.get("path") or (files[0] if files else "")).strip()
686
+ if not target_path:
687
+ return None
688
+
689
+ if tool_name == "Write":
690
+ old_text = _read_existing_text(target_path)
691
+ new_text = str(tool_input.get("content") or "")
692
+ elif tool_name == "MultiEdit":
693
+ edits = tool_input.get("edits")
694
+ if not isinstance(edits, list):
695
+ return None
696
+ old_text = "\n".join(str(edit.get("old_string") or "") for edit in edits if isinstance(edit, dict))
697
+ new_text = "\n".join(str(edit.get("new_string") or "") for edit in edits if isinstance(edit, dict))
698
+ else:
699
+ old_text = str(tool_input.get("old_string") or "")
700
+ new_text = str(tool_input.get("new_string") or "")
701
+ if not new_text or old_text == new_text:
702
+ return None
703
+
704
+ try:
705
+ decision = _minimal_delta_evaluate(prompt_text, target_path, old_text, new_text)
706
+ except Exception:
707
+ return None
708
+ if decision.get("decision") not in {"warn", "block"}:
709
+ return None
710
+ return decision
711
+
712
+
642
713
  def _extract_bash_command(tool_input) -> str:
643
714
  if not isinstance(tool_input, dict):
644
715
  return ""
@@ -1575,6 +1646,39 @@ def process_pre_tool_event(payload: dict) -> dict:
1575
1646
  sid = _resolve_nexo_sid(conn, claude_sid)
1576
1647
  open_task = _find_any_open_task(conn, sid) if sid else None
1577
1648
  warnings: list[dict] = []
1649
+ minimal_delta = _collect_minimal_delta_warning_or_block(
1650
+ payload,
1651
+ tool_name=tool_name,
1652
+ files=files,
1653
+ )
1654
+ if minimal_delta and minimal_delta.get("decision") == "block":
1655
+ return {
1656
+ "ok": True,
1657
+ "session_id": sid,
1658
+ "tool_name": tool_name,
1659
+ "operation": op,
1660
+ "strictness": strictness,
1661
+ "blocks": [
1662
+ {
1663
+ "file": minimal_delta.get("target_path", ""),
1664
+ "reason_code": "minimal_delta_scope_creep",
1665
+ "severity": "error",
1666
+ "debt_type": "minimal_delta_scope_creep",
1667
+ "minimal_delta": minimal_delta,
1668
+ }
1669
+ ],
1670
+ "warnings": warnings,
1671
+ "status": "blocked",
1672
+ }
1673
+ if minimal_delta and minimal_delta.get("decision") == "warn":
1674
+ warnings.append(
1675
+ {
1676
+ "file": minimal_delta.get("target_path", ""),
1677
+ "reason_code": "minimal_delta_soft_envelope",
1678
+ "severity": "warn",
1679
+ "minimal_delta": minimal_delta,
1680
+ }
1681
+ )
1578
1682
  legacy_memory_blocks = _collect_legacy_memory_write_blocks(
1579
1683
  conn,
1580
1684
  sid=sid,
@@ -42,6 +42,7 @@ DEFAULT_ROUTER_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_ROUTER_MAX_CHA
42
42
  DEFAULT_MAX_JOB_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_JOB_ATTEMPTS", "3") or "3")
43
43
  DEFAULT_SQLITE_BUSY_RETRY_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_ATTEMPTS", "5") or "5")
44
44
  DEFAULT_SQLITE_BUSY_RETRY_DELAY_SECONDS = float(os.environ.get("NEXO_LOCAL_CONTEXT_BUSY_RETRY_DELAY_SECONDS", "0.35") or "0.35")
45
+ DEFAULT_HYGIENE_QUICK_SCAN_LIMIT = int(os.environ.get("NEXO_LOCAL_INDEX_HYGIENE_QUICK_SCAN_LIMIT", "5000") or "5000")
45
46
  INITIAL_INDEX_COMPLETE_KEY = "initial_index_complete"
46
47
  INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
47
48
  PERFORMANCE_PROFILE_KEY = "performance_profile"
@@ -1329,8 +1330,20 @@ def _purge_asset_ids(conn, asset_ids: list[str]) -> dict:
1329
1330
  return counts
1330
1331
 
1331
1332
 
1332
- def _privacy_unsafe_asset_ids(conn) -> list[str]:
1333
- rows = conn.execute("SELECT asset_id, path, privacy_class FROM local_assets").fetchall()
1333
+ def _bounded_fetchall(conn, sql: str, params: tuple[Any, ...] = (), *, max_rows: int | None = None) -> tuple[list[Any], bool]:
1334
+ if max_rows is None or max_rows <= 0:
1335
+ return conn.execute(sql, params).fetchall(), False
1336
+ rows = conn.execute(f"{sql} LIMIT ?", (*params, max_rows + 1)).fetchall()
1337
+ truncated = len(rows) > max_rows
1338
+ return rows[:max_rows], truncated
1339
+
1340
+
1341
+ def _privacy_unsafe_asset_ids(conn, *, max_rows: int | None = None) -> tuple[list[str], bool]:
1342
+ rows, truncated = _bounded_fetchall(
1343
+ conn,
1344
+ "SELECT asset_id, path, privacy_class FROM local_assets",
1345
+ max_rows=max_rows,
1346
+ )
1334
1347
  override_prefixes = _active_user_override_prefixes_conn(conn)
1335
1348
  unsafe: list[str] = []
1336
1349
  for row in rows:
@@ -1340,30 +1353,40 @@ def _privacy_unsafe_asset_ids(conn) -> list[str]:
1340
1353
  continue
1341
1354
  if should_skip_file(path) or privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}:
1342
1355
  unsafe.append(str(row["asset_id"]))
1343
- return unsafe
1356
+ return unsafe, truncated
1344
1357
 
1345
1358
 
1346
- def _privacy_unsafe_dir_ids(conn) -> list[str]:
1347
- rows = conn.execute("SELECT dir_id, path FROM local_index_dirs").fetchall()
1359
+ def _privacy_unsafe_dir_ids(conn, *, max_rows: int | None = None) -> tuple[list[str], bool]:
1360
+ rows, truncated = _bounded_fetchall(
1361
+ conn,
1362
+ "SELECT dir_id, path FROM local_index_dirs",
1363
+ max_rows=max_rows,
1364
+ )
1348
1365
  override_prefixes = _active_user_override_prefixes_conn(conn)
1349
- return [
1366
+ unsafe = [
1350
1367
  str(row["dir_id"])
1351
1368
  for row in rows
1352
1369
  if should_skip_tree(str(row["path"] or "")) and not _path_under_any_prefix(str(row["path"] or ""), override_prefixes)
1353
1370
  ]
1371
+ return unsafe, truncated
1354
1372
 
1355
1373
 
1356
- def _content_secret_asset_ids(conn) -> list[str]:
1357
- rows = conn.execute(
1358
- """
1374
+ def _content_secret_asset_ids(conn, *, max_rows: int | None = None) -> tuple[list[str], bool]:
1375
+ sql = """
1359
1376
  SELECT c.asset_id, c.text
1360
1377
  FROM local_chunks c
1361
1378
  JOIN local_assets a ON a.asset_id=c.asset_id
1362
1379
  WHERE a.status='active'
1363
1380
  AND COALESCE(a.privacy_class, 'normal')='normal'
1364
- ORDER BY c.asset_id, c.chunk_index
1365
- """
1366
- ).fetchall()
1381
+ """
1382
+ params: tuple[Any, ...] = ()
1383
+ if max_rows is None or max_rows <= 0:
1384
+ rows = conn.execute(sql + " ORDER BY c.asset_id, c.chunk_index", params).fetchall()
1385
+ truncated = False
1386
+ else:
1387
+ rows = conn.execute(sql + " LIMIT ?", (max_rows + 1,)).fetchall()
1388
+ truncated = len(rows) > max_rows
1389
+ rows = rows[:max_rows]
1367
1390
  secret_ids: set[str] = set()
1368
1391
  for row in rows:
1369
1392
  asset_id = str(row["asset_id"])
@@ -1371,7 +1394,7 @@ def _content_secret_asset_ids(conn) -> list[str]:
1371
1394
  continue
1372
1395
  if contains_secret(str(row["text"] or "")):
1373
1396
  secret_ids.add(asset_id)
1374
- return sorted(secret_ids)
1397
+ return sorted(secret_ids), truncated
1375
1398
 
1376
1399
 
1377
1400
  def _mark_content_secret_assets(conn, asset_ids: list[str]) -> int:
@@ -1419,12 +1442,21 @@ def _mark_content_secret_assets(conn, asset_ids: list[str]) -> int:
1419
1442
  return len(unique_ids)
1420
1443
 
1421
1444
 
1422
- def local_index_privacy_hygiene(*, fix: bool = False) -> dict:
1445
+ def local_index_privacy_hygiene(*, fix: bool = False, quick: bool = False) -> dict:
1423
1446
  conn = _conn()
1424
- asset_ids = _privacy_unsafe_asset_ids(conn)
1425
- dir_ids = _privacy_unsafe_dir_ids(conn)
1426
- content_secret_ids = _content_secret_asset_ids(conn)
1427
- residue = {"assets": len(asset_ids), "dirs": len(dir_ids), "content_secret_assets": len(content_secret_ids)}
1447
+ max_rows = None if fix or not quick else DEFAULT_HYGIENE_QUICK_SCAN_LIMIT
1448
+ asset_ids, assets_truncated = _privacy_unsafe_asset_ids(conn, max_rows=max_rows)
1449
+ dir_ids, dirs_truncated = _privacy_unsafe_dir_ids(conn, max_rows=max_rows)
1450
+ content_secret_ids, chunks_truncated = _content_secret_asset_ids(conn, max_rows=max_rows)
1451
+ truncated = bool(assets_truncated or dirs_truncated or chunks_truncated)
1452
+ residue = {
1453
+ "assets": len(asset_ids),
1454
+ "dirs": len(dir_ids),
1455
+ "content_secret_assets": len(content_secret_ids),
1456
+ "truncated": truncated,
1457
+ "quick": bool(quick and not fix),
1458
+ "scan_limit": int(max_rows or 0),
1459
+ }
1428
1460
  cleanup = {"assets": 0, "jobs": 0, "errors": 0, "chunks": 0, "embeddings": 0, "entities": 0, "relations": 0, "versions": 0, "dirs": 0, "content_secret_assets": 0}
1429
1461
  if fix:
1430
1462
  cleanup.update(_purge_asset_ids(conn, asset_ids))
@@ -1437,10 +1469,10 @@ def local_index_privacy_hygiene(*, fix: bool = False) -> dict:
1437
1469
  conn.commit()
1438
1470
  if asset_ids or dir_ids or content_secret_ids:
1439
1471
  log_event("warn", "privacy_hygiene_repaired", "Local memory privacy hygiene repaired", cleanup=cleanup)
1440
- return {"ok": True, "fix": fix, "residue": residue, "cleanup": cleanup}
1472
+ return {"ok": True, "fix": fix, "quick": bool(quick and not fix), "truncated": truncated, "residue": residue, "cleanup": cleanup}
1441
1473
 
1442
1474
 
1443
- def local_index_hygiene(*, fix: bool = False) -> dict:
1475
+ def local_index_hygiene(*, fix: bool = False, quick: bool = False) -> dict:
1444
1476
  conn = _conn()
1445
1477
  removed_paths: list[str] = []
1446
1478
  for row in conn.execute("SELECT id, root_path, source, status FROM local_index_roots").fetchall():
@@ -1455,10 +1487,10 @@ def local_index_hygiene(*, fix: bool = False) -> dict:
1455
1487
  if fix:
1456
1488
  cleanup = _purge_removed_root_payloads(conn)
1457
1489
  conn.commit()
1458
- privacy = local_index_privacy_hygiene(fix=fix)
1490
+ privacy = local_index_privacy_hygiene(fix=fix, quick=quick and not fix)
1459
1491
  if fix and (removed_paths or any(int(cleanup.get(key, 0) or 0) for key in ("assets", "jobs", "errors", "dirs", "checkpoints"))):
1460
1492
  log_event("info", "index_hygiene_repaired", "Local memory index hygiene repaired", roots=[redact_path(path) for path in removed_paths], cleanup=cleanup)
1461
- return {"ok": True, "fix": fix, "removed_roots": removed_paths, "residue": before, "cleanup": cleanup, "privacy": privacy}
1493
+ return {"ok": True, "fix": fix, "quick": bool(quick and not fix), "removed_roots": removed_paths, "residue": before, "cleanup": cleanup, "privacy": privacy}
1462
1494
 
1463
1495
 
1464
1496
  def repair_index_hygiene() -> dict: