nexo-brain 7.25.4 → 7.25.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.25.4",
3
+ "version": "7.25.6",
4
4
  "description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
5
5
  "author": {
6
6
  "name": "NEXO Brain",
package/README.md CHANGED
@@ -18,7 +18,9 @@
18
18
 
19
19
  [Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
20
20
 
21
- Version `7.25.4` is the current packaged-runtime line. Patch release over v7.25.3 - Local Memory starts from safe user-content and email roots, adds configurable included/excluded file types, and cleans legacy whole-disk index state with backup or archive-rebuild safety.
21
+ Version `7.25.6` is the current packaged-runtime line. Patch release over v7.25.5 - existing Local Memory sidecar databases repair legacy root/exclusion columns before source-dependent indexes are created, and core background crons prefer the NEXO-managed Python runtime.
22
+
23
+ Previously in `7.25.4`: patch release over v7.25.3 - Local Memory starts from safe user-content and email roots, adds configurable included/excluded file types, and cleans legacy whole-disk index state with backup or archive-rebuild safety.
22
24
 
23
25
  Previously in `7.25.3`: patch release over v7.25.2 - Desktop-managed Brain installs require the same Python ABI as the bundled wheels and repair incompatible managed virtual environments before reuse.
24
26
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nexo-brain",
3
- "version": "7.25.4",
3
+ "version": "7.25.6",
4
4
  "mcpName": "io.github.wazionapps/nexo",
5
5
  "description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
6
6
  "homepage": "https://nexo-brain.com",
package/src/crons/sync.py CHANGED
@@ -150,6 +150,41 @@ RETIRED_CORE_FILES = (
150
150
  )
151
151
 
152
152
 
153
+ def _resolve_core_python_bin() -> str:
154
+ """Prefer the NEXO-managed Python for core cron execution."""
155
+ candidates = [
156
+ os.environ.get("NEXO_RUNTIME_PYTHON", ""),
157
+ os.environ.get("NEXO_PYTHON", ""),
158
+ str(RUNTIME_ROOT / ".venv" / "bin" / "python3"),
159
+ str(RUNTIME_ROOT / ".venv" / "bin" / "python"),
160
+ str(_runtime_code_dir() / ".venv" / "bin" / "python3"),
161
+ str(_runtime_code_dir() / ".venv" / "bin" / "python"),
162
+ ]
163
+ if platform.system() == "Darwin":
164
+ candidates.extend(
165
+ [
166
+ "/Library/Frameworks/Python.framework/Versions/3.12/bin/python3",
167
+ "/opt/homebrew/bin/python3.12",
168
+ "/usr/local/bin/python3.12",
169
+ "/opt/homebrew/bin/python3",
170
+ "/usr/local/bin/python3",
171
+ "/usr/bin/python3",
172
+ ]
173
+ )
174
+ else:
175
+ candidates.extend(["/usr/bin/python3", "/usr/local/bin/python3", "python3"])
176
+
177
+ for candidate in candidates:
178
+ if not candidate:
179
+ continue
180
+ expanded = Path(str(candidate)).expanduser()
181
+ if expanded.exists():
182
+ return str(expanded)
183
+ if os.sep not in str(candidate) and shutil.which(str(candidate)):
184
+ return str(candidate)
185
+ return "python3"
186
+
187
+
153
188
  def _runtime_scripts_dir() -> Path:
154
189
  new = RUNTIME_ROOT / "core" / "scripts"
155
190
  legacy = RUNTIME_ROOT / "scripts"
@@ -407,21 +442,10 @@ def build_plist(cron: dict) -> dict:
407
442
  if subdir_src.is_dir():
408
443
  _copy_into_runtime(subdir_src)
409
444
 
445
+ python_bin = _resolve_core_python_bin()
410
446
  if script_type == "shell":
411
447
  program_args = ["/bin/bash", wrapper_path, cron_id, "/bin/bash", script_path]
412
448
  else:
413
- # Find python3
414
- python_candidates = [
415
- "/opt/homebrew/bin/python3",
416
- "/usr/local/bin/python3",
417
- "/Library/Frameworks/Python.framework/Versions/3.12/bin/python3",
418
- "/usr/bin/python3",
419
- ]
420
- python_bin = "python3"
421
- for p in python_candidates:
422
- if Path(p).exists():
423
- python_bin = p
424
- break
425
449
  program_args = ["/bin/bash", wrapper_path, cron_id, python_bin, script_path]
426
450
 
427
451
  plist = {
@@ -436,6 +460,7 @@ def build_plist(cron: dict) -> dict:
436
460
  "NEXO_CODE": str(_runtime_code_dir()),
437
461
  "NEXO_SOURCE_CODE": str(SOURCE_ROOT),
438
462
  "NEXO_MANAGED_CORE_CRON": "1",
463
+ "NEXO_RUNTIME_PYTHON": python_bin,
439
464
  "PYTHONUNBUFFERED": "1",
440
465
  },
441
466
  }
@@ -505,6 +530,7 @@ def _linux_crontab_entry(cron: dict, exec_cmd: str, stdout_log: Path, stderr_log
505
530
  "HOME": Path.home(),
506
531
  "NEXO_HOME": NEXO_HOME,
507
532
  "NEXO_CODE": _runtime_code_dir(),
533
+ "NEXO_RUNTIME_PYTHON": _resolve_core_python_bin(),
508
534
  "PYTHONUNBUFFERED": "1",
509
535
  }.items()
510
536
  )
@@ -578,12 +604,13 @@ def _sync_wsl_windows_host_local_index_task(dry_run: bool = False) -> dict:
578
604
  log("WARNING: WSL_DISTRO_NAME missing; local-index host task not installed.")
579
605
  return {"ok": False, "skipped": True, "reason": "wsl_distro_missing"}
580
606
 
581
- python_bin = "/usr/bin/python3" if Path("/usr/bin/python3").exists() else "python3"
607
+ python_bin = _resolve_core_python_bin()
582
608
  script_path = _runtime_code_dir() / "scripts" / "nexo-local-index.py"
583
609
  command = (
584
610
  f"cd {shlex.quote(str(Path.home()))} && "
585
611
  f"NEXO_HOME={shlex.quote(str(NEXO_HOME))} "
586
612
  f"NEXO_CODE={shlex.quote(str(_runtime_code_dir()))} "
613
+ f"NEXO_RUNTIME_PYTHON={shlex.quote(python_bin)} "
587
614
  f"{shlex.quote(python_bin)} {shlex.quote(str(script_path))}"
588
615
  )
589
616
  wsl_args = " ".join(
@@ -835,11 +862,7 @@ def sync_linux(dry_run: bool = False):
835
862
 
836
863
  log(f"Manifest: {len(manifest_crons)} core crons")
837
864
 
838
- python_bin = "/usr/bin/python3"
839
- for p in ["/usr/bin/python3", "/usr/local/bin/python3"]:
840
- if Path(p).exists():
841
- python_bin = p
842
- break
865
+ python_bin = _resolve_core_python_bin()
843
866
 
844
867
  enable_units: list[str] = []
845
868
  crontab_entries: list[str] = []
@@ -878,6 +901,7 @@ Type={service_type}
878
901
  ExecStart={exec_cmd}
879
902
  Environment=NEXO_HOME={NEXO_HOME}
880
903
  Environment=NEXO_CODE={_runtime_code_dir()}
904
+ Environment=NEXO_RUNTIME_PYTHON={python_bin}
881
905
  Environment=HOME={Path.home()}
882
906
  StandardOutput=append:{stdout_log}
883
907
  StandardError=append:{stderr_log}
package/src/db/_schema.py CHANGED
@@ -1767,6 +1767,7 @@ def _m62_memory_observations_fts_trigger_fix(conn):
1767
1767
 
1768
1768
  def _m63_local_context_layer(conn):
1769
1769
  """Local Context Layer storage for on-device memory indexing."""
1770
+ _m63_repair_legacy_local_context_columns(conn)
1770
1771
  conn.executescript(
1771
1772
  """
1772
1773
  CREATE TABLE IF NOT EXISTS local_index_roots (
@@ -1995,6 +1996,35 @@ def _m63_local_context_layer(conn):
1995
1996
  )
1996
1997
 
1997
1998
 
1999
+ def _table_exists(conn, table: str) -> bool:
2000
+ row = conn.execute(
2001
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
2002
+ (table,),
2003
+ ).fetchone()
2004
+ return bool(row)
2005
+
2006
+
2007
+ def _m63_repair_legacy_local_context_columns(conn):
2008
+ """Add v2 columns before m63 creates indexes that reference them.
2009
+
2010
+ Existing sidecar DBs can already have m63-era tables without the v2
2011
+ columns. CREATE TABLE IF NOT EXISTS will not alter those tables, so index
2012
+ creation must be preceded by additive repairs.
2013
+ """
2014
+ if _table_exists(conn, "local_index_roots"):
2015
+ _migrate_add_column(conn, "local_index_roots", "source", "TEXT NOT NULL DEFAULT 'legacy'")
2016
+ _migrate_add_column(conn, "local_index_roots", "remote", "INTEGER NOT NULL DEFAULT 0")
2017
+ _migrate_add_column(conn, "local_index_roots", "seed_version", "INTEGER NOT NULL DEFAULT 1")
2018
+ if _table_exists(conn, "local_index_exclusions"):
2019
+ _migrate_add_column(conn, "local_index_exclusions", "source", "TEXT NOT NULL DEFAULT 'legacy'")
2020
+ _migrate_add_column(conn, "local_index_exclusions", "kind", "TEXT NOT NULL DEFAULT 'folder'")
2021
+ if _table_exists(conn, "local_index_file_type_rules"):
2022
+ _migrate_add_column(conn, "local_index_file_type_rules", "source", "TEXT NOT NULL DEFAULT 'legacy'")
2023
+ _migrate_add_column(conn, "local_index_file_type_rules", "priority", "INTEGER NOT NULL DEFAULT 0")
2024
+ _migrate_add_column(conn, "local_index_file_type_rules", "reason", "TEXT NOT NULL DEFAULT ''")
2025
+ _migrate_add_column(conn, "local_index_file_type_rules", "updated_at", "REAL NOT NULL DEFAULT 0")
2026
+
2027
+
1998
2028
  def _m64_local_context_live_dirs(conn):
1999
2029
  """Track known folders so local context can detect new/deleted/changed files quickly."""
2000
2030
  conn.executescript(
@@ -458,14 +458,14 @@ def _file_type_action(conn, path: str | Path) -> str:
458
458
  return str(_effective_file_type_rule(conn, p.suffix.lower()).get("action") or "ignore")
459
459
 
460
460
 
461
- def _should_index_file(conn, path: str | Path) -> bool:
462
- if should_skip_file(str(path)):
461
+ def _should_index_file(conn, path: str | Path, *, allow_default_skip_override: bool = False) -> bool:
462
+ if not allow_default_skip_override and should_skip_file(str(path)):
463
463
  return False
464
464
  return _file_type_action(conn, path) != "ignore"
465
465
 
466
466
 
467
- def _should_extract_file(conn, path: str | Path, depth: int) -> bool:
468
- if depth < 2 or should_skip_file(str(path)):
467
+ def _should_extract_file(conn, path: str | Path, depth: int, *, allow_default_skip_override: bool = False) -> bool:
468
+ if depth < 2 or (not allow_default_skip_override and should_skip_file(str(path))):
469
469
  return False
470
470
  return _file_type_action(conn, path) == "extract"
471
471
 
@@ -473,14 +473,15 @@ def _should_extract_file(conn, path: str | Path, depth: int) -> bool:
473
473
  def add_root(path: str, *, mode: str = "normal", depth: int | None = None, source: str = "user", remote: bool = False, seed_version: int | None = None) -> dict:
474
474
  conn = _conn()
475
475
  root_path = norm_path(path)
476
- if should_skip_tree(root_path) and not _allow_explicit_blocked_root(root_path):
476
+ source_value = _normalize_source(source)
477
+ explicit_user_override = source_value == "user" and (_is_disk_root_path(root_path) or should_skip_tree(root_path))
478
+ if should_skip_tree(root_path) and source_value != "user" and not _allow_explicit_blocked_root(root_path):
477
479
  log_event("warn", "root_rejected_private", "Root rejected by local memory privacy rules", path=redact_path(root_path))
478
480
  return {"ok": False, "error": "root_blocked_by_privacy", "root_path": root_path}
479
481
  depth_value = 2 if depth is None else int(depth)
480
- source_value = _normalize_source(source)
481
482
  seed_value = int(seed_version if seed_version is not None else (DEFAULT_ROOT_SEED_VERSION if source_value == "core_default" else 0))
482
483
  existing = conn.execute("SELECT id, status, source, depth FROM local_index_roots WHERE root_path=?", (root_path,)).fetchone()
483
- if existing and str(existing["status"] or "") == "active" and source_value == "user" and str(existing["source"] or "") == "core_default":
484
+ if existing and str(existing["status"] or "") == "active" and source_value == "user" and str(existing["source"] or "") == "core_default" and not explicit_user_override:
484
485
  return {"ok": True, "root_path": root_path, "mode": mode, "depth": int(existing["depth"] or depth_value), "already_included": True, "included_by": "core_default"}
485
486
  if source_value == "user":
486
487
  parent = conn.execute(
@@ -493,7 +494,7 @@ def add_root(path: str, *, mode: str = "normal", depth: int | None = None, sourc
493
494
  ).fetchall()
494
495
  for row in parent:
495
496
  parent_path = str(row["root_path"] or "")
496
- if _is_nested_path(root_path, parent_path):
497
+ if _is_nested_path(root_path, parent_path) and not explicit_user_override:
497
498
  return {
498
499
  "ok": True,
499
500
  "root_path": root_path,
@@ -525,8 +526,8 @@ def add_root(path: str, *, mode: str = "normal", depth: int | None = None, sourc
525
526
  _set_initial_index_complete(conn, False)
526
527
  _set_initial_index_started_at(conn, now())
527
528
  conn.commit()
528
- log_event("info", "root_added", "Root added", path=redact_path(root_path), mode=mode, depth=depth_value, source=source_value)
529
- return {"ok": True, "root_path": root_path, "mode": mode, "depth": depth_value, "source": source_value, "remote": bool(remote)}
529
+ log_event("info", "root_added", "Root added", path=redact_path(root_path), mode=mode, depth=depth_value, source=source_value, explicit_override=explicit_user_override)
530
+ return {"ok": True, "root_path": root_path, "mode": mode, "depth": depth_value, "source": source_value, "remote": bool(remote), "explicit_override": explicit_user_override}
530
531
 
531
532
 
532
533
  def remove_root(path: str) -> dict:
@@ -1084,6 +1085,7 @@ def migrate_roots_seed_v2(*, dry_run: bool = True, _already_seeded: bool = False
1084
1085
  keep_prefixes = [str(row.get("root_path") or "") for row in keep_roots if row.get("root_path")]
1085
1086
  legacy_ids = {int(row.get("id") or 0) for row in legacy_disk_roots}
1086
1087
  legacy_prefixes = [str(row.get("root_path") or "") for row in legacy_disk_roots if row.get("root_path")]
1088
+ override_prefixes = [str(row.get("root_path") or "") for row in keep_roots if _root_allows_default_skip_override(row)]
1087
1089
 
1088
1090
  asset_ids_to_purge: list[str] = []
1089
1091
  asset_remaps: dict[int, list[str]] = {}
@@ -1092,7 +1094,11 @@ def migrate_roots_seed_v2(*, dry_run: bool = True, _already_seeded: bool = False
1092
1094
  path = str(row["path"] or "")
1093
1095
  under_legacy = int(row["root_id"] or 0) in legacy_ids or _path_is_under_any(path, legacy_prefixes)
1094
1096
  action = _file_type_action(conn, path)
1095
- unsafe = should_skip_file(path) or str(row["privacy_class"] or "") in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}
1097
+ explicit_override = _path_under_any_prefix(path, override_prefixes)
1098
+ unsafe = not explicit_override and (
1099
+ should_skip_file(path)
1100
+ or str(row["privacy_class"] or "") in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}
1101
+ )
1096
1102
  if action == "ignore" or unsafe or (under_legacy and not _path_is_under_any(path, keep_prefixes)):
1097
1103
  asset_ids_to_purge.append(str(row["asset_id"]))
1098
1104
  continue
@@ -1107,7 +1113,8 @@ def migrate_roots_seed_v2(*, dry_run: bool = True, _already_seeded: bool = False
1107
1113
  for row in dir_rows:
1108
1114
  path = str(row["path"] or "")
1109
1115
  under_legacy = int(row["root_id"] or 0) in legacy_ids or _path_is_under_any(path, legacy_prefixes)
1110
- if should_skip_tree(path) or (under_legacy and not _path_is_under_any(path, keep_prefixes)):
1116
+ explicit_override = _path_under_any_prefix(path, override_prefixes)
1117
+ if (should_skip_tree(path) and not explicit_override) or (under_legacy and not _path_is_under_any(path, keep_prefixes)):
1111
1118
  dir_ids_to_purge.append(str(row["dir_id"]))
1112
1119
  continue
1113
1120
  if under_legacy:
@@ -1311,17 +1318,26 @@ def _purge_asset_ids(conn, asset_ids: list[str]) -> dict:
1311
1318
 
1312
1319
  def _privacy_unsafe_asset_ids(conn) -> list[str]:
1313
1320
  rows = conn.execute("SELECT asset_id, path, privacy_class FROM local_assets").fetchall()
1321
+ override_prefixes = _active_user_override_prefixes_conn(conn)
1314
1322
  unsafe: list[str] = []
1315
1323
  for row in rows:
1316
1324
  privacy_class = str(row["privacy_class"] or "")
1317
- if should_skip_file(str(row["path"] or "")) or privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}:
1325
+ path = str(row["path"] or "")
1326
+ if _path_under_any_prefix(path, override_prefixes):
1327
+ continue
1328
+ if should_skip_file(path) or privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}:
1318
1329
  unsafe.append(str(row["asset_id"]))
1319
1330
  return unsafe
1320
1331
 
1321
1332
 
1322
1333
  def _privacy_unsafe_dir_ids(conn) -> list[str]:
1323
1334
  rows = conn.execute("SELECT dir_id, path FROM local_index_dirs").fetchall()
1324
- return [str(row["dir_id"]) for row in rows if should_skip_tree(str(row["path"] or ""))]
1335
+ override_prefixes = _active_user_override_prefixes_conn(conn)
1336
+ return [
1337
+ str(row["dir_id"])
1338
+ for row in rows
1339
+ if should_skip_tree(str(row["path"] or "")) and not _path_under_any_prefix(str(row["path"] or ""), override_prefixes)
1340
+ ]
1325
1341
 
1326
1342
 
1327
1343
  def _content_secret_asset_ids(conn) -> list[str]:
@@ -1414,9 +1430,10 @@ def local_index_privacy_hygiene(*, fix: bool = False) -> dict:
1414
1430
  def local_index_hygiene(*, fix: bool = False) -> dict:
1415
1431
  conn = _conn()
1416
1432
  removed_paths: list[str] = []
1417
- for row in conn.execute("SELECT id, root_path FROM local_index_roots").fetchall():
1433
+ for row in conn.execute("SELECT id, root_path, source, status FROM local_index_roots").fetchall():
1418
1434
  path = str(row["root_path"] or "")
1419
- if _should_skip_mounted_root(Path(path)) or should_skip_tree(path):
1435
+ root = dict(row)
1436
+ if _should_skip_mounted_root(Path(path)) or (should_skip_tree(path) and not _root_allows_default_skip_override(root)):
1420
1437
  removed_paths.append(path)
1421
1438
  if fix:
1422
1439
  conn.execute("UPDATE local_index_roots SET status='removed', updated_at=? WHERE id=?", (now(), row["id"]))
@@ -1824,6 +1841,39 @@ def _is_nested_path(path: str, parent: str) -> bool:
1824
1841
  return value_cmp.startswith(prefix)
1825
1842
 
1826
1843
 
1844
+ def _root_allows_default_skip_override(root: dict | None) -> bool:
1845
+ if not root:
1846
+ return False
1847
+ root_path = str(root.get("root_path") or "")
1848
+ return str(root.get("source") or "") == "user" and bool(root_path) and (
1849
+ _is_disk_root_path(root_path) or should_skip_tree(root_path)
1850
+ )
1851
+
1852
+
1853
+ def _active_user_override_prefixes_conn(conn) -> list[str]:
1854
+ rows = conn.execute(
1855
+ """
1856
+ SELECT root_path
1857
+ FROM local_index_roots
1858
+ WHERE status='active' AND source='user'
1859
+ """
1860
+ ).fetchall()
1861
+ return [
1862
+ str(row["root_path"] or "")
1863
+ for row in rows
1864
+ if row["root_path"] and (_is_disk_root_path(str(row["root_path"] or "")) or should_skip_tree(str(row["root_path"] or "")))
1865
+ ]
1866
+
1867
+
1868
+ def _path_under_any_prefix(path: str, prefixes: list[str]) -> bool:
1869
+ for prefix in prefixes:
1870
+ if not prefix:
1871
+ continue
1872
+ if norm_path(path) == norm_path(prefix) or _is_nested_path(path, prefix):
1873
+ return True
1874
+ return False
1875
+
1876
+
1827
1877
  def _is_discovered_mount_path(path: str) -> bool:
1828
1878
  value = norm_path(path).replace("\\", "/").lower()
1829
1879
  if not value:
@@ -1844,6 +1894,9 @@ def _effective_scan_roots(roots: list[dict]) -> list[dict]:
1844
1894
  effective: list[dict] = []
1845
1895
  for root in active_roots:
1846
1896
  root_path = str(root.get("root_path") or "")
1897
+ if _root_allows_default_skip_override(root):
1898
+ effective.append(root)
1899
+ continue
1847
1900
  if _is_discovered_mount_path(root_path):
1848
1901
  effective.append(root)
1849
1902
  continue
@@ -1954,14 +2007,16 @@ def _upsert_dir(
1954
2007
  return changed, fingerprint
1955
2008
 
1956
2009
 
1957
- def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: int) -> tuple[str, bool, str]:
2010
+ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: int, *, allow_default_skip_override: bool = False) -> tuple[str, bool, str]:
1958
2011
  raw_path = str(path)
1959
2012
  normalized = norm_path(raw_path)
1960
2013
  asset_id = stable_id("asset", normalized)
1961
- if not _should_index_file(conn, normalized):
2014
+ if not _should_index_file(conn, normalized, allow_default_skip_override=allow_default_skip_override):
1962
2015
  return asset_id, False, "skipped"
1963
2016
  perm = _permission_state(path)
1964
2017
  depth, privacy_class, depth_reason = classify_path(normalized)
2018
+ if allow_default_skip_override and privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only", "inventory_only"}:
2019
+ depth, privacy_class, depth_reason = 2, "normal", "explicit_user_include"
1965
2020
  depth = min(depth, root_depth)
1966
2021
  try:
1967
2022
  st = path.stat()
@@ -1971,7 +2026,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
1971
2026
  INSERT INTO local_index_errors(asset_id, path, phase, error_code, user_message, technical_detail, retryable, created_at)
1972
2027
  VALUES (?, ?, 'quick_index', ?, ?, ?, 1, ?)
1973
2028
  """,
1974
- (asset_id, normalized, type(exc).__name__, "Algunos archivos no se pudieron leer", str(exc), now()),
2029
+ (asset_id, normalized, type(exc).__name__, "Some files could not be read", str(exc), now()),
1975
2030
  )
1976
2031
  return asset_id, False, "error"
1977
2032
  fingerprint = quick_fingerprint(path, st)
@@ -2039,7 +2094,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
2039
2094
  """,
2040
2095
  (version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
2041
2096
  )
2042
- if _should_extract_file(conn, normalized, depth):
2097
+ if _should_extract_file(conn, normalized, depth, allow_default_skip_override=allow_default_skip_override):
2043
2098
  enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path, conn=conn))
2044
2099
  enqueue_job(conn, asset_id, "graph", priority=40)
2045
2100
  return asset_id, changed, "ok"
@@ -2128,7 +2183,7 @@ def _record_scan_error(conn, stats: dict | None, path: str, phase: str, exc: Exc
2128
2183
  path=path,
2129
2184
  phase=phase,
2130
2185
  error_code=type(exc).__name__,
2131
- user_message="Algunas carpetas o archivos no se pudieron leer",
2186
+ user_message="Some folders or files could not be read",
2132
2187
  technical_detail=str(exc),
2133
2188
  retryable=True,
2134
2189
  )
@@ -2207,6 +2262,7 @@ def _iter_files(
2207
2262
  start_after: str = "",
2208
2263
  seen_at: float | None = None,
2209
2264
  stats: dict | None = None,
2265
+ allow_default_skip_override: bool = False,
2210
2266
  ):
2211
2267
  seen_at = seen_at or now()
2212
2268
  seen_dirs: set[tuple[int, int]] = set()
@@ -2217,7 +2273,7 @@ def _iter_files(
2217
2273
  current = stack.pop()
2218
2274
  if _is_excluded(str(current), exclusions):
2219
2275
  continue
2220
- if current != root and should_skip_tree(str(current)):
2276
+ if current != root and should_skip_tree(str(current)) and not allow_default_skip_override:
2221
2277
  continue
2222
2278
  try:
2223
2279
  st = current.stat()
@@ -2241,13 +2297,13 @@ def _iter_files(
2241
2297
  if entry.is_symlink():
2242
2298
  continue
2243
2299
  if entry.is_dir():
2244
- if should_skip_tree(str(entry)):
2300
+ if should_skip_tree(str(entry)) and not allow_default_skip_override:
2245
2301
  continue
2246
2302
  dirs.append(entry)
2247
2303
  continue
2248
2304
  if entry.is_file():
2249
2305
  normalized = norm_path(entry)
2250
- if not _should_index_file(conn, normalized):
2306
+ if not _should_index_file(conn, normalized, allow_default_skip_override=allow_default_skip_override):
2251
2307
  continue
2252
2308
  if start_after_norm and normalized <= start_after_norm:
2253
2309
  continue
@@ -2312,7 +2368,7 @@ def _reconcile_known_assets(conn, exclusions: list[str], *, limit: int) -> dict:
2312
2368
  return stats
2313
2369
  rows = conn.execute(
2314
2370
  """
2315
- SELECT a.asset_id, a.path, a.root_id, a.quick_fingerprint, a.depth, r.root_path
2371
+ SELECT a.asset_id, a.path, a.root_id, a.quick_fingerprint, a.depth, r.root_path, r.source
2316
2372
  FROM local_assets a
2317
2373
  LEFT JOIN local_index_roots r ON r.id = a.root_id
2318
2374
  WHERE a.status='active'
@@ -2326,11 +2382,12 @@ def _reconcile_known_assets(conn, exclusions: list[str], *, limit: int) -> dict:
2326
2382
  stats["checked"] += 1
2327
2383
  path = str(row["path"])
2328
2384
  root_path = Path(row["root_path"]).expanduser() if row["root_path"] else None
2385
+ allow_default_skip_override = _root_allows_default_skip_override(dict(row))
2329
2386
  if _is_excluded(path, exclusions):
2330
2387
  _purge_asset_ids(conn, [row["asset_id"]])
2331
2388
  stats["excluded"] += 1
2332
2389
  continue
2333
- if not _should_index_file(conn, path):
2390
+ if not _should_index_file(conn, path, allow_default_skip_override=allow_default_skip_override):
2334
2391
  _purge_asset_ids(conn, [row["asset_id"]])
2335
2392
  stats["excluded"] += 1
2336
2393
  continue
@@ -2349,7 +2406,7 @@ def _reconcile_known_assets(conn, exclusions: list[str], *, limit: int) -> dict:
2349
2406
  _record_scan_error(conn, stats, path, "live_reconcile", exc)
2350
2407
  continue
2351
2408
  if fingerprint != row["quick_fingerprint"]:
2352
- _, changed, state = _upsert_asset(conn, int(row["root_id"] or 0), file_path, seen_at, int(row["depth"] or 2))
2409
+ _, changed, state = _upsert_asset(conn, int(row["root_id"] or 0), file_path, seen_at, int(row["depth"] or 2), allow_default_skip_override=allow_default_skip_override)
2353
2410
  if changed:
2354
2411
  stats["modified"] += 1
2355
2412
  if state != "ok":
@@ -2398,6 +2455,7 @@ def _scan_known_directory(
2398
2455
  *,
2399
2456
  file_limit: int,
2400
2457
  dir_limit: int,
2458
+ allow_default_skip_override: bool = False,
2401
2459
  ) -> None:
2402
2460
  stack = [directory]
2403
2461
  seen_at = now()
@@ -2408,7 +2466,7 @@ def _scan_known_directory(
2408
2466
  _mark_dir_subtree_deleted(conn, str(current), seen_at)
2409
2467
  stats["excluded_dirs"] += 1
2410
2468
  continue
2411
- if current != directory and should_skip_tree(str(current)):
2469
+ if current != directory and should_skip_tree(str(current)) and not allow_default_skip_override:
2412
2470
  continue
2413
2471
  try:
2414
2472
  st = current.stat()
@@ -2430,7 +2488,7 @@ def _scan_known_directory(
2430
2488
  if entry.is_symlink():
2431
2489
  continue
2432
2490
  if entry.is_dir():
2433
- if should_skip_tree(str(entry)):
2491
+ if should_skip_tree(str(entry)) and not allow_default_skip_override:
2434
2492
  continue
2435
2493
  changed, _ = _upsert_dir(conn, root_id, entry, seen_at)
2436
2494
  seen_dirs.add(norm_path(entry))
@@ -2438,12 +2496,12 @@ def _scan_known_directory(
2438
2496
  stack.append(entry)
2439
2497
  continue
2440
2498
  if entry.is_file():
2441
- if not _should_index_file(conn, entry):
2499
+ if not _should_index_file(conn, entry, allow_default_skip_override=allow_default_skip_override):
2442
2500
  continue
2443
2501
  seen_files.add(norm_path(entry))
2444
2502
  if stats["files_scanned"] >= file_limit:
2445
2503
  continue
2446
- _, changed, state = _upsert_asset(conn, root_id, entry, seen_at, root_depth)
2504
+ _, changed, state = _upsert_asset(conn, root_id, entry, seen_at, root_depth, allow_default_skip_override=allow_default_skip_override)
2447
2505
  stats["files_scanned"] += 1
2448
2506
  if changed:
2449
2507
  stats["files_changed"] += 1
@@ -2473,7 +2531,7 @@ def _reconcile_known_dirs(conn, exclusions: list[str], *, dir_limit: int, file_l
2473
2531
  return stats
2474
2532
  rows = conn.execute(
2475
2533
  """
2476
- SELECT d.dir_id, d.path, d.quick_fingerprint, d.root_id, r.root_path, r.depth
2534
+ SELECT d.dir_id, d.path, d.quick_fingerprint, d.root_id, r.root_path, r.depth, r.source
2477
2535
  FROM local_index_dirs d
2478
2536
  LEFT JOIN local_index_roots r ON r.id = d.root_id
2479
2537
  WHERE d.status='active'
@@ -2487,11 +2545,12 @@ def _reconcile_known_dirs(conn, exclusions: list[str], *, dir_limit: int, file_l
2487
2545
  stats["checked"] += 1
2488
2546
  dir_path = Path(row["path"])
2489
2547
  root_path = Path(row["root_path"]).expanduser() if row["root_path"] else None
2548
+ allow_default_skip_override = _root_allows_default_skip_override(dict(row))
2490
2549
  if _is_excluded(str(dir_path), exclusions):
2491
2550
  stats["files_deleted"] += _mark_dir_subtree_deleted(conn, str(dir_path), seen_at)
2492
2551
  stats["excluded_dirs"] += 1
2493
2552
  continue
2494
- if should_skip_tree(str(dir_path)):
2553
+ if should_skip_tree(str(dir_path)) and not allow_default_skip_override:
2495
2554
  stats["files_deleted"] += _purge_dir_subtree(conn, str(dir_path))
2496
2555
  stats["excluded_dirs"] += 1
2497
2556
  continue
@@ -2519,6 +2578,7 @@ def _reconcile_known_dirs(conn, exclusions: list[str], *, dir_limit: int, file_l
2519
2578
  stats,
2520
2579
  file_limit=file_limit,
2521
2580
  dir_limit=dir_limit,
2581
+ allow_default_skip_override=allow_default_skip_override,
2522
2582
  )
2523
2583
  else:
2524
2584
  conn.execute("UPDATE local_index_dirs SET updated_at=? WHERE dir_id=?", (seen_at, row["dir_id"]))
@@ -2576,8 +2636,9 @@ def scan_once(*, limit: int | None = None) -> dict:
2576
2636
  for root in roots:
2577
2637
  root_path = Path(root["root_path"]).expanduser()
2578
2638
  root_id = int(root["id"])
2639
+ allow_default_skip_override = _root_allows_default_skip_override(dict(root))
2579
2640
  root_initial_complete = _root_initial_scan_complete(conn, dict(root))
2580
- if should_skip_tree(str(root_path)) and not _allow_explicit_blocked_root(str(root_path)):
2641
+ if should_skip_tree(str(root_path)) and not allow_default_skip_override and not _allow_explicit_blocked_root(str(root_path)):
2581
2642
  conn.execute(
2582
2643
  "UPDATE local_index_roots SET status='removed', last_scan_at=?, updated_at=? WHERE id=?",
2583
2644
  (now(), now(), root_id),
@@ -2607,8 +2668,16 @@ def scan_once(*, limit: int | None = None) -> dict:
2607
2668
  start_after=str(checkpoint["current_path"] or ""),
2608
2669
  seen_at=cycle_started_at,
2609
2670
  stats=totals,
2671
+ allow_default_skip_override=allow_default_skip_override,
2610
2672
  ):
2611
- asset_id, changed, state = _upsert_asset(conn, root_id, file_path, cycle_started_at, int(root["depth"] or 2))
2673
+ asset_id, changed, state = _upsert_asset(
2674
+ conn,
2675
+ root_id,
2676
+ file_path,
2677
+ cycle_started_at,
2678
+ int(root["depth"] or 2),
2679
+ allow_default_skip_override=allow_default_skip_override,
2680
+ )
2612
2681
  last_seen_path = norm_path(file_path)
2613
2682
  totals["seen"] += 1
2614
2683
  seen_for_root += 1
@@ -3180,7 +3249,7 @@ def process_jobs(*, limit: int = 100) -> dict:
3180
3249
  path=row["path"],
3181
3250
  phase=job_type,
3182
3251
  error_code=type(exc).__name__,
3183
- user_message="Algunos archivos no se pudieron leer",
3252
+ user_message="Some files could not be read",
3184
3253
  technical_detail=str(exc),
3185
3254
  retryable=not terminal,
3186
3255
  )