nexo-brain 7.25.4 → 7.25.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +3 -1
- package/package.json +1 -1
- package/src/crons/sync.py +42 -18
- package/src/db/_schema.py +30 -0
- package/src/local_context/api.py +105 -36
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.25.
|
|
3
|
+
"version": "7.25.6",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,9 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.25.
|
|
21
|
+
Version `7.25.6` is the current packaged-runtime line. Patch release over v7.25.5 - existing Local Memory sidecar databases repair legacy root/exclusion columns before source-dependent indexes are created, and core background crons prefer the NEXO-managed Python runtime.
|
|
22
|
+
|
|
23
|
+
Previously in `7.25.4`: patch release over v7.25.3 - Local Memory starts from safe user-content and email roots, adds configurable included/excluded file types, and cleans legacy whole-disk index state with backup or archive-rebuild safety.
|
|
22
24
|
|
|
23
25
|
Previously in `7.25.3`: patch release over v7.25.2 - Desktop-managed Brain installs require the same Python ABI as the bundled wheels and repair incompatible managed virtual environments before reuse.
|
|
24
26
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.25.
|
|
3
|
+
"version": "7.25.6",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/crons/sync.py
CHANGED
|
@@ -150,6 +150,41 @@ RETIRED_CORE_FILES = (
|
|
|
150
150
|
)
|
|
151
151
|
|
|
152
152
|
|
|
153
|
+
def _resolve_core_python_bin() -> str:
|
|
154
|
+
"""Prefer the NEXO-managed Python for core cron execution."""
|
|
155
|
+
candidates = [
|
|
156
|
+
os.environ.get("NEXO_RUNTIME_PYTHON", ""),
|
|
157
|
+
os.environ.get("NEXO_PYTHON", ""),
|
|
158
|
+
str(RUNTIME_ROOT / ".venv" / "bin" / "python3"),
|
|
159
|
+
str(RUNTIME_ROOT / ".venv" / "bin" / "python"),
|
|
160
|
+
str(_runtime_code_dir() / ".venv" / "bin" / "python3"),
|
|
161
|
+
str(_runtime_code_dir() / ".venv" / "bin" / "python"),
|
|
162
|
+
]
|
|
163
|
+
if platform.system() == "Darwin":
|
|
164
|
+
candidates.extend(
|
|
165
|
+
[
|
|
166
|
+
"/Library/Frameworks/Python.framework/Versions/3.12/bin/python3",
|
|
167
|
+
"/opt/homebrew/bin/python3.12",
|
|
168
|
+
"/usr/local/bin/python3.12",
|
|
169
|
+
"/opt/homebrew/bin/python3",
|
|
170
|
+
"/usr/local/bin/python3",
|
|
171
|
+
"/usr/bin/python3",
|
|
172
|
+
]
|
|
173
|
+
)
|
|
174
|
+
else:
|
|
175
|
+
candidates.extend(["/usr/bin/python3", "/usr/local/bin/python3", "python3"])
|
|
176
|
+
|
|
177
|
+
for candidate in candidates:
|
|
178
|
+
if not candidate:
|
|
179
|
+
continue
|
|
180
|
+
expanded = Path(str(candidate)).expanduser()
|
|
181
|
+
if expanded.exists():
|
|
182
|
+
return str(expanded)
|
|
183
|
+
if os.sep not in str(candidate) and shutil.which(str(candidate)):
|
|
184
|
+
return str(candidate)
|
|
185
|
+
return "python3"
|
|
186
|
+
|
|
187
|
+
|
|
153
188
|
def _runtime_scripts_dir() -> Path:
|
|
154
189
|
new = RUNTIME_ROOT / "core" / "scripts"
|
|
155
190
|
legacy = RUNTIME_ROOT / "scripts"
|
|
@@ -407,21 +442,10 @@ def build_plist(cron: dict) -> dict:
|
|
|
407
442
|
if subdir_src.is_dir():
|
|
408
443
|
_copy_into_runtime(subdir_src)
|
|
409
444
|
|
|
445
|
+
python_bin = _resolve_core_python_bin()
|
|
410
446
|
if script_type == "shell":
|
|
411
447
|
program_args = ["/bin/bash", wrapper_path, cron_id, "/bin/bash", script_path]
|
|
412
448
|
else:
|
|
413
|
-
# Find python3
|
|
414
|
-
python_candidates = [
|
|
415
|
-
"/opt/homebrew/bin/python3",
|
|
416
|
-
"/usr/local/bin/python3",
|
|
417
|
-
"/Library/Frameworks/Python.framework/Versions/3.12/bin/python3",
|
|
418
|
-
"/usr/bin/python3",
|
|
419
|
-
]
|
|
420
|
-
python_bin = "python3"
|
|
421
|
-
for p in python_candidates:
|
|
422
|
-
if Path(p).exists():
|
|
423
|
-
python_bin = p
|
|
424
|
-
break
|
|
425
449
|
program_args = ["/bin/bash", wrapper_path, cron_id, python_bin, script_path]
|
|
426
450
|
|
|
427
451
|
plist = {
|
|
@@ -436,6 +460,7 @@ def build_plist(cron: dict) -> dict:
|
|
|
436
460
|
"NEXO_CODE": str(_runtime_code_dir()),
|
|
437
461
|
"NEXO_SOURCE_CODE": str(SOURCE_ROOT),
|
|
438
462
|
"NEXO_MANAGED_CORE_CRON": "1",
|
|
463
|
+
"NEXO_RUNTIME_PYTHON": python_bin,
|
|
439
464
|
"PYTHONUNBUFFERED": "1",
|
|
440
465
|
},
|
|
441
466
|
}
|
|
@@ -505,6 +530,7 @@ def _linux_crontab_entry(cron: dict, exec_cmd: str, stdout_log: Path, stderr_log
|
|
|
505
530
|
"HOME": Path.home(),
|
|
506
531
|
"NEXO_HOME": NEXO_HOME,
|
|
507
532
|
"NEXO_CODE": _runtime_code_dir(),
|
|
533
|
+
"NEXO_RUNTIME_PYTHON": _resolve_core_python_bin(),
|
|
508
534
|
"PYTHONUNBUFFERED": "1",
|
|
509
535
|
}.items()
|
|
510
536
|
)
|
|
@@ -578,12 +604,13 @@ def _sync_wsl_windows_host_local_index_task(dry_run: bool = False) -> dict:
|
|
|
578
604
|
log("WARNING: WSL_DISTRO_NAME missing; local-index host task not installed.")
|
|
579
605
|
return {"ok": False, "skipped": True, "reason": "wsl_distro_missing"}
|
|
580
606
|
|
|
581
|
-
python_bin =
|
|
607
|
+
python_bin = _resolve_core_python_bin()
|
|
582
608
|
script_path = _runtime_code_dir() / "scripts" / "nexo-local-index.py"
|
|
583
609
|
command = (
|
|
584
610
|
f"cd {shlex.quote(str(Path.home()))} && "
|
|
585
611
|
f"NEXO_HOME={shlex.quote(str(NEXO_HOME))} "
|
|
586
612
|
f"NEXO_CODE={shlex.quote(str(_runtime_code_dir()))} "
|
|
613
|
+
f"NEXO_RUNTIME_PYTHON={shlex.quote(python_bin)} "
|
|
587
614
|
f"{shlex.quote(python_bin)} {shlex.quote(str(script_path))}"
|
|
588
615
|
)
|
|
589
616
|
wsl_args = " ".join(
|
|
@@ -835,11 +862,7 @@ def sync_linux(dry_run: bool = False):
|
|
|
835
862
|
|
|
836
863
|
log(f"Manifest: {len(manifest_crons)} core crons")
|
|
837
864
|
|
|
838
|
-
python_bin =
|
|
839
|
-
for p in ["/usr/bin/python3", "/usr/local/bin/python3"]:
|
|
840
|
-
if Path(p).exists():
|
|
841
|
-
python_bin = p
|
|
842
|
-
break
|
|
865
|
+
python_bin = _resolve_core_python_bin()
|
|
843
866
|
|
|
844
867
|
enable_units: list[str] = []
|
|
845
868
|
crontab_entries: list[str] = []
|
|
@@ -878,6 +901,7 @@ Type={service_type}
|
|
|
878
901
|
ExecStart={exec_cmd}
|
|
879
902
|
Environment=NEXO_HOME={NEXO_HOME}
|
|
880
903
|
Environment=NEXO_CODE={_runtime_code_dir()}
|
|
904
|
+
Environment=NEXO_RUNTIME_PYTHON={python_bin}
|
|
881
905
|
Environment=HOME={Path.home()}
|
|
882
906
|
StandardOutput=append:{stdout_log}
|
|
883
907
|
StandardError=append:{stderr_log}
|
package/src/db/_schema.py
CHANGED
|
@@ -1767,6 +1767,7 @@ def _m62_memory_observations_fts_trigger_fix(conn):
|
|
|
1767
1767
|
|
|
1768
1768
|
def _m63_local_context_layer(conn):
|
|
1769
1769
|
"""Local Context Layer storage for on-device memory indexing."""
|
|
1770
|
+
_m63_repair_legacy_local_context_columns(conn)
|
|
1770
1771
|
conn.executescript(
|
|
1771
1772
|
"""
|
|
1772
1773
|
CREATE TABLE IF NOT EXISTS local_index_roots (
|
|
@@ -1995,6 +1996,35 @@ def _m63_local_context_layer(conn):
|
|
|
1995
1996
|
)
|
|
1996
1997
|
|
|
1997
1998
|
|
|
1999
|
+
def _table_exists(conn, table: str) -> bool:
|
|
2000
|
+
row = conn.execute(
|
|
2001
|
+
"SELECT 1 FROM sqlite_master WHERE type='table' AND name=? LIMIT 1",
|
|
2002
|
+
(table,),
|
|
2003
|
+
).fetchone()
|
|
2004
|
+
return bool(row)
|
|
2005
|
+
|
|
2006
|
+
|
|
2007
|
+
def _m63_repair_legacy_local_context_columns(conn):
|
|
2008
|
+
"""Add v2 columns before m63 creates indexes that reference them.
|
|
2009
|
+
|
|
2010
|
+
Existing sidecar DBs can already have m63-era tables without the v2
|
|
2011
|
+
columns. CREATE TABLE IF NOT EXISTS will not alter those tables, so index
|
|
2012
|
+
creation must be preceded by additive repairs.
|
|
2013
|
+
"""
|
|
2014
|
+
if _table_exists(conn, "local_index_roots"):
|
|
2015
|
+
_migrate_add_column(conn, "local_index_roots", "source", "TEXT NOT NULL DEFAULT 'legacy'")
|
|
2016
|
+
_migrate_add_column(conn, "local_index_roots", "remote", "INTEGER NOT NULL DEFAULT 0")
|
|
2017
|
+
_migrate_add_column(conn, "local_index_roots", "seed_version", "INTEGER NOT NULL DEFAULT 1")
|
|
2018
|
+
if _table_exists(conn, "local_index_exclusions"):
|
|
2019
|
+
_migrate_add_column(conn, "local_index_exclusions", "source", "TEXT NOT NULL DEFAULT 'legacy'")
|
|
2020
|
+
_migrate_add_column(conn, "local_index_exclusions", "kind", "TEXT NOT NULL DEFAULT 'folder'")
|
|
2021
|
+
if _table_exists(conn, "local_index_file_type_rules"):
|
|
2022
|
+
_migrate_add_column(conn, "local_index_file_type_rules", "source", "TEXT NOT NULL DEFAULT 'legacy'")
|
|
2023
|
+
_migrate_add_column(conn, "local_index_file_type_rules", "priority", "INTEGER NOT NULL DEFAULT 0")
|
|
2024
|
+
_migrate_add_column(conn, "local_index_file_type_rules", "reason", "TEXT NOT NULL DEFAULT ''")
|
|
2025
|
+
_migrate_add_column(conn, "local_index_file_type_rules", "updated_at", "REAL NOT NULL DEFAULT 0")
|
|
2026
|
+
|
|
2027
|
+
|
|
1998
2028
|
def _m64_local_context_live_dirs(conn):
|
|
1999
2029
|
"""Track known folders so local context can detect new/deleted/changed files quickly."""
|
|
2000
2030
|
conn.executescript(
|
package/src/local_context/api.py
CHANGED
|
@@ -458,14 +458,14 @@ def _file_type_action(conn, path: str | Path) -> str:
|
|
|
458
458
|
return str(_effective_file_type_rule(conn, p.suffix.lower()).get("action") or "ignore")
|
|
459
459
|
|
|
460
460
|
|
|
461
|
-
def _should_index_file(conn, path: str | Path) -> bool:
|
|
462
|
-
if should_skip_file(str(path)):
|
|
461
|
+
def _should_index_file(conn, path: str | Path, *, allow_default_skip_override: bool = False) -> bool:
|
|
462
|
+
if not allow_default_skip_override and should_skip_file(str(path)):
|
|
463
463
|
return False
|
|
464
464
|
return _file_type_action(conn, path) != "ignore"
|
|
465
465
|
|
|
466
466
|
|
|
467
|
-
def _should_extract_file(conn, path: str | Path, depth: int) -> bool:
|
|
468
|
-
if depth < 2 or should_skip_file(str(path)):
|
|
467
|
+
def _should_extract_file(conn, path: str | Path, depth: int, *, allow_default_skip_override: bool = False) -> bool:
|
|
468
|
+
if depth < 2 or (not allow_default_skip_override and should_skip_file(str(path))):
|
|
469
469
|
return False
|
|
470
470
|
return _file_type_action(conn, path) == "extract"
|
|
471
471
|
|
|
@@ -473,14 +473,15 @@ def _should_extract_file(conn, path: str | Path, depth: int) -> bool:
|
|
|
473
473
|
def add_root(path: str, *, mode: str = "normal", depth: int | None = None, source: str = "user", remote: bool = False, seed_version: int | None = None) -> dict:
|
|
474
474
|
conn = _conn()
|
|
475
475
|
root_path = norm_path(path)
|
|
476
|
-
|
|
476
|
+
source_value = _normalize_source(source)
|
|
477
|
+
explicit_user_override = source_value == "user" and (_is_disk_root_path(root_path) or should_skip_tree(root_path))
|
|
478
|
+
if should_skip_tree(root_path) and source_value != "user" and not _allow_explicit_blocked_root(root_path):
|
|
477
479
|
log_event("warn", "root_rejected_private", "Root rejected by local memory privacy rules", path=redact_path(root_path))
|
|
478
480
|
return {"ok": False, "error": "root_blocked_by_privacy", "root_path": root_path}
|
|
479
481
|
depth_value = 2 if depth is None else int(depth)
|
|
480
|
-
source_value = _normalize_source(source)
|
|
481
482
|
seed_value = int(seed_version if seed_version is not None else (DEFAULT_ROOT_SEED_VERSION if source_value == "core_default" else 0))
|
|
482
483
|
existing = conn.execute("SELECT id, status, source, depth FROM local_index_roots WHERE root_path=?", (root_path,)).fetchone()
|
|
483
|
-
if existing and str(existing["status"] or "") == "active" and source_value == "user" and str(existing["source"] or "") == "core_default":
|
|
484
|
+
if existing and str(existing["status"] or "") == "active" and source_value == "user" and str(existing["source"] or "") == "core_default" and not explicit_user_override:
|
|
484
485
|
return {"ok": True, "root_path": root_path, "mode": mode, "depth": int(existing["depth"] or depth_value), "already_included": True, "included_by": "core_default"}
|
|
485
486
|
if source_value == "user":
|
|
486
487
|
parent = conn.execute(
|
|
@@ -493,7 +494,7 @@ def add_root(path: str, *, mode: str = "normal", depth: int | None = None, sourc
|
|
|
493
494
|
).fetchall()
|
|
494
495
|
for row in parent:
|
|
495
496
|
parent_path = str(row["root_path"] or "")
|
|
496
|
-
if _is_nested_path(root_path, parent_path):
|
|
497
|
+
if _is_nested_path(root_path, parent_path) and not explicit_user_override:
|
|
497
498
|
return {
|
|
498
499
|
"ok": True,
|
|
499
500
|
"root_path": root_path,
|
|
@@ -525,8 +526,8 @@ def add_root(path: str, *, mode: str = "normal", depth: int | None = None, sourc
|
|
|
525
526
|
_set_initial_index_complete(conn, False)
|
|
526
527
|
_set_initial_index_started_at(conn, now())
|
|
527
528
|
conn.commit()
|
|
528
|
-
log_event("info", "root_added", "Root added", path=redact_path(root_path), mode=mode, depth=depth_value, source=source_value)
|
|
529
|
-
return {"ok": True, "root_path": root_path, "mode": mode, "depth": depth_value, "source": source_value, "remote": bool(remote)}
|
|
529
|
+
log_event("info", "root_added", "Root added", path=redact_path(root_path), mode=mode, depth=depth_value, source=source_value, explicit_override=explicit_user_override)
|
|
530
|
+
return {"ok": True, "root_path": root_path, "mode": mode, "depth": depth_value, "source": source_value, "remote": bool(remote), "explicit_override": explicit_user_override}
|
|
530
531
|
|
|
531
532
|
|
|
532
533
|
def remove_root(path: str) -> dict:
|
|
@@ -1084,6 +1085,7 @@ def migrate_roots_seed_v2(*, dry_run: bool = True, _already_seeded: bool = False
|
|
|
1084
1085
|
keep_prefixes = [str(row.get("root_path") or "") for row in keep_roots if row.get("root_path")]
|
|
1085
1086
|
legacy_ids = {int(row.get("id") or 0) for row in legacy_disk_roots}
|
|
1086
1087
|
legacy_prefixes = [str(row.get("root_path") or "") for row in legacy_disk_roots if row.get("root_path")]
|
|
1088
|
+
override_prefixes = [str(row.get("root_path") or "") for row in keep_roots if _root_allows_default_skip_override(row)]
|
|
1087
1089
|
|
|
1088
1090
|
asset_ids_to_purge: list[str] = []
|
|
1089
1091
|
asset_remaps: dict[int, list[str]] = {}
|
|
@@ -1092,7 +1094,11 @@ def migrate_roots_seed_v2(*, dry_run: bool = True, _already_seeded: bool = False
|
|
|
1092
1094
|
path = str(row["path"] or "")
|
|
1093
1095
|
under_legacy = int(row["root_id"] or 0) in legacy_ids or _path_is_under_any(path, legacy_prefixes)
|
|
1094
1096
|
action = _file_type_action(conn, path)
|
|
1095
|
-
|
|
1097
|
+
explicit_override = _path_under_any_prefix(path, override_prefixes)
|
|
1098
|
+
unsafe = not explicit_override and (
|
|
1099
|
+
should_skip_file(path)
|
|
1100
|
+
or str(row["privacy_class"] or "") in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}
|
|
1101
|
+
)
|
|
1096
1102
|
if action == "ignore" or unsafe or (under_legacy and not _path_is_under_any(path, keep_prefixes)):
|
|
1097
1103
|
asset_ids_to_purge.append(str(row["asset_id"]))
|
|
1098
1104
|
continue
|
|
@@ -1107,7 +1113,8 @@ def migrate_roots_seed_v2(*, dry_run: bool = True, _already_seeded: bool = False
|
|
|
1107
1113
|
for row in dir_rows:
|
|
1108
1114
|
path = str(row["path"] or "")
|
|
1109
1115
|
under_legacy = int(row["root_id"] or 0) in legacy_ids or _path_is_under_any(path, legacy_prefixes)
|
|
1110
|
-
|
|
1116
|
+
explicit_override = _path_under_any_prefix(path, override_prefixes)
|
|
1117
|
+
if (should_skip_tree(path) and not explicit_override) or (under_legacy and not _path_is_under_any(path, keep_prefixes)):
|
|
1111
1118
|
dir_ids_to_purge.append(str(row["dir_id"]))
|
|
1112
1119
|
continue
|
|
1113
1120
|
if under_legacy:
|
|
@@ -1311,17 +1318,26 @@ def _purge_asset_ids(conn, asset_ids: list[str]) -> dict:
|
|
|
1311
1318
|
|
|
1312
1319
|
def _privacy_unsafe_asset_ids(conn) -> list[str]:
|
|
1313
1320
|
rows = conn.execute("SELECT asset_id, path, privacy_class FROM local_assets").fetchall()
|
|
1321
|
+
override_prefixes = _active_user_override_prefixes_conn(conn)
|
|
1314
1322
|
unsafe: list[str] = []
|
|
1315
1323
|
for row in rows:
|
|
1316
1324
|
privacy_class = str(row["privacy_class"] or "")
|
|
1317
|
-
|
|
1325
|
+
path = str(row["path"] or "")
|
|
1326
|
+
if _path_under_any_prefix(path, override_prefixes):
|
|
1327
|
+
continue
|
|
1328
|
+
if should_skip_file(path) or privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only"}:
|
|
1318
1329
|
unsafe.append(str(row["asset_id"]))
|
|
1319
1330
|
return unsafe
|
|
1320
1331
|
|
|
1321
1332
|
|
|
1322
1333
|
def _privacy_unsafe_dir_ids(conn) -> list[str]:
|
|
1323
1334
|
rows = conn.execute("SELECT dir_id, path FROM local_index_dirs").fetchall()
|
|
1324
|
-
|
|
1335
|
+
override_prefixes = _active_user_override_prefixes_conn(conn)
|
|
1336
|
+
return [
|
|
1337
|
+
str(row["dir_id"])
|
|
1338
|
+
for row in rows
|
|
1339
|
+
if should_skip_tree(str(row["path"] or "")) and not _path_under_any_prefix(str(row["path"] or ""), override_prefixes)
|
|
1340
|
+
]
|
|
1325
1341
|
|
|
1326
1342
|
|
|
1327
1343
|
def _content_secret_asset_ids(conn) -> list[str]:
|
|
@@ -1414,9 +1430,10 @@ def local_index_privacy_hygiene(*, fix: bool = False) -> dict:
|
|
|
1414
1430
|
def local_index_hygiene(*, fix: bool = False) -> dict:
|
|
1415
1431
|
conn = _conn()
|
|
1416
1432
|
removed_paths: list[str] = []
|
|
1417
|
-
for row in conn.execute("SELECT id, root_path FROM local_index_roots").fetchall():
|
|
1433
|
+
for row in conn.execute("SELECT id, root_path, source, status FROM local_index_roots").fetchall():
|
|
1418
1434
|
path = str(row["root_path"] or "")
|
|
1419
|
-
|
|
1435
|
+
root = dict(row)
|
|
1436
|
+
if _should_skip_mounted_root(Path(path)) or (should_skip_tree(path) and not _root_allows_default_skip_override(root)):
|
|
1420
1437
|
removed_paths.append(path)
|
|
1421
1438
|
if fix:
|
|
1422
1439
|
conn.execute("UPDATE local_index_roots SET status='removed', updated_at=? WHERE id=?", (now(), row["id"]))
|
|
@@ -1824,6 +1841,39 @@ def _is_nested_path(path: str, parent: str) -> bool:
|
|
|
1824
1841
|
return value_cmp.startswith(prefix)
|
|
1825
1842
|
|
|
1826
1843
|
|
|
1844
|
+
def _root_allows_default_skip_override(root: dict | None) -> bool:
|
|
1845
|
+
if not root:
|
|
1846
|
+
return False
|
|
1847
|
+
root_path = str(root.get("root_path") or "")
|
|
1848
|
+
return str(root.get("source") or "") == "user" and bool(root_path) and (
|
|
1849
|
+
_is_disk_root_path(root_path) or should_skip_tree(root_path)
|
|
1850
|
+
)
|
|
1851
|
+
|
|
1852
|
+
|
|
1853
|
+
def _active_user_override_prefixes_conn(conn) -> list[str]:
|
|
1854
|
+
rows = conn.execute(
|
|
1855
|
+
"""
|
|
1856
|
+
SELECT root_path
|
|
1857
|
+
FROM local_index_roots
|
|
1858
|
+
WHERE status='active' AND source='user'
|
|
1859
|
+
"""
|
|
1860
|
+
).fetchall()
|
|
1861
|
+
return [
|
|
1862
|
+
str(row["root_path"] or "")
|
|
1863
|
+
for row in rows
|
|
1864
|
+
if row["root_path"] and (_is_disk_root_path(str(row["root_path"] or "")) or should_skip_tree(str(row["root_path"] or "")))
|
|
1865
|
+
]
|
|
1866
|
+
|
|
1867
|
+
|
|
1868
|
+
def _path_under_any_prefix(path: str, prefixes: list[str]) -> bool:
|
|
1869
|
+
for prefix in prefixes:
|
|
1870
|
+
if not prefix:
|
|
1871
|
+
continue
|
|
1872
|
+
if norm_path(path) == norm_path(prefix) or _is_nested_path(path, prefix):
|
|
1873
|
+
return True
|
|
1874
|
+
return False
|
|
1875
|
+
|
|
1876
|
+
|
|
1827
1877
|
def _is_discovered_mount_path(path: str) -> bool:
|
|
1828
1878
|
value = norm_path(path).replace("\\", "/").lower()
|
|
1829
1879
|
if not value:
|
|
@@ -1844,6 +1894,9 @@ def _effective_scan_roots(roots: list[dict]) -> list[dict]:
|
|
|
1844
1894
|
effective: list[dict] = []
|
|
1845
1895
|
for root in active_roots:
|
|
1846
1896
|
root_path = str(root.get("root_path") or "")
|
|
1897
|
+
if _root_allows_default_skip_override(root):
|
|
1898
|
+
effective.append(root)
|
|
1899
|
+
continue
|
|
1847
1900
|
if _is_discovered_mount_path(root_path):
|
|
1848
1901
|
effective.append(root)
|
|
1849
1902
|
continue
|
|
@@ -1954,14 +2007,16 @@ def _upsert_dir(
|
|
|
1954
2007
|
return changed, fingerprint
|
|
1955
2008
|
|
|
1956
2009
|
|
|
1957
|
-
def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: int) -> tuple[str, bool, str]:
|
|
2010
|
+
def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: int, *, allow_default_skip_override: bool = False) -> tuple[str, bool, str]:
|
|
1958
2011
|
raw_path = str(path)
|
|
1959
2012
|
normalized = norm_path(raw_path)
|
|
1960
2013
|
asset_id = stable_id("asset", normalized)
|
|
1961
|
-
if not _should_index_file(conn, normalized):
|
|
2014
|
+
if not _should_index_file(conn, normalized, allow_default_skip_override=allow_default_skip_override):
|
|
1962
2015
|
return asset_id, False, "skipped"
|
|
1963
2016
|
perm = _permission_state(path)
|
|
1964
2017
|
depth, privacy_class, depth_reason = classify_path(normalized)
|
|
2018
|
+
if allow_default_skip_override and privacy_class in {"private_profile_blocked", "system_blocked", "sensitive_inventory_only", "inventory_only"}:
|
|
2019
|
+
depth, privacy_class, depth_reason = 2, "normal", "explicit_user_include"
|
|
1965
2020
|
depth = min(depth, root_depth)
|
|
1966
2021
|
try:
|
|
1967
2022
|
st = path.stat()
|
|
@@ -1971,7 +2026,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
|
|
|
1971
2026
|
INSERT INTO local_index_errors(asset_id, path, phase, error_code, user_message, technical_detail, retryable, created_at)
|
|
1972
2027
|
VALUES (?, ?, 'quick_index', ?, ?, ?, 1, ?)
|
|
1973
2028
|
""",
|
|
1974
|
-
(asset_id, normalized, type(exc).__name__, "
|
|
2029
|
+
(asset_id, normalized, type(exc).__name__, "Some files could not be read", str(exc), now()),
|
|
1975
2030
|
)
|
|
1976
2031
|
return asset_id, False, "error"
|
|
1977
2032
|
fingerprint = quick_fingerprint(path, st)
|
|
@@ -2039,7 +2094,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
|
|
|
2039
2094
|
""",
|
|
2040
2095
|
(version_id, asset_id, fingerprint, int(st.st_size), float(st.st_mtime), now()),
|
|
2041
2096
|
)
|
|
2042
|
-
if _should_extract_file(conn, normalized, depth):
|
|
2097
|
+
if _should_extract_file(conn, normalized, depth, allow_default_skip_override=allow_default_skip_override):
|
|
2043
2098
|
enqueue_job(conn, asset_id, "light_extraction", priority=_extraction_priority(path, conn=conn))
|
|
2044
2099
|
enqueue_job(conn, asset_id, "graph", priority=40)
|
|
2045
2100
|
return asset_id, changed, "ok"
|
|
@@ -2128,7 +2183,7 @@ def _record_scan_error(conn, stats: dict | None, path: str, phase: str, exc: Exc
|
|
|
2128
2183
|
path=path,
|
|
2129
2184
|
phase=phase,
|
|
2130
2185
|
error_code=type(exc).__name__,
|
|
2131
|
-
user_message="
|
|
2186
|
+
user_message="Some folders or files could not be read",
|
|
2132
2187
|
technical_detail=str(exc),
|
|
2133
2188
|
retryable=True,
|
|
2134
2189
|
)
|
|
@@ -2207,6 +2262,7 @@ def _iter_files(
|
|
|
2207
2262
|
start_after: str = "",
|
|
2208
2263
|
seen_at: float | None = None,
|
|
2209
2264
|
stats: dict | None = None,
|
|
2265
|
+
allow_default_skip_override: bool = False,
|
|
2210
2266
|
):
|
|
2211
2267
|
seen_at = seen_at or now()
|
|
2212
2268
|
seen_dirs: set[tuple[int, int]] = set()
|
|
@@ -2217,7 +2273,7 @@ def _iter_files(
|
|
|
2217
2273
|
current = stack.pop()
|
|
2218
2274
|
if _is_excluded(str(current), exclusions):
|
|
2219
2275
|
continue
|
|
2220
|
-
if current != root and should_skip_tree(str(current)):
|
|
2276
|
+
if current != root and should_skip_tree(str(current)) and not allow_default_skip_override:
|
|
2221
2277
|
continue
|
|
2222
2278
|
try:
|
|
2223
2279
|
st = current.stat()
|
|
@@ -2241,13 +2297,13 @@ def _iter_files(
|
|
|
2241
2297
|
if entry.is_symlink():
|
|
2242
2298
|
continue
|
|
2243
2299
|
if entry.is_dir():
|
|
2244
|
-
if should_skip_tree(str(entry)):
|
|
2300
|
+
if should_skip_tree(str(entry)) and not allow_default_skip_override:
|
|
2245
2301
|
continue
|
|
2246
2302
|
dirs.append(entry)
|
|
2247
2303
|
continue
|
|
2248
2304
|
if entry.is_file():
|
|
2249
2305
|
normalized = norm_path(entry)
|
|
2250
|
-
if not _should_index_file(conn, normalized):
|
|
2306
|
+
if not _should_index_file(conn, normalized, allow_default_skip_override=allow_default_skip_override):
|
|
2251
2307
|
continue
|
|
2252
2308
|
if start_after_norm and normalized <= start_after_norm:
|
|
2253
2309
|
continue
|
|
@@ -2312,7 +2368,7 @@ def _reconcile_known_assets(conn, exclusions: list[str], *, limit: int) -> dict:
|
|
|
2312
2368
|
return stats
|
|
2313
2369
|
rows = conn.execute(
|
|
2314
2370
|
"""
|
|
2315
|
-
SELECT a.asset_id, a.path, a.root_id, a.quick_fingerprint, a.depth, r.root_path
|
|
2371
|
+
SELECT a.asset_id, a.path, a.root_id, a.quick_fingerprint, a.depth, r.root_path, r.source
|
|
2316
2372
|
FROM local_assets a
|
|
2317
2373
|
LEFT JOIN local_index_roots r ON r.id = a.root_id
|
|
2318
2374
|
WHERE a.status='active'
|
|
@@ -2326,11 +2382,12 @@ def _reconcile_known_assets(conn, exclusions: list[str], *, limit: int) -> dict:
|
|
|
2326
2382
|
stats["checked"] += 1
|
|
2327
2383
|
path = str(row["path"])
|
|
2328
2384
|
root_path = Path(row["root_path"]).expanduser() if row["root_path"] else None
|
|
2385
|
+
allow_default_skip_override = _root_allows_default_skip_override(dict(row))
|
|
2329
2386
|
if _is_excluded(path, exclusions):
|
|
2330
2387
|
_purge_asset_ids(conn, [row["asset_id"]])
|
|
2331
2388
|
stats["excluded"] += 1
|
|
2332
2389
|
continue
|
|
2333
|
-
if not _should_index_file(conn, path):
|
|
2390
|
+
if not _should_index_file(conn, path, allow_default_skip_override=allow_default_skip_override):
|
|
2334
2391
|
_purge_asset_ids(conn, [row["asset_id"]])
|
|
2335
2392
|
stats["excluded"] += 1
|
|
2336
2393
|
continue
|
|
@@ -2349,7 +2406,7 @@ def _reconcile_known_assets(conn, exclusions: list[str], *, limit: int) -> dict:
|
|
|
2349
2406
|
_record_scan_error(conn, stats, path, "live_reconcile", exc)
|
|
2350
2407
|
continue
|
|
2351
2408
|
if fingerprint != row["quick_fingerprint"]:
|
|
2352
|
-
_, changed, state = _upsert_asset(conn, int(row["root_id"] or 0), file_path, seen_at, int(row["depth"] or 2))
|
|
2409
|
+
_, changed, state = _upsert_asset(conn, int(row["root_id"] or 0), file_path, seen_at, int(row["depth"] or 2), allow_default_skip_override=allow_default_skip_override)
|
|
2353
2410
|
if changed:
|
|
2354
2411
|
stats["modified"] += 1
|
|
2355
2412
|
if state != "ok":
|
|
@@ -2398,6 +2455,7 @@ def _scan_known_directory(
|
|
|
2398
2455
|
*,
|
|
2399
2456
|
file_limit: int,
|
|
2400
2457
|
dir_limit: int,
|
|
2458
|
+
allow_default_skip_override: bool = False,
|
|
2401
2459
|
) -> None:
|
|
2402
2460
|
stack = [directory]
|
|
2403
2461
|
seen_at = now()
|
|
@@ -2408,7 +2466,7 @@ def _scan_known_directory(
|
|
|
2408
2466
|
_mark_dir_subtree_deleted(conn, str(current), seen_at)
|
|
2409
2467
|
stats["excluded_dirs"] += 1
|
|
2410
2468
|
continue
|
|
2411
|
-
if current != directory and should_skip_tree(str(current)):
|
|
2469
|
+
if current != directory and should_skip_tree(str(current)) and not allow_default_skip_override:
|
|
2412
2470
|
continue
|
|
2413
2471
|
try:
|
|
2414
2472
|
st = current.stat()
|
|
@@ -2430,7 +2488,7 @@ def _scan_known_directory(
|
|
|
2430
2488
|
if entry.is_symlink():
|
|
2431
2489
|
continue
|
|
2432
2490
|
if entry.is_dir():
|
|
2433
|
-
if should_skip_tree(str(entry)):
|
|
2491
|
+
if should_skip_tree(str(entry)) and not allow_default_skip_override:
|
|
2434
2492
|
continue
|
|
2435
2493
|
changed, _ = _upsert_dir(conn, root_id, entry, seen_at)
|
|
2436
2494
|
seen_dirs.add(norm_path(entry))
|
|
@@ -2438,12 +2496,12 @@ def _scan_known_directory(
|
|
|
2438
2496
|
stack.append(entry)
|
|
2439
2497
|
continue
|
|
2440
2498
|
if entry.is_file():
|
|
2441
|
-
if not _should_index_file(conn, entry):
|
|
2499
|
+
if not _should_index_file(conn, entry, allow_default_skip_override=allow_default_skip_override):
|
|
2442
2500
|
continue
|
|
2443
2501
|
seen_files.add(norm_path(entry))
|
|
2444
2502
|
if stats["files_scanned"] >= file_limit:
|
|
2445
2503
|
continue
|
|
2446
|
-
_, changed, state = _upsert_asset(conn, root_id, entry, seen_at, root_depth)
|
|
2504
|
+
_, changed, state = _upsert_asset(conn, root_id, entry, seen_at, root_depth, allow_default_skip_override=allow_default_skip_override)
|
|
2447
2505
|
stats["files_scanned"] += 1
|
|
2448
2506
|
if changed:
|
|
2449
2507
|
stats["files_changed"] += 1
|
|
@@ -2473,7 +2531,7 @@ def _reconcile_known_dirs(conn, exclusions: list[str], *, dir_limit: int, file_l
|
|
|
2473
2531
|
return stats
|
|
2474
2532
|
rows = conn.execute(
|
|
2475
2533
|
"""
|
|
2476
|
-
SELECT d.dir_id, d.path, d.quick_fingerprint, d.root_id, r.root_path, r.depth
|
|
2534
|
+
SELECT d.dir_id, d.path, d.quick_fingerprint, d.root_id, r.root_path, r.depth, r.source
|
|
2477
2535
|
FROM local_index_dirs d
|
|
2478
2536
|
LEFT JOIN local_index_roots r ON r.id = d.root_id
|
|
2479
2537
|
WHERE d.status='active'
|
|
@@ -2487,11 +2545,12 @@ def _reconcile_known_dirs(conn, exclusions: list[str], *, dir_limit: int, file_l
|
|
|
2487
2545
|
stats["checked"] += 1
|
|
2488
2546
|
dir_path = Path(row["path"])
|
|
2489
2547
|
root_path = Path(row["root_path"]).expanduser() if row["root_path"] else None
|
|
2548
|
+
allow_default_skip_override = _root_allows_default_skip_override(dict(row))
|
|
2490
2549
|
if _is_excluded(str(dir_path), exclusions):
|
|
2491
2550
|
stats["files_deleted"] += _mark_dir_subtree_deleted(conn, str(dir_path), seen_at)
|
|
2492
2551
|
stats["excluded_dirs"] += 1
|
|
2493
2552
|
continue
|
|
2494
|
-
if should_skip_tree(str(dir_path)):
|
|
2553
|
+
if should_skip_tree(str(dir_path)) and not allow_default_skip_override:
|
|
2495
2554
|
stats["files_deleted"] += _purge_dir_subtree(conn, str(dir_path))
|
|
2496
2555
|
stats["excluded_dirs"] += 1
|
|
2497
2556
|
continue
|
|
@@ -2519,6 +2578,7 @@ def _reconcile_known_dirs(conn, exclusions: list[str], *, dir_limit: int, file_l
|
|
|
2519
2578
|
stats,
|
|
2520
2579
|
file_limit=file_limit,
|
|
2521
2580
|
dir_limit=dir_limit,
|
|
2581
|
+
allow_default_skip_override=allow_default_skip_override,
|
|
2522
2582
|
)
|
|
2523
2583
|
else:
|
|
2524
2584
|
conn.execute("UPDATE local_index_dirs SET updated_at=? WHERE dir_id=?", (seen_at, row["dir_id"]))
|
|
@@ -2576,8 +2636,9 @@ def scan_once(*, limit: int | None = None) -> dict:
|
|
|
2576
2636
|
for root in roots:
|
|
2577
2637
|
root_path = Path(root["root_path"]).expanduser()
|
|
2578
2638
|
root_id = int(root["id"])
|
|
2639
|
+
allow_default_skip_override = _root_allows_default_skip_override(dict(root))
|
|
2579
2640
|
root_initial_complete = _root_initial_scan_complete(conn, dict(root))
|
|
2580
|
-
if should_skip_tree(str(root_path)) and not _allow_explicit_blocked_root(str(root_path)):
|
|
2641
|
+
if should_skip_tree(str(root_path)) and not allow_default_skip_override and not _allow_explicit_blocked_root(str(root_path)):
|
|
2581
2642
|
conn.execute(
|
|
2582
2643
|
"UPDATE local_index_roots SET status='removed', last_scan_at=?, updated_at=? WHERE id=?",
|
|
2583
2644
|
(now(), now(), root_id),
|
|
@@ -2607,8 +2668,16 @@ def scan_once(*, limit: int | None = None) -> dict:
|
|
|
2607
2668
|
start_after=str(checkpoint["current_path"] or ""),
|
|
2608
2669
|
seen_at=cycle_started_at,
|
|
2609
2670
|
stats=totals,
|
|
2671
|
+
allow_default_skip_override=allow_default_skip_override,
|
|
2610
2672
|
):
|
|
2611
|
-
asset_id, changed, state = _upsert_asset(
|
|
2673
|
+
asset_id, changed, state = _upsert_asset(
|
|
2674
|
+
conn,
|
|
2675
|
+
root_id,
|
|
2676
|
+
file_path,
|
|
2677
|
+
cycle_started_at,
|
|
2678
|
+
int(root["depth"] or 2),
|
|
2679
|
+
allow_default_skip_override=allow_default_skip_override,
|
|
2680
|
+
)
|
|
2612
2681
|
last_seen_path = norm_path(file_path)
|
|
2613
2682
|
totals["seen"] += 1
|
|
2614
2683
|
seen_for_root += 1
|
|
@@ -3180,7 +3249,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
3180
3249
|
path=row["path"],
|
|
3181
3250
|
phase=job_type,
|
|
3182
3251
|
error_code=type(exc).__name__,
|
|
3183
|
-
user_message="
|
|
3252
|
+
user_message="Some files could not be read",
|
|
3184
3253
|
technical_detail=str(exc),
|
|
3185
3254
|
retryable=not terminal,
|
|
3186
3255
|
)
|