nexo-brain 7.20.11 → 7.20.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/package.json +1 -1
- package/src/auto_update.py +36 -7
- package/src/cli.py +8 -0
- package/src/crons/sync.py +68 -0
- package/src/db_guard.py +272 -0
- package/src/doctor/providers/boot.py +148 -0
- package/src/local_context/__init__.py +2 -0
- package/src/local_context/api.py +574 -56
- package/src/plugins/recover.py +22 -9
- package/src/server.py +90 -2
- package/src/tools_api_call.py +196 -0
- package/src/tools_credentials.py +180 -8
- package/src/tools_hot_context.py +4 -32
- package/tool-enforcement-map.json +392 -316
package/src/local_context/api.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
+
import re
|
|
5
6
|
import shutil
|
|
6
7
|
import stat
|
|
7
8
|
import hashlib
|
|
@@ -30,6 +31,12 @@ DEFAULT_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_DEFAULT_DEPTH", "24")
|
|
|
30
31
|
DEFAULT_EMAIL_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_EMAIL_ROOT_DEPTH", "24") or "24")
|
|
31
32
|
DEFAULT_MOUNTED_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_MOUNTED_ROOT_DEPTH", "24") or "24")
|
|
32
33
|
DEFAULT_SYSTEM_ROOT_DEPTH = int(os.environ.get("NEXO_LOCAL_INDEX_SYSTEM_ROOT_DEPTH", "24") or "24")
|
|
34
|
+
DEFAULT_CONTEXT_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_MAX_CHARS", "20000") or "20000")
|
|
35
|
+
DEFAULT_ROUTER_MAX_CHARS = int(os.environ.get("NEXO_LOCAL_CONTEXT_ROUTER_MAX_CHARS", "6000") or "6000")
|
|
36
|
+
DEFAULT_MAX_JOB_ATTEMPTS = int(os.environ.get("NEXO_LOCAL_INDEX_MAX_JOB_ATTEMPTS", "3") or "3")
|
|
37
|
+
INITIAL_INDEX_COMPLETE_KEY = "initial_index_complete"
|
|
38
|
+
INITIAL_INDEX_STARTED_AT_KEY = "initial_index_started_at"
|
|
39
|
+
VALID_CONTEXT_MODES = {"compact", "full"}
|
|
33
40
|
|
|
34
41
|
|
|
35
42
|
def ensure_ready() -> None:
|
|
@@ -49,6 +56,7 @@ def add_root(path: str, *, mode: str = "normal", depth: int | None = None) -> di
|
|
|
49
56
|
log_event("warn", "root_rejected_private", "Root rejected by local memory privacy rules", path=redact_path(root_path))
|
|
50
57
|
return {"ok": False, "error": "root_blocked_by_privacy", "root_path": root_path}
|
|
51
58
|
depth_value = 2 if depth is None else int(depth)
|
|
59
|
+
existing = conn.execute("SELECT id, status FROM local_index_roots WHERE root_path=?", (root_path,)).fetchone()
|
|
52
60
|
conn.execute(
|
|
53
61
|
"""
|
|
54
62
|
INSERT INTO local_index_roots(root_path, display_path, mode, depth, status, created_at, updated_at)
|
|
@@ -62,6 +70,12 @@ def add_root(path: str, *, mode: str = "normal", depth: int | None = None) -> di
|
|
|
62
70
|
""",
|
|
63
71
|
(root_path, path, mode, depth_value, now(), now()),
|
|
64
72
|
)
|
|
73
|
+
row = conn.execute("SELECT id FROM local_index_roots WHERE root_path=?", (root_path,)).fetchone()
|
|
74
|
+
existing_status = str(existing["status"] or "") if existing else ""
|
|
75
|
+
if row and (not existing or existing_status in {"removed", "offline"}):
|
|
76
|
+
_set_state_conn(conn, _root_initial_scan_key(int(row["id"])), "0")
|
|
77
|
+
_set_initial_index_complete(conn, False)
|
|
78
|
+
_set_initial_index_started_at(conn, now())
|
|
65
79
|
conn.commit()
|
|
66
80
|
log_event("info", "root_added", "Root added", path=redact_path(root_path), mode=mode, depth=depth_value)
|
|
67
81
|
return {"ok": True, "root_path": root_path, "mode": mode, "depth": depth_value}
|
|
@@ -504,8 +518,7 @@ def list_exclusions() -> list[dict]:
|
|
|
504
518
|
return [dict(row) for row in rows]
|
|
505
519
|
|
|
506
520
|
|
|
507
|
-
def
|
|
508
|
-
conn = _conn()
|
|
521
|
+
def _set_state_conn(conn, key: str, value: str) -> None:
|
|
509
522
|
conn.execute(
|
|
510
523
|
"""
|
|
511
524
|
INSERT INTO local_index_state(key, value, updated_at)
|
|
@@ -514,15 +527,127 @@ def _set_state(key: str, value: str) -> None:
|
|
|
514
527
|
""",
|
|
515
528
|
(key, value, now()),
|
|
516
529
|
)
|
|
517
|
-
conn.commit()
|
|
518
530
|
|
|
519
531
|
|
|
520
|
-
def
|
|
532
|
+
def _set_state(key: str, value: str) -> None:
|
|
521
533
|
conn = _conn()
|
|
534
|
+
_set_state_conn(conn, key, value)
|
|
535
|
+
conn.commit()
|
|
536
|
+
|
|
537
|
+
|
|
538
|
+
def _get_state_conn(conn, key: str, default: str = "") -> str:
|
|
522
539
|
row = conn.execute("SELECT value FROM local_index_state WHERE key=?", (key,)).fetchone()
|
|
523
540
|
return row["value"] if row else default
|
|
524
541
|
|
|
525
542
|
|
|
543
|
+
def _get_state(key: str, default: str = "") -> str:
|
|
544
|
+
conn = _conn()
|
|
545
|
+
return _get_state_conn(conn, key, default)
|
|
546
|
+
|
|
547
|
+
|
|
548
|
+
def _root_initial_scan_key(root_id: int) -> str:
|
|
549
|
+
return f"root:{int(root_id)}:initial_scan_complete"
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def _root_initial_scan_complete(conn, root: dict) -> bool:
|
|
553
|
+
root_id = int(root["id"])
|
|
554
|
+
row = conn.execute("SELECT value FROM local_index_state WHERE key=?", (_root_initial_scan_key(root_id),)).fetchone()
|
|
555
|
+
if row:
|
|
556
|
+
return str(row["value"]) == "1"
|
|
557
|
+
checkpoint = conn.execute(
|
|
558
|
+
"SELECT 1 FROM local_index_checkpoints WHERE root_id=? AND phase='quick_index' LIMIT 1",
|
|
559
|
+
(root_id,),
|
|
560
|
+
).fetchone()
|
|
561
|
+
return bool(root.get("last_scan_at") and not checkpoint)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _set_root_initial_scan_complete(conn, root_id: int, complete: bool) -> None:
|
|
565
|
+
_set_state_conn(conn, _root_initial_scan_key(root_id), "1" if complete else "0")
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def _initial_index_complete(conn) -> bool:
|
|
569
|
+
return _get_state_conn(conn, INITIAL_INDEX_COMPLETE_KEY, "0") == "1"
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
def _set_initial_index_complete(conn, complete: bool) -> None:
|
|
573
|
+
_set_state_conn(conn, INITIAL_INDEX_COMPLETE_KEY, "1" if complete else "0")
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
def _set_initial_index_started_at(conn, started_at: float) -> None:
|
|
577
|
+
_set_state_conn(conn, INITIAL_INDEX_STARTED_AT_KEY, str(float(started_at or now())))
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _earliest_index_activity(conn) -> float:
|
|
581
|
+
candidates = []
|
|
582
|
+
for sql in (
|
|
583
|
+
"SELECT MIN(created_at) AS value FROM local_index_roots WHERE status!='removed'",
|
|
584
|
+
"SELECT MIN(first_seen_at) AS value FROM local_assets WHERE status!='deleted'",
|
|
585
|
+
"SELECT MIN(created_at) AS value FROM local_index_jobs",
|
|
586
|
+
"SELECT MIN(created_at) AS value FROM local_index_logs WHERE event IN ('root_added', 'scan_started', 'scan_finished', 'jobs_processed', 'service_cycle_finished')",
|
|
587
|
+
):
|
|
588
|
+
try:
|
|
589
|
+
value = conn.execute(sql).fetchone()["value"] or 0
|
|
590
|
+
except Exception:
|
|
591
|
+
value = 0
|
|
592
|
+
if value:
|
|
593
|
+
candidates.append(float(value))
|
|
594
|
+
return min(candidates) if candidates else 0.0
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _ensure_initial_index_started_at(conn) -> float:
|
|
598
|
+
raw = _get_state_conn(conn, INITIAL_INDEX_STARTED_AT_KEY, "")
|
|
599
|
+
try:
|
|
600
|
+
value = float(raw or 0)
|
|
601
|
+
except Exception:
|
|
602
|
+
value = 0.0
|
|
603
|
+
if value > 0:
|
|
604
|
+
return value
|
|
605
|
+
value = _earliest_index_activity(conn) or now()
|
|
606
|
+
_set_initial_index_started_at(conn, value)
|
|
607
|
+
conn.commit()
|
|
608
|
+
return value
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def _active_job_count(conn) -> int:
|
|
612
|
+
row = conn.execute(
|
|
613
|
+
"""
|
|
614
|
+
SELECT COUNT(*) AS total
|
|
615
|
+
FROM local_index_jobs
|
|
616
|
+
WHERE status IN ('pending', 'running', 'failed')
|
|
617
|
+
"""
|
|
618
|
+
).fetchone()
|
|
619
|
+
return int(row["total"] or 0)
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def _refresh_initial_index_complete(conn, initial_scan: dict | None = None, active_jobs: int | None = None) -> bool:
|
|
623
|
+
if _initial_index_complete(conn):
|
|
624
|
+
return True
|
|
625
|
+
scan_state = initial_scan if initial_scan is not None else _initial_scan_status(conn)
|
|
626
|
+
remaining = _active_job_count(conn) if active_jobs is None else int(active_jobs or 0)
|
|
627
|
+
complete = bool(scan_state.get("complete")) and remaining == 0
|
|
628
|
+
if complete:
|
|
629
|
+
_set_initial_index_complete(conn, True)
|
|
630
|
+
conn.commit()
|
|
631
|
+
return complete
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
def _initial_scan_status(conn, roots: list[dict] | None = None) -> dict:
|
|
635
|
+
rows = roots if roots is not None else list_roots()
|
|
636
|
+
tracked = _effective_scan_roots([dict(row) for row in rows if str(row.get("status") or "active") not in {"removed", "offline"}])
|
|
637
|
+
pending = [row for row in tracked if not _root_initial_scan_complete(conn, row)]
|
|
638
|
+
checkpoints = conn.execute(
|
|
639
|
+
"SELECT COUNT(*) AS total FROM local_index_checkpoints WHERE phase='quick_index'"
|
|
640
|
+
).fetchone()["total"] or 0
|
|
641
|
+
complete = bool(tracked) and not pending
|
|
642
|
+
return {
|
|
643
|
+
"complete": complete,
|
|
644
|
+
"mode": "watching_changes" if complete else "initial_indexing",
|
|
645
|
+
"pending_roots": len(pending),
|
|
646
|
+
"total_roots": len(tracked),
|
|
647
|
+
"checkpoint_count": int(checkpoints or 0),
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
|
|
526
651
|
def pause() -> dict:
|
|
527
652
|
_set_state("paused", "1")
|
|
528
653
|
log_event("info", "index_paused", "Local memory indexing paused")
|
|
@@ -555,7 +680,12 @@ def _is_excluded(path: str, exclusions: list[str]) -> bool:
|
|
|
555
680
|
|
|
556
681
|
def _path_prefix(path: str) -> str:
|
|
557
682
|
normalized = norm_path(path)
|
|
558
|
-
|
|
683
|
+
if not normalized:
|
|
684
|
+
return os.sep
|
|
685
|
+
if normalized in {"/", "\\"}:
|
|
686
|
+
return normalized
|
|
687
|
+
sep = "\\" if re.match(r"^[A-Za-z]:\\", normalized) or "\\" in normalized else os.sep
|
|
688
|
+
return normalized if normalized.endswith(sep) else normalized + sep
|
|
559
689
|
|
|
560
690
|
|
|
561
691
|
def _is_nested_path(path: str, parent: str) -> bool:
|
|
@@ -563,9 +693,20 @@ def _is_nested_path(path: str, parent: str) -> bool:
|
|
|
563
693
|
base = norm_path(parent)
|
|
564
694
|
if not value or not base or value == base:
|
|
565
695
|
return False
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
696
|
+
value_cmp = value.replace("\\", "/")
|
|
697
|
+
base_cmp = base.replace("\\", "/")
|
|
698
|
+
if re.match(r"^[A-Za-z]:/?$", base_cmp):
|
|
699
|
+
base_cmp = f"{base_cmp[0].upper()}:/"
|
|
700
|
+
if re.match(r"^[A-Za-z]:/?$", value_cmp):
|
|
701
|
+
value_cmp = f"{value_cmp[0].upper()}:/"
|
|
702
|
+
if base_cmp != "/":
|
|
703
|
+
base_cmp = base_cmp.rstrip("/")
|
|
704
|
+
if value_cmp != "/":
|
|
705
|
+
value_cmp = value_cmp.rstrip("/")
|
|
706
|
+
if base_cmp == "/":
|
|
707
|
+
return value_cmp.startswith("/")
|
|
708
|
+
prefix = base_cmp if base_cmp.endswith("/") else f"{base_cmp}/"
|
|
709
|
+
return value_cmp.startswith(prefix)
|
|
569
710
|
|
|
570
711
|
|
|
571
712
|
def _is_discovered_mount_path(path: str) -> bool:
|
|
@@ -614,6 +755,19 @@ def _file_type(path: Path) -> str:
|
|
|
614
755
|
return "file"
|
|
615
756
|
|
|
616
757
|
|
|
758
|
+
def _volume_id_for_path(path: Path) -> str:
|
|
759
|
+
normalized = norm_path(path).replace("\\", "/")
|
|
760
|
+
match = re.match(r"^([A-Za-z]):/", normalized)
|
|
761
|
+
if match:
|
|
762
|
+
return f"{match.group(1).upper()}:\\"
|
|
763
|
+
parts = [part for part in normalized.split("/") if part]
|
|
764
|
+
if len(parts) >= 2 and parts[0] in {"Volumes", "mnt", "media"}:
|
|
765
|
+
return f"/{parts[0]}/{parts[1]}"
|
|
766
|
+
if len(parts) >= 3 and parts[0] == "run" and parts[1] == "media":
|
|
767
|
+
return f"/run/media/{parts[2]}"
|
|
768
|
+
return path.anchor or "/"
|
|
769
|
+
|
|
770
|
+
|
|
617
771
|
def _permission_state(path: Path) -> str:
|
|
618
772
|
try:
|
|
619
773
|
path.stat()
|
|
@@ -744,7 +898,7 @@ def _upsert_asset(conn, root_id: int, path: Path, seen_at: float, root_depth: in
|
|
|
744
898
|
normalized,
|
|
745
899
|
raw_path,
|
|
746
900
|
parent,
|
|
747
|
-
path
|
|
901
|
+
_volume_id_for_path(path),
|
|
748
902
|
_file_type(path),
|
|
749
903
|
path.suffix.lower(),
|
|
750
904
|
int(st.st_size),
|
|
@@ -1259,6 +1413,7 @@ def scan_once(*, limit: int | None = None) -> dict:
|
|
|
1259
1413
|
for root in roots:
|
|
1260
1414
|
root_path = Path(root["root_path"]).expanduser()
|
|
1261
1415
|
root_id = int(root["id"])
|
|
1416
|
+
root_initial_complete = _root_initial_scan_complete(conn, dict(root))
|
|
1262
1417
|
if should_skip_tree(str(root_path)) and not _allow_explicit_blocked_root(str(root_path)):
|
|
1263
1418
|
conn.execute(
|
|
1264
1419
|
"UPDATE local_index_roots SET status='removed', last_scan_at=?, updated_at=? WHERE id=?",
|
|
@@ -1301,6 +1456,8 @@ def scan_once(*, limit: int | None = None) -> dict:
|
|
|
1301
1456
|
partial_root = bool(limit and seen_for_root >= limit)
|
|
1302
1457
|
totals["partial"] = bool(totals["partial"] or partial_root)
|
|
1303
1458
|
if partial_root:
|
|
1459
|
+
if not root_initial_complete:
|
|
1460
|
+
_set_root_initial_scan_complete(conn, root_id, False)
|
|
1304
1461
|
log_event(
|
|
1305
1462
|
"info",
|
|
1306
1463
|
"scan_partial",
|
|
@@ -1315,11 +1472,10 @@ def scan_once(*, limit: int | None = None) -> dict:
|
|
|
1315
1472
|
(root_id, cycle_started_at),
|
|
1316
1473
|
).fetchall()
|
|
1317
1474
|
for row in rows:
|
|
1318
|
-
conn
|
|
1319
|
-
"UPDATE local_assets SET status='deleted', deleted_at=?, updated_at=? WHERE asset_id=?",
|
|
1320
|
-
(now(), now(), row["asset_id"]),
|
|
1321
|
-
)
|
|
1475
|
+
_mark_asset_deleted(conn, row["asset_id"])
|
|
1322
1476
|
_clear_checkpoint(conn, root_id)
|
|
1477
|
+
if not root_initial_complete:
|
|
1478
|
+
_set_root_initial_scan_complete(conn, root_id, True)
|
|
1323
1479
|
conn.execute(
|
|
1324
1480
|
"UPDATE local_index_roots SET status='active', last_scan_at=?, updated_at=? WHERE id=?",
|
|
1325
1481
|
(now(), now(), root_id),
|
|
@@ -1391,13 +1547,23 @@ def _replace_entities(conn, asset_id: str, version_id: str, values: list[str]) -
|
|
|
1391
1547
|
|
|
1392
1548
|
def _requeue_due_jobs(conn) -> dict:
|
|
1393
1549
|
current = now()
|
|
1550
|
+
exhausted = conn.execute(
|
|
1551
|
+
"""
|
|
1552
|
+
UPDATE local_index_jobs
|
|
1553
|
+
SET status='done', next_attempt_at=NULL, claimed_by='', lease_expires_at=NULL, updated_at=?
|
|
1554
|
+
WHERE status='failed' AND attempt_count >= ?
|
|
1555
|
+
""",
|
|
1556
|
+
(current, DEFAULT_MAX_JOB_ATTEMPTS),
|
|
1557
|
+
).rowcount
|
|
1394
1558
|
failed = conn.execute(
|
|
1395
1559
|
"""
|
|
1396
1560
|
UPDATE local_index_jobs
|
|
1397
1561
|
SET status='pending', claimed_by='', lease_expires_at=NULL, updated_at=?
|
|
1398
|
-
WHERE status='failed'
|
|
1562
|
+
WHERE status='failed'
|
|
1563
|
+
AND attempt_count < ?
|
|
1564
|
+
AND (next_attempt_at IS NULL OR next_attempt_at <= ?)
|
|
1399
1565
|
""",
|
|
1400
|
-
(current, current),
|
|
1566
|
+
(current, DEFAULT_MAX_JOB_ATTEMPTS, current),
|
|
1401
1567
|
).rowcount
|
|
1402
1568
|
expired = conn.execute(
|
|
1403
1569
|
"""
|
|
@@ -1407,9 +1573,9 @@ def _requeue_due_jobs(conn) -> dict:
|
|
|
1407
1573
|
""",
|
|
1408
1574
|
(current, current),
|
|
1409
1575
|
).rowcount
|
|
1410
|
-
if failed or expired:
|
|
1411
|
-
log_event("warn", "jobs_requeued", "Local memory recovered stalled jobs", failed=failed, expired=expired)
|
|
1412
|
-
return {"failed": int(failed or 0), "expired": int(expired or 0)}
|
|
1576
|
+
if failed or expired or exhausted:
|
|
1577
|
+
log_event("warn", "jobs_requeued", "Local memory recovered stalled jobs", failed=failed, expired=expired, exhausted=exhausted)
|
|
1578
|
+
return {"failed": int(failed or 0), "expired": int(expired or 0), "exhausted": int(exhausted or 0)}
|
|
1413
1579
|
|
|
1414
1580
|
|
|
1415
1581
|
def process_jobs(*, limit: int = 100) -> dict:
|
|
@@ -1483,13 +1649,15 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1483
1649
|
processed += 1
|
|
1484
1650
|
except Exception as exc:
|
|
1485
1651
|
failed += 1
|
|
1652
|
+
attempts = int(row["attempt_count"] or 0) + 1
|
|
1653
|
+
terminal = attempts >= DEFAULT_MAX_JOB_ATTEMPTS
|
|
1486
1654
|
conn.execute(
|
|
1487
1655
|
"""
|
|
1488
1656
|
UPDATE local_index_jobs
|
|
1489
|
-
SET status
|
|
1657
|
+
SET status=?, attempt_count=attempt_count+1, next_attempt_at=?, claimed_by='', lease_expires_at=NULL, last_error_code=?, updated_at=?
|
|
1490
1658
|
WHERE job_id=?
|
|
1491
1659
|
""",
|
|
1492
|
-
(now() + 3600, type(exc).__name__, now(), job_id),
|
|
1660
|
+
("done" if terminal else "failed", None if terminal else now() + 3600, type(exc).__name__, now(), job_id),
|
|
1493
1661
|
)
|
|
1494
1662
|
_record_index_error(
|
|
1495
1663
|
conn,
|
|
@@ -1499,7 +1667,7 @@ def process_jobs(*, limit: int = 100) -> dict:
|
|
|
1499
1667
|
error_code=type(exc).__name__,
|
|
1500
1668
|
user_message="Algunos archivos no se pudieron leer",
|
|
1501
1669
|
technical_detail=str(exc),
|
|
1502
|
-
retryable=
|
|
1670
|
+
retryable=not terminal,
|
|
1503
1671
|
)
|
|
1504
1672
|
conn.commit()
|
|
1505
1673
|
if processed or failed:
|
|
@@ -1526,14 +1694,37 @@ def run_once(
|
|
|
1526
1694
|
ensure_default_roots()
|
|
1527
1695
|
if root:
|
|
1528
1696
|
add_root(root)
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1697
|
+
conn = _conn()
|
|
1698
|
+
initial_before = _initial_scan_status(conn, list_roots())
|
|
1699
|
+
initial_index_before = _refresh_initial_index_complete(conn, initial_before)
|
|
1700
|
+
if initial_index_before:
|
|
1701
|
+
live_result = reconcile_live_changes(
|
|
1702
|
+
asset_limit=live_asset_limit,
|
|
1703
|
+
dir_limit=live_dir_limit,
|
|
1704
|
+
file_limit=live_file_limit,
|
|
1705
|
+
)
|
|
1706
|
+
else:
|
|
1707
|
+
live_result = {
|
|
1708
|
+
"ok": True,
|
|
1709
|
+
"skipped": True,
|
|
1710
|
+
"reason": "initial_scan_in_progress",
|
|
1711
|
+
"assets": {},
|
|
1712
|
+
"dirs": {},
|
|
1713
|
+
}
|
|
1534
1714
|
scan_result = scan_once(limit=limit)
|
|
1535
1715
|
job_result = process_jobs(limit=process_limit)
|
|
1536
|
-
|
|
1716
|
+
conn_after = _conn()
|
|
1717
|
+
initial_after = _initial_scan_status(conn_after, list_roots())
|
|
1718
|
+
active_after = _active_job_count(conn_after)
|
|
1719
|
+
initial_index_after = _refresh_initial_index_complete(conn_after, initial_after, active_after)
|
|
1720
|
+
return {
|
|
1721
|
+
"ok": True,
|
|
1722
|
+
"initial_scan": initial_after,
|
|
1723
|
+
"initial_index_complete": initial_index_after,
|
|
1724
|
+
"live": live_result,
|
|
1725
|
+
"scan": scan_result,
|
|
1726
|
+
"jobs": job_result,
|
|
1727
|
+
}
|
|
1537
1728
|
|
|
1538
1729
|
|
|
1539
1730
|
def _problem_rows(conn) -> list[dict]:
|
|
@@ -1788,21 +1979,7 @@ def _service_cycle_observation(conn) -> dict:
|
|
|
1788
1979
|
|
|
1789
1980
|
|
|
1790
1981
|
def _index_timing(conn, *, done: int, active_jobs: int, percent: int) -> dict:
|
|
1791
|
-
first_seen = conn
|
|
1792
|
-
"""
|
|
1793
|
-
SELECT MIN(created_at) AS created_at
|
|
1794
|
-
FROM local_index_logs
|
|
1795
|
-
WHERE event IN ('root_added', 'scan_started', 'scan_finished', 'jobs_processed', 'service_cycle_finished')
|
|
1796
|
-
"""
|
|
1797
|
-
).fetchone()["created_at"] or 0
|
|
1798
|
-
if not first_seen:
|
|
1799
|
-
first_seen = conn.execute(
|
|
1800
|
-
"""
|
|
1801
|
-
SELECT MIN(first_seen_at) AS first_seen_at
|
|
1802
|
-
FROM local_assets
|
|
1803
|
-
WHERE status!='deleted'
|
|
1804
|
-
"""
|
|
1805
|
-
).fetchone()["first_seen_at"] or 0
|
|
1982
|
+
first_seen = _ensure_initial_index_started_at(conn)
|
|
1806
1983
|
elapsed_seconds = max(0, int(now() - float(first_seen))) if first_seen else 0
|
|
1807
1984
|
eta_seconds = None
|
|
1808
1985
|
if elapsed_seconds > 0 and done > 0 and active_jobs > 0 and 0 < percent < 100:
|
|
@@ -1885,6 +2062,8 @@ def status() -> dict:
|
|
|
1885
2062
|
percent = 100 if total_jobs == 0 else int((done / max(total_jobs, 1)) * 100)
|
|
1886
2063
|
timing = _index_timing(conn, done=done, active_jobs=active_jobs, percent=percent)
|
|
1887
2064
|
roots = list_roots()
|
|
2065
|
+
initial_scan = _initial_scan_status(conn, roots)
|
|
2066
|
+
initial_index_complete = _refresh_initial_index_complete(conn, initial_scan, active_jobs)
|
|
1888
2067
|
volumes = []
|
|
1889
2068
|
by_volume = conn.execute(
|
|
1890
2069
|
"""
|
|
@@ -1903,7 +2082,7 @@ def status() -> dict:
|
|
|
1903
2082
|
service.update(_service_cycle_observation(conn))
|
|
1904
2083
|
problem = _service_problem(service)
|
|
1905
2084
|
service["healthy"] = problem is None
|
|
1906
|
-
service["state"] = "paused" if paused else ("attention" if problem else ("idle" if active_jobs == 0 else "indexing"))
|
|
2085
|
+
service["state"] = "paused" if paused else ("attention" if problem else ("idle" if active_jobs == 0 and initial_index_complete else "indexing"))
|
|
1907
2086
|
problems = _problem_rows(conn)
|
|
1908
2087
|
if problem:
|
|
1909
2088
|
problems.insert(0, {
|
|
@@ -1917,11 +2096,21 @@ def status() -> dict:
|
|
|
1917
2096
|
"phase": "service",
|
|
1918
2097
|
"created_at": now(),
|
|
1919
2098
|
})
|
|
2099
|
+
if paused:
|
|
2100
|
+
phase = "paused"
|
|
2101
|
+
elif problem:
|
|
2102
|
+
phase = "service_attention"
|
|
2103
|
+
elif not initial_index_complete:
|
|
2104
|
+
phase = "initial_indexing"
|
|
2105
|
+
elif active_jobs == 0:
|
|
2106
|
+
phase = "idle"
|
|
2107
|
+
else:
|
|
2108
|
+
phase = "updating_changes"
|
|
1920
2109
|
return {
|
|
1921
2110
|
"ok": True,
|
|
1922
2111
|
"service": service,
|
|
1923
2112
|
"global": {
|
|
1924
|
-
"phase":
|
|
2113
|
+
"phase": phase,
|
|
1925
2114
|
"percent": percent,
|
|
1926
2115
|
"files_found": int(assets["total"] or 0),
|
|
1927
2116
|
"files_processed": int(done or 0),
|
|
@@ -1931,7 +2120,14 @@ def status() -> dict:
|
|
|
1931
2120
|
"jobs_failed": failed_jobs,
|
|
1932
2121
|
"elapsed_seconds": timing["elapsed_seconds"],
|
|
1933
2122
|
"eta_seconds": timing["eta_seconds"],
|
|
2123
|
+
"index_started_at": _get_state_conn(conn, INITIAL_INDEX_STARTED_AT_KEY, ""),
|
|
2124
|
+
"initial_scan_complete": bool(initial_index_complete),
|
|
2125
|
+
"initial_discovery_complete": bool(initial_scan["complete"]),
|
|
2126
|
+
"initial_index_complete": bool(initial_index_complete),
|
|
2127
|
+
"index_mode": "watching_changes" if initial_index_complete else "initial_indexing",
|
|
1934
2128
|
},
|
|
2129
|
+
"initial_scan": initial_scan,
|
|
2130
|
+
"initial_index_complete": bool(initial_index_complete),
|
|
1935
2131
|
"volumes": volumes,
|
|
1936
2132
|
"roots": roots,
|
|
1937
2133
|
"exclusions": list_exclusions(),
|
|
@@ -2165,19 +2361,321 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
|
|
|
2165
2361
|
return rows
|
|
2166
2362
|
|
|
2167
2363
|
|
|
2168
|
-
def
|
|
2364
|
+
def _compact_text(value: str, *, max_chars: int) -> str:
|
|
2365
|
+
text = " ".join(str(value or "").split())
|
|
2366
|
+
if max_chars <= 0 or len(text) <= max_chars:
|
|
2367
|
+
return text
|
|
2368
|
+
return text[: max(0, max_chars - 1)].rstrip() + "…"
|
|
2369
|
+
|
|
2370
|
+
|
|
2371
|
+
def _payload_size(payload: dict) -> int:
|
|
2372
|
+
return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
|
|
2373
|
+
|
|
2374
|
+
|
|
2375
|
+
def _normalize_context_mode(mode: str) -> tuple[str, list[str]]:
|
|
2376
|
+
value = str(mode or "compact").strip().lower()
|
|
2377
|
+
if value in VALID_CONTEXT_MODES:
|
|
2378
|
+
return value, []
|
|
2379
|
+
return "compact", [f"Unsupported local context mode '{value}'. Falling back to compact mode."]
|
|
2380
|
+
|
|
2381
|
+
|
|
2382
|
+
def _context_usage_hint(payload: dict) -> dict:
|
|
2383
|
+
current = {
|
|
2384
|
+
"mode": payload.get("mode", "compact"),
|
|
2385
|
+
"limit": payload.get("limit"),
|
|
2386
|
+
"max_chars": payload.get("max_chars"),
|
|
2387
|
+
"include_entities": bool(payload.get("include_entities")),
|
|
2388
|
+
"include_relations": bool(payload.get("include_relations")),
|
|
2389
|
+
}
|
|
2390
|
+
return {
|
|
2391
|
+
"tool": "nexo_local_context",
|
|
2392
|
+
"current_params": current,
|
|
2393
|
+
"recommended_call": "nexo_local_context(query='...', mode='compact', limit=4, max_chars=12000, include_entities=false, include_relations=false)",
|
|
2394
|
+
"recommended_params": {
|
|
2395
|
+
"mode": "compact",
|
|
2396
|
+
"limit": 4,
|
|
2397
|
+
"max_chars": 12000,
|
|
2398
|
+
"include_entities": False,
|
|
2399
|
+
"include_relations": False,
|
|
2400
|
+
},
|
|
2401
|
+
"expand": "Use mode='full' only for debugging, with a specific query and explicit max_chars.",
|
|
2402
|
+
"refine": "Add names, dates, project names, file types, paths, or email subjects to reduce noise.",
|
|
2403
|
+
}
|
|
2404
|
+
|
|
2405
|
+
|
|
2406
|
+
def _minimal_truncated_context_payload(payload: dict, *, max_chars: int) -> dict:
|
|
2407
|
+
mode = str(payload.get("mode") or "compact")
|
|
2408
|
+
minimal = {
|
|
2409
|
+
"ok": bool(payload.get("ok", True)),
|
|
2410
|
+
"mode": mode,
|
|
2411
|
+
"truncated": True,
|
|
2412
|
+
"warnings": ["truncated"],
|
|
2413
|
+
"usage_hint": "nexo_local_context(query='...', mode='compact', limit=4, max_chars=12000)",
|
|
2414
|
+
"assets": [],
|
|
2415
|
+
"chunks": [],
|
|
2416
|
+
"entities": [],
|
|
2417
|
+
"relations": [],
|
|
2418
|
+
"evidence_refs": [],
|
|
2419
|
+
}
|
|
2420
|
+
if max_chars and _payload_size(minimal) > max_chars:
|
|
2421
|
+
tiny = {
|
|
2422
|
+
"ok": bool(payload.get("ok", True)),
|
|
2423
|
+
"mode": mode,
|
|
2424
|
+
"truncated": True,
|
|
2425
|
+
"usage_hint": "nexo_local_context(mode='compact',limit=4,max_chars=12000)",
|
|
2426
|
+
}
|
|
2427
|
+
return tiny
|
|
2428
|
+
return minimal
|
|
2429
|
+
|
|
2430
|
+
|
|
2431
|
+
def _sync_context_payload_refs(payload: dict) -> None:
|
|
2432
|
+
chunks = payload.get("chunks") or []
|
|
2433
|
+
chunk_ids = {str(chunk.get("chunk_id") or "") for chunk in chunks if chunk.get("chunk_id")}
|
|
2434
|
+
asset_ids = {str(chunk.get("asset_id") or "") for chunk in chunks if chunk.get("asset_id")}
|
|
2435
|
+
if chunk_ids:
|
|
2436
|
+
payload["evidence_refs"] = [
|
|
2437
|
+
ref for ref in (payload.get("evidence_refs") or [])
|
|
2438
|
+
if any(f"#chunk:{chunk_id}" in str(ref) for chunk_id in chunk_ids)
|
|
2439
|
+
]
|
|
2440
|
+
payload["assets"] = [
|
|
2441
|
+
asset for asset in (payload.get("assets") or [])
|
|
2442
|
+
if str(asset.get("asset_id") or "") in asset_ids
|
|
2443
|
+
]
|
|
2444
|
+
elif not chunks:
|
|
2445
|
+
payload["evidence_refs"] = []
|
|
2446
|
+
|
|
2447
|
+
|
|
2448
|
+
def _truncate_context_payload(payload: dict, *, max_chars: int) -> dict:
|
|
2449
|
+
if not max_chars or max_chars <= 0 or _payload_size(payload) <= max_chars:
|
|
2450
|
+
return payload
|
|
2451
|
+
warnings = list(payload.get("warnings") or [])
|
|
2452
|
+
warnings.append(
|
|
2453
|
+
"Local context result was truncated. Use mode='compact', lower limit, raise max_chars, or refine the query with more specific names, dates, paths, projects, or file types."
|
|
2454
|
+
)
|
|
2455
|
+
payload["warnings"] = warnings
|
|
2456
|
+
payload["truncated"] = True
|
|
2457
|
+
payload["usage_hint"] = _context_usage_hint(payload)
|
|
2458
|
+
payload["query"] = _compact_text(payload.get("query") or "", max_chars=240)
|
|
2459
|
+
payload["summary"] = _compact_text(payload.get("summary") or "", max_chars=240)
|
|
2460
|
+
for chunk in payload.get("chunks") or []:
|
|
2461
|
+
chunk["text"] = _compact_text(chunk.get("text") or "", max_chars=220)
|
|
2462
|
+
for asset in payload.get("assets") or []:
|
|
2463
|
+
asset["display_path"] = _compact_text(asset.get("display_path") or "", max_chars=240)
|
|
2464
|
+
asset["summary"] = _compact_text(asset.get("summary") or "", max_chars=160)
|
|
2465
|
+
if not payload.get("include_entities"):
|
|
2466
|
+
payload["entities"] = []
|
|
2467
|
+
if not payload.get("include_relations"):
|
|
2468
|
+
payload["relations"] = []
|
|
2469
|
+
while _payload_size(payload) > max_chars and len(payload.get("chunks") or []) > 1:
|
|
2470
|
+
payload["chunks"].pop()
|
|
2471
|
+
while _payload_size(payload) > max_chars and len(payload.get("assets") or []) > 1:
|
|
2472
|
+
removed = payload["assets"].pop()
|
|
2473
|
+
removed_asset_id = removed.get("asset_id")
|
|
2474
|
+
payload["chunks"] = [chunk for chunk in payload.get("chunks") or [] if chunk.get("asset_id") != removed_asset_id]
|
|
2475
|
+
payload["evidence_refs"] = payload.get("evidence_refs", [])[: len(payload.get("assets") or [])]
|
|
2476
|
+
if _payload_size(payload) > max_chars:
|
|
2477
|
+
payload["entities"] = []
|
|
2478
|
+
payload["relations"] = []
|
|
2479
|
+
if _payload_size(payload) > max_chars:
|
|
2480
|
+
payload["chunks"] = [
|
|
2481
|
+
{
|
|
2482
|
+
"chunk_id": chunk.get("chunk_id", ""),
|
|
2483
|
+
"asset_id": chunk.get("asset_id", ""),
|
|
2484
|
+
"text": _compact_text(chunk.get("text") or "", max_chars=120),
|
|
2485
|
+
"score": chunk.get("score", 0),
|
|
2486
|
+
}
|
|
2487
|
+
for chunk in (payload.get("chunks") or [])[:1]
|
|
2488
|
+
]
|
|
2489
|
+
payload["assets"] = [
|
|
2490
|
+
{
|
|
2491
|
+
"asset_id": asset.get("asset_id", ""),
|
|
2492
|
+
"display_path": asset.get("display_path", ""),
|
|
2493
|
+
"file_type": asset.get("file_type", "file"),
|
|
2494
|
+
"score": asset.get("score", 0),
|
|
2495
|
+
}
|
|
2496
|
+
for asset in (payload.get("assets") or [])[:1]
|
|
2497
|
+
]
|
|
2498
|
+
payload["evidence_refs"] = (payload.get("evidence_refs") or [])[:1]
|
|
2499
|
+
_sync_context_payload_refs(payload)
|
|
2500
|
+
if _payload_size(payload) > max_chars:
|
|
2501
|
+
return _minimal_truncated_context_payload(payload, max_chars=max_chars)
|
|
2502
|
+
return payload
|
|
2503
|
+
|
|
2504
|
+
|
|
2505
|
+
def _shape_context_payload(
|
|
2506
|
+
payload: dict,
|
|
2507
|
+
*,
|
|
2508
|
+
mode: str,
|
|
2509
|
+
max_chars: int,
|
|
2510
|
+
include_entities: bool,
|
|
2511
|
+
include_relations: bool,
|
|
2512
|
+
snippet_chars: int,
|
|
2513
|
+
) -> dict:
|
|
2514
|
+
normalized_mode, mode_warnings = _normalize_context_mode(mode)
|
|
2515
|
+
shaped = dict(payload)
|
|
2516
|
+
shaped["warnings"] = [*(shaped.get("warnings") or []), *mode_warnings]
|
|
2517
|
+
shaped["mode"] = normalized_mode
|
|
2518
|
+
shaped["limit"] = len(shaped.get("assets") or [])
|
|
2519
|
+
shaped["include_entities"] = bool(include_entities)
|
|
2520
|
+
shaped["include_relations"] = bool(include_relations)
|
|
2521
|
+
shaped["truncated"] = False
|
|
2522
|
+
shaped["max_chars"] = int(max_chars or 0)
|
|
2523
|
+
if normalized_mode == "compact":
|
|
2524
|
+
seen_chunk_assets: set[str] = set()
|
|
2525
|
+
compact_chunks = []
|
|
2526
|
+
for chunk in shaped.get("chunks") or []:
|
|
2527
|
+
asset_id = str(chunk.get("asset_id") or "")
|
|
2528
|
+
if asset_id in seen_chunk_assets:
|
|
2529
|
+
continue
|
|
2530
|
+
seen_chunk_assets.add(asset_id)
|
|
2531
|
+
compact_chunks.append({
|
|
2532
|
+
"chunk_id": chunk.get("chunk_id", ""),
|
|
2533
|
+
"asset_id": asset_id,
|
|
2534
|
+
"text": _compact_text(chunk.get("text") or "", max_chars=max(80, int(snippet_chars or 360))),
|
|
2535
|
+
"score": chunk.get("score", 0),
|
|
2536
|
+
})
|
|
2537
|
+
shaped["chunks"] = compact_chunks
|
|
2538
|
+
shaped["assets"] = [
|
|
2539
|
+
{
|
|
2540
|
+
"asset_id": asset.get("asset_id", ""),
|
|
2541
|
+
"display_path": asset.get("display_path", ""),
|
|
2542
|
+
"file_type": asset.get("file_type", "file"),
|
|
2543
|
+
"score": asset.get("score", 0),
|
|
2544
|
+
"summary": _compact_text(asset.get("summary") or "", max_chars=180),
|
|
2545
|
+
}
|
|
2546
|
+
for asset in shaped.get("assets") or []
|
|
2547
|
+
]
|
|
2548
|
+
else:
|
|
2549
|
+
shaped["chunks"] = [
|
|
2550
|
+
{
|
|
2551
|
+
**chunk,
|
|
2552
|
+
"text": _compact_text(chunk.get("text") or "", max_chars=max(200, int(snippet_chars or 1200))),
|
|
2553
|
+
}
|
|
2554
|
+
for chunk in shaped.get("chunks") or []
|
|
2555
|
+
]
|
|
2556
|
+
if not include_entities:
|
|
2557
|
+
shaped["entities"] = []
|
|
2558
|
+
if not include_relations:
|
|
2559
|
+
shaped["relations"] = []
|
|
2560
|
+
_sync_context_payload_refs(shaped)
|
|
2561
|
+
return _truncate_context_payload(shaped, max_chars=int(max_chars or 0))
|
|
2562
|
+
|
|
2563
|
+
|
|
2564
|
+
def render_context_evidence(result: dict, *, limit: int = 4, max_chars: int = DEFAULT_ROUTER_MAX_CHARS) -> str:
|
|
2565
|
+
assets = result.get("assets") or []
|
|
2566
|
+
if not assets:
|
|
2567
|
+
return ""
|
|
2568
|
+
lines = ["", "LOCAL CONTEXT EVIDENCE:"]
|
|
2569
|
+
lines.append("Use this local evidence if it is relevant to the user's request. Do not mention files that are not supported by the evidence.")
|
|
2570
|
+
chunks_by_asset = {}
|
|
2571
|
+
for chunk in result.get("chunks") or []:
|
|
2572
|
+
chunks_by_asset.setdefault(chunk.get("asset_id"), chunk)
|
|
2573
|
+
for asset in assets[: max(1, int(limit or 4))]:
|
|
2574
|
+
display_path = str(asset.get("display_path") or "")
|
|
2575
|
+
score = asset.get("score")
|
|
2576
|
+
summary = _compact_text(asset.get("summary") or "", max_chars=160)
|
|
2577
|
+
suffix = f" — {summary}" if summary else ""
|
|
2578
|
+
lines.append(f"- {display_path} ({asset.get('file_type', 'file')}, score={score}){suffix}")
|
|
2579
|
+
chunk = chunks_by_asset.get(asset.get("asset_id"))
|
|
2580
|
+
if chunk and chunk.get("text"):
|
|
2581
|
+
lines.append(f" excerpt: {_compact_text(chunk.get('text') or '', max_chars=320)}")
|
|
2582
|
+
refs = result.get("evidence_refs") or []
|
|
2583
|
+
if refs:
|
|
2584
|
+
lines.append(f"Evidence refs: {', '.join(str(ref) for ref in refs[: max(1, int(limit or 4))])}")
|
|
2585
|
+
if result.get("truncated"):
|
|
2586
|
+
lines.append("Result was compacted. Refine the query or call nexo_local_context(mode='full', max_chars=...) if deeper inspection is needed.")
|
|
2587
|
+
rendered = "\n".join(lines)
|
|
2588
|
+
if max_chars and len(rendered) > max_chars:
|
|
2589
|
+
return rendered[: max(0, max_chars - 1)].rstrip() + "…"
|
|
2590
|
+
return rendered
|
|
2591
|
+
|
|
2592
|
+
|
|
2593
|
+
def _router_payload_size(payload: dict) -> int:
|
|
2594
|
+
return len(json.dumps(payload, ensure_ascii=False, separators=(",", ":")))
|
|
2595
|
+
|
|
2596
|
+
|
|
2597
|
+
def context_router(
|
|
2598
|
+
query: str,
|
|
2599
|
+
*,
|
|
2600
|
+
intent: str = "answer",
|
|
2601
|
+
limit: int = 4,
|
|
2602
|
+
current_context: str = "",
|
|
2603
|
+
max_chars: int = DEFAULT_ROUTER_MAX_CHARS,
|
|
2604
|
+
) -> dict:
|
|
2605
|
+
output_max_chars = int(max_chars or 0)
|
|
2606
|
+
internal_max_chars = max(output_max_chars * 3, 4000) if output_max_chars > 0 else 0
|
|
2607
|
+
result = context_query(
|
|
2608
|
+
query,
|
|
2609
|
+
intent=intent,
|
|
2610
|
+
limit=max(1, min(int(limit or 4), 8)),
|
|
2611
|
+
evidence_required=False,
|
|
2612
|
+
current_context=current_context,
|
|
2613
|
+
mode="compact",
|
|
2614
|
+
max_chars=internal_max_chars,
|
|
2615
|
+
include_entities=False,
|
|
2616
|
+
include_relations=False,
|
|
2617
|
+
snippet_chars=360,
|
|
2618
|
+
)
|
|
2619
|
+
rendered = render_context_evidence(result, limit=limit, max_chars=output_max_chars)
|
|
2620
|
+
payload = {
|
|
2621
|
+
"ok": True,
|
|
2622
|
+
"query": query,
|
|
2623
|
+
"intent": intent,
|
|
2624
|
+
"should_inject": bool(result.get("evidence_refs")),
|
|
2625
|
+
"rendered": rendered,
|
|
2626
|
+
"evidence_refs": result.get("evidence_refs") or [],
|
|
2627
|
+
"truncated": bool(result.get("truncated") or (output_max_chars and len(rendered) >= output_max_chars)),
|
|
2628
|
+
"usage_hint": result.get("usage_hint"),
|
|
2629
|
+
}
|
|
2630
|
+
if output_max_chars and _router_payload_size(payload) > output_max_chars:
|
|
2631
|
+
payload["rendered"] = _compact_text(rendered, max_chars=max(80, output_max_chars // 2))
|
|
2632
|
+
payload["truncated"] = True
|
|
2633
|
+
if output_max_chars and _router_payload_size(payload) > output_max_chars:
|
|
2634
|
+
payload["evidence_refs"] = (payload.get("evidence_refs") or [])[:1]
|
|
2635
|
+
payload["usage_hint"] = "nexo_local_context(query='...', mode='compact', limit=4, max_chars=12000)"
|
|
2636
|
+
if output_max_chars and _router_payload_size(payload) > output_max_chars:
|
|
2637
|
+
return {
|
|
2638
|
+
"ok": True,
|
|
2639
|
+
"query": _compact_text(query, max_chars=120),
|
|
2640
|
+
"intent": intent,
|
|
2641
|
+
"should_inject": bool(payload.get("evidence_refs")),
|
|
2642
|
+
"truncated": True,
|
|
2643
|
+
"rendered": _compact_text(rendered, max_chars=max(40, output_max_chars // 2)),
|
|
2644
|
+
"evidence_refs": (payload.get("evidence_refs") or [])[:1],
|
|
2645
|
+
"usage_hint": "nexo_local_context(mode='compact',limit=4,max_chars=12000)",
|
|
2646
|
+
}
|
|
2647
|
+
return payload
|
|
2648
|
+
|
|
2649
|
+
|
|
2650
|
+
def context_query(
|
|
2651
|
+
query: str,
|
|
2652
|
+
*,
|
|
2653
|
+
intent: str = "answer",
|
|
2654
|
+
limit: int = 12,
|
|
2655
|
+
evidence_required: bool = True,
|
|
2656
|
+
current_context: str = "",
|
|
2657
|
+
mode: str = "full",
|
|
2658
|
+
max_chars: int = DEFAULT_CONTEXT_MAX_CHARS,
|
|
2659
|
+
include_entities: bool = True,
|
|
2660
|
+
include_relations: bool = True,
|
|
2661
|
+
snippet_chars: int = 1200,
|
|
2662
|
+
) -> dict:
|
|
2169
2663
|
conn = _conn()
|
|
2170
|
-
|
|
2171
|
-
|
|
2664
|
+
clean_query = str(query or "").strip()
|
|
2665
|
+
normalized_mode, mode_warnings = _normalize_context_mode(mode)
|
|
2666
|
+
context_tail = _compact_text(current_context or "", max_chars=1000)
|
|
2667
|
+
search_query = clean_query if not context_tail else f"{clean_query}\n{context_tail}"
|
|
2668
|
+
qvec = embeddings.embed_text(search_query)
|
|
2669
|
+
entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
|
|
2172
2670
|
rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
|
|
2173
2671
|
scored = []
|
|
2174
2672
|
for row in rows:
|
|
2175
2673
|
if not is_queryable_path(str(row["path"] or ""), str(row["privacy_class"] or "")):
|
|
2176
2674
|
continue
|
|
2177
2675
|
vector = json_loads(row["vector_json"], [])
|
|
2178
|
-
text_score = _search_text_score(
|
|
2179
|
-
path_score = _search_text_score(
|
|
2180
|
-
summary_score = _search_text_score(
|
|
2676
|
+
text_score = _search_text_score(search_query, row["text"])
|
|
2677
|
+
path_score = _search_text_score(search_query, row["path"] or "")
|
|
2678
|
+
summary_score = _search_text_score(search_query, row["summary"] or "")
|
|
2181
2679
|
entity_score = entity_boosts.get(row["asset_id"], 0.0)
|
|
2182
2680
|
vector_score = embeddings.cosine(qvec, vector)
|
|
2183
2681
|
score = max(text_score, path_score, summary_score, vector_score)
|
|
@@ -2216,7 +2714,7 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
|
|
|
2216
2714
|
evidence_refs.append(f"local_asset:{row['asset_id']}#chunk:{row['chunk_id']}")
|
|
2217
2715
|
relations_payload: list[dict] = []
|
|
2218
2716
|
relation_asset_ids = list(dict.fromkeys([*seen_assets, *entity_boosts.keys()]))[: int(limit)]
|
|
2219
|
-
if relation_asset_ids:
|
|
2717
|
+
if include_relations and relation_asset_ids:
|
|
2220
2718
|
asset_ids = relation_asset_ids
|
|
2221
2719
|
placeholders = ",".join("?" for _ in asset_ids)
|
|
2222
2720
|
relation_rows = conn.execute(
|
|
@@ -2230,19 +2728,19 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
|
|
|
2230
2728
|
[*asset_ids, int(limit) * 3],
|
|
2231
2729
|
).fetchall()
|
|
2232
2730
|
relations_payload = [dict(row) for row in relation_rows]
|
|
2233
|
-
warnings =
|
|
2731
|
+
warnings = list(mode_warnings)
|
|
2234
2732
|
if evidence_required and not evidence_refs:
|
|
2235
2733
|
warnings.append("No local evidence found for this query.")
|
|
2236
2734
|
summary = ""
|
|
2237
2735
|
if assets:
|
|
2238
|
-
summary = f"Found {len(assets)} local asset(s) related to '{
|
|
2736
|
+
summary = f"Found {len(assets)} local asset(s) related to '{clean_query}'."
|
|
2239
2737
|
conn.execute(
|
|
2240
2738
|
"""
|
|
2241
2739
|
INSERT INTO local_context_queries(query_hash, intent, result_count, confidence, warnings_json, created_at)
|
|
2242
2740
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
2243
2741
|
""",
|
|
2244
2742
|
(
|
|
2245
|
-
hashlib.sha256(
|
|
2743
|
+
hashlib.sha256(clean_query.encode("utf-8", errors="ignore")).hexdigest(),
|
|
2246
2744
|
intent,
|
|
2247
2745
|
len(assets),
|
|
2248
2746
|
0.75 if evidence_refs else 0.0,
|
|
@@ -2251,9 +2749,9 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
|
|
|
2251
2749
|
),
|
|
2252
2750
|
)
|
|
2253
2751
|
conn.commit()
|
|
2254
|
-
|
|
2752
|
+
payload = {
|
|
2255
2753
|
"ok": True,
|
|
2256
|
-
"query":
|
|
2754
|
+
"query": clean_query,
|
|
2257
2755
|
"intent": intent,
|
|
2258
2756
|
"confidence": 0.75 if evidence_refs else 0.0,
|
|
2259
2757
|
"summary": summary,
|
|
@@ -2264,6 +2762,14 @@ def context_query(query: str, *, intent: str = "answer", limit: int = 12, eviden
|
|
|
2264
2762
|
"warnings": warnings,
|
|
2265
2763
|
"evidence_refs": evidence_refs,
|
|
2266
2764
|
}
|
|
2765
|
+
return _shape_context_payload(
|
|
2766
|
+
payload,
|
|
2767
|
+
mode=normalized_mode,
|
|
2768
|
+
max_chars=int(max_chars or 0),
|
|
2769
|
+
include_entities=bool(include_entities),
|
|
2770
|
+
include_relations=bool(include_relations),
|
|
2771
|
+
snippet_chars=int(snippet_chars or 1200),
|
|
2772
|
+
)
|
|
2267
2773
|
|
|
2268
2774
|
|
|
2269
2775
|
def get_asset(asset_id: str) -> dict:
|
|
@@ -2306,11 +2812,23 @@ def clear_index() -> dict:
|
|
|
2306
2812
|
"local_index_dirs",
|
|
2307
2813
|
"local_index_errors",
|
|
2308
2814
|
"local_index_jobs",
|
|
2815
|
+
"local_index_checkpoints",
|
|
2309
2816
|
"local_asset_versions",
|
|
2310
2817
|
"local_assets",
|
|
2311
2818
|
"local_context_queries",
|
|
2312
2819
|
):
|
|
2313
2820
|
conn.execute(f"DELETE FROM {table}")
|
|
2821
|
+
conn.execute("DELETE FROM local_index_state WHERE key LIKE 'root:%:initial_scan_complete'")
|
|
2822
|
+
conn.execute("DELETE FROM local_index_state WHERE key=?", (INITIAL_INDEX_COMPLETE_KEY,))
|
|
2823
|
+
conn.execute("DELETE FROM local_index_state WHERE key=?", (INITIAL_INDEX_STARTED_AT_KEY,))
|
|
2824
|
+
rows = conn.execute("SELECT id FROM local_index_roots WHERE status!='removed'").fetchall()
|
|
2825
|
+
for row in rows:
|
|
2826
|
+
_set_root_initial_scan_complete(conn, int(row["id"]), False)
|
|
2827
|
+
conn.execute(
|
|
2828
|
+
"UPDATE local_index_roots SET last_scan_at=NULL, status='active', updated_at=? WHERE status!='removed'",
|
|
2829
|
+
(now(),),
|
|
2830
|
+
)
|
|
2831
|
+
_set_initial_index_complete(conn, False)
|
|
2314
2832
|
conn.commit()
|
|
2315
2833
|
log_event("warn", "index_cleared", "Local memory index cleared")
|
|
2316
2834
|
return {"ok": True}
|