npm - nexo-brain - Versions diffs - 7.31.13 → 7.33.0 - Mend

nexo-brain 7.31.13 → 7.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +1 -1
package/package.json +1 -1
package/src/auto_close_sessions.py +38 -0
package/src/cognitive/_search.py +13 -2
package/src/consolidation_prep.py +380 -0
package/src/crons/sync.py +14 -7
package/src/db/__init__.py +3 -0
package/src/db/_memory_v2.py +276 -0
package/src/db/_schema.py +134 -0
package/src/hooks/auto_capture.py +60 -18
package/src/learning_resolver.py +42 -0
package/src/local_context/api.py +237 -33
package/src/local_context/db.py +3 -2
package/src/memory_retrieval.py +96 -7
package/src/plugins/protocol.py +71 -24
package/src/pre_answer_router.py +116 -6
package/src/scripts/nexo-followup-runner.py +110 -8
package/src/scripts/nexo-postmortem-consolidator.py +44 -1
package/src/tools_sessions.py +1 -1
package/templates/core-prompts/postmortem-consolidator.md +29 -2

package/src/pre_answer_router.py CHANGED Viewed

@@ -565,6 +565,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
             SourceStep("workflows", timeout_ms=260),
             SourceStep("change_log", timeout_ms=260),
             SourceStep("causal_graph", timeout_ms=120, max_chars=900),
+            SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
             SourceStep("diary", timeout_ms=260),
         ),
         fallback=(
@@ -592,6 +593,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
             SourceStep("guard_context", timeout_ms=160),
             SourceStep("change_log", timeout_ms=300),
             SourceStep("workflows", timeout_ms=260),
+            SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
         ),
         fallback=(
             SourceStep("transcripts", phase="fallback", timeout_ms=650),
@@ -625,6 +627,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
             SourceStep("diary", timeout_ms=280),
             SourceStep("change_log", timeout_ms=300),
             SourceStep("transcripts", timeout_ms=700),
+            SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
         ),
         fallback=(SourceStep("continuity", phase="fallback", timeout_ms=400),),
     ),
@@ -654,6 +657,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
             SourceStep("project_atlas", timeout_ms=160),
             SourceStep("system_catalog", timeout_ms=420),
             SourceStep("diary", timeout_ms=280),
+            SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
         ),
         fallback=(
             SourceStep("transcripts", phase="fallback", timeout_ms=700),
@@ -667,6 +671,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
             SourceStep("system_catalog", timeout_ms=420),
             SourceStep("project_atlas", timeout_ms=160),
             SourceStep("runtime_docs", timeout_ms=300),
+            SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
         ),
         fallback=(
             SourceStep("source_grep", phase="fallback", timeout_ms=600),
@@ -1218,6 +1223,7 @@ def default_source_adapters() -> dict[str, SourceAdapter]:
         "workflows": _source_workflows,
         "change_log": _source_change_log,
         "causal_graph": _source_causal_graph,
+        "kg_neighbors": _source_kg_neighbors,
         "diary": _source_diary,
         "transcripts": _source_transcripts,
         "memory": _source_memory,
@@ -1710,6 +1716,77 @@ def _source_causal_graph(request: SourceRequest) -> SourceResult:
     )
+def _source_kg_neighbors(request: SourceRequest) -> SourceResult:
+    """KG neighbors + verified causal/ops edges for entities/files in the query.
+    task_close (7.32.0) writes causal/provenance edges but nothing READ the KG at
+    answer time, so the richer non-causal structure (touched/applies_to/belongs_to/
+    mentions/...) never reached an answer. This bounded, fail-open, 1-hop source
+    reads it. Hard-limited (<=3 refs, <=6 neighbors), index-backed, respects the
+    per-source timeout — it can never block the answer.
+    """
+    try:
+        import knowledge_graph as kg
+        import causal_graph
+    except Exception as exc:
+        return SourceResult(source="kg_neighbors", ok=False, skipped=True, aborted_reason="source_error", error=str(exc))
+    refs: list[str] = []
+    for raw in (request.files or "").split(","):
+        clean = raw.strip()
+        if clean:
+            refs.append(clean)
+    if not refs:
+        for match in _PATHISH_RE.findall(request.query or ""):
+            refs.append(match)
+        for match in re.findall(r"\b[\w.-]+(?:/[\w.@+-]+)+\b", request.query or ""):
+            refs.append(match)
+    refs = list(dict.fromkeys(refs))
+    if not refs:
+        return SourceResult(source="kg_neighbors")
+    rendered_parts: list[str] = []
+    evidence_refs: list[str] = []
+    result_count = 0
+    for ref in refs[:3]:
+        try:
+            node = None
+            for ntype, nref in (("file", ref), ("file", f"file:{ref}"), ("entity", ref), ("entity", f"entity:{ref}")):
+                node = kg.get_node(ntype, nref)
+                if node:
+                    break
+            if node:
+                for nb in kg.get_neighbors(int(node["id"]), active_only=True)[:6]:
+                    relation = str(nb.get("relation") or "")
+                    if relation.startswith("causal:") or relation.startswith("ops:"):
+                        continue  # surfaced via query_edges below (avoid duplicate)
+                    line = f"- {relation} ({nb.get('direction')}) {nb.get('node_type')}:{nb.get('node_ref')}"
+                    if nb.get("label"):
+                        line += f" ({nb.get('label')})"
+                    rendered_parts.append(line)
+                    evidence_refs.append(f"kg:node:{node['id']}:{nb.get('id')}")
+                    result_count += 1
+            cg = causal_graph.query_edges(
+                ref_type="file", ref=ref, project_key=request.area, include_historical=False, limit=4,
+            )
+            if cg.get("has_evidence"):
+                rendered_parts.append(causal_graph.render_query_result(cg, max_chars=request.max_chars))
+                result_count += len(cg.get("edges") or [])
+                for edge in cg.get("edges") or []:
+                    props = edge.get("properties_dict") or {}
+                    evidence_refs.extend(str(i) for i in props.get("evidence_refs") or [] if str(i).strip())
+        except Exception:
+            continue
+    if not rendered_parts:
+        return SourceResult(source="kg_neighbors")
+    return SourceResult(
+        source="kg_neighbors",
+        rendered=_clip("\n".join(rendered_parts), request.max_chars),
+        evidence_refs=list(dict.fromkeys(evidence_refs)),
+        result_count=result_count,
+    )
 def _source_diary(request: SourceRequest) -> SourceResult:
     from db import read_session_diary
@@ -1987,15 +2064,48 @@ def _source_filesystem(request: SourceRequest) -> SourceResult:
 def _source_guard_context(request: SourceRequest) -> SourceResult:
-    # G01 cannot call the MCP guard from this pure core. Return the file scope
-    # so G15 can wire real guard context without changing the source plan.
-    if not request.files:
+    # Real guard verification: surface the file-conditioned blocking learnings
+    # for the requested files. Previously this returned fake evidence
+    # (evidence_refs=["guard_context:requested"], result_count=1) WITHOUT any
+    # check, which silently satisfied the critical-tier required-source / gap
+    # gate for release/server/billing/legal areas. Never fake evidence again.
+    files = [f.strip() for f in (request.files or "").split(",") if f.strip()]
+    if not files:
         return SourceResult(source="guard_context")
+    try:
+        from db import get_db
+        from plugins.guard import _load_conditioned_learnings
+        conn = get_db()
+        conditioned = _load_conditioned_learnings(conn, files)
+    except Exception:
+        # Fail-closed: do NOT fake evidence; report that verification could not run.
+        return SourceResult(
+            source="guard_context",
+            rendered="Guard verification could not run for: " + ", ".join(files),
+            result_count=0,
+        )
+    refs: list[str] = []
+    lines: list[str] = []
+    for filepath, entries in conditioned.items():
+        for entry in entries:
+            refs.append(f"learning:{entry.get('id')}")
+            lines.append(
+                f"- [{entry.get('priority', 'medium')}] {entry.get('title', '')} (applies_to {filepath})"
+            )
+    if lines:
+        return SourceResult(
+            source="guard_context",
+            rendered="Blocking/file-conditioned learnings:\n" + "\n".join(lines),
+            evidence_refs=refs,
+            result_count=len(refs),
+        )
+    # Guard ran and found nothing blocking — a real verified-clean result.
     return SourceResult(
         source="guard_context",
-        rendered=f"Guard context requested for files: {request.files}",
-        evidence_refs=["guard_context:requested"],
-        result_count=1,
+        rendered="Guard verified: no blocking file-conditioned learnings for "
+        + ", ".join(files),
+        evidence_refs=["guard_context:verified_clean"],
+        result_count=0,
     )

package/src/scripts/nexo-followup-runner.py CHANGED Viewed

@@ -27,12 +27,15 @@ From the operator's point of view, these are all "pending items". Internally,
 followups and reminders remain distinct, but the runner focuses on executable work.
 """
+import atexit
 import json
 import os
 import re
+import signal
 import sqlite3
 import subprocess
 import sys
+import time
 from difflib import SequenceMatcher
 from email.utils import parsedate_to_datetime
 from datetime import datetime, date, timedelta
@@ -74,6 +77,7 @@ RESULTS_FILE = data_dir() / "followup-runner-results.json"
 CLI_TIMEOUT = AUTOMATION_SUBPROCESS_TIMEOUT
 LOCK_FILE = LOG_DIR / "followup-runner.lock"
+FOLLOWUP_LOCK_STALE_SECONDS = 7200  # reclaim a leftover lock FILE from a hard-killed prior run
 MAX_FOLLOWUPS_PER_RUN = 5  # Focus: Opus can actually execute 5, not 30
 COOLDOWN_DAYS = 3  # Don't retry waiting_user/stale_review/blocked for 3 days
 STALE_FOLLOWUP_TRIAGE_DAYS = 14
@@ -802,25 +806,123 @@ def render_history_preview(events) -> list[str]:
 # ── Lock ────────────────────────────────────────────────────────────────
+_LOCK_FH = None
+_LOCK_RELEASED = False
+def _register_lock_cleanup() -> None:
+    """Release the flock on normal exit and on SIGTERM/SIGINT (cron supervisor)."""
+    atexit.register(release_lock)
+    def _handler(signum, _frame):
+        release_lock()
+        raise SystemExit(128 + signum)
+    for _sig in (signal.SIGTERM, signal.SIGINT):
+        try:
+            signal.signal(_sig, _handler)
+        except Exception:
+            pass
 def acquire_lock() -> bool:
-    if LOCK_FILE.exists():
+    """Atomically acquire the single-runner lock via fcntl.flock.
+    Replaces the previous PID-file check-then-write, which had a TOCTOU race
+    that let two concurrent runners both acquire and both spend LLM budget.
+    flock is kernel-enforced and auto-released when the holder process dies; a
+    leftover lock FILE from a hard-killed prior holder is reclaimed via a
+    dead-PID / stale-mtime check before re-attempting the flock.
+    """
+    global _LOCK_FH, _LOCK_RELEASED
+    try:
+        LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
+    except Exception:
+        pass
+    # Pre-steal a stale lock FILE only if its owner is dead or the file is old.
+    try:
+        if LOCK_FILE.exists():
+            stale = False
+            try:
+                raw = LOCK_FILE.read_text().strip()
+                pid = int(raw.split(":", 1)[0])  # tolerate legacy bare-int format
+                try:
+                    os.kill(pid, 0)
+                except ProcessLookupError:
+                    stale = True
+                except PermissionError:
+                    stale = False
+            except (ValueError, OSError):
+                stale = True
+            try:
+                if time.time() - LOCK_FILE.stat().st_mtime > FOLLOWUP_LOCK_STALE_SECONDS:
+                    stale = True
+            except OSError:
+                pass
+            if stale:
+                try:
+                    LOCK_FILE.unlink()
+                except Exception:
+                    pass
+    except Exception:
+        pass
+    try:
+        fh = open(LOCK_FILE, "a+")
+    except Exception:
+        return False
+    try:
+        import fcntl
+        fcntl.flock(fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+    except ImportError:
+        # Non-POSIX platform (Windows). Best-effort PID stamp and proceed.
         try:
-            pid = int(LOCK_FILE.read_text().strip())
-            os.kill(pid, 0)
-            return False
-        except (ProcessLookupError, ValueError):
+            fh.seek(0); fh.truncate(); fh.write(f"{os.getpid()}:{time.time()}\n"); fh.flush()
+        except Exception:
             pass
-        except PermissionError:
-            return False
-    LOCK_FILE.write_text(str(os.getpid()))
+        _LOCK_FH = fh
+        _LOCK_RELEASED = False
+        _register_lock_cleanup()
+        return True
+    except (OSError, BlockingIOError):
+        try:
+            fh.close()
+        except Exception:
+            pass
+        return False
+    # We hold the flock — stamp pid:timestamp so observers can see who.
+    try:
+        fh.seek(0); fh.truncate(); fh.write(f"{os.getpid()}:{time.time()}\n"); fh.flush()
+    except Exception:
+        pass
+    _LOCK_FH = fh
+    _LOCK_RELEASED = False
+    _register_lock_cleanup()
     return True
 def release_lock():
+    """Idempotent, ownership-aware release. Only acts if we actually hold the lock."""
+    global _LOCK_FH, _LOCK_RELEASED
+    if _LOCK_RELEASED or _LOCK_FH is None:
+        return
+    try:
+        import fcntl
+        fcntl.flock(_LOCK_FH.fileno(), fcntl.LOCK_UN)
+    except Exception:
+        pass
+    try:
+        _LOCK_FH.close()
+    except Exception:
+        pass
     try:
         LOCK_FILE.unlink(missing_ok=True)
     except Exception:
         pass
+    _LOCK_FH = None
+    _LOCK_RELEASED = True
 # ── Recent activity context ────────────────────────────────────────────

package/src/scripts/nexo-postmortem-consolidator.py CHANGED Viewed

@@ -39,6 +39,7 @@ sys.path.insert(0, str(NEXO_CODE))
 from agent_runner import AutomationBackendUnavailableError, run_automation_prompt
 from constants import AUTOMATION_SUBPROCESS_TIMEOUT
 from core_prompts import render_core_prompt
+import consolidation_prep
 import paths
 try:
@@ -186,12 +187,41 @@ def consolidate_with_cli(data: dict) -> bool:
     if len(diaries_json) > 12000:
         diaries_json = diaries_json[:12000] + "\n... (truncated)"
+    # Precompute ALL corpus-wide mechanical work here (read-only) so the LLM gets
+    # a tiny, bounded brief and never lists the full learnings corpus — which is
+    # what blew up the headless context and caused the exit-124 timeout. Guarded:
+    # any failure degrades to a safe empty brief; the prompt still forbids the LLM
+    # from scanning the corpus, and Stage 3 runs regardless.
+    try:
+        brief = consolidation_prep.build_consolidation_brief(diaries_with_critique)
+        log(
+            f"Stage 2: brief built — corpus_size={brief.get('corpus_size')}, "
+            f"shortlist={len(brief.get('shortlist', []))}, "
+            f"contradictions={len(brief.get('contradiction_pairs', []))}, "
+            f"truncated={brief.get('truncated')}"
+        )
+    except Exception as e:
+        log(f"Stage 2: brief builder failed ({e}); degrading to empty brief")
+        brief = {
+            "corpus_size": None,
+            "today_topics": [],
+            "shortlist": [],
+            "contradiction_pairs": [],
+            "supersession_stubs": [],
+            "stale_candidates": [],
+            "preference_key_dupes": [],
+            "truncated": False,
+            "_helper_error": str(e),
+        }
+    brief_json = json.dumps(brief, ensure_ascii=False)
     prompt = render_core_prompt(
         "postmortem-consolidator",
         date=data["date"],
         session_total=len(data["diaries"]),
         sessions_with_critique=len(diaries_with_critique),
         diaries_json=diaries_json,
+        brief_json=brief_json,
         existing_feedback_count=len(data["existing_feedbacks"]),
         existing_feedbacks_json=json.dumps(data["existing_feedbacks"][:30], ensure_ascii=False),
         recent_rules_json=json.dumps(data["history_summary"].get("recent_rules", []), ensure_ascii=False),
@@ -206,7 +236,20 @@ def consolidate_with_cli(data: dict) -> bool:
             caller="postmortem_consolidator",
             timeout=AUTOMATION_SUBPROCESS_TIMEOUT,
             output_format="text",
-            allowed_tools="Read,Write,Edit,Glob,Grep,Bash,mcp__nexo__*",
+            # Defense in depth: REMOVE the blanket mcp__nexo__* grant so the model
+            # structurally CANNOT call nexo_learning_list / nexo_learning_search and
+            # re-pull the whole corpus into context (the exit-124 root cause). It
+            # keeps only the tools the consolidation actually needs to write its
+            # decisions; all corpus analysis is already precomputed in brief_json.
+            allowed_tools=(
+                "Read,Write,Edit,Glob,Grep,Bash,"
+                "mcp__nexo__nexo_startup,"
+                "mcp__nexo__nexo_learning_add,"
+                "mcp__nexo__nexo_followup_create,"
+                "mcp__nexo__nexo_task_open,"
+                "mcp__nexo__nexo_task_close,"
+                "mcp__nexo__nexo_heartbeat"
+            ),
         )
         if result.returncode != 0:

package/src/tools_sessions.py CHANGED Viewed

@@ -317,7 +317,7 @@ def _session_portability_bundle(sid: str = "") -> dict:
         dict(row) for row in conn.execute(
             """SELECT run_id, goal_id, goal, workflow_kind, status, priority, next_action, current_step_key, updated_at
                FROM workflow_runs
-               WHERE session_id = ? AND status IN ('open', 'running', 'blocked', 'needs_approval')
+               WHERE session_id = ? AND status IN ('open', 'running', 'blocked', 'waiting_approval')
                ORDER BY updated_at DESC
                LIMIT 10""",
             (session_id,),

package/templates/core-prompts/postmortem-consolidator.md CHANGED Viewed

@@ -9,6 +9,25 @@ SESSIONS TODAY: [[session_total]] total, [[sessions_with_critique]] with self-cr
 DIARIES WITH SELF-CRITIQUE:
 [[diaries_json]]
+PRECOMPUTED CORPUS ANALYSIS (authoritative — do NOT re-scan):
+[[brief_json]]
+This brief was computed deterministically against the FULL learnings corpus
+before you started. It is the authoritative, already-finished mechanical pass:
+- `today_topics[*].has_existing_coverage` / `covering_ids` — which of today's
+  critiques are ALREADY covered by an active learning (so you don't duplicate).
+- `shortlist` — the ONLY existing learnings relevant to today's topics.
+- `contradiction_pairs` — every contradiction already detected (corpus-wide and
+  vs today's topics).
+- `supersession_stubs`, `stale_candidates`, `preference_key_dupes` — candidates
+  for replacement/cleanup.
+HARD RULE — DO NOT exhaust your context:
+You ALREADY have the relevant existing learnings in `shortlist` and all
+contradictions in `contradiction_pairs`. Do NOT call nexo_learning_list,
+nexo_learning_search, or read MEMORY.md — the corpus is large and that will
+exhaust your context and time out the run. Judge ONLY against this brief.
 EXISTING POSTMORTEM FEEDBACKS ([[existing_feedback_count]]):
 [[existing_feedbacks_json]]
@@ -23,12 +42,20 @@ INSTRUCTIONS:
    - A pattern appears in 2+ different sessions of the day (by meaning, not literal text)
    - Or the user explicitly corrected (user_signals contains correction)
    - And the self-critique contains a CONCRETE ACTION that prevents a future error
-   - And a similar feedback does NOT already exist in the existing ones
+   - And the matching today_topic has `has_existing_coverage` == false in the brief
+     (i.e. no learning in `shortlist`/`covering_ids` already covers it)
+2b. CONTRADICTIONS: for each entry in `contradiction_pairs` that you confirm is a
+   REAL contradiction, author the single canonical rule and call
+   nexo_learning_add(..., supersedes_id=existing_id) using that pair's
+   `existing_id`. The resolver finalizes the merge/supersede server-side. You
+   still decide whether the contradiction is real and how to phrase the rule.
 3. DO NOT promote if:
    - It's a negative response ("Nothing happened", "clean session")
    - It's generic without concrete action
-   - A feedback covering the same topic already exists
+   - The brief already shows coverage for that topic
+     (`has_existing_coverage` == true or it appears in `shortlist`)
 4. For each rule to promote, create the file with Write en [[memory_dir]]/:
    Name: feedback_postmortem_[descriptive_slug].md