nexo-brain 7.31.13 → 7.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -565,6 +565,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
565
565
  SourceStep("workflows", timeout_ms=260),
566
566
  SourceStep("change_log", timeout_ms=260),
567
567
  SourceStep("causal_graph", timeout_ms=120, max_chars=900),
568
+ SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
568
569
  SourceStep("diary", timeout_ms=260),
569
570
  ),
570
571
  fallback=(
@@ -592,6 +593,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
592
593
  SourceStep("guard_context", timeout_ms=160),
593
594
  SourceStep("change_log", timeout_ms=300),
594
595
  SourceStep("workflows", timeout_ms=260),
596
+ SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
595
597
  ),
596
598
  fallback=(
597
599
  SourceStep("transcripts", phase="fallback", timeout_ms=650),
@@ -625,6 +627,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
625
627
  SourceStep("diary", timeout_ms=280),
626
628
  SourceStep("change_log", timeout_ms=300),
627
629
  SourceStep("transcripts", timeout_ms=700),
630
+ SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
628
631
  ),
629
632
  fallback=(SourceStep("continuity", phase="fallback", timeout_ms=400),),
630
633
  ),
@@ -654,6 +657,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
654
657
  SourceStep("project_atlas", timeout_ms=160),
655
658
  SourceStep("system_catalog", timeout_ms=420),
656
659
  SourceStep("diary", timeout_ms=280),
660
+ SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
657
661
  ),
658
662
  fallback=(
659
663
  SourceStep("transcripts", phase="fallback", timeout_ms=700),
@@ -667,6 +671,7 @@ _SOURCE_PLANS: dict[str, SourcePlan] = {
667
671
  SourceStep("system_catalog", timeout_ms=420),
668
672
  SourceStep("project_atlas", timeout_ms=160),
669
673
  SourceStep("runtime_docs", timeout_ms=300),
674
+ SourceStep("kg_neighbors", timeout_ms=120, max_chars=900),
670
675
  ),
671
676
  fallback=(
672
677
  SourceStep("source_grep", phase="fallback", timeout_ms=600),
@@ -1218,6 +1223,7 @@ def default_source_adapters() -> dict[str, SourceAdapter]:
1218
1223
  "workflows": _source_workflows,
1219
1224
  "change_log": _source_change_log,
1220
1225
  "causal_graph": _source_causal_graph,
1226
+ "kg_neighbors": _source_kg_neighbors,
1221
1227
  "diary": _source_diary,
1222
1228
  "transcripts": _source_transcripts,
1223
1229
  "memory": _source_memory,
@@ -1710,6 +1716,77 @@ def _source_causal_graph(request: SourceRequest) -> SourceResult:
1710
1716
  )
1711
1717
 
1712
1718
 
1719
+ def _source_kg_neighbors(request: SourceRequest) -> SourceResult:
1720
+ """KG neighbors + verified causal/ops edges for entities/files in the query.
1721
+
1722
+ task_close (7.32.0) writes causal/provenance edges but nothing READ the KG at
1723
+ answer time, so the richer non-causal structure (touched/applies_to/belongs_to/
1724
+ mentions/...) never reached an answer. This bounded, fail-open, 1-hop source
1725
+ reads it. Hard-limited (<=3 refs, <=6 neighbors), index-backed, respects the
1726
+ per-source timeout — it can never block the answer.
1727
+ """
1728
+ try:
1729
+ import knowledge_graph as kg
1730
+ import causal_graph
1731
+ except Exception as exc:
1732
+ return SourceResult(source="kg_neighbors", ok=False, skipped=True, aborted_reason="source_error", error=str(exc))
1733
+
1734
+ refs: list[str] = []
1735
+ for raw in (request.files or "").split(","):
1736
+ clean = raw.strip()
1737
+ if clean:
1738
+ refs.append(clean)
1739
+ if not refs:
1740
+ for match in _PATHISH_RE.findall(request.query or ""):
1741
+ refs.append(match)
1742
+ for match in re.findall(r"\b[\w.-]+(?:/[\w.@+-]+)+\b", request.query or ""):
1743
+ refs.append(match)
1744
+ refs = list(dict.fromkeys(refs))
1745
+ if not refs:
1746
+ return SourceResult(source="kg_neighbors")
1747
+
1748
+ rendered_parts: list[str] = []
1749
+ evidence_refs: list[str] = []
1750
+ result_count = 0
1751
+ for ref in refs[:3]:
1752
+ try:
1753
+ node = None
1754
+ for ntype, nref in (("file", ref), ("file", f"file:{ref}"), ("entity", ref), ("entity", f"entity:{ref}")):
1755
+ node = kg.get_node(ntype, nref)
1756
+ if node:
1757
+ break
1758
+ if node:
1759
+ for nb in kg.get_neighbors(int(node["id"]), active_only=True)[:6]:
1760
+ relation = str(nb.get("relation") or "")
1761
+ if relation.startswith("causal:") or relation.startswith("ops:"):
1762
+ continue # surfaced via query_edges below (avoid duplicate)
1763
+ line = f"- {relation} ({nb.get('direction')}) {nb.get('node_type')}:{nb.get('node_ref')}"
1764
+ if nb.get("label"):
1765
+ line += f" ({nb.get('label')})"
1766
+ rendered_parts.append(line)
1767
+ evidence_refs.append(f"kg:node:{node['id']}:{nb.get('id')}")
1768
+ result_count += 1
1769
+ cg = causal_graph.query_edges(
1770
+ ref_type="file", ref=ref, project_key=request.area, include_historical=False, limit=4,
1771
+ )
1772
+ if cg.get("has_evidence"):
1773
+ rendered_parts.append(causal_graph.render_query_result(cg, max_chars=request.max_chars))
1774
+ result_count += len(cg.get("edges") or [])
1775
+ for edge in cg.get("edges") or []:
1776
+ props = edge.get("properties_dict") or {}
1777
+ evidence_refs.extend(str(i) for i in props.get("evidence_refs") or [] if str(i).strip())
1778
+ except Exception:
1779
+ continue
1780
+ if not rendered_parts:
1781
+ return SourceResult(source="kg_neighbors")
1782
+ return SourceResult(
1783
+ source="kg_neighbors",
1784
+ rendered=_clip("\n".join(rendered_parts), request.max_chars),
1785
+ evidence_refs=list(dict.fromkeys(evidence_refs)),
1786
+ result_count=result_count,
1787
+ )
1788
+
1789
+
1713
1790
  def _source_diary(request: SourceRequest) -> SourceResult:
1714
1791
  from db import read_session_diary
1715
1792
 
@@ -1987,15 +2064,48 @@ def _source_filesystem(request: SourceRequest) -> SourceResult:
1987
2064
 
1988
2065
 
1989
2066
  def _source_guard_context(request: SourceRequest) -> SourceResult:
1990
- # G01 cannot call the MCP guard from this pure core. Return the file scope
1991
- # so G15 can wire real guard context without changing the source plan.
1992
- if not request.files:
2067
+ # Real guard verification: surface the file-conditioned blocking learnings
2068
+ # for the requested files. Previously this returned fake evidence
2069
+ # (evidence_refs=["guard_context:requested"], result_count=1) WITHOUT any
2070
+ # check, which silently satisfied the critical-tier required-source / gap
2071
+ # gate for release/server/billing/legal areas. Never fake evidence again.
2072
+ files = [f.strip() for f in (request.files or "").split(",") if f.strip()]
2073
+ if not files:
1993
2074
  return SourceResult(source="guard_context")
2075
+ try:
2076
+ from db import get_db
2077
+ from plugins.guard import _load_conditioned_learnings
2078
+ conn = get_db()
2079
+ conditioned = _load_conditioned_learnings(conn, files)
2080
+ except Exception:
2081
+ # Fail-closed: do NOT fake evidence; report that verification could not run.
2082
+ return SourceResult(
2083
+ source="guard_context",
2084
+ rendered="Guard verification could not run for: " + ", ".join(files),
2085
+ result_count=0,
2086
+ )
2087
+ refs: list[str] = []
2088
+ lines: list[str] = []
2089
+ for filepath, entries in conditioned.items():
2090
+ for entry in entries:
2091
+ refs.append(f"learning:{entry.get('id')}")
2092
+ lines.append(
2093
+ f"- [{entry.get('priority', 'medium')}] {entry.get('title', '')} (applies_to {filepath})"
2094
+ )
2095
+ if lines:
2096
+ return SourceResult(
2097
+ source="guard_context",
2098
+ rendered="Blocking/file-conditioned learnings:\n" + "\n".join(lines),
2099
+ evidence_refs=refs,
2100
+ result_count=len(refs),
2101
+ )
2102
+ # Guard ran and found nothing blocking — a real verified-clean result.
1994
2103
  return SourceResult(
1995
2104
  source="guard_context",
1996
- rendered=f"Guard context requested for files: {request.files}",
1997
- evidence_refs=["guard_context:requested"],
1998
- result_count=1,
2105
+ rendered="Guard verified: no blocking file-conditioned learnings for "
2106
+ + ", ".join(files),
2107
+ evidence_refs=["guard_context:verified_clean"],
2108
+ result_count=0,
1999
2109
  )
2000
2110
 
2001
2111
 
@@ -27,12 +27,15 @@ From the operator's point of view, these are all "pending items". Internally,
27
27
  followups and reminders remain distinct, but the runner focuses on executable work.
28
28
  """
29
29
 
30
+ import atexit
30
31
  import json
31
32
  import os
32
33
  import re
34
+ import signal
33
35
  import sqlite3
34
36
  import subprocess
35
37
  import sys
38
+ import time
36
39
  from difflib import SequenceMatcher
37
40
  from email.utils import parsedate_to_datetime
38
41
  from datetime import datetime, date, timedelta
@@ -74,6 +77,7 @@ RESULTS_FILE = data_dir() / "followup-runner-results.json"
74
77
 
75
78
  CLI_TIMEOUT = AUTOMATION_SUBPROCESS_TIMEOUT
76
79
  LOCK_FILE = LOG_DIR / "followup-runner.lock"
80
+ FOLLOWUP_LOCK_STALE_SECONDS = 7200 # reclaim a leftover lock FILE from a hard-killed prior run
77
81
  MAX_FOLLOWUPS_PER_RUN = 5 # Focus: Opus can actually execute 5, not 30
78
82
  COOLDOWN_DAYS = 3 # Don't retry waiting_user/stale_review/blocked for 3 days
79
83
  STALE_FOLLOWUP_TRIAGE_DAYS = 14
@@ -802,25 +806,123 @@ def render_history_preview(events) -> list[str]:
802
806
 
803
807
 
804
808
  # ── Lock ────────────────────────────────────────────────────────────────
809
+ _LOCK_FH = None
810
+ _LOCK_RELEASED = False
811
+
812
+
813
+ def _register_lock_cleanup() -> None:
814
+ """Release the flock on normal exit and on SIGTERM/SIGINT (cron supervisor)."""
815
+ atexit.register(release_lock)
816
+
817
+ def _handler(signum, _frame):
818
+ release_lock()
819
+ raise SystemExit(128 + signum)
820
+
821
+ for _sig in (signal.SIGTERM, signal.SIGINT):
822
+ try:
823
+ signal.signal(_sig, _handler)
824
+ except Exception:
825
+ pass
826
+
827
+
805
828
  def acquire_lock() -> bool:
806
- if LOCK_FILE.exists():
829
+ """Atomically acquire the single-runner lock via fcntl.flock.
830
+
831
+ Replaces the previous PID-file check-then-write, which had a TOCTOU race
832
+ that let two concurrent runners both acquire and both spend LLM budget.
833
+ flock is kernel-enforced and auto-released when the holder process dies; a
834
+ leftover lock FILE from a hard-killed prior holder is reclaimed via a
835
+ dead-PID / stale-mtime check before re-attempting the flock.
836
+ """
837
+ global _LOCK_FH, _LOCK_RELEASED
838
+ try:
839
+ LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
840
+ except Exception:
841
+ pass
842
+ # Pre-steal a stale lock FILE only if its owner is dead or the file is old.
843
+ try:
844
+ if LOCK_FILE.exists():
845
+ stale = False
846
+ try:
847
+ raw = LOCK_FILE.read_text().strip()
848
+ pid = int(raw.split(":", 1)[0]) # tolerate legacy bare-int format
849
+ try:
850
+ os.kill(pid, 0)
851
+ except ProcessLookupError:
852
+ stale = True
853
+ except PermissionError:
854
+ stale = False
855
+ except (ValueError, OSError):
856
+ stale = True
857
+ try:
858
+ if time.time() - LOCK_FILE.stat().st_mtime > FOLLOWUP_LOCK_STALE_SECONDS:
859
+ stale = True
860
+ except OSError:
861
+ pass
862
+ if stale:
863
+ try:
864
+ LOCK_FILE.unlink()
865
+ except Exception:
866
+ pass
867
+ except Exception:
868
+ pass
869
+
870
+ try:
871
+ fh = open(LOCK_FILE, "a+")
872
+ except Exception:
873
+ return False
874
+
875
+ try:
876
+ import fcntl
877
+ fcntl.flock(fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
878
+ except ImportError:
879
+ # Non-POSIX platform (Windows). Best-effort PID stamp and proceed.
807
880
  try:
808
- pid = int(LOCK_FILE.read_text().strip())
809
- os.kill(pid, 0)
810
- return False
811
- except (ProcessLookupError, ValueError):
881
+ fh.seek(0); fh.truncate(); fh.write(f"{os.getpid()}:{time.time()}\n"); fh.flush()
882
+ except Exception:
812
883
  pass
813
- except PermissionError:
814
- return False
815
- LOCK_FILE.write_text(str(os.getpid()))
884
+ _LOCK_FH = fh
885
+ _LOCK_RELEASED = False
886
+ _register_lock_cleanup()
887
+ return True
888
+ except (OSError, BlockingIOError):
889
+ try:
890
+ fh.close()
891
+ except Exception:
892
+ pass
893
+ return False
894
+
895
+ # We hold the flock — stamp pid:timestamp so observers can see who.
896
+ try:
897
+ fh.seek(0); fh.truncate(); fh.write(f"{os.getpid()}:{time.time()}\n"); fh.flush()
898
+ except Exception:
899
+ pass
900
+ _LOCK_FH = fh
901
+ _LOCK_RELEASED = False
902
+ _register_lock_cleanup()
816
903
  return True
817
904
 
818
905
 
819
906
  def release_lock():
907
+ """Idempotent, ownership-aware release. Only acts if we actually hold the lock."""
908
+ global _LOCK_FH, _LOCK_RELEASED
909
+ if _LOCK_RELEASED or _LOCK_FH is None:
910
+ return
911
+ try:
912
+ import fcntl
913
+ fcntl.flock(_LOCK_FH.fileno(), fcntl.LOCK_UN)
914
+ except Exception:
915
+ pass
916
+ try:
917
+ _LOCK_FH.close()
918
+ except Exception:
919
+ pass
820
920
  try:
821
921
  LOCK_FILE.unlink(missing_ok=True)
822
922
  except Exception:
823
923
  pass
924
+ _LOCK_FH = None
925
+ _LOCK_RELEASED = True
824
926
 
825
927
 
826
928
  # ── Recent activity context ────────────────────────────────────────────
@@ -39,6 +39,7 @@ sys.path.insert(0, str(NEXO_CODE))
39
39
  from agent_runner import AutomationBackendUnavailableError, run_automation_prompt
40
40
  from constants import AUTOMATION_SUBPROCESS_TIMEOUT
41
41
  from core_prompts import render_core_prompt
42
+ import consolidation_prep
42
43
  import paths
43
44
 
44
45
  try:
@@ -186,12 +187,41 @@ def consolidate_with_cli(data: dict) -> bool:
186
187
  if len(diaries_json) > 12000:
187
188
  diaries_json = diaries_json[:12000] + "\n... (truncated)"
188
189
 
190
+ # Precompute ALL corpus-wide mechanical work here (read-only) so the LLM gets
191
+ # a tiny, bounded brief and never lists the full learnings corpus — which is
192
+ # what blew up the headless context and caused the exit-124 timeout. Guarded:
193
+ # any failure degrades to a safe empty brief; the prompt still forbids the LLM
194
+ # from scanning the corpus, and Stage 3 runs regardless.
195
+ try:
196
+ brief = consolidation_prep.build_consolidation_brief(diaries_with_critique)
197
+ log(
198
+ f"Stage 2: brief built — corpus_size={brief.get('corpus_size')}, "
199
+ f"shortlist={len(brief.get('shortlist', []))}, "
200
+ f"contradictions={len(brief.get('contradiction_pairs', []))}, "
201
+ f"truncated={brief.get('truncated')}"
202
+ )
203
+ except Exception as e:
204
+ log(f"Stage 2: brief builder failed ({e}); degrading to empty brief")
205
+ brief = {
206
+ "corpus_size": None,
207
+ "today_topics": [],
208
+ "shortlist": [],
209
+ "contradiction_pairs": [],
210
+ "supersession_stubs": [],
211
+ "stale_candidates": [],
212
+ "preference_key_dupes": [],
213
+ "truncated": False,
214
+ "_helper_error": str(e),
215
+ }
216
+ brief_json = json.dumps(brief, ensure_ascii=False)
217
+
189
218
  prompt = render_core_prompt(
190
219
  "postmortem-consolidator",
191
220
  date=data["date"],
192
221
  session_total=len(data["diaries"]),
193
222
  sessions_with_critique=len(diaries_with_critique),
194
223
  diaries_json=diaries_json,
224
+ brief_json=brief_json,
195
225
  existing_feedback_count=len(data["existing_feedbacks"]),
196
226
  existing_feedbacks_json=json.dumps(data["existing_feedbacks"][:30], ensure_ascii=False),
197
227
  recent_rules_json=json.dumps(data["history_summary"].get("recent_rules", []), ensure_ascii=False),
@@ -206,7 +236,20 @@ def consolidate_with_cli(data: dict) -> bool:
206
236
  caller="postmortem_consolidator",
207
237
  timeout=AUTOMATION_SUBPROCESS_TIMEOUT,
208
238
  output_format="text",
209
- allowed_tools="Read,Write,Edit,Glob,Grep,Bash,mcp__nexo__*",
239
+ # Defense in depth: REMOVE the blanket mcp__nexo__* grant so the model
240
+ # structurally CANNOT call nexo_learning_list / nexo_learning_search and
241
+ # re-pull the whole corpus into context (the exit-124 root cause). It
242
+ # keeps only the tools the consolidation actually needs to write its
243
+ # decisions; all corpus analysis is already precomputed in brief_json.
244
+ allowed_tools=(
245
+ "Read,Write,Edit,Glob,Grep,Bash,"
246
+ "mcp__nexo__nexo_startup,"
247
+ "mcp__nexo__nexo_learning_add,"
248
+ "mcp__nexo__nexo_followup_create,"
249
+ "mcp__nexo__nexo_task_open,"
250
+ "mcp__nexo__nexo_task_close,"
251
+ "mcp__nexo__nexo_heartbeat"
252
+ ),
210
253
  )
211
254
 
212
255
  if result.returncode != 0:
@@ -317,7 +317,7 @@ def _session_portability_bundle(sid: str = "") -> dict:
317
317
  dict(row) for row in conn.execute(
318
318
  """SELECT run_id, goal_id, goal, workflow_kind, status, priority, next_action, current_step_key, updated_at
319
319
  FROM workflow_runs
320
- WHERE session_id = ? AND status IN ('open', 'running', 'blocked', 'needs_approval')
320
+ WHERE session_id = ? AND status IN ('open', 'running', 'blocked', 'waiting_approval')
321
321
  ORDER BY updated_at DESC
322
322
  LIMIT 10""",
323
323
  (session_id,),
@@ -9,6 +9,25 @@ SESSIONS TODAY: [[session_total]] total, [[sessions_with_critique]] with self-cr
9
9
  DIARIES WITH SELF-CRITIQUE:
10
10
  [[diaries_json]]
11
11
 
12
+ PRECOMPUTED CORPUS ANALYSIS (authoritative — do NOT re-scan):
13
+ [[brief_json]]
14
+
15
+ This brief was computed deterministically against the FULL learnings corpus
16
+ before you started. It is the authoritative, already-finished mechanical pass:
17
+ - `today_topics[*].has_existing_coverage` / `covering_ids` — which of today's
18
+ critiques are ALREADY covered by an active learning (so you don't duplicate).
19
+ - `shortlist` — the ONLY existing learnings relevant to today's topics.
20
+ - `contradiction_pairs` — every contradiction already detected (corpus-wide and
21
+ vs today's topics).
22
+ - `supersession_stubs`, `stale_candidates`, `preference_key_dupes` — candidates
23
+ for replacement/cleanup.
24
+
25
+ HARD RULE — DO NOT exhaust your context:
26
+ You ALREADY have the relevant existing learnings in `shortlist` and all
27
+ contradictions in `contradiction_pairs`. Do NOT call nexo_learning_list,
28
+ nexo_learning_search, or read MEMORY.md — the corpus is large and that will
29
+ exhaust your context and time out the run. Judge ONLY against this brief.
30
+
12
31
  EXISTING POSTMORTEM FEEDBACKS ([[existing_feedback_count]]):
13
32
  [[existing_feedbacks_json]]
14
33
 
@@ -23,12 +42,20 @@ INSTRUCTIONS:
23
42
  - A pattern appears in 2+ different sessions of the day (by meaning, not literal text)
24
43
  - Or the user explicitly corrected (user_signals contains correction)
25
44
  - And the self-critique contains a CONCRETE ACTION that prevents a future error
26
- - And a similar feedback does NOT already exist in the existing ones
45
+ - And the matching today_topic has `has_existing_coverage` == false in the brief
46
+ (i.e. no learning in `shortlist`/`covering_ids` already covers it)
47
+
48
+ 2b. CONTRADICTIONS: for each entry in `contradiction_pairs` that you confirm is a
49
+ REAL contradiction, author the single canonical rule and call
50
+ nexo_learning_add(..., supersedes_id=existing_id) using that pair's
51
+ `existing_id`. The resolver finalizes the merge/supersede server-side. You
52
+ still decide whether the contradiction is real and how to phrase the rule.
27
53
 
28
54
  3. DO NOT promote if:
29
55
  - It's a negative response ("Nothing happened", "clean session")
30
56
  - It's generic without concrete action
31
- - A feedback covering the same topic already exists
57
+ - The brief already shows coverage for that topic
58
+ (`has_existing_coverage` == true or it appears in `shortlist`)
32
59
 
33
60
  4. For each rule to promote, create the file with Write en [[memory_dir]]/:
34
61
  Name: feedback_postmortem_[descriptive_slug].md