nexo-brain 5.3.11 → 5.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,13 +11,14 @@ if str(SRC_DIR) not in sys.path:
11
11
  sys.path.insert(0, str(SRC_DIR))
12
12
 
13
13
 
14
- def handle_doctor(tier: str = "boot", fix: bool = False, output: str = "text") -> str:
14
+ def handle_doctor(tier: str = "boot", fix: bool = False, output: str = "text", plane: str = "") -> str:
15
15
  """Unified diagnostic report for boot/runtime/deep health.
16
16
 
17
17
  Args:
18
18
  tier: Diagnostic tier — boot, runtime, deep, or all (default: boot)
19
19
  fix: Apply deterministic fixes (default: False)
20
20
  output: Output format — text or json (default: text)
21
+ plane: Diagnostic plane — runtime_personal, installation_live, or database_real
21
22
  """
22
23
  from doctor.orchestrator import run_doctor
23
24
  from doctor.formatters import format_report
@@ -27,7 +28,7 @@ def handle_doctor(tier: str = "boot", fix: bool = False, output: str = "text") -
27
28
  if output not in ("text", "json"):
28
29
  return f"Invalid output '{output}'. Use: text, json"
29
30
 
30
- report = run_doctor(tier=tier, fix=fix)
31
+ report = run_doctor(tier=tier, fix=fix, plane=plane)
31
32
  return format_report(report, fmt=output)
32
33
 
33
34
 
@@ -20,6 +20,9 @@ from script_registry import (
20
20
  get_declared_schedule,
21
21
  )
22
22
 
23
+ LEGACY_BACKUP_CRON_ID = "backup"
24
+ LEGACY_BACKUP_SUMMARY = "legacy backup file evidence"
25
+
23
26
 
24
27
  def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
25
28
  """Show cron execution status — what ran, what failed, durations.
@@ -30,6 +33,8 @@ def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
30
33
  """
31
34
  if cron_id:
32
35
  runs = cron_runs_recent(hours, cron_id)
36
+ if cron_id == LEGACY_BACKUP_CRON_ID:
37
+ runs = _select_backup_runs(runs, hours)
33
38
  if not runs:
34
39
  return f"No runs for '{cron_id}' in the last {hours}h."
35
40
  schedule_meta = get_personal_script_schedule(cron_id) or {}
@@ -53,7 +58,7 @@ def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
53
58
  return "\n".join(lines)
54
59
 
55
60
  # Summary view — one line per cron
56
- summary = cron_runs_summary(hours)
61
+ summary = _merge_legacy_summaries(cron_runs_summary(hours), hours)
57
62
  if not summary:
58
63
  return f"No cron executions recorded in the last {hours}h."
59
64
 
@@ -82,6 +87,119 @@ def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
82
87
  return "\n".join(lines)
83
88
 
84
89
 
90
+ def _select_backup_runs(db_runs: list[dict], hours: int) -> list[dict]:
91
+ legacy_runs = _legacy_backup_runs(hours)
92
+ if _prefer_legacy_over_db(db_runs, legacy_runs):
93
+ return legacy_runs
94
+ return db_runs
95
+
96
+
97
+ def _merge_legacy_summaries(summary_rows: list[dict], hours: int) -> list[dict]:
98
+ rows = [dict(row) for row in (summary_rows or [])]
99
+ legacy_summary = _legacy_backup_summary(hours)
100
+ if not legacy_summary:
101
+ return rows
102
+ by_cron_id = {row["cron_id"]: row for row in rows}
103
+ existing = by_cron_id.get(LEGACY_BACKUP_CRON_ID)
104
+ if _prefer_legacy_summary(existing, legacy_summary):
105
+ by_cron_id[LEGACY_BACKUP_CRON_ID] = legacy_summary
106
+ return sorted(
107
+ by_cron_id.values(),
108
+ key=lambda row: row.get("last_run") or "",
109
+ reverse=True,
110
+ )
111
+
112
+
113
+ def _prefer_legacy_over_db(db_runs: list[dict], legacy_runs: list[dict]) -> bool:
114
+ if not legacy_runs:
115
+ return False
116
+ if not db_runs:
117
+ return True
118
+ latest_db = _parse_db_timestamp(db_runs[0].get("started_at"))
119
+ latest_legacy = _parse_db_timestamp(legacy_runs[0].get("started_at"))
120
+ if latest_legacy is None:
121
+ return False
122
+ if latest_db is None:
123
+ return True
124
+ return latest_legacy > latest_db
125
+
126
+
127
+ def _prefer_legacy_summary(existing: dict | None, legacy: dict) -> bool:
128
+ if not legacy:
129
+ return False
130
+ if not existing:
131
+ return True
132
+ latest_existing = _parse_db_timestamp(existing.get("last_run"))
133
+ latest_legacy = _parse_db_timestamp(legacy.get("last_run"))
134
+ if latest_legacy is None:
135
+ return False
136
+ if latest_existing is None:
137
+ return True
138
+ return latest_legacy > latest_existing
139
+
140
+
141
+ def _legacy_backup_summary(hours: int) -> dict | None:
142
+ runs = _legacy_backup_runs(hours)
143
+ if not runs:
144
+ return None
145
+ return _build_summary_from_runs(LEGACY_BACKUP_CRON_ID, runs)
146
+
147
+
148
+ def _legacy_backup_runs(hours: int) -> list[dict]:
149
+ nexo_home = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
150
+ backup_dir = nexo_home / "backups"
151
+ if not backup_dir.exists():
152
+ return []
153
+ cutoff = _now_utc().timestamp() - (hours * 3600)
154
+ runs: list[dict] = []
155
+ for backup_file in backup_dir.glob("nexo-*.db"):
156
+ try:
157
+ stat = backup_file.stat()
158
+ except OSError:
159
+ continue
160
+ if stat.st_mtime < cutoff:
161
+ continue
162
+ started = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).replace(microsecond=0)
163
+ started_at = started.strftime("%Y-%m-%d %H:%M:%S")
164
+ runs.append(
165
+ {
166
+ "cron_id": LEGACY_BACKUP_CRON_ID,
167
+ "started_at": started_at,
168
+ "ended_at": started_at,
169
+ "exit_code": 0,
170
+ "summary": LEGACY_BACKUP_SUMMARY,
171
+ "error": "",
172
+ "duration_secs": 1.0,
173
+ }
174
+ )
175
+ runs.sort(key=lambda row: row["started_at"], reverse=True)
176
+ return runs
177
+
178
+
179
+ def _build_summary_from_runs(cron_id: str, runs: list[dict]) -> dict:
180
+ completed_runs = [
181
+ row for row in runs if row.get("exit_code") is not None and row.get("ended_at")
182
+ ]
183
+ duration_values = [
184
+ float(row["duration_secs"])
185
+ for row in completed_runs
186
+ if row.get("duration_secs") is not None
187
+ ]
188
+ return {
189
+ "cron_id": cron_id,
190
+ "total_runs": len(runs),
191
+ "succeeded": sum(1 for row in completed_runs if row.get("exit_code") == 0),
192
+ "completed_runs": len(completed_runs),
193
+ "failed": sum(1 for row in completed_runs if row.get("exit_code") not in (None, 0)),
194
+ "open_runs": len(runs) - len(completed_runs),
195
+ "avg_duration": round(sum(duration_values) / len(duration_values), 1) if duration_values else None,
196
+ "last_run": runs[0].get("started_at"),
197
+ "last_exit_code": runs[0].get("exit_code"),
198
+ "last_ended_at": runs[0].get("ended_at"),
199
+ "last_summary": next((row.get("summary") for row in runs if row.get("summary")), ""),
200
+ }
201
+
202
+
85
203
  def _summary_has_warning(summary: str = "") -> bool:
86
204
  lowered = str(summary or "").strip().lower()
87
205
  if not lowered:
@@ -67,8 +67,9 @@ MACOS_FDA_PROBE_PATHS = (
67
67
  )
68
68
  DEFAULT_CLAUDE_CODE_MODEL = "claude-opus-4-6[1m]"
69
69
  DEFAULT_CLAUDE_CODE_REASONING_EFFORT = ""
70
- DEFAULT_CODEX_MODEL = "gpt-5.4"
71
- DEFAULT_CODEX_REASONING_EFFORT = "xhigh"
70
+ # Codex defaults mirror the user's primary model — no hardcoded third-party models.
71
+ DEFAULT_CODEX_MODEL = DEFAULT_CLAUDE_CODE_MODEL
72
+ DEFAULT_CODEX_REASONING_EFFORT = ""
72
73
 
73
74
 
74
75
  def _schedule_defaults() -> dict:
@@ -18,9 +18,9 @@ What this script does (idempotent and best-effort):
18
18
  3. Detects degradation signals on the 7-day window. The criteria are
19
19
  intentionally conservative to avoid false alarms on small samples:
20
20
  a. recommendation_accept_rate < 50% AND total_evaluations >= 10
21
- b. linked_outcome_success_rate < 50% AND linked_outcomes_total >= 5
21
+ b. linked_outcome_success_rate < 50% AND linked_outcomes_resolved >= 5
22
22
  c. override_success_rate > recommended_success_rate by >= 20pp
23
- AND linked_outcomes_total >= 5
23
+ AND linked_outcomes_resolved >= 5
24
24
  4. Opens (or refreshes) NF-CORTEX-QUALITY-DROP followup with the offending
25
25
  metrics when degradation is detected. Idempotent: if a non-PENDING /
26
26
  resolved followup of the same id already exists, it is updated in
@@ -96,6 +96,13 @@ def detect_quality_signals(summary: dict) -> list[dict]:
96
96
  total = int(summary.get("total_evaluations") or 0)
97
97
  accept_rate = float(summary.get("recommendation_accept_rate") or 0.0)
98
98
  linked_total = int(summary.get("linked_outcomes_total") or 0)
99
+ linked_met = int(summary.get("linked_outcomes_met") or 0)
100
+ linked_missed = int(summary.get("linked_outcomes_missed") or 0)
101
+ linked_pending = int(summary.get("linked_outcomes_pending") or 0)
102
+ linked_resolved = linked_met + linked_missed
103
+ if linked_resolved <= 0 and linked_total > 0:
104
+ # Older callers may omit the met/missed counters; fall back to total minus pending.
105
+ linked_resolved = max(0, linked_total - linked_pending)
99
106
  linked_success = float(summary.get("linked_outcome_success_rate") or 0.0)
100
107
  recommended_success = float(summary.get("recommended_success_rate") or 0.0)
101
108
  override_success = float(summary.get("override_success_rate") or 0.0)
@@ -114,21 +121,25 @@ def detect_quality_signals(summary: dict) -> list[dict]:
114
121
  ),
115
122
  })
116
123
 
117
- if linked_total >= LINKED_MIN_SAMPLE and linked_success < LINKED_SUCCESS_FLOOR:
124
+ linked_scope = f"{linked_resolved} resolved linked outcomes"
125
+ if linked_pending > 0:
126
+ linked_scope += f" ({linked_total} total, {linked_pending} pending)"
127
+
128
+ if linked_resolved >= LINKED_MIN_SAMPLE and linked_success < LINKED_SUCCESS_FLOOR:
118
129
  signals.append({
119
130
  "kind": "linked_success",
120
131
  "severity": "warn",
121
132
  "metric_value": linked_success,
122
133
  "threshold": LINKED_SUCCESS_FLOOR,
123
- "sample_size": linked_total,
134
+ "sample_size": linked_resolved,
124
135
  "message": (
125
136
  f"Cortex linked-outcome success rate {linked_success:.1f}% on "
126
- f"{linked_total} linked outcomes is below the "
137
+ f"{linked_scope} is below the "
127
138
  f"{LINKED_SUCCESS_FLOOR:.0f}% floor."
128
139
  ),
129
140
  })
130
141
 
131
- if linked_total >= LINKED_MIN_SAMPLE:
142
+ if linked_resolved >= LINKED_MIN_SAMPLE:
132
143
  gap = override_success - recommended_success
133
144
  if gap >= OVERRIDE_GAP_THRESHOLD:
134
145
  signals.append({
@@ -136,12 +147,12 @@ def detect_quality_signals(summary: dict) -> list[dict]:
136
147
  "severity": "error",
137
148
  "metric_value": gap,
138
149
  "threshold": OVERRIDE_GAP_THRESHOLD,
139
- "sample_size": linked_total,
150
+ "sample_size": linked_resolved,
140
151
  "message": (
141
152
  f"Cortex overrides outperform recommendations by {gap:.1f}pp "
142
153
  f"(override {override_success:.1f}% vs recommended "
143
- f"{recommended_success:.1f}% on {linked_total} linked "
144
- "outcomes). The recommender is mis-ranking choices."
154
+ f"{recommended_success:.1f}% on {linked_scope}). The "
155
+ "recommender is mis-ranking choices."
145
156
  ),
146
157
  })
147
158
 
@@ -171,15 +182,37 @@ def _upsert_quality_followup(signals: list[dict]) -> str:
171
182
  resolved, a fresh row is inserted with the same id (REPLACE) so the
172
183
  new degradation pattern is visible.
173
184
  """
174
- if not signals:
175
- return "no_signal"
176
-
177
185
  try:
178
- from db import get_followup, get_db
186
+ from db import complete_followup, get_followup, get_db
179
187
  except Exception as e:
180
188
  _log(f"WARN: cannot import db helpers: {e}")
181
189
  return "skipped_no_db"
182
190
 
191
+ try:
192
+ existing = get_followup(FOLLOWUP_ID)
193
+ except Exception as e:
194
+ _log(f"WARN: get_followup raised: {e}")
195
+ existing = None
196
+
197
+ if not signals:
198
+ if not existing:
199
+ return "no_signal"
200
+ status = str(existing.get("status") or "").upper()
201
+ if status.startswith("COMPLETED") or status in {"DELETED", "ARCHIVED", "BLOCKED", "WAITING", "CANCELLED"}:
202
+ return "no_signal"
203
+ try:
204
+ complete_followup(
205
+ FOLLOWUP_ID,
206
+ result=(
207
+ "Auto-resolved by cortex-cycle: no active degradation signals in the "
208
+ "current 7d window."
209
+ ),
210
+ )
211
+ except Exception as e:
212
+ _log(f"WARN: failed to close followup: {e}")
213
+ return "failed_close"
214
+ return "closed"
215
+
183
216
  summary_lines = ["Cortex continuous validation found quality degradation:"]
184
217
  for sig in signals:
185
218
  summary_lines.append(
@@ -197,12 +230,6 @@ def _upsert_quality_followup(signals: list[dict]) -> str:
197
230
  )
198
231
  now_epoch = datetime.now().timestamp()
199
232
 
200
- try:
201
- existing = get_followup(FOLLOWUP_ID)
202
- except Exception as e:
203
- _log(f"WARN: get_followup raised: {e}")
204
- existing = None
205
-
206
233
  try:
207
234
  conn = get_db()
208
235
  conn.execute(
@@ -255,8 +282,8 @@ def run() -> int:
255
282
  f"signals={len(signals)}"
256
283
  )
257
284
 
258
- if signals:
259
- action = _upsert_quality_followup(signals)
285
+ action = _upsert_quality_followup(signals)
286
+ if signals or action not in {"no_signal"}:
260
287
  _log(f"Cortex cycle: followup {FOLLOWUP_ID} {action} ({len(signals)} signal(s))")
261
288
 
262
289
  return 0
@@ -479,6 +479,43 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
479
479
 
480
480
  columns = _table_columns(conn, "workflow_goals")
481
481
  signature = _topic_signature(sample_goal)
482
+ goal_id = f"WG-AUDIT-{hashlib.sha1(f'{area}:{signature or sample_goal}'.encode('utf-8'), usedforsecurity=False).hexdigest()[:8].upper()}"
483
+
484
+ def _write_goal(existing_row: sqlite3.Row, *, reactivated: bool) -> dict:
485
+ updates: dict[str, object] = {}
486
+ if "title" in columns:
487
+ updates["title"] = sample_goal[:140]
488
+ if "objective" in columns:
489
+ updates["objective"] = objective
490
+ if "priority" in columns:
491
+ updates["priority"] = "high"
492
+ if "owner" in columns:
493
+ updates["owner"] = AUDIT_GOAL_OWNER
494
+ if "next_action" in columns:
495
+ updates["next_action"] = next_action
496
+ if "success_signal" in columns:
497
+ updates["success_signal"] = success_signal
498
+ if "shared_state" in columns:
499
+ updates["shared_state"] = json.dumps({"area": area, "signature": signature, "source": "self-audit"})
500
+ if reactivated and "status" in columns:
501
+ updates["status"] = "active"
502
+ if reactivated and "blocker_reason" in columns:
503
+ updates["blocker_reason"] = ""
504
+ if reactivated and "closed_at" in columns:
505
+ updates["closed_at"] = None
506
+ if "updated_at" in columns:
507
+ updates["updated_at"] = now_iso
508
+ assignments = ", ".join(f"{column} = ?" for column in updates)
509
+ conn.execute(
510
+ f"UPDATE workflow_goals SET {assignments} WHERE goal_id = ?",
511
+ [updates[column] for column in updates] + [existing_row["goal_id"]],
512
+ )
513
+ return {
514
+ "ok": True,
515
+ "action": "reactivated" if reactivated else "updated",
516
+ "goal_id": str(existing_row["goal_id"]),
517
+ }
518
+
482
519
  rows = conn.execute(
483
520
  """SELECT * FROM workflow_goals
484
521
  WHERE status NOT IN ('completed', 'cancelled', 'abandoned')
@@ -499,31 +536,21 @@ def _upsert_workflow_goal_inline(conn: sqlite3.Connection, *, area: str, sample_
499
536
  next_action = AUDIT_GOAL_NEXT_ACTION
500
537
  success_signal = "The theme stops resurfacing in unresolved protocol tasks."
501
538
  now_iso = datetime.now().isoformat(timespec="seconds")
502
- if existing:
503
- updates: dict[str, object] = {}
504
- if "title" in columns:
505
- updates["title"] = sample_goal[:140]
506
- if "objective" in columns:
507
- updates["objective"] = objective
508
- if "priority" in columns:
509
- updates["priority"] = "high"
510
- if "owner" in columns:
511
- updates["owner"] = AUDIT_GOAL_OWNER
512
- if "next_action" in columns:
513
- updates["next_action"] = next_action
514
- if "success_signal" in columns:
515
- updates["success_signal"] = success_signal
516
- if "updated_at" in columns:
517
- updates["updated_at"] = now_iso
518
- assignments = ", ".join(f"{column} = ?" for column in updates)
519
- conn.execute(
520
- f"UPDATE workflow_goals SET {assignments} WHERE goal_id = ?",
521
- [updates[column] for column in updates] + [existing["goal_id"]],
539
+ exact = conn.execute(
540
+ "SELECT * FROM workflow_goals WHERE goal_id = ? LIMIT 1",
541
+ (goal_id,),
542
+ ).fetchone()
543
+ if exact is not None:
544
+ exact_status = str(exact["status"] or "").lower()
545
+ return _write_goal(
546
+ exact,
547
+ reactivated=exact_status in {"completed", "cancelled", "abandoned"},
522
548
  )
523
- return {"ok": True, "action": "updated", "goal_id": str(existing["goal_id"])}
549
+
550
+ if existing:
551
+ return _write_goal(existing, reactivated=False)
524
552
 
525
553
  # Content fingerprint, not security-sensitive.
526
- goal_id = f"WG-AUDIT-{hashlib.sha1(f'{area}:{signature or sample_goal}'.encode('utf-8'), usedforsecurity=False).hexdigest()[:8].upper()}"
527
554
  values: dict[str, object] = {"goal_id": goal_id}
528
555
  if "session_id" in columns:
529
556
  values["session_id"] = ""
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ """Rehydrate archived markdown learnings back into the NEXO learnings table.
5
+
6
+ The original Evolution #5 incident found an empty learnings table while the
7
+ historical archive still existed as markdown grouped by domain. This helper
8
+ parses the archive format used in those files:
9
+
10
+ - markdown tables with `Error | Solucion`
11
+ - dated sections with bullet/numbered operational learnings
12
+
13
+ Dry-run is the default. Pass `--apply` to insert missing learnings.
14
+ """
15
+
16
+ import argparse
17
+ import re
18
+ import sys
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+
22
+ REPO_SRC = Path(__file__).resolve().parents[1]
23
+ if str(REPO_SRC) not in sys.path:
24
+ sys.path.insert(0, str(REPO_SRC))
25
+
26
+ from db import create_learning, get_db, init_db # noqa: E402
27
+ from runtime_home import export_resolved_nexo_home # noqa: E402
28
+
29
+ NEXO_HOME = export_resolved_nexo_home()
30
+ TABLE_HEADER_TITLES = {"error", "problema", "issue"}
31
+ DEFAULT_ARCHIVE_DIRS = (
32
+ NEXO_HOME / "claude" / "operations" / "archive" / "learnings",
33
+ Path.home() / "claude" / "operations" / "archive" / "learnings",
34
+ Path.home() / ".claude" / "operations" / "archive" / "learnings",
35
+ )
36
+
37
+
38
+ @dataclass(frozen=True)
39
+ class LearningCandidate:
40
+ category: str
41
+ title: str
42
+ content: str
43
+ reasoning: str
44
+ prevention: str
45
+ status: str = "active"
46
+
47
+
48
+ def _strip_markdown(text: str) -> str:
49
+ text = text.replace("**", "").replace("__", "")
50
+ text = text.replace("~~", "")
51
+ text = re.sub(r"`([^`]*)`", r"\1", text)
52
+ text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
53
+ text = re.sub(r"\s+", " ", text).strip(" |")
54
+ return text.strip()
55
+
56
+
57
+ def _derive_title(text: str) -> str:
58
+ first_sentence = re.split(r"(?<=[.!?])\s+", text, maxsplit=1)[0].strip()
59
+ if not first_sentence:
60
+ first_sentence = text.strip()
61
+ return first_sentence[:180].rstrip(" .")
62
+
63
+
64
+ def _derive_prevention(text: str) -> str:
65
+ match = re.search(r"(Regla:\s*.*|SIEMPRE\s+.*|NUNCA\s+.*)", text, flags=re.IGNORECASE)
66
+ if match:
67
+ return match.group(1).strip()
68
+ return text[:500].strip()
69
+
70
+
71
+ def _candidate_reasoning(path: Path, section: str) -> str:
72
+ section_note = f" [{section}]" if section and section != path.stem else ""
73
+ return f"Rehydrated from markdown archive {path.name}{section_note}"
74
+
75
+
76
+ def _parse_table_row(path: Path, section: str, line: str) -> LearningCandidate | None:
77
+ parts = [_strip_markdown(cell) for cell in line.strip().strip("|").split("|")]
78
+ if len(parts) < 2:
79
+ return None
80
+ title, prevention = parts[0], parts[1]
81
+ if title.lower() in TABLE_HEADER_TITLES or set(title) <= {"-"}:
82
+ return None
83
+ if not title or not prevention:
84
+ return None
85
+ status = "superseded" if "obsoleto" in prevention.lower() or "obsoleto" in title.lower() else "active"
86
+ content = f"{title}. {prevention}"
87
+ return LearningCandidate(
88
+ category=path.stem,
89
+ title=title,
90
+ content=content,
91
+ reasoning=_candidate_reasoning(path, section),
92
+ prevention=prevention,
93
+ status=status,
94
+ )
95
+
96
+
97
+ def _consume_bullet_block(lines: list[str], start: int) -> tuple[str, int]:
98
+ pieces = [re.sub(r"^([-*]|\d+\.)\s+", "", lines[start].strip())]
99
+ idx = start + 1
100
+ while idx < len(lines):
101
+ stripped = lines[idx].strip()
102
+ if not stripped:
103
+ break
104
+ if stripped.startswith("## ") or stripped.startswith("|"):
105
+ break
106
+ if re.match(r"^([-*]|\d+\.)\s+", stripped):
107
+ break
108
+ pieces.append(stripped)
109
+ idx += 1
110
+ return _strip_markdown(" ".join(pieces)), idx
111
+
112
+
113
+ def _parse_bullet(path: Path, section: str, text: str) -> LearningCandidate | None:
114
+ if len(text) < 12:
115
+ return None
116
+ title = _derive_title(text)
117
+ prevention = _derive_prevention(text)
118
+ status = "superseded" if "obsoleto" in text.lower() else "active"
119
+ return LearningCandidate(
120
+ category=path.stem,
121
+ title=title,
122
+ content=text,
123
+ reasoning=_candidate_reasoning(path, section),
124
+ prevention=prevention,
125
+ status=status,
126
+ )
127
+
128
+
129
+ def parse_archive_file(path: Path) -> list[LearningCandidate]:
130
+ lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
131
+ section = path.stem
132
+ results: list[LearningCandidate] = []
133
+ idx = 0
134
+ while idx < len(lines):
135
+ stripped = lines[idx].strip()
136
+ if stripped.startswith("## "):
137
+ section = _strip_markdown(stripped[3:])
138
+ idx += 1
139
+ continue
140
+ if stripped.startswith("|") and not re.match(r"^\|\s*-", stripped):
141
+ row = _parse_table_row(path, section, stripped)
142
+ if row is not None:
143
+ results.append(row)
144
+ idx += 1
145
+ continue
146
+ if re.match(r"^([-*]|\d+\.)\s+", stripped):
147
+ block, next_idx = _consume_bullet_block(lines, idx)
148
+ row = _parse_bullet(path, section, block)
149
+ if row is not None:
150
+ results.append(row)
151
+ idx = next_idx
152
+ continue
153
+ idx += 1
154
+ return results
155
+
156
+
157
+ def parse_archive_dir(archive_dir: Path) -> list[LearningCandidate]:
158
+ candidates: list[LearningCandidate] = []
159
+ for path in sorted(archive_dir.glob("*.md")):
160
+ candidates.extend(parse_archive_file(path))
161
+
162
+ deduped: list[LearningCandidate] = []
163
+ seen: set[tuple[str, str]] = set()
164
+ for item in candidates:
165
+ key = (item.category.lower(), item.title.lower())
166
+ if key in seen:
167
+ continue
168
+ seen.add(key)
169
+ deduped.append(item)
170
+ return deduped
171
+
172
+
173
+ def resolve_archive_dir(explicit: str = "") -> Path:
174
+ if explicit:
175
+ path = Path(explicit).expanduser()
176
+ if not path.is_dir():
177
+ raise FileNotFoundError(f"archive dir not found: {path}")
178
+ return path
179
+ for candidate in DEFAULT_ARCHIVE_DIRS:
180
+ if candidate.is_dir():
181
+ return candidate
182
+ raise FileNotFoundError(
183
+ "No learnings archive found. Tried: "
184
+ + ", ".join(str(path) for path in DEFAULT_ARCHIVE_DIRS)
185
+ )
186
+
187
+
188
+ def apply_candidates(candidates: list[LearningCandidate], *, apply: bool) -> dict:
189
+ init_db()
190
+ conn = get_db()
191
+ existing = {
192
+ (row[0].lower(), row[1].lower())
193
+ for row in conn.execute("SELECT category, title FROM learnings").fetchall()
194
+ }
195
+ inserted = 0
196
+ skipped = 0
197
+ for item in candidates:
198
+ key = (item.category.lower(), item.title.lower())
199
+ if key in existing:
200
+ skipped += 1
201
+ continue
202
+ if apply:
203
+ create_learning(
204
+ item.category,
205
+ item.title,
206
+ item.content,
207
+ reasoning=item.reasoning,
208
+ prevention=item.prevention,
209
+ status=item.status,
210
+ )
211
+ existing.add(key)
212
+ inserted += 1
213
+ return {
214
+ "parsed": len(candidates),
215
+ "inserted": inserted,
216
+ "skipped_existing": skipped,
217
+ "mode": "apply" if apply else "dry-run",
218
+ }
219
+
220
+
221
+ def build_arg_parser() -> argparse.ArgumentParser:
222
+ parser = argparse.ArgumentParser(description=__doc__)
223
+ parser.add_argument("--archive-dir", default="", help="Override archive directory")
224
+ parser.add_argument("--apply", action="store_true", help="Insert parsed learnings into the DB")
225
+ return parser
226
+
227
+
228
+ def main(argv: list[str] | None = None) -> int:
229
+ args = build_arg_parser().parse_args(argv)
230
+ try:
231
+ archive_dir = resolve_archive_dir(args.archive_dir)
232
+ except FileNotFoundError as exc:
233
+ print(f"ERROR: {exc}", file=sys.stderr)
234
+ return 1
235
+ candidates = parse_archive_dir(archive_dir)
236
+ summary = apply_candidates(candidates, apply=args.apply)
237
+ print(
238
+ f"{summary['mode']}: archive={archive_dir} parsed={summary['parsed']} "
239
+ f"inserted={summary['inserted']} skipped_existing={summary['skipped_existing']}"
240
+ )
241
+ return 0
242
+
243
+
244
+ if __name__ == "__main__":
245
+ raise SystemExit(main())
package/src/server.py CHANGED
@@ -202,6 +202,8 @@ mcp = FastMCP(
202
202
  "- **Workflow runtime (MANDATORY for long multi-step or cross-session work):** open `nexo_goal_open(...)` when the objective must survive sessions, then `nexo_workflow_open(...)`, "
203
203
  "update meaningful checkpoints with `nexo_workflow_update(...)`, then use `nexo_workflow_resume(...)` / "
204
204
  "`nexo_workflow_replay(...)` instead of restarting blindly.\n"
205
+ "- **Diagnostic plane (MANDATORY before diagnosing NEXO):** fix the plane explicitly first — `product_public`, `runtime_personal`, `installation_live`, `database_real`, or `cooperator`. "
206
+ "Do not mix product, runtime, install, DB, and agent-behavior explanations in the same diagnosis.\n"
205
207
  "- **Guard (MANDATORY before ANY code edit):** `nexo_guard_check(files='...', area='...')` BEFORE editing code. "
206
208
  "No exceptions. Blocking rules→resolve first. `nexo_track(sid=SID, paths=[...])` before shared files\n"
207
209
  "- **Skills (MANDATORY before multi-step tasks):** `nexo_skill_match(task)` to find reusable procedures. "