nexo-brain 5.3.19 → 5.3.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/bin/nexo-brain.js +52 -10
- package/package.json +1 -1
- package/src/auto_update.py +11 -8
- package/src/dashboard/static/favicon 2.svg +32 -0
- package/src/dashboard/static/nexo-logo 2.png +0 -0
- package/src/dashboard/static/nexo-logo 2.svg +40 -0
- package/src/dashboard/static/style 2.css +2458 -0
- package/src/dashboard/templates/adaptive 2.html +118 -0
- package/src/dashboard/templates/artifacts 2.html +133 -0
- package/src/dashboard/templates/backups 2.html +136 -0
- package/src/dashboard/templates/base 2.html +417 -0
- package/src/dashboard/templates/calendar 2.html +591 -0
- package/src/dashboard/templates/chat 2.html +356 -0
- package/src/dashboard/templates/claims 2.html +259 -0
- package/src/dashboard/templates/cortex 2.html +321 -0
- package/src/dashboard/templates/credentials 2.html +128 -0
- package/src/dashboard/templates/crons 2.html +370 -0
- package/src/dashboard/templates/dashboard 2.html +494 -0
- package/src/dashboard/templates/dreams 2.html +252 -0
- package/src/dashboard/templates/email 2.html +160 -0
- package/src/dashboard/templates/evolution 2.html +189 -0
- package/src/dashboard/templates/feed 2.html +249 -0
- package/src/dashboard/templates/followup_health 2.html +170 -0
- package/src/dashboard/templates/graph 2.html +201 -0
- package/src/dashboard/templates/guard 2.html +259 -0
- package/src/dashboard/templates/inbox 2.html +251 -0
- package/src/dashboard/templates/memory 2.html +420 -0
- package/src/dashboard/templates/operations 2.html +608 -0
- package/src/dashboard/templates/plugins 2.html +185 -0
- package/src/dashboard/templates/protocol 2.html +199 -0
- package/src/dashboard/templates/rules 2.html +246 -0
- package/src/dashboard/templates/sentiment 2.html +247 -0
- package/src/dashboard/templates/sessions 2.html +218 -0
- package/src/dashboard/templates/skills 2.html +329 -0
- package/src/dashboard/templates/somatic 2.html +73 -0
- package/src/dashboard/templates/triggers 2.html +133 -0
- package/src/dashboard/templates/trust 2.html +360 -0
- package/src/db/__init__ 2.py +259 -0
- package/src/db/_core 2.py +437 -0
- package/src/db/_credentials 2.py +124 -0
- package/src/db/_episodic 2.py +762 -0
- package/src/db/_evolution 2.py +54 -0
- package/src/db/_fts 2.py +406 -0
- package/src/db/_goal_profiles 2.py +376 -0
- package/src/db/_hot_context 2.py +660 -0
- package/src/db/_outcomes 2.py +800 -0
- package/src/db/_personal_scripts 2.py +582 -0
- package/src/db/_sessions 2.py +330 -0
- package/src/db/_tasks 2.py +91 -0
- package/src/db/_watchers 2.py +173 -0
- package/src/doctor/formatters 2.py +52 -0
- package/src/doctor/models 2.py +69 -0
- package/src/doctor/planes 2.py +87 -0
- package/src/doctor/providers/__init__ 2.py +1 -0
- package/src/doctor/providers/deep 2.py +367 -0
- package/src/evolution_cycle 2.py +519 -0
- package/src/hooks/auto_capture 2.py +208 -0
- package/src/hooks/caffeinate-guard 2.sh +8 -0
- package/src/hooks/capture-session 2.sh +21 -0
- package/src/hooks/capture-tool-logs 2.sh +158 -0
- package/src/hooks/daily-briefing-check 2.sh +33 -0
- package/src/hooks/heartbeat-enforcement 2.py +90 -0
- package/src/hooks/heartbeat-posttool 2.sh +18 -0
- package/src/hooks/inbox-hook 2.sh +76 -0
- package/src/hooks/post-compact 2.sh +152 -0
- package/src/hooks/pre-compact 2.sh +169 -0
- package/src/hooks/protocol-guardrail 2.sh +10 -0
- package/src/hooks/protocol-pretool-guardrail 2.sh +9 -0
- package/src/hooks/session-stop 2.sh +52 -0
- package/src/kg_populate 2.py +292 -0
- package/src/maintenance 2.py +53 -0
- package/src/memory_backends 2.py +71 -0
- package/src/migrate_embeddings 2.py +124 -0
- package/src/nexo_sdk 2.py +103 -0
- package/src/observability 2.py +199 -0
- package/src/plugin_loader 2.py +217 -0
- package/src/plugins/__init__ 2.py +0 -0
- package/src/plugins/artifact_registry 2.py +450 -0
- package/src/plugins/backup 2.py +127 -0
- package/src/plugins/claims_tools 2.py +119 -0
- package/src/plugins/cognitive_memory 2.py +609 -0
- package/src/plugins/core_rules 2.py +252 -0
- package/src/plugins/cortex 2.py +1155 -0
- package/src/plugins/entities 2.py +67 -0
- package/src/plugins/episodic_memory 2.py +560 -0
- package/src/plugins/evolution 2.py +167 -0
- package/src/plugins/goal_engine 2.py +142 -0
- package/src/plugins/guard 2.py +862 -0
- package/src/plugins/impact 2.py +29 -0
- package/src/plugins/knowledge_graph_tools 2.py +137 -0
- package/src/plugins/media_memory_tools 2.py +98 -0
- package/src/plugins/memory_export 2.py +196 -0
- package/src/plugins/outcomes 2.py +130 -0
- package/src/plugins/personal_scripts 2.py +117 -0
- package/src/plugins/preferences 2.py +47 -0
- package/src/plugins/protocol 2.py +1449 -0
- package/src/plugins/simple_api 2.py +106 -0
- package/src/plugins/skills 2.py +341 -0
- package/src/plugins/state_watchers 2.py +79 -0
- package/src/plugins/update 2.py +986 -0
- package/src/plugins/user_state_tools 2.py +43 -0
- package/src/plugins/workflow 2.py +588 -0
- package/src/protocol_settings 2.py +59 -0
- package/src/public_contribution 2.py +466 -0
- package/src/public_evolution_queue 2.py +241 -0
- package/src/requirements 2.txt +14 -0
- package/src/retroactive_learnings 2.py +373 -0
- package/src/rules/__init__ 2.py +0 -0
- package/src/rules/core-rules 2.json +331 -0
- package/src/rules/migrate 2.py +207 -0
- package/src/runtime_power 2.py +874 -0
- package/src/script_registry 2.py +1559 -0
- package/src/scripts/check-context 2.py +272 -0
- package/src/scripts/deep-sleep/apply_findings 2.py +2327 -0
- package/src/scripts/deep-sleep/collect 2.py +928 -0
- package/src/scripts/deep-sleep/extract 2.py +330 -0
- package/src/scripts/deep-sleep/extract-prompt 2.md +285 -0
- package/src/scripts/deep-sleep/synthesize 2.py +312 -0
- package/src/scripts/deep-sleep/synthesize-prompt 2.md +336 -0
- package/src/scripts/nexo-agent-run 2.py +75 -0
- package/src/scripts/nexo-auto-update 2.py +6 -0
- package/src/scripts/nexo-backup 2.sh +25 -0
- package/src/scripts/nexo-brain-activation 2.sh +140 -0
- package/src/scripts/nexo-catchup 2.py +300 -0
- package/src/scripts/nexo-cognitive-decay 2.py +257 -0
- package/src/scripts/nexo-cortex-cycle 2.py +293 -0
- package/src/scripts/nexo-cron-wrapper 2.sh +53 -0
- package/src/scripts/nexo-daily-self-audit 2.py +2161 -0
- package/src/scripts/nexo-dashboard 2.sh +29 -0
- package/src/scripts/nexo-deep-sleep 2.sh +86 -0
- package/src/scripts/nexo-evolution-run 2.py +1664 -0
- package/src/scripts/nexo-followup-hygiene 2.py +139 -0
- package/src/scripts/nexo-hook-record 2.py +42 -0
- package/src/scripts/nexo-immune 2.py +936 -0
- package/src/scripts/nexo-impact-scorer 2.py +117 -0
- package/src/scripts/nexo-inbox-hook 2.sh +74 -0
- package/src/scripts/nexo-install 2.py +6 -0
- package/src/scripts/nexo-learning-housekeep 2.py +401 -0
- package/src/scripts/nexo-learning-validator 2.py +266 -0
- package/src/scripts/nexo-migrate 2.py +260 -0
- package/src/scripts/nexo-outcome-checker 2.py +127 -0
- package/src/scripts/nexo-postmortem-consolidator 2.py +456 -0
- package/src/scripts/nexo-pre-commit 2.py +120 -0
- package/src/scripts/nexo-prevent-sleep 2.sh +35 -0
- package/src/scripts/nexo-proactive-dashboard 2.py +354 -0
- package/src/scripts/nexo-reflection 2.py +256 -0
- package/src/scripts/nexo-runtime-preflight 2.py +274 -0
- package/src/scripts/nexo-sleep 2.py +631 -0
- package/src/scripts/nexo-snapshot-restore 2.sh +35 -0
- package/src/scripts/nexo-sync-clients 2.py +16 -0
- package/src/scripts/nexo-synthesis 2.py +475 -0
- package/src/scripts/nexo-tcc-approve 2.sh +79 -0
- package/src/scripts/nexo-update 2.sh +306 -0
- package/src/scripts/nexo-watchdog 2.sh +1207 -0
- package/src/scripts/nexo-watchdog-smoke 2.py +119 -0
- package/src/scripts/rehydrate_learnings_from_archive 2.py +245 -0
- package/src/server 2.py +1296 -0
- package/src/skills/run-nexo-audit-phase/guide 2.md +43 -0
- package/src/skills/run-nexo-audit-phase/skill 2.json +59 -0
- package/src/skills/run-nexo-core-fix-cycle/guide 2.md +17 -0
- package/src/skills/run-nexo-core-fix-cycle/script 2.py +276 -0
- package/src/skills/run-nexo-core-fix-cycle/skill 2.json +58 -0
- package/src/skills/run-release-final-audit/guide 2.md +16 -0
- package/src/skills/run-release-final-audit/script 2.py +259 -0
- package/src/skills/run-release-final-audit/skill 2.json +77 -0
- package/src/skills/run-runtime-doctor/guide 2.md +12 -0
- package/src/skills/run-runtime-doctor/script 2.py +21 -0
- package/src/skills/run-runtime-doctor/skill 2.json +25 -0
- package/src/skills_runtime 2.py +932 -0
- package/src/state_watchers_runtime 2.py +475 -0
- package/src/storage_router 2.py +32 -0
- package/src/system_catalog 2.py +786 -0
- package/src/tools_coordination 2.py +103 -0
- package/src/tools_credentials 2.py +68 -0
- package/src/tools_drive 2.py +487 -0
- package/src/tools_hot_context 2.py +163 -0
- package/src/tools_learnings 2.py +612 -0
- package/src/tools_menu 2.py +229 -0
- package/src/tools_reminders 2.py +88 -0
- package/src/tools_reminders_crud 2.py +363 -0
- package/src/tools_sessions 2.py +1054 -0
- package/src/tools_system_catalog 2.py +19 -0
- package/src/tools_task_history 2.py +57 -0
- package/src/tools_transcripts 2.py +98 -0
- package/src/transcript_utils 2.py +412 -0
- package/src/user_context 2.py +46 -0
- package/src/user_data_portability 2.py +328 -0
- package/src/user_state_model 2.py +170 -0
- package/templates/CLAUDE.md 2.template +108 -0
- package/templates/CODEX.AGENTS.md 2.template +66 -0
- package/templates/launchagents/README 2.md +132 -0
- package/templates/launchagents/com.nexo.auto-close-sessions 2.plist +39 -0
- package/templates/launchagents/com.nexo.catchup 2.plist +39 -0
- package/templates/launchagents/com.nexo.cognitive-decay 2.plist +40 -0
- package/templates/launchagents/com.nexo.dashboard 2.plist +43 -0
- package/templates/launchagents/com.nexo.deep-sleep 2.plist +43 -0
- package/templates/launchagents/com.nexo.evolution 2.plist +44 -0
- package/templates/launchagents/com.nexo.followup-hygiene 2.plist +45 -0
- package/templates/launchagents/com.nexo.immune 2.plist +41 -0
- package/templates/launchagents/com.nexo.postmortem 2.plist +45 -0
- package/templates/launchagents/com.nexo.self-audit 2.plist +47 -0
- package/templates/launchagents/com.nexo.synthesis 2.plist +45 -0
- package/templates/launchagents/com.nexo.watchdog 2.plist +37 -0
- package/templates/nexo_helper 2.py +301 -0
- package/templates/openclaw 2.json +13 -0
- package/templates/plugin-template 2.py +40 -0
- package/templates/script-template 2.py +59 -0
- package/templates/script-template 2.sh +13 -0
- package/templates/skill-script-template 2.py +48 -0
- package/templates/skill-template 2.md +33 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
NEXO Watchdog Smoke
|
|
4
|
+
|
|
5
|
+
Runs the same health checks as the shell watchdog, but never restores files,
|
|
6
|
+
restarts services, disables evolution or notifies the user.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import sqlite3
|
|
15
|
+
import subprocess
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
HOME = Path.home()
|
|
20
|
+
NEXO_HOME = Path(os.environ.get("NEXO_HOME", str(HOME / ".nexo")))
|
|
21
|
+
NEXO_CODE = Path(os.environ.get("NEXO_CODE", str(NEXO_HOME)))
|
|
22
|
+
BRAIN_DIR = NEXO_HOME / "brain"
|
|
23
|
+
LOG_DIR = NEXO_HOME / "logs"
|
|
24
|
+
SUMMARY_FILE = LOG_DIR / "watchdog-smoke-summary.json"
|
|
25
|
+
HASH_REGISTRY = NEXO_HOME / "scripts" / ".watchdog-hashes"
|
|
26
|
+
RESTORE_LOG = LOG_DIR / "snapshot-restores.log"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _read_restore_count_for_current_hour() -> int:
|
|
30
|
+
if not RESTORE_LOG.exists():
|
|
31
|
+
return 0
|
|
32
|
+
needle = datetime.now().strftime("%Y-%m-%d %H")
|
|
33
|
+
return sum(1 for line in RESTORE_LOG.read_text(errors="ignore").splitlines()
|
|
34
|
+
if needle in line and "/.codex/memories/nexo-" not in line)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _sha256(path: Path) -> str:
|
|
38
|
+
return hashlib.sha256(path.read_bytes()).hexdigest()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def main() -> int:
|
|
42
|
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
findings = []
|
|
44
|
+
|
|
45
|
+
db_path = NEXO_HOME / "data" / "nexo.db"
|
|
46
|
+
integrity = "missing"
|
|
47
|
+
if db_path.exists():
|
|
48
|
+
try:
|
|
49
|
+
conn = sqlite3.connect(str(db_path), timeout=10)
|
|
50
|
+
integrity = conn.execute("PRAGMA integrity_check").fetchone()[0]
|
|
51
|
+
conn.close()
|
|
52
|
+
except Exception as exc:
|
|
53
|
+
integrity = f"error:{exc}"
|
|
54
|
+
if integrity != "ok":
|
|
55
|
+
findings.append({"severity": "ERROR", "area": "sqlite", "msg": f"integrity={integrity}"})
|
|
56
|
+
|
|
57
|
+
# Check if the NEXO MCP server process is alive (replaces legacy cortex process check)
|
|
58
|
+
nexo_server_running = subprocess.run(
|
|
59
|
+
["pgrep", "-f", "nexo-brain"],
|
|
60
|
+
capture_output=True,
|
|
61
|
+
text=True,
|
|
62
|
+
).returncode == 0
|
|
63
|
+
if not nexo_server_running:
|
|
64
|
+
findings.append({"severity": "INFO", "area": "server", "msg": "nexo-brain not running (normal if no active session)"})
|
|
65
|
+
|
|
66
|
+
backups = sorted((NEXO_HOME / "backups").glob("nexo-*.db"), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
67
|
+
if backups:
|
|
68
|
+
age_seconds = int(datetime.now().timestamp() - backups[0].stat().st_mtime)
|
|
69
|
+
if age_seconds > 7200:
|
|
70
|
+
findings.append({"severity": "WARN", "area": "backup", "msg": f"latest backup age={age_seconds}s"})
|
|
71
|
+
else:
|
|
72
|
+
findings.append({"severity": "WARN", "area": "backup", "msg": "no backups found"})
|
|
73
|
+
|
|
74
|
+
if HASH_REGISTRY.exists():
|
|
75
|
+
for line in HASH_REGISTRY.read_text().splitlines():
|
|
76
|
+
if not line.strip():
|
|
77
|
+
continue
|
|
78
|
+
filepath, expected_hash = line.split("|", 1)
|
|
79
|
+
path = Path(filepath)
|
|
80
|
+
if path.exists() and _sha256(path) != expected_hash:
|
|
81
|
+
findings.append({"severity": "ERROR", "area": "immutable", "msg": f"hash mismatch: {filepath}"})
|
|
82
|
+
|
|
83
|
+
restore_count = _read_restore_count_for_current_hour()
|
|
84
|
+
if restore_count > 2:
|
|
85
|
+
findings.append({"severity": "ERROR", "area": "restore_loop", "msg": f"{restore_count} restores this hour"})
|
|
86
|
+
elif restore_count > 0:
|
|
87
|
+
findings.append({"severity": "INFO", "area": "restore_activity", "msg": f"{restore_count} restores this hour"})
|
|
88
|
+
|
|
89
|
+
# Check brain/ (canonical) first, fall back to cortex/ (legacy)
|
|
90
|
+
objective = BRAIN_DIR / "evolution-objective.json"
|
|
91
|
+
if not objective.exists():
|
|
92
|
+
objective = NEXO_HOME / "cortex" / "evolution-objective.json"
|
|
93
|
+
evolution_enabled = None
|
|
94
|
+
if objective.exists():
|
|
95
|
+
obj = json.loads(objective.read_text())
|
|
96
|
+
evolution_enabled = obj.get("evolution_enabled", True)
|
|
97
|
+
if not evolution_enabled:
|
|
98
|
+
findings.append({
|
|
99
|
+
"severity": "WARN",
|
|
100
|
+
"area": "evolution",
|
|
101
|
+
"msg": f"disabled: {obj.get('disabled_reason', 'unknown')}",
|
|
102
|
+
})
|
|
103
|
+
|
|
104
|
+
summary = {
|
|
105
|
+
"timestamp": datetime.now().isoformat(),
|
|
106
|
+
"ok": not any(f["severity"] == "ERROR" for f in findings),
|
|
107
|
+
"integrity": integrity,
|
|
108
|
+
"server_running": nexo_server_running,
|
|
109
|
+
"evolution_enabled": evolution_enabled,
|
|
110
|
+
"restore_count_current_hour": restore_count,
|
|
111
|
+
"findings": findings,
|
|
112
|
+
}
|
|
113
|
+
SUMMARY_FILE.write_text(json.dumps(summary, indent=2, ensure_ascii=False))
|
|
114
|
+
print(json.dumps(summary, ensure_ascii=False))
|
|
115
|
+
return 1 if any(f["severity"] == "ERROR" for f in findings) else 0
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
if __name__ == "__main__":
|
|
119
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
"""Rehydrate archived markdown learnings back into the NEXO learnings table.
|
|
5
|
+
|
|
6
|
+
The original Evolution #5 incident found an empty learnings table while the
|
|
7
|
+
historical archive still existed as markdown grouped by domain. This helper
|
|
8
|
+
parses the archive format used in those files:
|
|
9
|
+
|
|
10
|
+
- markdown tables with `Error | Solucion`
|
|
11
|
+
- dated sections with bullet/numbered operational learnings
|
|
12
|
+
|
|
13
|
+
Dry-run is the default. Pass `--apply` to insert missing learnings.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import re
|
|
18
|
+
import sys
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
REPO_SRC = Path(__file__).resolve().parents[1]
|
|
23
|
+
if str(REPO_SRC) not in sys.path:
|
|
24
|
+
sys.path.insert(0, str(REPO_SRC))
|
|
25
|
+
|
|
26
|
+
from db import create_learning, get_db, init_db # noqa: E402
|
|
27
|
+
from runtime_home import export_resolved_nexo_home # noqa: E402
|
|
28
|
+
|
|
29
|
+
NEXO_HOME = export_resolved_nexo_home()
|
|
30
|
+
TABLE_HEADER_TITLES = {"error", "problema", "issue"}
|
|
31
|
+
DEFAULT_ARCHIVE_DIRS = (
|
|
32
|
+
NEXO_HOME / "claude" / "operations" / "archive" / "learnings",
|
|
33
|
+
Path.home() / "claude" / "operations" / "archive" / "learnings",
|
|
34
|
+
Path.home() / ".claude" / "operations" / "archive" / "learnings",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True)
|
|
39
|
+
class LearningCandidate:
|
|
40
|
+
category: str
|
|
41
|
+
title: str
|
|
42
|
+
content: str
|
|
43
|
+
reasoning: str
|
|
44
|
+
prevention: str
|
|
45
|
+
status: str = "active"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _strip_markdown(text: str) -> str:
|
|
49
|
+
text = text.replace("**", "").replace("__", "")
|
|
50
|
+
text = text.replace("~~", "")
|
|
51
|
+
text = re.sub(r"`([^`]*)`", r"\1", text)
|
|
52
|
+
text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text)
|
|
53
|
+
text = re.sub(r"\s+", " ", text).strip(" |")
|
|
54
|
+
return text.strip()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _derive_title(text: str) -> str:
|
|
58
|
+
first_sentence = re.split(r"(?<=[.!?])\s+", text, maxsplit=1)[0].strip()
|
|
59
|
+
if not first_sentence:
|
|
60
|
+
first_sentence = text.strip()
|
|
61
|
+
return first_sentence[:180].rstrip(" .")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _derive_prevention(text: str) -> str:
|
|
65
|
+
match = re.search(r"(Regla:\s*.*|SIEMPRE\s+.*|NUNCA\s+.*)", text, flags=re.IGNORECASE)
|
|
66
|
+
if match:
|
|
67
|
+
return match.group(1).strip()
|
|
68
|
+
return text[:500].strip()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _candidate_reasoning(path: Path, section: str) -> str:
|
|
72
|
+
section_note = f" [{section}]" if section and section != path.stem else ""
|
|
73
|
+
return f"Rehydrated from markdown archive {path.name}{section_note}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _parse_table_row(path: Path, section: str, line: str) -> LearningCandidate | None:
|
|
77
|
+
parts = [_strip_markdown(cell) for cell in line.strip().strip("|").split("|")]
|
|
78
|
+
if len(parts) < 2:
|
|
79
|
+
return None
|
|
80
|
+
title, prevention = parts[0], parts[1]
|
|
81
|
+
if title.lower() in TABLE_HEADER_TITLES or set(title) <= {"-"}:
|
|
82
|
+
return None
|
|
83
|
+
if not title or not prevention:
|
|
84
|
+
return None
|
|
85
|
+
status = "superseded" if "obsoleto" in prevention.lower() or "obsoleto" in title.lower() else "active"
|
|
86
|
+
content = f"{title}. {prevention}"
|
|
87
|
+
return LearningCandidate(
|
|
88
|
+
category=path.stem,
|
|
89
|
+
title=title,
|
|
90
|
+
content=content,
|
|
91
|
+
reasoning=_candidate_reasoning(path, section),
|
|
92
|
+
prevention=prevention,
|
|
93
|
+
status=status,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _consume_bullet_block(lines: list[str], start: int) -> tuple[str, int]:
|
|
98
|
+
pieces = [re.sub(r"^([-*]|\d+\.)\s+", "", lines[start].strip())]
|
|
99
|
+
idx = start + 1
|
|
100
|
+
while idx < len(lines):
|
|
101
|
+
stripped = lines[idx].strip()
|
|
102
|
+
if not stripped:
|
|
103
|
+
break
|
|
104
|
+
if stripped.startswith("## ") or stripped.startswith("|"):
|
|
105
|
+
break
|
|
106
|
+
if re.match(r"^([-*]|\d+\.)\s+", stripped):
|
|
107
|
+
break
|
|
108
|
+
pieces.append(stripped)
|
|
109
|
+
idx += 1
|
|
110
|
+
return _strip_markdown(" ".join(pieces)), idx
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _parse_bullet(path: Path, section: str, text: str) -> LearningCandidate | None:
|
|
114
|
+
if len(text) < 12:
|
|
115
|
+
return None
|
|
116
|
+
title = _derive_title(text)
|
|
117
|
+
prevention = _derive_prevention(text)
|
|
118
|
+
status = "superseded" if "obsoleto" in text.lower() else "active"
|
|
119
|
+
return LearningCandidate(
|
|
120
|
+
category=path.stem,
|
|
121
|
+
title=title,
|
|
122
|
+
content=text,
|
|
123
|
+
reasoning=_candidate_reasoning(path, section),
|
|
124
|
+
prevention=prevention,
|
|
125
|
+
status=status,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def parse_archive_file(path: Path) -> list[LearningCandidate]:
|
|
130
|
+
lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
|
131
|
+
section = path.stem
|
|
132
|
+
results: list[LearningCandidate] = []
|
|
133
|
+
idx = 0
|
|
134
|
+
while idx < len(lines):
|
|
135
|
+
stripped = lines[idx].strip()
|
|
136
|
+
if stripped.startswith("## "):
|
|
137
|
+
section = _strip_markdown(stripped[3:])
|
|
138
|
+
idx += 1
|
|
139
|
+
continue
|
|
140
|
+
if stripped.startswith("|") and not re.match(r"^\|\s*-", stripped):
|
|
141
|
+
row = _parse_table_row(path, section, stripped)
|
|
142
|
+
if row is not None:
|
|
143
|
+
results.append(row)
|
|
144
|
+
idx += 1
|
|
145
|
+
continue
|
|
146
|
+
if re.match(r"^([-*]|\d+\.)\s+", stripped):
|
|
147
|
+
block, next_idx = _consume_bullet_block(lines, idx)
|
|
148
|
+
row = _parse_bullet(path, section, block)
|
|
149
|
+
if row is not None:
|
|
150
|
+
results.append(row)
|
|
151
|
+
idx = next_idx
|
|
152
|
+
continue
|
|
153
|
+
idx += 1
|
|
154
|
+
return results
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def parse_archive_dir(archive_dir: Path) -> list[LearningCandidate]:
|
|
158
|
+
candidates: list[LearningCandidate] = []
|
|
159
|
+
for path in sorted(archive_dir.glob("*.md")):
|
|
160
|
+
candidates.extend(parse_archive_file(path))
|
|
161
|
+
|
|
162
|
+
deduped: list[LearningCandidate] = []
|
|
163
|
+
seen: set[tuple[str, str]] = set()
|
|
164
|
+
for item in candidates:
|
|
165
|
+
key = (item.category.lower(), item.title.lower())
|
|
166
|
+
if key in seen:
|
|
167
|
+
continue
|
|
168
|
+
seen.add(key)
|
|
169
|
+
deduped.append(item)
|
|
170
|
+
return deduped
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def resolve_archive_dir(explicit: str = "") -> Path:
|
|
174
|
+
if explicit:
|
|
175
|
+
path = Path(explicit).expanduser()
|
|
176
|
+
if not path.is_dir():
|
|
177
|
+
raise FileNotFoundError(f"archive dir not found: {path}")
|
|
178
|
+
return path
|
|
179
|
+
for candidate in DEFAULT_ARCHIVE_DIRS:
|
|
180
|
+
if candidate.is_dir():
|
|
181
|
+
return candidate
|
|
182
|
+
raise FileNotFoundError(
|
|
183
|
+
"No learnings archive found. Tried: "
|
|
184
|
+
+ ", ".join(str(path) for path in DEFAULT_ARCHIVE_DIRS)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def apply_candidates(candidates: list[LearningCandidate], *, apply: bool) -> dict:
|
|
189
|
+
init_db()
|
|
190
|
+
conn = get_db()
|
|
191
|
+
existing = {
|
|
192
|
+
(row[0].lower(), row[1].lower())
|
|
193
|
+
for row in conn.execute("SELECT category, title FROM learnings").fetchall()
|
|
194
|
+
}
|
|
195
|
+
inserted = 0
|
|
196
|
+
skipped = 0
|
|
197
|
+
for item in candidates:
|
|
198
|
+
key = (item.category.lower(), item.title.lower())
|
|
199
|
+
if key in existing:
|
|
200
|
+
skipped += 1
|
|
201
|
+
continue
|
|
202
|
+
if apply:
|
|
203
|
+
create_learning(
|
|
204
|
+
item.category,
|
|
205
|
+
item.title,
|
|
206
|
+
item.content,
|
|
207
|
+
reasoning=item.reasoning,
|
|
208
|
+
prevention=item.prevention,
|
|
209
|
+
status=item.status,
|
|
210
|
+
)
|
|
211
|
+
existing.add(key)
|
|
212
|
+
inserted += 1
|
|
213
|
+
return {
|
|
214
|
+
"parsed": len(candidates),
|
|
215
|
+
"inserted": inserted,
|
|
216
|
+
"skipped_existing": skipped,
|
|
217
|
+
"mode": "apply" if apply else "dry-run",
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def build_arg_parser() -> argparse.ArgumentParser:
|
|
222
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
223
|
+
parser.add_argument("--archive-dir", default="", help="Override archive directory")
|
|
224
|
+
parser.add_argument("--apply", action="store_true", help="Insert parsed learnings into the DB")
|
|
225
|
+
return parser
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def main(argv: list[str] | None = None) -> int:
|
|
229
|
+
args = build_arg_parser().parse_args(argv)
|
|
230
|
+
try:
|
|
231
|
+
archive_dir = resolve_archive_dir(args.archive_dir)
|
|
232
|
+
except FileNotFoundError as exc:
|
|
233
|
+
print(f"ERROR: {exc}", file=sys.stderr)
|
|
234
|
+
return 1
|
|
235
|
+
candidates = parse_archive_dir(archive_dir)
|
|
236
|
+
summary = apply_candidates(candidates, apply=args.apply)
|
|
237
|
+
print(
|
|
238
|
+
f"{summary['mode']}: archive={archive_dir} parsed={summary['parsed']} "
|
|
239
|
+
f"inserted={summary['inserted']} skipped_existing={summary['skipped_existing']}"
|
|
240
|
+
)
|
|
241
|
+
return 0
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
if __name__ == "__main__":
|
|
245
|
+
raise SystemExit(main())
|