nexo-brain 1.2.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/README.md +10 -5
  2. package/package.json +1 -1
  3. package/src/__pycache__/evolution_cycle.cpython-314.pyc +0 -0
  4. package/src/cognitive.py +45 -0
  5. package/src/evolution_cycle.py +266 -0
  6. package/src/plugins/guard.py +235 -1
  7. package/src/scripts/__pycache__/check-context.cpython-314.pyc +0 -0
  8. package/src/scripts/__pycache__/nexo-auto-update.cpython-314.pyc +0 -0
  9. package/src/scripts/__pycache__/nexo-catchup.cpython-314.pyc +0 -0
  10. package/src/scripts/__pycache__/nexo-cognitive-decay.cpython-314.pyc +0 -0
  11. package/src/scripts/__pycache__/nexo-daily-self-audit.cpython-314.pyc +0 -0
  12. package/src/scripts/__pycache__/nexo-evolution-run.cpython-314.pyc +0 -0
  13. package/src/scripts/__pycache__/nexo-immune.cpython-314.pyc +0 -0
  14. package/src/scripts/__pycache__/nexo-learning-validator.cpython-314.pyc +0 -0
  15. package/src/scripts/__pycache__/nexo-postmortem-consolidator.cpython-314.pyc +0 -0
  16. package/src/scripts/__pycache__/nexo-reflection.cpython-314.pyc +0 -0
  17. package/src/scripts/__pycache__/nexo-sleep.cpython-314.pyc +0 -0
  18. package/src/scripts/__pycache__/nexo-synthesis.cpython-314.pyc +0 -0
  19. package/src/scripts/check-context.py +257 -0
  20. package/src/scripts/nexo-catchup.py +59 -5
  21. package/src/scripts/nexo-cognitive-decay.py +8 -0
  22. package/src/scripts/nexo-daily-self-audit.py +168 -183
  23. package/src/scripts/nexo-evolution-run.py +584 -0
  24. package/src/scripts/nexo-immune.py +108 -91
  25. package/src/scripts/nexo-learning-validator.py +226 -0
  26. package/src/scripts/nexo-postmortem-consolidator.py +230 -414
  27. package/src/scripts/nexo-sleep.py +283 -503
  28. package/src/scripts/nexo-synthesis.py +141 -432
  29. package/src/tools_sessions.py +20 -12
@@ -4,7 +4,7 @@ NEXO Immune System — Health monitor & auto-repair.
4
4
 
5
5
  Runs every 30 minutes via LaunchAgent. Checks tokens, LaunchAgents, DBs,
6
6
  scripts, logs, disk, and server crons. Auto-repairs what it can, alerts
7
- User via WhatsApp only on NEW failures.
7
+ the user via notification only on NEW failures.
8
8
 
9
9
  Zero external dependencies. Stdlib + sqlite3 + urllib only.
10
10
  """
@@ -14,7 +14,6 @@ import json
14
14
  import os
15
15
  import re
16
16
  import shlex
17
- import shutil
18
17
  import signal
19
18
  import sqlite3
20
19
  import ssl
@@ -56,62 +55,38 @@ SSL_CTX = _make_ssl_context()
56
55
 
57
56
  # ─── Paths ────────────────────────────────────────────────────────────────────
58
57
  HOME = Path.home()
59
- CLAUDE_DIR = HOME / "claude"
58
+ CLAUDE_DIR = HOME / ".nexo"
60
59
  COORD_DIR = CLAUDE_DIR / "coordination"
61
60
  BRAIN_DIR = CLAUDE_DIR / "brain"
62
61
  SCRIPTS_DIR = CLAUDE_DIR / "scripts"
63
- NEXO_HOME = os.environ.get("NEXO_HOME", str(Path.home() / ".nexo"))
64
62
 
65
63
  IMMUNE_STATUS = COORD_DIR / "immune-status.json"
66
64
  IMMUNE_LOG = COORD_DIR / "immune-log.json"
67
65
  LOCK_FILE = COORD_DIR / "immune-process.lock"
68
66
 
69
- WA_NOTIFY = SCRIPTS_DIR / "nexo-whatsapp-notify.sh"
67
+ # Configure your alert script here (optional)
68
+ # ALERT_SCRIPT = SCRIPTS_DIR / "my-notify.sh"
70
69
 
71
70
  CLAUDE_MEM_DB = HOME / ".claude-mem" / "claude-mem.db"
72
71
 
73
72
  LAUNCH_AGENTS_DIR = HOME / "Library" / "LaunchAgents"
73
+ CLAUDE_CLI = HOME / ".local" / "bin" / "claude"
74
74
 
75
75
  NOW = datetime.now()
76
76
  TODAY = date.today()
77
77
 
78
78
  # ─── Config ───────────────────────────────────────────────────────────────────
79
79
 
80
+ # Token checks — NEXO core infrastructure only.
81
+ # Add your own service tokens here if you want immune to monitor them.
82
+ # Supported types: file_text (read file), json_field (read JSON), service_account (gcloud)
80
83
  TOKEN_CHECKS = [
81
- {
82
- "name": "Meta Ads",
83
- "path": "~/.claude/meta_token.txt",
84
- "type": "file_text",
85
- "test_url": "https://graph.facebook.com/v21.0/me?access_token={token}",
86
- },
87
- {
88
- "name": "Instagram",
89
- "path": "~/.claude/instagram_token.txt",
90
- "type": "file_text",
91
- "test_url": "https://graph.instagram.com/v21.0/me?access_token={token}",
92
- },
93
- {
94
- "name": "YouTube",
95
- "path": "~/.claude/youtube_token.json",
96
- "type": "json_field",
97
- },
98
- {
99
- "name": "X/Twitter",
100
- "path": "~/.claude/x_credentials.json",
101
- "type": "json_field",
102
- },
103
- {
104
- "name": "GA4 Service Account",
105
- "path": "~/.claude/ga4-service-account.json",
106
- "type": "service_account",
107
- },
108
- # Example: Shopify Admin token check
84
+ # Example: uncomment and configure for your services
109
85
  # {
110
- # "name": "Shopify Admin",
111
- # "type": "hardcoded",
112
- # "test_url": "https://YOUR_STORE.myshopify.com/admin/api/2024-01/shop.json",
113
- # "token": "YOUR_SHOPIFY_ADMIN_TOKEN",
114
- # "header": "X-Shopify-Access-Token",
86
+ # "name": "My API",
87
+ # "path": "~/.nexo/my_api_token.txt",
88
+ # "type": "file_text",
89
+ # "test_url": "https://api.example.com/health?token={token}",
115
90
  # },
116
91
  ]
117
92
 
@@ -133,7 +108,7 @@ LOG_TRUNCATE_SIZE = 50 * 1024 * 1024 # 50 MB — auto-truncate threshold
133
108
  DISK_WARN_PCT = 85
134
109
  DISK_FAIL_PCT = 95
135
110
 
136
- # Quiet hours — no WhatsApp alerts
111
+ # Quiet hours — no notification alerts
137
112
  QUIET_START = 23 # 23:00
138
113
  QUIET_END = 7 # 07:00
139
114
 
@@ -167,7 +142,7 @@ def save_json(path, data):
167
142
 
168
143
 
169
144
  def is_quiet_hours():
170
- """Check if within WhatsApp quiet hours (23:00 - 07:00)."""
145
+ """Check if within quiet hours (23:00 - 07:00). No alerts sent."""
171
146
  h = NOW.hour
172
147
  if QUIET_START > QUIET_END:
173
148
  return h >= QUIET_START or h < QUIET_END
@@ -175,22 +150,21 @@ def is_quiet_hours():
175
150
 
176
151
 
177
152
  def is_skip_hours():
178
- """Check if within skip hours (00:00 - 06:00)."""
153
+ """Check if within skip hours (00:00 - 06:00). Full immune cycle skipped."""
179
154
  return SKIP_START <= NOW.hour < SKIP_END
180
155
 
181
156
 
182
- def send_wa_alert(title, message):
183
- """Send WhatsApp alert if not in quiet hours."""
157
+ def send_alert(title, message):
158
+ """Send alert for critical failures. Override this for your notification system.
159
+
160
+ Default: prints to stdout (captured by LaunchAgent logs).
161
+ Customize: webhook, email, Slack, etc.
162
+ """
184
163
  if is_quiet_hours():
185
- print(f" [QUIET] Suppressed WA alert: {title}")
164
+ print(f" [QUIET] Suppressed alert: {title}")
186
165
  return False
187
166
  try:
188
- subprocess.run(
189
- [str(WA_NOTIFY), title, message],
190
- timeout=15,
191
- capture_output=True,
192
- )
193
- print(f" [WA] Sent alert: {title}")
167
+ print(f" [ALERT] {title}: {message}")
194
168
  return True
195
169
  except Exception as e:
196
170
  print(f" [WA] Failed to send: {e}")
@@ -369,8 +343,8 @@ def check_databases():
369
343
  results = []
370
344
 
371
345
  dbs = [
372
- ("nexo.db", Path(NEXO_HOME) / "nexo.db"),
373
- ("cognitive.db", Path(NEXO_HOME) / "cognitive.db"),
346
+ ("nexo.db", Path.home() / ".nexo" / "nexo.db"),
347
+ ("cognitive.db", Path.home() / ".nexo" / "cognitive.db"),
374
348
  ("claude-mem.db", CLAUDE_MEM_DB),
375
349
  ]
376
350
 
@@ -549,44 +523,16 @@ def check_disk():
549
523
 
550
524
 
551
525
  def check_server_crons():
552
- """Check remote server crons via SSH. Only runs every 2 hours.
526
+ """Check external server health via SSH. Configure SSH_CHECKS for your servers.
553
527
 
554
- Configure SSH_HOST, SSH_PORT, SSH_USER and the cron check command for your server.
555
- Example: check that a MySQL/cron log table has entries for today.
528
+ This is a stub add your own SSH checks to SSH_CHECKS at the top of the file.
529
+ Example: SSH_CHECKS = [{"host": "myserver.com", "port": 22, "command": "uptime"}]
556
530
  """
557
531
  results = []
558
- result = {"name": "server-crons", "status": "OK", "detail": ""}
559
-
560
- # ── Configure for your server ──────────────────────────────────────────────
561
- # SSH_HOST = "your-server.example.com"
562
- # SSH_PORT = 22
563
- # SSH_USER = "root"
564
- # CRON_CHECK_CMD = '"echo cron-check-not-configured"'
565
- # ───────────────────────────────────────────────────────────────────────────
566
-
567
- # Check if we should run (every 2 hours based on last check)
568
- status = load_json(IMMUNE_STATUS)
569
- last_ssh_str = status.get("last_ssh_check", "")
570
- should_run = True
571
-
572
- if last_ssh_str:
573
- try:
574
- last_ssh = datetime.strptime(last_ssh_str, "%Y-%m-%d %H:%M")
575
- hours_ago = (NOW - last_ssh).total_seconds() / 3600
576
- if hours_ago < SSH_CHECK_INTERVAL_HOURS:
577
- result["detail"] = f"Skipped (last check {hours_ago:.1f}h ago, interval {SSH_CHECK_INTERVAL_HOURS}h)"
578
- should_run = False
579
- except Exception:
580
- pass
581
-
582
- if should_run:
583
- result["status"] = "WARN"
584
- result["detail"] = "Server cron check not configured — see check_server_crons() to set up SSH+command"
585
-
586
- results.append(result)
587
- rc = 1 # Default to indicate SSH did not run
588
-
589
- return results, should_run
532
+ # No external server checks configured by default.
533
+ # NEXO immune focuses on local NEXO infrastructure health.
534
+ # Add SSH_CHECKS config at the top of the file if you have servers to monitor.
535
+ return results, False
590
536
 
591
537
 
592
538
  # ─── Alerting ─────────────────────────────────────────────────────────────────
@@ -660,11 +606,11 @@ def detect_new_failures(current_results, previous_status):
660
606
 
661
607
 
662
608
  def send_failure_alerts(new_failures):
663
- """Send WhatsApp alerts for new failures. Max 1 alert per 30 min."""
609
+ """Send notification alerts for new failures. Max 1 alert per 30 min."""
664
610
  if not new_failures:
665
611
  return
666
612
 
667
- # Global alert cooldown — max 1 WhatsApp alert per 30 minutes
613
+ # Global alert cooldown — max 1 notification alert per 30 minutes
668
614
  cooldown_file = COORD_DIR / "immune-last-alert.txt"
669
615
  if cooldown_file.exists():
670
616
  try:
@@ -685,7 +631,7 @@ def send_failure_alerts(new_failures):
685
631
  msg = "\n".join(lines)
686
632
  if len(fails) > 5:
687
633
  msg += f"\n... +{len(fails) - 5} more"
688
- sent = send_wa_alert(
634
+ sent = send_alert(
689
635
  "NEXO Immune FAIL",
690
636
  f"{len(fails)} new failure(s):\n{msg}"
691
637
  )
@@ -693,7 +639,7 @@ def send_failure_alerts(new_failures):
693
639
  if warns and not fails:
694
640
  lines = [f"- {f['name']}: {f['detail']}" for f in warns[:3]]
695
641
  msg = "\n".join(lines)
696
- sent = send_wa_alert(
642
+ sent = send_alert(
697
643
  "NEXO Immune WARN",
698
644
  f"{len(warns)} new warning(s):\n{msg}"
699
645
  )
@@ -865,6 +811,77 @@ def _run_checks(lock_fd):
865
811
  print(f"Status saved to {IMMUNE_STATUS}")
866
812
  print(f"Log appended to {IMMUNE_LOG} ({len(log)} entries)")
867
813
 
814
+ # ─── Stage B: CLI interpretation (only when issues found) ────────────
815
+ if counts["FAIL"] > 0 or counts["WARN"] > 2 or repairs:
816
+ _run_cli_triage(all_results, repairs, counts)
817
+
818
+
819
+ def _run_cli_triage(all_results: dict, repairs: list, counts: dict):
820
+ """Pass all findings to Claude CLI for intelligent triage and recommendations."""
821
+ if not CLAUDE_CLI.exists():
822
+ print("[SKIP] Claude CLI not found, skipping triage")
823
+ return
824
+
825
+ triage_file = COORD_DIR / "immune-triage.md"
826
+ findings_json = json.dumps({
827
+ "timestamp": NOW.strftime("%Y-%m-%d %H:%M"),
828
+ "counts": counts,
829
+ "repairs": repairs,
830
+ "checks": all_results,
831
+ }, indent=2, default=str)
832
+
833
+ prompt = f"""You are the NEXO Immune System triage analyst.
834
+
835
+ Below are the raw health check results from a scheduled scan. Your job:
836
+
837
+ 1. Identify which failures are REAL problems vs transient/expected
838
+ 2. Group related issues (e.g. SSH failure + server cron failure = same root cause)
839
+ 3. Prioritize: what needs attention NOW vs can wait
840
+ 4. For each real issue, suggest a specific remediation action
841
+ 5. Note any patterns across recent runs if visible
842
+
843
+ Write a concise triage report to: {triage_file}
844
+
845
+ Format:
846
+ ## Immune Triage — YYYY-MM-DD HH:MM
847
+
848
+ ### Critical (act now)
849
+ - ...
850
+
851
+ ### Monitor (watch next run)
852
+ - ...
853
+
854
+ ### Resolved (auto-repaired)
855
+ - ...
856
+
857
+ ### Patterns
858
+ - ...
859
+
860
+ Raw findings:
861
+ {findings_json}
862
+
863
+ Write the report. Be concise — max 40 lines."""
864
+
865
+ print("\n[TRIAGE] Running CLI interpretation...")
866
+ env = os.environ.copy()
867
+ env.pop("CLAUDECODE", None)
868
+ env.pop("CLAUDE_CODE", None)
869
+
870
+ try:
871
+ result = subprocess.run(
872
+ [str(CLAUDE_CLI), "-p", prompt, "--model", "opus",
873
+ "--allowedTools", "Read,Write,Edit,Glob,Grep"],
874
+ capture_output=True, text=True, timeout=120, env=env
875
+ )
876
+ if result.returncode == 0:
877
+ print(f"[TRIAGE] Report written to {triage_file}")
878
+ else:
879
+ print(f"[TRIAGE] CLI exited {result.returncode}: {result.stderr[:200]}")
880
+ except subprocess.TimeoutExpired:
881
+ print("[TRIAGE] CLI timed out (120s)")
882
+ except Exception as e:
883
+ print(f"[TRIAGE] Error: {e}")
884
+
868
885
 
869
886
  if __name__ == "__main__":
870
887
  main()
@@ -0,0 +1,226 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ NEXO Learning Validator — Cross-checks findings against existing learnings.
4
+
5
+ Wrapper collects the finding + all learnings from SQLite, then passes
6
+ to Claude CLI (opus) to make an intelligent determination of whether
7
+ the finding is known, related, or genuinely new.
8
+
9
+ Usage as CLI:
10
+ python3 nexo-learning-validator.py "finding text to validate"
11
+ python3 nexo-learning-validator.py --category project "finding text"
12
+
13
+ Usage as library:
14
+ from nexo_learning_validator import validate_finding
15
+ result = validate_finding("CRITICAL: message_id column is NULL")
16
+ if result["known"]:
17
+ print(f"Already known: {result['matching_learnings']}")
18
+
19
+ Exit codes:
20
+ 0 = Finding is NEW (not known)
21
+ 1 = Finding is KNOWN (matches existing learning)
22
+ """
23
+
24
+ import json
25
+ import os
26
+ import sqlite3
27
+ import subprocess
28
+ import sys
29
+ from pathlib import Path
30
+
31
+ NEXO_DB = Path.home() / ".nexo" / "nexo.db"
32
+ CLAUDE_CLI = Path.home() / ".local" / "bin" / "claude"
33
+
34
+
35
+ def get_all_learnings(category: str = None) -> list[dict]:
36
+ """Fetch all learnings from nexo.db."""
37
+ conn = sqlite3.connect(str(NEXO_DB), timeout=10)
38
+ conn.row_factory = sqlite3.Row
39
+ if category:
40
+ rows = conn.execute(
41
+ "SELECT id, category, title, content FROM learnings WHERE category = ?",
42
+ (category,)
43
+ ).fetchall()
44
+ else:
45
+ rows = conn.execute(
46
+ "SELECT id, category, title, content FROM learnings"
47
+ ).fetchall()
48
+ conn.close()
49
+ return [dict(r) for r in rows]
50
+
51
+
52
+ def validate_finding(finding: str, category: str = None) -> dict:
53
+ """
54
+ Validate a finding against existing learnings using Claude CLI.
55
+
56
+ Returns:
57
+ {
58
+ "known": bool,
59
+ "confidence": float (0-1),
60
+ "matching_learnings": [{"id": int, "title": str, "similarity": float}],
61
+ "recommendation": str
62
+ }
63
+ """
64
+ learnings = get_all_learnings(category)
65
+
66
+ if not learnings:
67
+ return {
68
+ "known": False,
69
+ "confidence": 0,
70
+ "matching_learnings": [],
71
+ "recommendation": "No learnings in DB — finding is new by default"
72
+ }
73
+
74
+ # Build compact learnings reference for CLI
75
+ learnings_ref = []
76
+ for l in learnings:
77
+ learnings_ref.append({
78
+ "id": l["id"],
79
+ "cat": l["category"],
80
+ "title": l["title"],
81
+ "content": (l["content"] or "")[:300],
82
+ })
83
+
84
+ prompt = f"""You are a finding deduplication engine. Compare a new finding against existing learnings and determine if it's already known.
85
+
86
+ NEW FINDING:
87
+ {finding}
88
+
89
+ EXISTING LEARNINGS ({len(learnings_ref)} total):
90
+ {json.dumps(learnings_ref, indent=1)}
91
+
92
+ Respond with ONLY valid JSON (no markdown, no code fences):
93
+ {{
94
+ "known": true/false,
95
+ "confidence": 0.0-1.0,
96
+ "matching_learnings": [
97
+ {{"id": <learning_id>, "title": "<title>", "similarity": 0.0-1.0}}
98
+ ],
99
+ "recommendation": "<one line: KNOWN/LIKELY KNOWN/POSSIBLY RELATED/NEW>"
100
+ }}
101
+
102
+ Rules:
103
+ - confidence >= 0.7 and same root cause = known: true
104
+ - confidence 0.55-0.7 and related topic = known: true, say LIKELY KNOWN
105
+ - confidence < 0.55 = known: false
106
+ - Max 5 matching_learnings, sorted by similarity descending
107
+ - If the finding describes the SAME bug/issue/pattern as a learning, it's known even if worded differently
108
+ - Be strict: different symptoms of different bugs are NOT the same even if they mention the same file"""
109
+
110
+ # Try CLI first, fall back to mechanical similarity
111
+ if CLAUDE_CLI.exists():
112
+ env = os.environ.copy()
113
+ env.pop("CLAUDECODE", None)
114
+ env.pop("CLAUDE_CODE", None)
115
+
116
+ try:
117
+ result = subprocess.run(
118
+ [str(CLAUDE_CLI), "-p", prompt, "--model", "opus",
119
+ "--allowedTools", "Read,Write,Edit,Glob,Grep"],
120
+ capture_output=True, text=True, timeout=60, env=env
121
+ )
122
+ if result.returncode == 0:
123
+ text = result.stdout.strip()
124
+ # Strip markdown fences if present
125
+ if "```json" in text:
126
+ text = text.split("```json")[1].split("```")[0]
127
+ elif "```" in text:
128
+ text = text.split("```")[1].split("```")[0]
129
+ return json.loads(text.strip())
130
+ except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception):
131
+ pass # Fall through to mechanical fallback
132
+
133
+ # Fallback: mechanical SequenceMatcher (original logic)
134
+ return _mechanical_validate(finding, learnings)
135
+
136
+
137
+ def _mechanical_validate(finding: str, learnings: list[dict]) -> dict:
138
+ """Fallback validation using SequenceMatcher when CLI is unavailable."""
139
+ from difflib import SequenceMatcher
140
+
141
+ threshold = 0.45
142
+ finding_kw = _extract_keywords(finding)
143
+ matches = []
144
+
145
+ for learning in learnings:
146
+ title_sim = SequenceMatcher(None, finding.lower(), learning["title"].lower()).ratio()
147
+ content_sim = SequenceMatcher(None, finding.lower(), (learning["content"] or "").lower()).ratio()
148
+
149
+ learning_text = f"{learning['title']} {learning['content'] or ''}"
150
+ learning_kw = _extract_keywords(learning_text)
151
+ kw_overlap = len(finding_kw & learning_kw) / len(finding_kw) if finding_kw and learning_kw else 0
152
+
153
+ combined = max(title_sim, content_sim) * 0.6 + kw_overlap * 0.4
154
+
155
+ if combined >= threshold:
156
+ matches.append({
157
+ "id": learning["id"],
158
+ "category": learning["category"],
159
+ "title": learning["title"],
160
+ "similarity": round(combined, 3),
161
+ })
162
+
163
+ matches.sort(key=lambda x: x["similarity"], reverse=True)
164
+ top = matches[:5]
165
+
166
+ if not top:
167
+ return {"known": False, "confidence": 0, "matching_learnings": [], "recommendation": "NEW finding"}
168
+
169
+ best = top[0]["similarity"]
170
+ if best >= 0.7:
171
+ return {"known": True, "confidence": best, "matching_learnings": top,
172
+ "recommendation": f"KNOWN issue (learning #{top[0]['id']})"}
173
+ elif best >= 0.55:
174
+ return {"known": True, "confidence": best, "matching_learnings": top,
175
+ "recommendation": f"LIKELY KNOWN (learning #{top[0]['id']})"}
176
+ else:
177
+ return {"known": False, "confidence": best, "matching_learnings": top,
178
+ "recommendation": "POSSIBLY RELATED but different enough to report"}
179
+
180
+
181
+ def _extract_keywords(text: str) -> set:
182
+ """Extract meaningful keywords from text."""
183
+ stop_words = {
184
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
185
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
186
+ 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare',
187
+ 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by', 'from', 'as',
188
+ 'and', 'but', 'or', 'nor', 'not', 'so', 'yet', 'both', 'either',
189
+ 'error', 'critical', 'warning', 'bug', 'issue', 'problem', 'fix',
190
+ 'el', 'la', 'los', 'las', 'un', 'una', 'de', 'en', 'que', 'por',
191
+ }
192
+ words = set()
193
+ for word in text.lower().split():
194
+ clean = ''.join(c for c in word if c.isalnum() or c == '_')
195
+ if clean and len(clean) > 2 and clean not in stop_words:
196
+ words.add(clean)
197
+ return words
198
+
199
+
200
+ def main():
201
+ import argparse
202
+ parser = argparse.ArgumentParser(description="Validate findings against existing NEXO learnings")
203
+ parser.add_argument("finding", help="The finding text to validate")
204
+ parser.add_argument("--category", "-c", help="Filter learnings by category")
205
+ parser.add_argument("--json", "-j", action="store_true", help="Output as JSON")
206
+ args = parser.parse_args()
207
+
208
+ result = validate_finding(args.finding, args.category)
209
+
210
+ if args.json:
211
+ print(json.dumps(result, indent=2))
212
+ else:
213
+ status = "KNOWN" if result["known"] else "NEW"
214
+ print(f"Status: {status} (confidence: {result['confidence']:.0%})")
215
+ print(f"Recommendation: {result['recommendation']}")
216
+ if result["matching_learnings"]:
217
+ print(f"Related learnings:")
218
+ for m in result["matching_learnings"]:
219
+ cat = m.get('category', '?')
220
+ print(f" #{m['id']} [{cat}] {m['title']} ({m['similarity']:.0%})")
221
+
222
+ sys.exit(1 if result["known"] else 0)
223
+
224
+
225
+ if __name__ == "__main__":
226
+ main()