nexo-brain 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,15 +6,9 @@ and provides stats on error prevention effectiveness.
6
6
  import json
7
7
  import os
8
8
  from datetime import datetime, timedelta
9
- from db import get_db, find_similar_learnings, extract_keywords
9
+ from db import get_db, find_similar_learnings, extract_keywords, search_learnings, search_changes
10
10
 
11
11
 
12
- SCHEMA_CACHE_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
13
- "nexo-mcp", "schema_cache.json")
14
- # Fallback: same dir as db
15
- if not os.path.exists(SCHEMA_CACHE_PATH):
16
- SCHEMA_CACHE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "schema_cache.json")
17
-
18
12
 
19
13
  def _load_schema_cache() -> dict:
20
14
  """Load cached DB schemas from schema_cache.json."""
@@ -117,7 +111,8 @@ def handle_guard_check(files: str = "", area: str = "", include_schemas: str = "
117
111
  ).fetchall()
118
112
  for r in rows:
119
113
  if r["id"] not in seen_ids:
120
- result["universal_rules"].append({"id": r["id"], "rule": r["title"]})
114
+ seen_ids.add(r["id"])
115
+ result["universal_rules"].append({"id": r["id"], "rule": r["title"], "category": r["category"]})
121
116
 
122
117
  # 4. DB schemas if files contain SQL keywords
123
118
  if include_schemas_bool and file_list:
@@ -141,16 +136,42 @@ def handle_guard_check(files: str = "", area: str = "", include_schemas: str = "
141
136
  elif "cloud_sql" in cache and table in cache["cloud_sql"]:
142
137
  result["schemas"][table] = cache["cloud_sql"][table]
143
138
 
144
- # 5. Check for blocking rules (5+ repetitions)
145
- for learning in result["learnings"]:
139
+ # 5. Check for blocking rules two paths:
140
+ # (a) 5+ repetitions (existing behavior)
141
+ # (b) Learning contains NUNCA/NEVER/PROHIBIDO and matches semantically (aggressive mode)
142
+ import re
143
+ BLOCKING_KEYWORDS = re.compile(
144
+ r'\bNUNCA\b|\bNEVER\b|\bPROHIBIDO\b|\bNO\s+\w+\b|\bFORBIDDEN\b|\bBLOCKING\b|\bSIEMPRE\b|\bALWAYS\b',
145
+ re.IGNORECASE
146
+ )
147
+ # Check both learnings and universal_rules for blocking
148
+ all_candidates = [(l, "learning") for l in result["learnings"]] + \
149
+ [(u, "universal") for u in result["universal_rules"]]
150
+ blocking_seen = set()
151
+ for learning, source in all_candidates:
146
152
  lid = learning["id"]
153
+ if lid in blocking_seen:
154
+ continue
147
155
  rep_count = conn.execute(
148
156
  "SELECT COUNT(*) as cnt FROM error_repetitions WHERE original_learning_id = ?",
149
157
  (lid,)
150
158
  ).fetchone()["cnt"]
159
+
160
+ # Path (a): 5+ repetitions
151
161
  if rep_count >= 5:
162
+ blocking_seen.add(lid)
152
163
  result["blocking_rules"].append({
153
- "id": lid, "rule": learning["rule"], "repetitions": rep_count
164
+ "id": lid, "rule": learning["rule"], "repetitions": rep_count,
165
+ "reason": "repeated_error"
166
+ })
167
+ continue
168
+
169
+ # Path (b): Aggressive — learning TITLE contains prohibition keywords
170
+ if BLOCKING_KEYWORDS.search(learning["rule"]):
171
+ blocking_seen.add(lid)
172
+ result["blocking_rules"].append({
173
+ "id": lid, "rule": learning["rule"], "repetitions": rep_count,
174
+ "reason": "prohibition_keyword"
154
175
  })
155
176
 
156
177
  # 6. Area repetition rate
@@ -185,15 +206,6 @@ def handle_guard_check(files: str = "", area: str = "", include_schemas: str = "
185
206
  cog_top_k = 3
186
207
  cog_min_score = 0.65
187
208
 
188
- # Somatic risk lowers threshold further
189
- try:
190
- risk_result = cognitive.somatic_get_risk(file_list, area)
191
- if risk_result["max_risk"] > 0.5:
192
- cog_min_score = min(cog_min_score, 0.4)
193
- cog_top_k = max(cog_top_k, 5)
194
- except Exception:
195
- pass
196
-
197
209
  query_parts = []
198
210
  if file_list:
199
211
  query_parts.append(f"editing files: {', '.join(file_list[:5])}")
@@ -241,7 +253,11 @@ def handle_guard_check(files: str = "", area: str = "", include_schemas: str = "
241
253
  if result["blocking_rules"]:
242
254
  lines.append("BLOCKING RULES (resolve BEFORE writing):")
243
255
  for r in result["blocking_rules"]:
244
- lines.append(f" #{r['id']} ({r['repetitions']}x repeated): {r['rule']}")
256
+ reason = r.get("reason", "repeated_error")
257
+ if reason == "prohibition_keyword":
258
+ lines.append(f" #{r['id']} [PROHIBIT]: {r['rule']}")
259
+ else:
260
+ lines.append(f" #{r['id']} ({r['repetitions']}x repeated): {r['rule']}")
245
261
  lines.append("")
246
262
 
247
263
  if result["learnings"]:
@@ -435,10 +451,244 @@ def handle_somatic_stats() -> str:
435
451
  return "Error: {}".format(e)
436
452
 
437
453
 
454
+ def handle_guard_cross_check(findings: list, area: str = "") -> str:
455
+ """Cross-check audit findings against known learnings to filter false positives.
456
+
457
+ Args:
458
+ findings: List of audit finding strings to cross-check
459
+ area: System area to narrow the learning search (wazion, shopify, etc.)
460
+ """
461
+ # Common English/Spanish stopwords to skip during keyword extraction
462
+ STOPWORDS = {
463
+ "the", "a", "an", "is", "in", "on", "at", "to", "of", "and", "or", "but",
464
+ "for", "with", "that", "this", "it", "as", "are", "was", "be", "by", "not",
465
+ "has", "have", "from", "which", "when", "if", "then", "do", "does", "can",
466
+ "el", "la", "los", "las", "un", "una", "en", "de", "del", "al", "y", "o",
467
+ "que", "se", "no", "es", "por", "con", "su", "pero", "como", "para",
468
+ "este", "esta", "esto", "son", "hay", "más", "ya",
469
+ }
470
+
471
+ new_issues = []
472
+ known_issues = []
473
+
474
+ for finding in findings:
475
+ if not finding or not finding.strip():
476
+ continue
477
+
478
+ # Extract significant keywords from the finding text
479
+ words = finding.lower().split()
480
+ keywords = [
481
+ w.strip(".,;:!?\"'()[]{}") for w in words
482
+ if len(w) >= 4 and w.lower() not in STOPWORDS
483
+ ]
484
+ # Use up to 5 most distinctive keywords to build the search query
485
+ query_keywords = keywords[:5]
486
+
487
+ matched_learnings = []
488
+ if query_keywords:
489
+ query = " ".join(query_keywords)
490
+ try:
491
+ results = search_learnings(query, category=area if area else None)
492
+ if not results and area:
493
+ # Retry without category filter if area-filtered search returns nothing
494
+ results = search_learnings(query)
495
+ matched_learnings = results[:3] # Top 3 matches per finding
496
+ except Exception:
497
+ pass
498
+
499
+ if matched_learnings:
500
+ refs = [
501
+ {"id": r["id"], "title": r["title"], "category": r.get("category", "")}
502
+ for r in matched_learnings
503
+ ]
504
+ known_issues.append({
505
+ "finding": finding,
506
+ "status": "known",
507
+ "learning_refs": refs,
508
+ })
509
+ else:
510
+ new_issues.append({
511
+ "finding": finding,
512
+ "status": "new",
513
+ })
514
+
515
+ # Build output
516
+ lines = [
517
+ f"CROSS-CHECK RESULTS: {len(findings)} findings — "
518
+ f"{len(new_issues)} new, {len(known_issues)} already documented",
519
+ "",
520
+ ]
521
+
522
+ if new_issues:
523
+ lines.append(f"NEW ISSUES ({len(new_issues)}) — not in learnings, investigate:")
524
+ for i, item in enumerate(new_issues, 1):
525
+ lines.append(f" {i}. {item['finding']}")
526
+ lines.append("")
527
+
528
+ if known_issues:
529
+ lines.append(f"KNOWN ISSUES ({len(known_issues)}) — covered by existing learnings:")
530
+ for i, item in enumerate(known_issues, 1):
531
+ refs_str = ", ".join(
532
+ f"#{r['id']} [{r['category']}] {r['title'][:60]}"
533
+ for r in item["learning_refs"]
534
+ )
535
+ lines.append(f" {i}. {item['finding']}")
536
+ lines.append(f" -> {refs_str}")
537
+ lines.append("")
538
+
539
+ summary = {
540
+ "total": len(findings),
541
+ "new_count": len(new_issues),
542
+ "known_count": len(known_issues),
543
+ "new_issues": [i["finding"] for i in new_issues],
544
+ "known_issues": [
545
+ {"finding": i["finding"], "refs": i["learning_refs"]}
546
+ for i in known_issues
547
+ ],
548
+ }
549
+ lines.append(f"SUMMARY JSON: {json.dumps(summary)}")
550
+
551
+ return "\n".join(lines)
552
+
553
+
554
+ def handle_guard_file_check(files: list) -> str:
555
+ """Pre-edit check: surfaces learnings and recent changes for files about to be modified.
556
+
557
+ Args:
558
+ files: List of file paths about to be edited
559
+ """
560
+ from pathlib import Path
561
+ import re
562
+
563
+ BLOCKING_KEYWORDS = re.compile(
564
+ r'\bNUNCA\b|\bNEVER\b|\bPROHIBIDO\b|\bFORBIDDEN\b|\bBLOCKING\b',
565
+ re.IGNORECASE
566
+ )
567
+
568
+ if not files:
569
+ return "ERROR: No files provided."
570
+
571
+ file_learnings: dict = {}
572
+ recent_changes: dict = {}
573
+ warnings: list = []
574
+ seen_learning_ids: set = set()
575
+
576
+ for filepath in files:
577
+ p = Path(filepath)
578
+ filename = p.name
579
+ parent_dir = p.parent.name
580
+ stem = p.stem # filename without extension
581
+
582
+ # Build search keywords: filename, stem, parent directory (deduplicated)
583
+ keywords = [kw for kw in [filename, stem, parent_dir] if kw and kw not in (".", "")]
584
+ seen_kw: set = set()
585
+ unique_keywords = []
586
+ for kw in keywords:
587
+ if kw not in seen_kw:
588
+ seen_kw.add(kw)
589
+ unique_keywords.append(kw)
590
+
591
+ file_results = []
592
+ file_seen_ids: set = set()
593
+
594
+ for keyword in unique_keywords:
595
+ try:
596
+ rows = search_learnings(keyword)
597
+ for r in rows:
598
+ lid = r.get("id")
599
+ if lid and lid not in seen_learning_ids and lid not in file_seen_ids:
600
+ file_seen_ids.add(lid)
601
+ seen_learning_ids.add(lid)
602
+ entry = {
603
+ "id": lid,
604
+ "category": r.get("category", ""),
605
+ "title": r.get("title", ""),
606
+ "content": (r.get("content") or "")[:300],
607
+ }
608
+ file_results.append(entry)
609
+ # Flag blocking learnings
610
+ if BLOCKING_KEYWORDS.search(r.get("title", "")) or \
611
+ BLOCKING_KEYWORDS.search(r.get("content") or ""):
612
+ warnings.append(
613
+ f"[BLOCKING] #{lid} ({filepath}): {r.get('title', '')}"
614
+ )
615
+ except Exception:
616
+ pass
617
+
618
+ file_learnings[filepath] = file_results
619
+
620
+ # Search recent changes (last 7 days) for this file by filename/stem
621
+ file_changes = []
622
+ for keyword in unique_keywords[:2]: # filename + stem are most specific
623
+ try:
624
+ changes = search_changes(files=keyword, days=7)
625
+ for c in changes:
626
+ cid = c.get("id")
627
+ if cid and not any(fc.get("id") == cid for fc in file_changes):
628
+ file_changes.append({
629
+ "id": cid,
630
+ "files": c.get("files", ""),
631
+ "what_changed": (c.get("what_changed") or "")[:200],
632
+ "why": (c.get("why") or "")[:150],
633
+ "created_at": (c.get("created_at") or "")[:16],
634
+ })
635
+ except Exception:
636
+ pass
637
+
638
+ recent_changes[filepath] = file_changes
639
+
640
+ # Build summary line
641
+ total_learnings = sum(len(v) for v in file_learnings.values())
642
+ total_changes = sum(len(v) for v in recent_changes.values())
643
+ summary_parts = []
644
+ if total_learnings:
645
+ summary_parts.append(f"{total_learnings} learning(s) found")
646
+ if total_changes:
647
+ summary_parts.append(f"{total_changes} recent change(s) in last 7 days")
648
+ if warnings:
649
+ summary_parts.append(f"{len(warnings)} BLOCKING warning(s)")
650
+ summary = ", ".join(summary_parts) if summary_parts else "No relevant learnings or recent changes found."
651
+
652
+ # Format output
653
+ lines = []
654
+
655
+ if warnings:
656
+ lines.append("WARNINGS — resolve before editing:")
657
+ for w in warnings:
658
+ lines.append(f" {w}")
659
+ lines.append("")
660
+
661
+ for filepath in files:
662
+ learnings = file_learnings.get(filepath, [])
663
+ changes = recent_changes.get(filepath, [])
664
+ if not learnings and not changes:
665
+ continue
666
+ lines.append(f"FILE: {filepath}")
667
+ if learnings:
668
+ lines.append(f" Learnings ({len(learnings)}):")
669
+ for entry in learnings[:10]:
670
+ lines.append(f" #{entry['id']} [{entry['category']}] {entry['title']}")
671
+ if entry["content"]:
672
+ lines.append(f" {entry['content'][:120]}")
673
+ if changes:
674
+ lines.append(f" Recent changes ({len(changes)}, last 7d):")
675
+ for c in changes[:5]:
676
+ lines.append(f" [{c['created_at']}] {c['what_changed'][:100]}")
677
+ if c["why"]:
678
+ lines.append(f" Why: {c['why'][:80]}")
679
+ lines.append("")
680
+
681
+ lines.append(f"SUMMARY: {summary}")
682
+
683
+ return "\n".join(lines) if lines else summary
684
+
685
+
438
686
  TOOLS = [
439
687
  (handle_guard_check, "nexo_guard_check", "Check learnings relevant to files/area BEFORE editing code. Call this before any code change."),
440
688
  (handle_guard_stats, "nexo_guard_stats", "Get guard system statistics: repetition rate, trends, top problem areas"),
441
689
  (handle_guard_log_repetition, "nexo_guard_log_repetition", "Log a learning repetition (new learning matches existing one)"),
442
690
  (handle_somatic_check, "nexo_somatic_check", "View somatic risk scores for files/areas — pain memory"),
443
691
  (handle_somatic_stats, "nexo_somatic_stats", "Top 10 riskiest targets + risk distribution"),
692
+ (handle_guard_cross_check, "nexo_guard_cross_check", "Cross-check audit findings against known learnings to filter false positives"),
693
+ (handle_guard_file_check, "nexo_guard_file_check", "Pre-edit check: surfaces learnings and recent changes for files about to be modified"),
444
694
  ]
@@ -37,6 +37,14 @@ def main():
37
37
  except Exception as e:
38
38
  print(f"[{ts}] Quarantine processing error: {e}")
39
39
 
40
+ # 0b. Purge test/dev memories from STM
41
+ try:
42
+ test_purged = cognitive.gc_test_memories()
43
+ if test_purged > 0:
44
+ print(f"[{ts}] Purged {test_purged} test/dev memories from STM.")
45
+ except Exception as e:
46
+ print(f"[{ts}] Test memory purge error: {e}")
47
+
40
48
  # 1. Apply decay
41
49
  cognitive.apply_decay()
42
50
  print(f"[{ts}] Decay applied.")