nexo-brain 7.23.13 → 7.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +15 -11
  3. package/bin/nexo-brain.js +42 -235
  4. package/package.json +1 -1
  5. package/src/auto_update.py +30 -0
  6. package/src/automation_supervisor.py +1 -1
  7. package/src/cli.py +255 -9
  8. package/src/cognitive_control_observatory.py +224 -0
  9. package/src/crons/manifest.json +13 -0
  10. package/src/dashboard/app.py +26 -9
  11. package/src/db/__init__.py +2 -0
  12. package/src/db/_fts.py +38 -8
  13. package/src/db/_learnings.py +1 -1
  14. package/src/db/_memory_v2.py +107 -1
  15. package/src/db/_protocol.py +2 -2
  16. package/src/db/_reminders.py +132 -4
  17. package/src/db/_schema.py +48 -2
  18. package/src/doctor/providers/runtime.py +69 -0
  19. package/src/events_bus.py +4 -5
  20. package/src/learning_resolver.py +419 -0
  21. package/src/lifecycle_events.py +9 -9
  22. package/src/local_context/api.py +67 -5
  23. package/src/local_context/usage_events.py +24 -0
  24. package/src/memory_fabric.py +536 -0
  25. package/src/memory_observation_processor.py +28 -0
  26. package/src/memory_retrieval.py +5 -5
  27. package/src/operator_language.py +2 -0
  28. package/src/plugins/backup.py +1 -1
  29. package/src/plugins/cortex.py +21 -21
  30. package/src/plugins/episodic_memory.py +11 -11
  31. package/src/plugins/goal_engine.py +3 -3
  32. package/src/plugins/personal_scripts.py +75 -0
  33. package/src/plugins/protocol.py +10 -1
  34. package/src/pre_answer_router.py +120 -3
  35. package/src/r_catalog.py +4 -5
  36. package/src/saved_not_used_audit.py +31 -31
  37. package/src/script_registry.py +444 -1
  38. package/src/scripts/deep-sleep/apply_findings.py +79 -17
  39. package/src/scripts/nexo-backup.sh +30 -0
  40. package/src/scripts/nexo-daily-self-audit.py +46 -13
  41. package/src/scripts/nexo-email-migrate-config.py +2 -2
  42. package/src/scripts/nexo-email-monitor.py +19 -19
  43. package/src/scripts/nexo-followup-hygiene.py +40 -8
  44. package/src/scripts/nexo-followup-runner.py +31 -31
  45. package/src/scripts/nexo-inbox-hook.sh +1 -1
  46. package/src/scripts/nexo-learning-validator.py +24 -3
  47. package/src/scripts/nexo-memory-fabric.py +45 -0
  48. package/src/server.py +73 -1
  49. package/src/system_catalog.py +31 -31
  50. package/src/tools_learnings.py +96 -65
  51. package/src/tools_memory_v2.py +2 -2
  52. package/src/tools_sessions.py +25 -7
  53. package/src/tools_transcripts.py +50 -8
  54. package/src/transcript_index.py +105 -2
  55. package/src/transcript_utils.py +65 -13
  56. package/templates/core-prompts/postmortem-consolidator.md +3 -3
  57. package/templates/core-prompts/r17-promise-debt-injection.md +1 -1
  58. package/templates/core-prompts/server-mcp-instructions.md +6 -6
  59. package/tool-enforcement-map.json +143 -13
@@ -1,16 +1,25 @@
1
1
  """Learnings CRUD tools: add, search, update, delete, list."""
2
2
 
3
+ import json
3
4
  import os
4
5
  import re
6
+ import unicodedata
5
7
  from datetime import datetime
6
8
 
7
9
  from db import (create_learning, update_learning, delete_learning, search_learnings,
8
10
  list_learnings, find_similar_learnings, get_db, now_epoch, supersede_learning, extract_keywords,
9
11
  resolve_session_correction_requirements)
12
+ from learning_resolver import (
13
+ applies_overlap as _canonical_applies_overlap,
14
+ looks_contradictory as _canonical_looks_contradictory,
15
+ resolve_learning_candidate,
16
+ )
10
17
 
11
18
  NEGATION_PATTERNS = (
12
19
  "do not", "don't", "never", "avoid", "skip", "without", "forbid", "forbidden",
13
20
  "disable", "disabled", "remove", "ban", "bypass",
21
+ " no ", " nunca ", " evita ", " evitar ", " sin ", " prohibe ", " prohibido ",
22
+ " desactiva ", " desactivar ", " elimina ", " eliminar ", " bloquea ", " bloquear ",
14
23
  )
15
24
  CONTRADICTION_PAIRS = (
16
25
  ("enable", "disable"),
@@ -23,6 +32,15 @@ CONTRADICTION_PAIRS = (
23
32
  ("validate", "skip"),
24
33
  ("validate", "bypass"),
25
34
  ("include", "exclude"),
35
+ ("activar", "desactivar"),
36
+ ("usar", "evitar"),
37
+ ("usar", "no usar"),
38
+ ("editar", "no editar"),
39
+ ("tocar", "no tocar"),
40
+ ("anadir", "eliminar"),
41
+ ("permitir", "prohibir"),
42
+ ("validar", "saltar"),
43
+ ("incluir", "excluir"),
26
44
  )
27
45
 
28
46
 
@@ -40,26 +58,13 @@ def _normalize_applies_token(value: str) -> str:
40
58
 
41
59
 
42
60
  def _applies_overlap(left: str, right: str) -> bool:
43
- left_tokens = {_normalize_applies_token(item) for item in _split_applies_to(left)}
44
- right_tokens = {_normalize_applies_token(item) for item in _split_applies_to(right)}
45
- left_tokens.discard("")
46
- right_tokens.discard("")
47
- if not left_tokens or not right_tokens:
48
- return False
49
- if left_tokens & right_tokens:
50
- return True
51
- for left_token in left_tokens:
52
- for right_token in right_tokens:
53
- if "/" in left_token or "/" in right_token:
54
- if left_token.startswith(f"{right_token}/") or right_token.startswith(f"{left_token}/"):
55
- return True
56
- if left_token.endswith(f"/{right_token}") or right_token.endswith(f"/{left_token}"):
57
- return True
58
- return False
61
+ return _canonical_applies_overlap(left, right)
59
62
 
60
63
 
61
64
  def _normalize_text(text: str) -> str:
62
- return re.sub(r"\s+", " ", str(text or "").strip().lower())
65
+ normalized = unicodedata.normalize("NFKD", str(text or ""))
66
+ ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
67
+ return re.sub(r"\s+", " ", ascii_text.strip().lower())
63
68
 
64
69
 
65
70
  def _tokenize(text: str) -> list[str]:
@@ -67,7 +72,7 @@ def _tokenize(text: str) -> list[str]:
67
72
 
68
73
 
69
74
  def _contains_negation(text: str) -> bool:
70
- lowered = _normalize_text(text)
75
+ lowered = f" {_normalize_text(text)} "
71
76
  return any(token in lowered for token in NEGATION_PATTERNS)
72
77
 
73
78
 
@@ -75,37 +80,16 @@ def _negated_action_verbs(text: str) -> set[str]:
75
80
  lowered = _normalize_text(text)
76
81
  matches = set()
77
82
  for pattern in (
78
- r"(?:never|avoid|skip|disable|remove|forbid|bypass)\s+([a-z0-9_-]+)",
79
- r"(?:do not|don't)\s+([a-z0-9_-]+)",
83
+ r"(?:never|avoid|skip|disable|remove|forbid|bypass|nunca|evita|evitar|desactiva|desactivar|elimina|eliminar|prohibe|prohibir|bloquea|bloquear)\s+([a-z0-9_-]+)",
84
+ r"(?:do not|don't|no)\s+([a-z0-9_-]+)",
85
+ r"(?:without|sin)\s+([a-z0-9_-]+)",
80
86
  ):
81
87
  matches.update(re.findall(pattern, lowered))
82
88
  return {match for match in matches if len(match) > 2}
83
89
 
84
90
 
85
91
  def _looks_contradictory(existing_text: str, new_text: str) -> bool:
86
- existing_norm = _normalize_text(existing_text)
87
- new_norm = _normalize_text(new_text)
88
- if not existing_norm or not new_norm:
89
- return False
90
- existing_tokens = set(_tokenize(existing_norm))
91
- new_tokens = set(_tokenize(new_norm))
92
- if not (existing_tokens & new_tokens):
93
- return False
94
- existing_negated_verbs = _negated_action_verbs(existing_norm)
95
- new_negated_verbs = _negated_action_verbs(new_norm)
96
- if existing_negated_verbs & new_tokens and not existing_negated_verbs & new_negated_verbs:
97
- return True
98
- if new_negated_verbs & existing_tokens and not existing_negated_verbs & new_negated_verbs:
99
- return True
100
- if _contains_negation(existing_norm) != _contains_negation(new_norm):
101
- return True
102
- for positive, negative in CONTRADICTION_PAIRS:
103
- existing_has_pair = positive in existing_norm or negative in existing_norm
104
- new_has_pair = positive in new_norm or negative in new_norm
105
- if existing_has_pair and new_has_pair:
106
- if (positive in existing_norm and negative in new_norm) or (negative in existing_norm and positive in new_norm):
107
- return True
108
- return False
92
+ return _canonical_looks_contradictory(existing_text, new_text)
109
93
 
110
94
 
111
95
  def _find_conflicting_active_learning(conn, *, category: str, title: str, content: str,
@@ -286,7 +270,8 @@ def score_learning_quality(row: dict, conn=None) -> dict:
286
270
 
287
271
  def handle_learning_add(category: str, title: str, content: str, reasoning: str = '',
288
272
  prevention: str = '', applies_to: str = '', review_days: int = 30,
289
- priority: str = 'medium', supersedes_id: int = 0) -> str:
273
+ priority: str = 'medium', supersedes_id: int = 0,
274
+ source_authority: str = 'explicit_instruction') -> str:
290
275
  """Add a new learning entry to the specified category.
291
276
 
292
277
  Args:
@@ -304,15 +289,55 @@ def handle_learning_add(category: str, title: str, content: str, reasoning: str
304
289
  category = category.lower().strip()
305
290
  if not category:
306
291
  return "ERROR: Category cannot be empty."
307
- # Dedup guard: block exact title duplicates in same category
308
292
  conn = get_db()
309
- existing = conn.execute(
310
- "SELECT id, title FROM learnings WHERE LOWER(title) = LOWER(?) AND category = ? AND status = 'active'",
311
- (title.strip(), category)
312
- ).fetchone()
313
- if existing:
314
- _resolve_pending_correction_learning(int(existing["id"]))
315
- return f"Learning #{existing['id']} already exists with same title in {category}: {existing['title']}. Use nexo_learning_update to modify it."
293
+ resolution = resolve_learning_candidate(
294
+ category=category,
295
+ title=title,
296
+ content=content,
297
+ reasoning=reasoning,
298
+ prevention=prevention,
299
+ applies_to=applies_to,
300
+ priority=priority,
301
+ supersedes_id=supersedes_id,
302
+ source_authority=source_authority,
303
+ conn=conn,
304
+ )
305
+ if resolution["action"] == "reject":
306
+ return f"ERROR: Learning candidate rejected: {resolution['reason']}."
307
+ if resolution["action"] == "merge":
308
+ existing_id = int(resolution.get("target_id") or 0)
309
+ existing = conn.execute("SELECT id, title, weight FROM learnings WHERE id = ?", (existing_id,)).fetchone()
310
+ if existing:
311
+ if resolution.get("reason") == "exact_title_duplicate":
312
+ _resolve_pending_correction_learning(existing_id)
313
+ return f"Learning #{existing['id']} already exists with same title in {category}: {existing['title']}. Use nexo_learning_update to modify it."
314
+ old_weight = float(existing["weight"] or 0.0)
315
+ new_weight = min(1.0, old_weight + 0.1)
316
+ conn.execute(
317
+ "UPDATE learnings SET weight = ?, updated_at = ? WHERE id = ?",
318
+ (new_weight, now_epoch(), existing_id),
319
+ )
320
+ conn.commit()
321
+ _resolve_pending_correction_learning(existing_id)
322
+ return (
323
+ f"Learning #{existing_id} resolved as merge ({resolution['reason']}, similarity "
324
+ f"{float(resolution.get('similarity') or 0):.2f}). No duplicate created. "
325
+ f"Weight bumped {old_weight:.2f} -> {new_weight:.2f}. Use nexo_learning_update(id={existing_id}) "
326
+ "to refine the canonical text."
327
+ )
328
+ if resolution["action"] == "conflict_review":
329
+ conflicting = {
330
+ "id": resolution.get("target_id"),
331
+ "title": resolution.get("target_title"),
332
+ "applies_to": applies_to,
333
+ }
334
+ return (
335
+ f"ERROR: Contradictory active learning #{conflicting['id']} already exists for applies_to="
336
+ f"{conflicting.get('applies_to', '')}: {conflicting['title']}. "
337
+ f"Supersede or update the existing canonical rule instead of creating two active file rules."
338
+ )
339
+ if resolution["action"] == "supersede":
340
+ supersedes_id = int(resolution.get("target_id") or supersedes_id or 0)
316
341
 
317
342
  # ── R05 (Fase 2 Protocol Enforcer): auto-merge on high Jaccard similarity ──
318
343
  # When a near-duplicate active learning exists (Jaccard >= R05 threshold),
@@ -356,19 +381,6 @@ def handle_learning_add(category: str, title: str, content: str, reasoning: str
356
381
  f"→ {new_weight:.2f}. Use nexo_learning_update(id={existing_id}) if you need to "
357
382
  "refine the canonical text."
358
383
  )
359
- conflicting = _find_conflicting_active_learning(
360
- conn,
361
- category=category,
362
- title=title,
363
- content=content,
364
- applies_to=applies_to,
365
- )
366
- if conflicting and int(supersedes_id or 0) != int(conflicting["id"]):
367
- return (
368
- f"ERROR: Contradictory active learning #{conflicting['id']} already exists for applies_to="
369
- f"{conflicting.get('applies_to', '')}: {conflicting['title']}. "
370
- f"Supersede or update the existing canonical rule instead of creating two active file rules."
371
- )
372
384
  result = create_learning(
373
385
  category, title, content, reasoning=reasoning, supersedes_id=(int(supersedes_id) if supersedes_id else None)
374
386
  )
@@ -508,6 +520,25 @@ def handle_learning_add(category: str, title: str, content: str, reasoning: str
508
520
  return f"Learning #{result['id']} added in {category}: {title}{meta_str} ✓verified{repetition_msg}{retro_meta_msg}{correction_msg}"
509
521
 
510
522
 
523
+ def handle_learning_resolve_candidate(category: str, title: str, content: str, reasoning: str = '',
524
+ prevention: str = '', applies_to: str = '',
525
+ priority: str = 'medium', supersedes_id: int = 0,
526
+ source_authority: str = 'inference') -> str:
527
+ """Dry-run the canonical learning resolver without mutating state."""
528
+ result = resolve_learning_candidate(
529
+ category=category,
530
+ title=title,
531
+ content=content,
532
+ reasoning=reasoning,
533
+ prevention=prevention,
534
+ applies_to=applies_to,
535
+ priority=priority,
536
+ supersedes_id=supersedes_id,
537
+ source_authority=source_authority,
538
+ )
539
+ return json.dumps(result, ensure_ascii=False, indent=2)
540
+
541
+
511
542
  def handle_learning_search(query: str, category: str = '') -> str:
512
543
  """Search learnings by query string, optionally filtered by category."""
513
544
  results = search_learnings(query, category if category else None)
@@ -173,8 +173,8 @@ def handle_memory_timeline(
173
173
  result = memory_timeline(query, project_hint=project_hint, time_range=time_range, limit=limit)
174
174
  candidates = result.get("candidates") or []
175
175
  if not candidates:
176
- return "No hay eventos suficientes para construir timeline."
177
- lines = [f"MEMORY TIMELINE ({len(candidates)}) — {query or time_range or '(sin query)'}"]
176
+ return "There are not enough events to build a timeline."
177
+ lines = [f"MEMORY TIMELINE ({len(candidates)}) — {query or time_range or '(no query)'}"]
178
178
  for item in candidates:
179
179
  refs = item.get("evidence_refs") or []
180
180
  refs_note = f" refs={', '.join(refs[:3])}" if refs else ""
@@ -1334,10 +1334,21 @@ def handle_context_packet(area: str, files: str = "") -> str:
1334
1334
  parts.append("")
1335
1335
 
1336
1336
  # 3. Active followups for this area
1337
- followups = conn.execute(
1338
- "SELECT id, description, date, verification FROM followups WHERE status = 'PENDING' AND (description LIKE ? OR verification LIKE ?) ORDER BY date ASC LIMIT 10",
1337
+ from db import followup_lifecycle_lane, normalize_followup_status
1338
+
1339
+ followup_rows = conn.execute(
1340
+ "SELECT id, description, date, verification, status, owner FROM followups "
1341
+ "WHERE (description LIKE ? OR verification LIKE ?) ORDER BY date ASC LIMIT 50",
1339
1342
  (f"%{area}%", f"%{area}%")
1340
1343
  ).fetchall()
1344
+ followups = []
1345
+ for row in followup_rows:
1346
+ item = dict(row)
1347
+ item["status"] = normalize_followup_status(item.get("status"))
1348
+ if followup_lifecycle_lane(item) == "active":
1349
+ followups.append(item)
1350
+ if len(followups) >= 10:
1351
+ break
1341
1352
  if followups:
1342
1353
  parts.append("## ACTIVE FOLLOWUPS")
1343
1354
  for f in followups:
@@ -1479,11 +1490,18 @@ def handle_smart_startup_query() -> str:
1479
1490
  sent_email_block = ""
1480
1491
 
1481
1492
  # 1. Pending followups (what NEXO needs to do)
1482
- followups = conn.execute(
1483
- "SELECT description FROM followups WHERE status = 'PENDING' ORDER BY date ASC LIMIT 5"
1484
- ).fetchall()
1485
- for f in followups:
1486
- query_parts.append(f['description'][:100])
1493
+ try:
1494
+ from db import followup_lifecycle_snapshot
1495
+
1496
+ active_followups = (followup_lifecycle_snapshot(limit=500).get("lanes") or {}).get("active", [])[:5]
1497
+ for f in active_followups:
1498
+ query_parts.append(str(f.get("description") or "")[:100])
1499
+ except Exception:
1500
+ followups = conn.execute(
1501
+ "SELECT description FROM followups WHERE status = 'PENDING' ORDER BY date ASC LIMIT 5"
1502
+ ).fetchall()
1503
+ for f in followups:
1504
+ query_parts.append(f['description'][:100])
1487
1505
 
1488
1506
  # 2. Due reminders (what the user needs to know)
1489
1507
  reminders = conn.execute(
@@ -8,26 +8,53 @@ from transcript_utils import (
8
8
  load_transcript,
9
9
  search_transcripts,
10
10
  )
11
+ from transcript_index import ensure_transcript_index, search_transcript_index
11
12
 
12
13
 
13
14
  def handle_transcript_search(query: str = "", hours: int = 24, client: str = "", limit: int = 10) -> str:
14
15
  """Search recent Claude Code / Codex transcripts as a fallback when memory is insufficient."""
15
16
  window = clamp_transcript_hours(hours)
16
- rows = search_transcripts(query or "", hours=window, client=(client or "").strip(), limit=limit)
17
+ clean_client = (client or "").strip()
18
+ ensure_transcript_index(
19
+ hours=window,
20
+ client=clean_client,
21
+ limit=max(200, min(2000, int(limit or 10) * 50)),
22
+ min_user_messages=1,
23
+ )
24
+ rows = search_transcript_index(query or "", hours=window, client=clean_client, limit=limit)
25
+ source = "index"
26
+ if not rows:
27
+ rows = search_transcripts(
28
+ query or "",
29
+ hours=window,
30
+ client=clean_client,
31
+ limit=limit,
32
+ min_user_messages=1,
33
+ )
34
+ source = "raw"
17
35
  if not rows:
18
36
  scope = f"query='{query}'" if query else "recent transcripts"
19
37
  return f"No transcript matches for {scope} in the last {window}h."
20
38
 
21
- lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h"]
39
+ lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
22
40
  for item in rows:
41
+ session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
42
+ display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
43
+ modified = item.get("modified") or item.get("modified_at")
23
44
  lines.append(
24
- f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
25
- f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
45
+ f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
46
+ f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
26
47
  )
27
48
  if item.get("cwd"):
28
49
  lines.append(f" cwd: {item['cwd']}")
29
50
  if item.get("session_uid"):
30
51
  lines.append(f" session_uid: {item['session_uid']}")
52
+ if item.get("conversation_id") and item.get("conversation_id") != item.get("session_id"):
53
+ lines.append(f" conversation_id: {item['conversation_id']}")
54
+ if item.get("path_ref"):
55
+ lines.append(f" path: {item['path_ref']}")
56
+ if item.get("sanitized_summary"):
57
+ lines.append(f" summary: {item['sanitized_summary']}")
31
58
  for snippet in item.get("matched_messages") or []:
32
59
  lines.append(
33
60
  f" [{snippet.get('role')}#{snippet.get('index')}] {snippet.get('snippet')}"
@@ -38,15 +65,29 @@ def handle_transcript_search(query: str = "", hours: int = 24, client: str = "",
38
65
  def handle_transcript_recent(hours: int = 24, client: str = "", limit: int = 10) -> str:
39
66
  """List recent transcripts without searching full text."""
40
67
  window = clamp_transcript_hours(hours)
41
- rows = list_recent_transcripts(hours=window, client=(client or "").strip(), limit=limit)
68
+ clean_client = (client or "").strip()
69
+ ensure_transcript_index(
70
+ hours=window,
71
+ client=clean_client,
72
+ limit=max(200, min(2000, int(limit or 10) * 50)),
73
+ min_user_messages=1,
74
+ )
75
+ rows = search_transcript_index("", hours=window, client=clean_client, limit=limit)
76
+ source = "index"
77
+ if not rows:
78
+ rows = list_recent_transcripts(hours=window, client=clean_client, limit=limit, min_user_messages=1)
79
+ source = "raw"
42
80
  if not rows:
43
81
  return f"No transcripts found in the last {window}h."
44
82
 
45
- lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h"]
83
+ lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
46
84
  for item in rows:
85
+ session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
86
+ display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
87
+ modified = item.get("modified") or item.get("modified_at")
47
88
  lines.append(
48
- f"- {item.get('session_file')}: [{item.get('client')}] {item.get('display_name')} "
49
- f"(modified={item.get('modified')}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
89
+ f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
90
+ f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
50
91
  )
51
92
  return "\n".join(lines)
52
93
 
@@ -62,6 +103,7 @@ def handle_transcript_read(
62
103
  session_ref=(session_ref or "").strip(),
63
104
  transcript_path=(transcript_path or "").strip(),
64
105
  client=(client or "").strip(),
106
+ min_user_messages=1,
65
107
  )
66
108
  if not transcript:
67
109
  target = session_ref or transcript_path or "(empty ref)"
@@ -15,9 +15,12 @@ from typing import Any
15
15
  from db import get_db
16
16
  from transcript_utils import (
17
17
  DEFAULT_TRANSCRIPT_HOURS,
18
+ MAX_TRANSCRIPT_HOURS,
18
19
  _score_text_match,
19
20
  _tokenize,
20
21
  _truncate,
22
+ find_claude_session_files,
23
+ find_codex_session_files,
21
24
  list_recent_transcripts,
22
25
  )
23
26
 
@@ -103,6 +106,29 @@ def _sanitized_summary(session: dict[str, Any], *, limit: int = 900) -> str:
103
106
  return _truncate(summary, limit)
104
107
 
105
108
 
109
+ def _row_ref_matches(query: str, row: dict[str, Any]) -> bool:
110
+ clean = str(query or "").strip().lower()
111
+ if len(clean) < 6:
112
+ return False
113
+ values = [
114
+ row.get("session_id"),
115
+ row.get("conversation_id"),
116
+ row.get("display_name"),
117
+ row.get("path_ref"),
118
+ Path(str(row.get("path_ref") or "")).name,
119
+ Path(str(row.get("path_ref") or "")).stem,
120
+ ]
121
+ for value in values:
122
+ candidate = str(value or "").strip().lower()
123
+ if not candidate:
124
+ continue
125
+ if candidate.startswith(clean):
126
+ return True
127
+ if candidate.split(":")[-1].startswith(clean):
128
+ return True
129
+ return False
130
+
131
+
106
132
  def index_transcript_session(session: dict[str, Any]) -> dict[str, Any]:
107
133
  """Upsert a single transcript metadata row and return it."""
108
134
  _ensure_transcript_index_table()
@@ -186,6 +212,81 @@ def index_recent_transcripts(
186
212
  return indexed
187
213
 
188
214
 
215
+ def _latest_source_modified_ts(client: str = "") -> float:
216
+ paths: list[Path] = []
217
+ if not client or client == "claude_code":
218
+ paths.extend(find_claude_session_files())
219
+ if not client or client == "codex":
220
+ paths.extend(find_codex_session_files())
221
+ latest = 0.0
222
+ for path in paths:
223
+ try:
224
+ latest = max(latest, path.stat().st_mtime)
225
+ except OSError:
226
+ continue
227
+ return latest
228
+
229
+
230
+ def _parse_iso_ts(value: str) -> float:
231
+ if not value:
232
+ return 0.0
233
+ try:
234
+ return datetime.fromisoformat(value).timestamp()
235
+ except Exception:
236
+ return 0.0
237
+
238
+
239
+ def ensure_transcript_index(
240
+ *,
241
+ hours: int = MAX_TRANSCRIPT_HOURS,
242
+ client: str = "",
243
+ limit: int = 1000,
244
+ min_user_messages: int = 1,
245
+ force: bool = False,
246
+ ) -> dict[str, Any]:
247
+ """Keep the compact transcript DB index warm enough for fast lookup.
248
+
249
+ This is intentionally bounded. Raw JSONL remains the source of truth, but
250
+ normal MCP searches should hit this table before falling back to slow file
251
+ scans.
252
+ """
253
+ _ensure_transcript_index_table()
254
+ conn = get_db()
255
+ params: list[Any] = []
256
+ where = "1=1"
257
+ if client:
258
+ where += " AND source_client = ?"
259
+ params.append(client)
260
+ before = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
261
+ latest_indexed = str(conn.execute(
262
+ f"SELECT MAX(modified_at) AS latest FROM transcript_index WHERE {where}",
263
+ tuple(params),
264
+ ).fetchone()["latest"] or "")
265
+ latest_source_ts = _latest_source_modified_ts(client)
266
+ latest_indexed_ts = _parse_iso_ts(latest_indexed)
267
+ stale = bool(latest_source_ts and latest_source_ts > latest_indexed_ts + 1.0)
268
+ should_index = bool(force or before == 0 or stale)
269
+ indexed: list[dict[str, Any]] = []
270
+ if should_index:
271
+ indexed = index_recent_transcripts(
272
+ hours=hours,
273
+ client=client,
274
+ limit=limit,
275
+ min_user_messages=min_user_messages,
276
+ )
277
+ after = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
278
+ return {
279
+ "ok": True,
280
+ "before": before,
281
+ "after": after,
282
+ "indexed": len(indexed),
283
+ "forced": bool(force),
284
+ "stale": stale,
285
+ "hours": hours,
286
+ "client": client,
287
+ }
288
+
289
+
189
290
  def search_transcript_index(
190
291
  query: str = "",
191
292
  *,
@@ -201,7 +302,7 @@ def search_transcript_index(
201
302
  where += " AND source_client = ?"
202
303
  params.append(client)
203
304
  rows = [dict(row) for row in conn.execute(
204
- f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 500",
305
+ f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 5000",
205
306
  tuple(params),
206
307
  ).fetchall()]
207
308
 
@@ -222,9 +323,11 @@ def search_transcript_index(
222
323
  continue
223
324
  haystack = " ".join(
224
325
  str(row.get(field) or "")
225
- for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "metadata_json")
326
+ for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "path_ref", "metadata_json")
226
327
  )
227
328
  score = _score_text_match(query_tokens, haystack)
329
+ if _row_ref_matches(query, row):
330
+ score = max(score, 2.0)
228
331
  if score <= 0:
229
332
  continue
230
333
  row["_score"] = round(score, 4)
@@ -110,7 +110,10 @@ def find_codex_session_files() -> list[Path]:
110
110
  if not root.exists():
111
111
  continue
112
112
  for jsonl in sorted(root.rglob("*.jsonl")):
113
- key = jsonl.name
113
+ try:
114
+ key = str(jsonl.resolve())
115
+ except OSError:
116
+ key = str(jsonl)
114
117
  if key in seen:
115
118
  continue
116
119
  seen.add(key)
@@ -346,8 +349,20 @@ def list_recent_transcripts(
346
349
  return filtered[: max(1, int(limit or 10))]
347
350
 
348
351
 
349
- def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, client: str = "", limit: int = 10) -> list[dict]:
350
- rows = list_recent_transcripts(hours=hours, client=client, limit=200)
352
+ def search_transcripts(
353
+ query: str,
354
+ *,
355
+ hours: int = DEFAULT_TRANSCRIPT_HOURS,
356
+ client: str = "",
357
+ limit: int = 10,
358
+ min_user_messages: int = MIN_USER_MESSAGES,
359
+ ) -> list[dict]:
360
+ rows = list_recent_transcripts(
361
+ hours=hours,
362
+ client=client,
363
+ limit=200,
364
+ min_user_messages=min_user_messages,
365
+ )
351
366
  query_tokens = _tokenize(query)
352
367
  if not query_tokens:
353
368
  return rows[: max(1, int(limit or 10))]
@@ -398,7 +413,46 @@ def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, cli
398
413
  return matches[: max(1, int(limit or 10))]
399
414
 
400
415
 
401
- def load_transcript(session_ref: str = "", transcript_path: str = "", client: str = "") -> dict | None:
416
+ def _transcript_ref_matches(ref: str, session: dict, path: Path) -> bool:
417
+ clean = str(ref or "").strip()
418
+ if not clean:
419
+ return True
420
+ candidates = {
421
+ str(session.get("session_file", "")),
422
+ str(session.get("display_name", "")),
423
+ str(session.get("session_uid", "")),
424
+ str(session.get("conversation_id", "")),
425
+ str(path),
426
+ path.name,
427
+ path.stem,
428
+ }
429
+ if clean in candidates:
430
+ return True
431
+
432
+ # Operator-facing refs are often short prefixes copied from filenames
433
+ # or session ids. Require a minimum length so common words do not match
434
+ # arbitrary historical transcripts.
435
+ if len(clean) < 6:
436
+ return False
437
+ lowered = clean.lower()
438
+ for candidate in candidates:
439
+ value = str(candidate or "").strip().lower()
440
+ if not value:
441
+ continue
442
+ if value.startswith(lowered):
443
+ return True
444
+ if value.split(":")[-1].startswith(lowered):
445
+ return True
446
+ return False
447
+
448
+
449
+ def load_transcript(
450
+ session_ref: str = "",
451
+ transcript_path: str = "",
452
+ client: str = "",
453
+ *,
454
+ min_user_messages: int = 1,
455
+ ) -> dict | None:
402
456
  ref = str(session_ref or "").strip()
403
457
  path_ref = str(transcript_path or "").strip()
404
458
 
@@ -416,17 +470,15 @@ def load_transcript(session_ref: str = "", transcript_path: str = "", client: st
416
470
  continue
417
471
  except Exception:
418
472
  continue
419
- session = extract_codex_session(path) if detected_client == "codex" else extract_claude_session(path)
473
+ session = (
474
+ extract_codex_session(path, min_user_messages=min_user_messages)
475
+ if detected_client == "codex"
476
+ else extract_claude_session(path, min_user_messages=min_user_messages)
477
+ )
420
478
  if not session:
421
479
  continue
422
- if ref:
423
- if ref not in {
424
- str(session.get("session_file", "")),
425
- str(session.get("display_name", "")),
426
- str(session.get("session_uid", "")),
427
- str(path),
428
- }:
429
- continue
480
+ if ref and not _transcript_ref_matches(ref, session, path):
481
+ continue
430
482
  try:
431
483
  session["modified"] = datetime.fromtimestamp(path.stat().st_mtime).isoformat()
432
484
  except OSError:
@@ -31,8 +31,8 @@ INSTRUCTIONS:
31
31
  - A feedback covering the same topic already exists
32
32
 
33
33
  4. For each rule to promote, create the file with Write en [[memory_dir]]/:
34
- Nombre: feedback_postmortem_[slug_descriptivo].md
35
- Formato:
34
+ Name: feedback_postmortem_[descriptive_slug].md
35
+ Format:
36
36
  ---
37
37
  name: [descriptive title]
38
38
  description: Behavioral rule extracted from self-critique — recurring pattern
@@ -49,7 +49,7 @@ INSTRUCTIONS:
49
49
  Sessions: X | Self-critiques: Y | Promoted: Z
50
50
 
51
51
  ## Today's self-critiques (summary)
52
- [Lista breve]
52
+ [Brief list]
53
53
 
54
54
  ## Promoted to permanent memory
55
55
  [What you promoted and why]