nexo-brain 7.23.13 → 7.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +15 -11
- package/bin/nexo-brain.js +42 -235
- package/package.json +1 -1
- package/src/auto_update.py +30 -0
- package/src/automation_supervisor.py +1 -1
- package/src/cli.py +255 -9
- package/src/cognitive_control_observatory.py +224 -0
- package/src/crons/manifest.json +13 -0
- package/src/dashboard/app.py +26 -9
- package/src/db/__init__.py +2 -0
- package/src/db/_fts.py +38 -8
- package/src/db/_learnings.py +1 -1
- package/src/db/_memory_v2.py +107 -1
- package/src/db/_protocol.py +2 -2
- package/src/db/_reminders.py +132 -4
- package/src/db/_schema.py +48 -2
- package/src/doctor/providers/runtime.py +69 -0
- package/src/events_bus.py +4 -5
- package/src/learning_resolver.py +419 -0
- package/src/lifecycle_events.py +9 -9
- package/src/local_context/api.py +67 -5
- package/src/local_context/usage_events.py +24 -0
- package/src/memory_fabric.py +536 -0
- package/src/memory_observation_processor.py +28 -0
- package/src/memory_retrieval.py +5 -5
- package/src/operator_language.py +2 -0
- package/src/plugins/backup.py +1 -1
- package/src/plugins/cortex.py +21 -21
- package/src/plugins/episodic_memory.py +11 -11
- package/src/plugins/goal_engine.py +3 -3
- package/src/plugins/personal_scripts.py +75 -0
- package/src/plugins/protocol.py +10 -1
- package/src/pre_answer_router.py +120 -3
- package/src/r_catalog.py +4 -5
- package/src/saved_not_used_audit.py +31 -31
- package/src/script_registry.py +444 -1
- package/src/scripts/deep-sleep/apply_findings.py +79 -17
- package/src/scripts/nexo-backup.sh +30 -0
- package/src/scripts/nexo-daily-self-audit.py +46 -13
- package/src/scripts/nexo-email-migrate-config.py +2 -2
- package/src/scripts/nexo-email-monitor.py +19 -19
- package/src/scripts/nexo-followup-hygiene.py +40 -8
- package/src/scripts/nexo-followup-runner.py +31 -31
- package/src/scripts/nexo-inbox-hook.sh +1 -1
- package/src/scripts/nexo-learning-validator.py +24 -3
- package/src/scripts/nexo-memory-fabric.py +45 -0
- package/src/server.py +73 -1
- package/src/system_catalog.py +31 -31
- package/src/tools_learnings.py +96 -65
- package/src/tools_memory_v2.py +2 -2
- package/src/tools_sessions.py +25 -7
- package/src/tools_transcripts.py +50 -8
- package/src/transcript_index.py +105 -2
- package/src/transcript_utils.py +65 -13
- package/templates/core-prompts/postmortem-consolidator.md +3 -3
- package/templates/core-prompts/r17-promise-debt-injection.md +1 -1
- package/templates/core-prompts/server-mcp-instructions.md +6 -6
- package/tool-enforcement-map.json +143 -13
package/src/tools_learnings.py
CHANGED
|
@@ -1,16 +1,25 @@
|
|
|
1
1
|
"""Learnings CRUD tools: add, search, update, delete, list."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
import re
|
|
6
|
+
import unicodedata
|
|
5
7
|
from datetime import datetime
|
|
6
8
|
|
|
7
9
|
from db import (create_learning, update_learning, delete_learning, search_learnings,
|
|
8
10
|
list_learnings, find_similar_learnings, get_db, now_epoch, supersede_learning, extract_keywords,
|
|
9
11
|
resolve_session_correction_requirements)
|
|
12
|
+
from learning_resolver import (
|
|
13
|
+
applies_overlap as _canonical_applies_overlap,
|
|
14
|
+
looks_contradictory as _canonical_looks_contradictory,
|
|
15
|
+
resolve_learning_candidate,
|
|
16
|
+
)
|
|
10
17
|
|
|
11
18
|
NEGATION_PATTERNS = (
|
|
12
19
|
"do not", "don't", "never", "avoid", "skip", "without", "forbid", "forbidden",
|
|
13
20
|
"disable", "disabled", "remove", "ban", "bypass",
|
|
21
|
+
" no ", " nunca ", " evita ", " evitar ", " sin ", " prohibe ", " prohibido ",
|
|
22
|
+
" desactiva ", " desactivar ", " elimina ", " eliminar ", " bloquea ", " bloquear ",
|
|
14
23
|
)
|
|
15
24
|
CONTRADICTION_PAIRS = (
|
|
16
25
|
("enable", "disable"),
|
|
@@ -23,6 +32,15 @@ CONTRADICTION_PAIRS = (
|
|
|
23
32
|
("validate", "skip"),
|
|
24
33
|
("validate", "bypass"),
|
|
25
34
|
("include", "exclude"),
|
|
35
|
+
("activar", "desactivar"),
|
|
36
|
+
("usar", "evitar"),
|
|
37
|
+
("usar", "no usar"),
|
|
38
|
+
("editar", "no editar"),
|
|
39
|
+
("tocar", "no tocar"),
|
|
40
|
+
("anadir", "eliminar"),
|
|
41
|
+
("permitir", "prohibir"),
|
|
42
|
+
("validar", "saltar"),
|
|
43
|
+
("incluir", "excluir"),
|
|
26
44
|
)
|
|
27
45
|
|
|
28
46
|
|
|
@@ -40,26 +58,13 @@ def _normalize_applies_token(value: str) -> str:
|
|
|
40
58
|
|
|
41
59
|
|
|
42
60
|
def _applies_overlap(left: str, right: str) -> bool:
|
|
43
|
-
|
|
44
|
-
right_tokens = {_normalize_applies_token(item) for item in _split_applies_to(right)}
|
|
45
|
-
left_tokens.discard("")
|
|
46
|
-
right_tokens.discard("")
|
|
47
|
-
if not left_tokens or not right_tokens:
|
|
48
|
-
return False
|
|
49
|
-
if left_tokens & right_tokens:
|
|
50
|
-
return True
|
|
51
|
-
for left_token in left_tokens:
|
|
52
|
-
for right_token in right_tokens:
|
|
53
|
-
if "/" in left_token or "/" in right_token:
|
|
54
|
-
if left_token.startswith(f"{right_token}/") or right_token.startswith(f"{left_token}/"):
|
|
55
|
-
return True
|
|
56
|
-
if left_token.endswith(f"/{right_token}") or right_token.endswith(f"/{left_token}"):
|
|
57
|
-
return True
|
|
58
|
-
return False
|
|
61
|
+
return _canonical_applies_overlap(left, right)
|
|
59
62
|
|
|
60
63
|
|
|
61
64
|
def _normalize_text(text: str) -> str:
|
|
62
|
-
|
|
65
|
+
normalized = unicodedata.normalize("NFKD", str(text or ""))
|
|
66
|
+
ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
|
|
67
|
+
return re.sub(r"\s+", " ", ascii_text.strip().lower())
|
|
63
68
|
|
|
64
69
|
|
|
65
70
|
def _tokenize(text: str) -> list[str]:
|
|
@@ -67,7 +72,7 @@ def _tokenize(text: str) -> list[str]:
|
|
|
67
72
|
|
|
68
73
|
|
|
69
74
|
def _contains_negation(text: str) -> bool:
|
|
70
|
-
lowered = _normalize_text(text)
|
|
75
|
+
lowered = f" {_normalize_text(text)} "
|
|
71
76
|
return any(token in lowered for token in NEGATION_PATTERNS)
|
|
72
77
|
|
|
73
78
|
|
|
@@ -75,37 +80,16 @@ def _negated_action_verbs(text: str) -> set[str]:
|
|
|
75
80
|
lowered = _normalize_text(text)
|
|
76
81
|
matches = set()
|
|
77
82
|
for pattern in (
|
|
78
|
-
r"(?:never|avoid|skip|disable|remove|forbid|bypass)\s+([a-z0-9_-]+)",
|
|
79
|
-
r"(?:do not|don't)\s+([a-z0-9_-]+)",
|
|
83
|
+
r"(?:never|avoid|skip|disable|remove|forbid|bypass|nunca|evita|evitar|desactiva|desactivar|elimina|eliminar|prohibe|prohibir|bloquea|bloquear)\s+([a-z0-9_-]+)",
|
|
84
|
+
r"(?:do not|don't|no)\s+([a-z0-9_-]+)",
|
|
85
|
+
r"(?:without|sin)\s+([a-z0-9_-]+)",
|
|
80
86
|
):
|
|
81
87
|
matches.update(re.findall(pattern, lowered))
|
|
82
88
|
return {match for match in matches if len(match) > 2}
|
|
83
89
|
|
|
84
90
|
|
|
85
91
|
def _looks_contradictory(existing_text: str, new_text: str) -> bool:
|
|
86
|
-
|
|
87
|
-
new_norm = _normalize_text(new_text)
|
|
88
|
-
if not existing_norm or not new_norm:
|
|
89
|
-
return False
|
|
90
|
-
existing_tokens = set(_tokenize(existing_norm))
|
|
91
|
-
new_tokens = set(_tokenize(new_norm))
|
|
92
|
-
if not (existing_tokens & new_tokens):
|
|
93
|
-
return False
|
|
94
|
-
existing_negated_verbs = _negated_action_verbs(existing_norm)
|
|
95
|
-
new_negated_verbs = _negated_action_verbs(new_norm)
|
|
96
|
-
if existing_negated_verbs & new_tokens and not existing_negated_verbs & new_negated_verbs:
|
|
97
|
-
return True
|
|
98
|
-
if new_negated_verbs & existing_tokens and not existing_negated_verbs & new_negated_verbs:
|
|
99
|
-
return True
|
|
100
|
-
if _contains_negation(existing_norm) != _contains_negation(new_norm):
|
|
101
|
-
return True
|
|
102
|
-
for positive, negative in CONTRADICTION_PAIRS:
|
|
103
|
-
existing_has_pair = positive in existing_norm or negative in existing_norm
|
|
104
|
-
new_has_pair = positive in new_norm or negative in new_norm
|
|
105
|
-
if existing_has_pair and new_has_pair:
|
|
106
|
-
if (positive in existing_norm and negative in new_norm) or (negative in existing_norm and positive in new_norm):
|
|
107
|
-
return True
|
|
108
|
-
return False
|
|
92
|
+
return _canonical_looks_contradictory(existing_text, new_text)
|
|
109
93
|
|
|
110
94
|
|
|
111
95
|
def _find_conflicting_active_learning(conn, *, category: str, title: str, content: str,
|
|
@@ -286,7 +270,8 @@ def score_learning_quality(row: dict, conn=None) -> dict:
|
|
|
286
270
|
|
|
287
271
|
def handle_learning_add(category: str, title: str, content: str, reasoning: str = '',
|
|
288
272
|
prevention: str = '', applies_to: str = '', review_days: int = 30,
|
|
289
|
-
priority: str = 'medium', supersedes_id: int = 0
|
|
273
|
+
priority: str = 'medium', supersedes_id: int = 0,
|
|
274
|
+
source_authority: str = 'explicit_instruction') -> str:
|
|
290
275
|
"""Add a new learning entry to the specified category.
|
|
291
276
|
|
|
292
277
|
Args:
|
|
@@ -304,15 +289,55 @@ def handle_learning_add(category: str, title: str, content: str, reasoning: str
|
|
|
304
289
|
category = category.lower().strip()
|
|
305
290
|
if not category:
|
|
306
291
|
return "ERROR: Category cannot be empty."
|
|
307
|
-
# Dedup guard: block exact title duplicates in same category
|
|
308
292
|
conn = get_db()
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
293
|
+
resolution = resolve_learning_candidate(
|
|
294
|
+
category=category,
|
|
295
|
+
title=title,
|
|
296
|
+
content=content,
|
|
297
|
+
reasoning=reasoning,
|
|
298
|
+
prevention=prevention,
|
|
299
|
+
applies_to=applies_to,
|
|
300
|
+
priority=priority,
|
|
301
|
+
supersedes_id=supersedes_id,
|
|
302
|
+
source_authority=source_authority,
|
|
303
|
+
conn=conn,
|
|
304
|
+
)
|
|
305
|
+
if resolution["action"] == "reject":
|
|
306
|
+
return f"ERROR: Learning candidate rejected: {resolution['reason']}."
|
|
307
|
+
if resolution["action"] == "merge":
|
|
308
|
+
existing_id = int(resolution.get("target_id") or 0)
|
|
309
|
+
existing = conn.execute("SELECT id, title, weight FROM learnings WHERE id = ?", (existing_id,)).fetchone()
|
|
310
|
+
if existing:
|
|
311
|
+
if resolution.get("reason") == "exact_title_duplicate":
|
|
312
|
+
_resolve_pending_correction_learning(existing_id)
|
|
313
|
+
return f"Learning #{existing['id']} already exists with same title in {category}: {existing['title']}. Use nexo_learning_update to modify it."
|
|
314
|
+
old_weight = float(existing["weight"] or 0.0)
|
|
315
|
+
new_weight = min(1.0, old_weight + 0.1)
|
|
316
|
+
conn.execute(
|
|
317
|
+
"UPDATE learnings SET weight = ?, updated_at = ? WHERE id = ?",
|
|
318
|
+
(new_weight, now_epoch(), existing_id),
|
|
319
|
+
)
|
|
320
|
+
conn.commit()
|
|
321
|
+
_resolve_pending_correction_learning(existing_id)
|
|
322
|
+
return (
|
|
323
|
+
f"Learning #{existing_id} resolved as merge ({resolution['reason']}, similarity "
|
|
324
|
+
f"{float(resolution.get('similarity') or 0):.2f}). No duplicate created. "
|
|
325
|
+
f"Weight bumped {old_weight:.2f} -> {new_weight:.2f}. Use nexo_learning_update(id={existing_id}) "
|
|
326
|
+
"to refine the canonical text."
|
|
327
|
+
)
|
|
328
|
+
if resolution["action"] == "conflict_review":
|
|
329
|
+
conflicting = {
|
|
330
|
+
"id": resolution.get("target_id"),
|
|
331
|
+
"title": resolution.get("target_title"),
|
|
332
|
+
"applies_to": applies_to,
|
|
333
|
+
}
|
|
334
|
+
return (
|
|
335
|
+
f"ERROR: Contradictory active learning #{conflicting['id']} already exists for applies_to="
|
|
336
|
+
f"{conflicting.get('applies_to', '')}: {conflicting['title']}. "
|
|
337
|
+
f"Supersede or update the existing canonical rule instead of creating two active file rules."
|
|
338
|
+
)
|
|
339
|
+
if resolution["action"] == "supersede":
|
|
340
|
+
supersedes_id = int(resolution.get("target_id") or supersedes_id or 0)
|
|
316
341
|
|
|
317
342
|
# ── R05 (Fase 2 Protocol Enforcer): auto-merge on high Jaccard similarity ──
|
|
318
343
|
# When a near-duplicate active learning exists (Jaccard >= R05 threshold),
|
|
@@ -356,19 +381,6 @@ def handle_learning_add(category: str, title: str, content: str, reasoning: str
|
|
|
356
381
|
f"→ {new_weight:.2f}. Use nexo_learning_update(id={existing_id}) if you need to "
|
|
357
382
|
"refine the canonical text."
|
|
358
383
|
)
|
|
359
|
-
conflicting = _find_conflicting_active_learning(
|
|
360
|
-
conn,
|
|
361
|
-
category=category,
|
|
362
|
-
title=title,
|
|
363
|
-
content=content,
|
|
364
|
-
applies_to=applies_to,
|
|
365
|
-
)
|
|
366
|
-
if conflicting and int(supersedes_id or 0) != int(conflicting["id"]):
|
|
367
|
-
return (
|
|
368
|
-
f"ERROR: Contradictory active learning #{conflicting['id']} already exists for applies_to="
|
|
369
|
-
f"{conflicting.get('applies_to', '')}: {conflicting['title']}. "
|
|
370
|
-
f"Supersede or update the existing canonical rule instead of creating two active file rules."
|
|
371
|
-
)
|
|
372
384
|
result = create_learning(
|
|
373
385
|
category, title, content, reasoning=reasoning, supersedes_id=(int(supersedes_id) if supersedes_id else None)
|
|
374
386
|
)
|
|
@@ -508,6 +520,25 @@ def handle_learning_add(category: str, title: str, content: str, reasoning: str
|
|
|
508
520
|
return f"Learning #{result['id']} added in {category}: {title}{meta_str} ✓verified{repetition_msg}{retro_meta_msg}{correction_msg}"
|
|
509
521
|
|
|
510
522
|
|
|
523
|
+
def handle_learning_resolve_candidate(category: str, title: str, content: str, reasoning: str = '',
|
|
524
|
+
prevention: str = '', applies_to: str = '',
|
|
525
|
+
priority: str = 'medium', supersedes_id: int = 0,
|
|
526
|
+
source_authority: str = 'inference') -> str:
|
|
527
|
+
"""Dry-run the canonical learning resolver without mutating state."""
|
|
528
|
+
result = resolve_learning_candidate(
|
|
529
|
+
category=category,
|
|
530
|
+
title=title,
|
|
531
|
+
content=content,
|
|
532
|
+
reasoning=reasoning,
|
|
533
|
+
prevention=prevention,
|
|
534
|
+
applies_to=applies_to,
|
|
535
|
+
priority=priority,
|
|
536
|
+
supersedes_id=supersedes_id,
|
|
537
|
+
source_authority=source_authority,
|
|
538
|
+
)
|
|
539
|
+
return json.dumps(result, ensure_ascii=False, indent=2)
|
|
540
|
+
|
|
541
|
+
|
|
511
542
|
def handle_learning_search(query: str, category: str = '') -> str:
|
|
512
543
|
"""Search learnings by query string, optionally filtered by category."""
|
|
513
544
|
results = search_learnings(query, category if category else None)
|
package/src/tools_memory_v2.py
CHANGED
|
@@ -173,8 +173,8 @@ def handle_memory_timeline(
|
|
|
173
173
|
result = memory_timeline(query, project_hint=project_hint, time_range=time_range, limit=limit)
|
|
174
174
|
candidates = result.get("candidates") or []
|
|
175
175
|
if not candidates:
|
|
176
|
-
return "
|
|
177
|
-
lines = [f"MEMORY TIMELINE ({len(candidates)}) — {query or time_range or '(
|
|
176
|
+
return "There are not enough events to build a timeline."
|
|
177
|
+
lines = [f"MEMORY TIMELINE ({len(candidates)}) — {query or time_range or '(no query)'}"]
|
|
178
178
|
for item in candidates:
|
|
179
179
|
refs = item.get("evidence_refs") or []
|
|
180
180
|
refs_note = f" refs={', '.join(refs[:3])}" if refs else ""
|
package/src/tools_sessions.py
CHANGED
|
@@ -1334,10 +1334,21 @@ def handle_context_packet(area: str, files: str = "") -> str:
|
|
|
1334
1334
|
parts.append("")
|
|
1335
1335
|
|
|
1336
1336
|
# 3. Active followups for this area
|
|
1337
|
-
|
|
1338
|
-
|
|
1337
|
+
from db import followup_lifecycle_lane, normalize_followup_status
|
|
1338
|
+
|
|
1339
|
+
followup_rows = conn.execute(
|
|
1340
|
+
"SELECT id, description, date, verification, status, owner FROM followups "
|
|
1341
|
+
"WHERE (description LIKE ? OR verification LIKE ?) ORDER BY date ASC LIMIT 50",
|
|
1339
1342
|
(f"%{area}%", f"%{area}%")
|
|
1340
1343
|
).fetchall()
|
|
1344
|
+
followups = []
|
|
1345
|
+
for row in followup_rows:
|
|
1346
|
+
item = dict(row)
|
|
1347
|
+
item["status"] = normalize_followup_status(item.get("status"))
|
|
1348
|
+
if followup_lifecycle_lane(item) == "active":
|
|
1349
|
+
followups.append(item)
|
|
1350
|
+
if len(followups) >= 10:
|
|
1351
|
+
break
|
|
1341
1352
|
if followups:
|
|
1342
1353
|
parts.append("## ACTIVE FOLLOWUPS")
|
|
1343
1354
|
for f in followups:
|
|
@@ -1479,11 +1490,18 @@ def handle_smart_startup_query() -> str:
|
|
|
1479
1490
|
sent_email_block = ""
|
|
1480
1491
|
|
|
1481
1492
|
# 1. Pending followups (what NEXO needs to do)
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1493
|
+
try:
|
|
1494
|
+
from db import followup_lifecycle_snapshot
|
|
1495
|
+
|
|
1496
|
+
active_followups = (followup_lifecycle_snapshot(limit=500).get("lanes") or {}).get("active", [])[:5]
|
|
1497
|
+
for f in active_followups:
|
|
1498
|
+
query_parts.append(str(f.get("description") or "")[:100])
|
|
1499
|
+
except Exception:
|
|
1500
|
+
followups = conn.execute(
|
|
1501
|
+
"SELECT description FROM followups WHERE status = 'PENDING' ORDER BY date ASC LIMIT 5"
|
|
1502
|
+
).fetchall()
|
|
1503
|
+
for f in followups:
|
|
1504
|
+
query_parts.append(f['description'][:100])
|
|
1487
1505
|
|
|
1488
1506
|
# 2. Due reminders (what the user needs to know)
|
|
1489
1507
|
reminders = conn.execute(
|
package/src/tools_transcripts.py
CHANGED
|
@@ -8,26 +8,53 @@ from transcript_utils import (
|
|
|
8
8
|
load_transcript,
|
|
9
9
|
search_transcripts,
|
|
10
10
|
)
|
|
11
|
+
from transcript_index import ensure_transcript_index, search_transcript_index
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
def handle_transcript_search(query: str = "", hours: int = 24, client: str = "", limit: int = 10) -> str:
|
|
14
15
|
"""Search recent Claude Code / Codex transcripts as a fallback when memory is insufficient."""
|
|
15
16
|
window = clamp_transcript_hours(hours)
|
|
16
|
-
|
|
17
|
+
clean_client = (client or "").strip()
|
|
18
|
+
ensure_transcript_index(
|
|
19
|
+
hours=window,
|
|
20
|
+
client=clean_client,
|
|
21
|
+
limit=max(200, min(2000, int(limit or 10) * 50)),
|
|
22
|
+
min_user_messages=1,
|
|
23
|
+
)
|
|
24
|
+
rows = search_transcript_index(query or "", hours=window, client=clean_client, limit=limit)
|
|
25
|
+
source = "index"
|
|
26
|
+
if not rows:
|
|
27
|
+
rows = search_transcripts(
|
|
28
|
+
query or "",
|
|
29
|
+
hours=window,
|
|
30
|
+
client=clean_client,
|
|
31
|
+
limit=limit,
|
|
32
|
+
min_user_messages=1,
|
|
33
|
+
)
|
|
34
|
+
source = "raw"
|
|
17
35
|
if not rows:
|
|
18
36
|
scope = f"query='{query}'" if query else "recent transcripts"
|
|
19
37
|
return f"No transcript matches for {scope} in the last {window}h."
|
|
20
38
|
|
|
21
|
-
lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h"]
|
|
39
|
+
lines = [f"TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
|
|
22
40
|
for item in rows:
|
|
41
|
+
session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
|
|
42
|
+
display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
|
|
43
|
+
modified = item.get("modified") or item.get("modified_at")
|
|
23
44
|
lines.append(
|
|
24
|
-
f"- {
|
|
25
|
-
f"(modified={
|
|
45
|
+
f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
|
|
46
|
+
f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
|
|
26
47
|
)
|
|
27
48
|
if item.get("cwd"):
|
|
28
49
|
lines.append(f" cwd: {item['cwd']}")
|
|
29
50
|
if item.get("session_uid"):
|
|
30
51
|
lines.append(f" session_uid: {item['session_uid']}")
|
|
52
|
+
if item.get("conversation_id") and item.get("conversation_id") != item.get("session_id"):
|
|
53
|
+
lines.append(f" conversation_id: {item['conversation_id']}")
|
|
54
|
+
if item.get("path_ref"):
|
|
55
|
+
lines.append(f" path: {item['path_ref']}")
|
|
56
|
+
if item.get("sanitized_summary"):
|
|
57
|
+
lines.append(f" summary: {item['sanitized_summary']}")
|
|
31
58
|
for snippet in item.get("matched_messages") or []:
|
|
32
59
|
lines.append(
|
|
33
60
|
f" [{snippet.get('role')}#{snippet.get('index')}] {snippet.get('snippet')}"
|
|
@@ -38,15 +65,29 @@ def handle_transcript_search(query: str = "", hours: int = 24, client: str = "",
|
|
|
38
65
|
def handle_transcript_recent(hours: int = 24, client: str = "", limit: int = 10) -> str:
|
|
39
66
|
"""List recent transcripts without searching full text."""
|
|
40
67
|
window = clamp_transcript_hours(hours)
|
|
41
|
-
|
|
68
|
+
clean_client = (client or "").strip()
|
|
69
|
+
ensure_transcript_index(
|
|
70
|
+
hours=window,
|
|
71
|
+
client=clean_client,
|
|
72
|
+
limit=max(200, min(2000, int(limit or 10) * 50)),
|
|
73
|
+
min_user_messages=1,
|
|
74
|
+
)
|
|
75
|
+
rows = search_transcript_index("", hours=window, client=clean_client, limit=limit)
|
|
76
|
+
source = "index"
|
|
77
|
+
if not rows:
|
|
78
|
+
rows = list_recent_transcripts(hours=window, client=clean_client, limit=limit, min_user_messages=1)
|
|
79
|
+
source = "raw"
|
|
42
80
|
if not rows:
|
|
43
81
|
return f"No transcripts found in the last {window}h."
|
|
44
82
|
|
|
45
|
-
lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h"]
|
|
83
|
+
lines = [f"RECENT TRANSCRIPTS ({len(rows)}) — last {window}h ({source})"]
|
|
46
84
|
for item in rows:
|
|
85
|
+
session_file = item.get("session_file") or item.get("session_id") or item.get("display_name")
|
|
86
|
+
display_name = item.get("display_name") or item.get("path_ref") or item.get("session_path")
|
|
87
|
+
modified = item.get("modified") or item.get("modified_at")
|
|
47
88
|
lines.append(
|
|
48
|
-
f"- {
|
|
49
|
-
f"(modified={
|
|
89
|
+
f"- {session_file}: [{item.get('client') or item.get('source_client')}] {display_name} "
|
|
90
|
+
f"(modified={modified}, messages={item.get('message_count')}, user={item.get('user_message_count')})"
|
|
50
91
|
)
|
|
51
92
|
return "\n".join(lines)
|
|
52
93
|
|
|
@@ -62,6 +103,7 @@ def handle_transcript_read(
|
|
|
62
103
|
session_ref=(session_ref or "").strip(),
|
|
63
104
|
transcript_path=(transcript_path or "").strip(),
|
|
64
105
|
client=(client or "").strip(),
|
|
106
|
+
min_user_messages=1,
|
|
65
107
|
)
|
|
66
108
|
if not transcript:
|
|
67
109
|
target = session_ref or transcript_path or "(empty ref)"
|
package/src/transcript_index.py
CHANGED
|
@@ -15,9 +15,12 @@ from typing import Any
|
|
|
15
15
|
from db import get_db
|
|
16
16
|
from transcript_utils import (
|
|
17
17
|
DEFAULT_TRANSCRIPT_HOURS,
|
|
18
|
+
MAX_TRANSCRIPT_HOURS,
|
|
18
19
|
_score_text_match,
|
|
19
20
|
_tokenize,
|
|
20
21
|
_truncate,
|
|
22
|
+
find_claude_session_files,
|
|
23
|
+
find_codex_session_files,
|
|
21
24
|
list_recent_transcripts,
|
|
22
25
|
)
|
|
23
26
|
|
|
@@ -103,6 +106,29 @@ def _sanitized_summary(session: dict[str, Any], *, limit: int = 900) -> str:
|
|
|
103
106
|
return _truncate(summary, limit)
|
|
104
107
|
|
|
105
108
|
|
|
109
|
+
def _row_ref_matches(query: str, row: dict[str, Any]) -> bool:
|
|
110
|
+
clean = str(query or "").strip().lower()
|
|
111
|
+
if len(clean) < 6:
|
|
112
|
+
return False
|
|
113
|
+
values = [
|
|
114
|
+
row.get("session_id"),
|
|
115
|
+
row.get("conversation_id"),
|
|
116
|
+
row.get("display_name"),
|
|
117
|
+
row.get("path_ref"),
|
|
118
|
+
Path(str(row.get("path_ref") or "")).name,
|
|
119
|
+
Path(str(row.get("path_ref") or "")).stem,
|
|
120
|
+
]
|
|
121
|
+
for value in values:
|
|
122
|
+
candidate = str(value or "").strip().lower()
|
|
123
|
+
if not candidate:
|
|
124
|
+
continue
|
|
125
|
+
if candidate.startswith(clean):
|
|
126
|
+
return True
|
|
127
|
+
if candidate.split(":")[-1].startswith(clean):
|
|
128
|
+
return True
|
|
129
|
+
return False
|
|
130
|
+
|
|
131
|
+
|
|
106
132
|
def index_transcript_session(session: dict[str, Any]) -> dict[str, Any]:
|
|
107
133
|
"""Upsert a single transcript metadata row and return it."""
|
|
108
134
|
_ensure_transcript_index_table()
|
|
@@ -186,6 +212,81 @@ def index_recent_transcripts(
|
|
|
186
212
|
return indexed
|
|
187
213
|
|
|
188
214
|
|
|
215
|
+
def _latest_source_modified_ts(client: str = "") -> float:
|
|
216
|
+
paths: list[Path] = []
|
|
217
|
+
if not client or client == "claude_code":
|
|
218
|
+
paths.extend(find_claude_session_files())
|
|
219
|
+
if not client or client == "codex":
|
|
220
|
+
paths.extend(find_codex_session_files())
|
|
221
|
+
latest = 0.0
|
|
222
|
+
for path in paths:
|
|
223
|
+
try:
|
|
224
|
+
latest = max(latest, path.stat().st_mtime)
|
|
225
|
+
except OSError:
|
|
226
|
+
continue
|
|
227
|
+
return latest
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _parse_iso_ts(value: str) -> float:
|
|
231
|
+
if not value:
|
|
232
|
+
return 0.0
|
|
233
|
+
try:
|
|
234
|
+
return datetime.fromisoformat(value).timestamp()
|
|
235
|
+
except Exception:
|
|
236
|
+
return 0.0
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def ensure_transcript_index(
|
|
240
|
+
*,
|
|
241
|
+
hours: int = MAX_TRANSCRIPT_HOURS,
|
|
242
|
+
client: str = "",
|
|
243
|
+
limit: int = 1000,
|
|
244
|
+
min_user_messages: int = 1,
|
|
245
|
+
force: bool = False,
|
|
246
|
+
) -> dict[str, Any]:
|
|
247
|
+
"""Keep the compact transcript DB index warm enough for fast lookup.
|
|
248
|
+
|
|
249
|
+
This is intentionally bounded. Raw JSONL remains the source of truth, but
|
|
250
|
+
normal MCP searches should hit this table before falling back to slow file
|
|
251
|
+
scans.
|
|
252
|
+
"""
|
|
253
|
+
_ensure_transcript_index_table()
|
|
254
|
+
conn = get_db()
|
|
255
|
+
params: list[Any] = []
|
|
256
|
+
where = "1=1"
|
|
257
|
+
if client:
|
|
258
|
+
where += " AND source_client = ?"
|
|
259
|
+
params.append(client)
|
|
260
|
+
before = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
|
|
261
|
+
latest_indexed = str(conn.execute(
|
|
262
|
+
f"SELECT MAX(modified_at) AS latest FROM transcript_index WHERE {where}",
|
|
263
|
+
tuple(params),
|
|
264
|
+
).fetchone()["latest"] or "")
|
|
265
|
+
latest_source_ts = _latest_source_modified_ts(client)
|
|
266
|
+
latest_indexed_ts = _parse_iso_ts(latest_indexed)
|
|
267
|
+
stale = bool(latest_source_ts and latest_source_ts > latest_indexed_ts + 1.0)
|
|
268
|
+
should_index = bool(force or before == 0 or stale)
|
|
269
|
+
indexed: list[dict[str, Any]] = []
|
|
270
|
+
if should_index:
|
|
271
|
+
indexed = index_recent_transcripts(
|
|
272
|
+
hours=hours,
|
|
273
|
+
client=client,
|
|
274
|
+
limit=limit,
|
|
275
|
+
min_user_messages=min_user_messages,
|
|
276
|
+
)
|
|
277
|
+
after = int(conn.execute(f"SELECT COUNT(*) AS total FROM transcript_index WHERE {where}", tuple(params)).fetchone()["total"] or 0)
|
|
278
|
+
return {
|
|
279
|
+
"ok": True,
|
|
280
|
+
"before": before,
|
|
281
|
+
"after": after,
|
|
282
|
+
"indexed": len(indexed),
|
|
283
|
+
"forced": bool(force),
|
|
284
|
+
"stale": stale,
|
|
285
|
+
"hours": hours,
|
|
286
|
+
"client": client,
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
|
|
189
290
|
def search_transcript_index(
|
|
190
291
|
query: str = "",
|
|
191
292
|
*,
|
|
@@ -201,7 +302,7 @@ def search_transcript_index(
|
|
|
201
302
|
where += " AND source_client = ?"
|
|
202
303
|
params.append(client)
|
|
203
304
|
rows = [dict(row) for row in conn.execute(
|
|
204
|
-
f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT
|
|
305
|
+
f"SELECT * FROM transcript_index WHERE {where} ORDER BY modified_at DESC LIMIT 5000",
|
|
205
306
|
tuple(params),
|
|
206
307
|
).fetchall()]
|
|
207
308
|
|
|
@@ -222,9 +323,11 @@ def search_transcript_index(
|
|
|
222
323
|
continue
|
|
223
324
|
haystack = " ".join(
|
|
224
325
|
str(row.get(field) or "")
|
|
225
|
-
for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "metadata_json")
|
|
326
|
+
for field in ("sanitized_summary", "display_name", "session_id", "conversation_id", "path_ref", "metadata_json")
|
|
226
327
|
)
|
|
227
328
|
score = _score_text_match(query_tokens, haystack)
|
|
329
|
+
if _row_ref_matches(query, row):
|
|
330
|
+
score = max(score, 2.0)
|
|
228
331
|
if score <= 0:
|
|
229
332
|
continue
|
|
230
333
|
row["_score"] = round(score, 4)
|
package/src/transcript_utils.py
CHANGED
|
@@ -110,7 +110,10 @@ def find_codex_session_files() -> list[Path]:
|
|
|
110
110
|
if not root.exists():
|
|
111
111
|
continue
|
|
112
112
|
for jsonl in sorted(root.rglob("*.jsonl")):
|
|
113
|
-
|
|
113
|
+
try:
|
|
114
|
+
key = str(jsonl.resolve())
|
|
115
|
+
except OSError:
|
|
116
|
+
key = str(jsonl)
|
|
114
117
|
if key in seen:
|
|
115
118
|
continue
|
|
116
119
|
seen.add(key)
|
|
@@ -346,8 +349,20 @@ def list_recent_transcripts(
|
|
|
346
349
|
return filtered[: max(1, int(limit or 10))]
|
|
347
350
|
|
|
348
351
|
|
|
349
|
-
def search_transcripts(
|
|
350
|
-
|
|
352
|
+
def search_transcripts(
|
|
353
|
+
query: str,
|
|
354
|
+
*,
|
|
355
|
+
hours: int = DEFAULT_TRANSCRIPT_HOURS,
|
|
356
|
+
client: str = "",
|
|
357
|
+
limit: int = 10,
|
|
358
|
+
min_user_messages: int = MIN_USER_MESSAGES,
|
|
359
|
+
) -> list[dict]:
|
|
360
|
+
rows = list_recent_transcripts(
|
|
361
|
+
hours=hours,
|
|
362
|
+
client=client,
|
|
363
|
+
limit=200,
|
|
364
|
+
min_user_messages=min_user_messages,
|
|
365
|
+
)
|
|
351
366
|
query_tokens = _tokenize(query)
|
|
352
367
|
if not query_tokens:
|
|
353
368
|
return rows[: max(1, int(limit or 10))]
|
|
@@ -398,7 +413,46 @@ def search_transcripts(query: str, *, hours: int = DEFAULT_TRANSCRIPT_HOURS, cli
|
|
|
398
413
|
return matches[: max(1, int(limit or 10))]
|
|
399
414
|
|
|
400
415
|
|
|
401
|
-
def
|
|
416
|
+
def _transcript_ref_matches(ref: str, session: dict, path: Path) -> bool:
|
|
417
|
+
clean = str(ref or "").strip()
|
|
418
|
+
if not clean:
|
|
419
|
+
return True
|
|
420
|
+
candidates = {
|
|
421
|
+
str(session.get("session_file", "")),
|
|
422
|
+
str(session.get("display_name", "")),
|
|
423
|
+
str(session.get("session_uid", "")),
|
|
424
|
+
str(session.get("conversation_id", "")),
|
|
425
|
+
str(path),
|
|
426
|
+
path.name,
|
|
427
|
+
path.stem,
|
|
428
|
+
}
|
|
429
|
+
if clean in candidates:
|
|
430
|
+
return True
|
|
431
|
+
|
|
432
|
+
# Operator-facing refs are often short prefixes copied from filenames
|
|
433
|
+
# or session ids. Require a minimum length so common words do not match
|
|
434
|
+
# arbitrary historical transcripts.
|
|
435
|
+
if len(clean) < 6:
|
|
436
|
+
return False
|
|
437
|
+
lowered = clean.lower()
|
|
438
|
+
for candidate in candidates:
|
|
439
|
+
value = str(candidate or "").strip().lower()
|
|
440
|
+
if not value:
|
|
441
|
+
continue
|
|
442
|
+
if value.startswith(lowered):
|
|
443
|
+
return True
|
|
444
|
+
if value.split(":")[-1].startswith(lowered):
|
|
445
|
+
return True
|
|
446
|
+
return False
|
|
447
|
+
|
|
448
|
+
|
|
449
|
+
def load_transcript(
|
|
450
|
+
session_ref: str = "",
|
|
451
|
+
transcript_path: str = "",
|
|
452
|
+
client: str = "",
|
|
453
|
+
*,
|
|
454
|
+
min_user_messages: int = 1,
|
|
455
|
+
) -> dict | None:
|
|
402
456
|
ref = str(session_ref or "").strip()
|
|
403
457
|
path_ref = str(transcript_path or "").strip()
|
|
404
458
|
|
|
@@ -416,17 +470,15 @@ def load_transcript(session_ref: str = "", transcript_path: str = "", client: st
|
|
|
416
470
|
continue
|
|
417
471
|
except Exception:
|
|
418
472
|
continue
|
|
419
|
-
session =
|
|
473
|
+
session = (
|
|
474
|
+
extract_codex_session(path, min_user_messages=min_user_messages)
|
|
475
|
+
if detected_client == "codex"
|
|
476
|
+
else extract_claude_session(path, min_user_messages=min_user_messages)
|
|
477
|
+
)
|
|
420
478
|
if not session:
|
|
421
479
|
continue
|
|
422
|
-
if ref:
|
|
423
|
-
|
|
424
|
-
str(session.get("session_file", "")),
|
|
425
|
-
str(session.get("display_name", "")),
|
|
426
|
-
str(session.get("session_uid", "")),
|
|
427
|
-
str(path),
|
|
428
|
-
}:
|
|
429
|
-
continue
|
|
480
|
+
if ref and not _transcript_ref_matches(ref, session, path):
|
|
481
|
+
continue
|
|
430
482
|
try:
|
|
431
483
|
session["modified"] = datetime.fromtimestamp(path.stat().st_mtime).isoformat()
|
|
432
484
|
except OSError:
|
|
@@ -31,8 +31,8 @@ INSTRUCTIONS:
|
|
|
31
31
|
- A feedback covering the same topic already exists
|
|
32
32
|
|
|
33
33
|
4. For each rule to promote, create the file with Write en [[memory_dir]]/:
|
|
34
|
-
|
|
35
|
-
|
|
34
|
+
Name: feedback_postmortem_[descriptive_slug].md
|
|
35
|
+
Format:
|
|
36
36
|
---
|
|
37
37
|
name: [descriptive title]
|
|
38
38
|
description: Behavioral rule extracted from self-critique — recurring pattern
|
|
@@ -49,7 +49,7 @@ INSTRUCTIONS:
|
|
|
49
49
|
Sessions: X | Self-critiques: Y | Promoted: Z
|
|
50
50
|
|
|
51
51
|
## Today's self-critiques (summary)
|
|
52
|
-
[
|
|
52
|
+
[Brief list]
|
|
53
53
|
|
|
54
54
|
## Promoted to permanent memory
|
|
55
55
|
[What you promoted and why]
|