nexo-brain 7.23.13 → 7.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +13 -11
  3. package/bin/nexo-brain.js +42 -235
  4. package/package.json +1 -1
  5. package/src/automation_supervisor.py +1 -1
  6. package/src/cli.py +255 -9
  7. package/src/cognitive_control_observatory.py +224 -0
  8. package/src/dashboard/app.py +26 -9
  9. package/src/db/__init__.py +2 -0
  10. package/src/db/_learnings.py +1 -1
  11. package/src/db/_memory_v2.py +107 -1
  12. package/src/db/_protocol.py +2 -2
  13. package/src/db/_reminders.py +132 -4
  14. package/src/db/_schema.py +2 -2
  15. package/src/events_bus.py +4 -5
  16. package/src/learning_resolver.py +419 -0
  17. package/src/lifecycle_events.py +9 -9
  18. package/src/local_context/api.py +67 -5
  19. package/src/local_context/usage_events.py +24 -0
  20. package/src/memory_observation_processor.py +28 -0
  21. package/src/memory_retrieval.py +5 -5
  22. package/src/operator_language.py +2 -0
  23. package/src/plugins/backup.py +1 -1
  24. package/src/plugins/cortex.py +21 -21
  25. package/src/plugins/episodic_memory.py +11 -11
  26. package/src/plugins/goal_engine.py +3 -3
  27. package/src/plugins/personal_scripts.py +75 -0
  28. package/src/plugins/protocol.py +10 -1
  29. package/src/pre_answer_router.py +116 -0
  30. package/src/r_catalog.py +4 -5
  31. package/src/saved_not_used_audit.py +31 -31
  32. package/src/script_registry.py +444 -1
  33. package/src/scripts/deep-sleep/apply_findings.py +79 -17
  34. package/src/scripts/nexo-daily-self-audit.py +46 -13
  35. package/src/scripts/nexo-email-migrate-config.py +2 -2
  36. package/src/scripts/nexo-email-monitor.py +19 -19
  37. package/src/scripts/nexo-followup-hygiene.py +40 -8
  38. package/src/scripts/nexo-followup-runner.py +31 -31
  39. package/src/scripts/nexo-inbox-hook.sh +1 -1
  40. package/src/scripts/nexo-learning-validator.py +24 -3
  41. package/src/server.py +73 -1
  42. package/src/system_catalog.py +31 -31
  43. package/src/tools_learnings.py +96 -65
  44. package/src/tools_memory_v2.py +2 -2
  45. package/src/tools_sessions.py +25 -7
  46. package/templates/core-prompts/postmortem-consolidator.md +3 -3
  47. package/templates/core-prompts/r17-promise-debt-injection.md +1 -1
  48. package/templates/core-prompts/server-mcp-instructions.md +6 -6
  49. package/tool-enforcement-map.json +143 -13
@@ -0,0 +1,419 @@
1
+ from __future__ import annotations
2
+
3
+ """Canonical learning candidate resolver.
4
+
5
+ This module decides what should happen to an incoming learning before any
6
+ caller mutates the learnings table. It deliberately returns a decision only;
7
+ MCP tools, Deep Sleep, validators and self-audit own the actual write.
8
+ """
9
+
10
+ import re
11
+ import sqlite3
12
+ import unicodedata
13
+ from typing import Any
14
+
15
+ from db import extract_keywords, get_db
16
+ from db._semantic_similarity import hybrid_similarity_score
17
+
18
+
19
+ AUTHORITY_RANKS: dict[str, int] = {
20
+ "francisco_correction": 100,
21
+ "explicit_instruction": 80,
22
+ "code_test_evidence": 60,
23
+ "deep_sleep": 40,
24
+ "inference": 20,
25
+ }
26
+
27
+ CANONICAL_ACTIONS = ("new", "merge", "supersede", "conflict_review", "reject")
28
+
29
+ NEGATION_PATTERNS = (
30
+ "do not", "don't", "never", "avoid", "skip", "without", "forbid", "forbidden",
31
+ "disable", "disabled", "remove", "ban", "bypass",
32
+ " no ", " nunca ", " evita ", " evitar ", " sin ", " prohibe ", " prohibido ",
33
+ " desactiva ", " desactivar ", " elimina ", " eliminar ", " bloquea ", " bloquear ",
34
+ )
35
+ CONTRADICTION_PAIRS = (
36
+ ("enable", "disable"),
37
+ ("use", "avoid"),
38
+ ("add", "remove"),
39
+ ("allow", "forbid"),
40
+ ("always", "never"),
41
+ ("before", "after"),
42
+ ("require", "skip"),
43
+ ("validate", "skip"),
44
+ ("validate", "bypass"),
45
+ ("include", "exclude"),
46
+ ("activar", "desactivar"),
47
+ ("usar", "evitar"),
48
+ ("usar", "no usar"),
49
+ ("editar", "no editar"),
50
+ ("tocar", "no tocar"),
51
+ ("anadir", "eliminar"),
52
+ ("permitir", "prohibir"),
53
+ ("validar", "saltar"),
54
+ ("incluir", "excluir"),
55
+ )
56
+
57
+
58
+ def normalize_authority(value: str | None) -> str:
59
+ clean = str(value or "").strip().lower().replace("-", "_").replace(" ", "_")
60
+ aliases = {
61
+ "francisco": "francisco_correction",
62
+ "user_correction": "francisco_correction",
63
+ "correction": "francisco_correction",
64
+ "explicit": "explicit_instruction",
65
+ "operator": "explicit_instruction",
66
+ "manual": "explicit_instruction",
67
+ "code": "code_test_evidence",
68
+ "test": "code_test_evidence",
69
+ "evidence": "code_test_evidence",
70
+ "deep": "deep_sleep",
71
+ "deepsleep": "deep_sleep",
72
+ "overnight": "deep_sleep",
73
+ "inferred": "inference",
74
+ }
75
+ clean = aliases.get(clean, clean)
76
+ return clean if clean in AUTHORITY_RANKS else "inference"
77
+
78
+
79
+ def authority_rank(value: str | None) -> int:
80
+ return AUTHORITY_RANKS[normalize_authority(value)]
81
+
82
+
83
+ def _normalize_text(text: str) -> str:
84
+ normalized = unicodedata.normalize("NFKD", str(text or ""))
85
+ ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
86
+ return re.sub(r"\s+", " ", ascii_text.strip().lower())
87
+
88
+
89
+ def _tokenize(text: str) -> list[str]:
90
+ return re.findall(r"[a-z0-9_-]+", _normalize_text(text))
91
+
92
+
93
+ def _token_sets_related(left: set[str], right: set[str]) -> bool:
94
+ if left & right:
95
+ return True
96
+ for left_token in left:
97
+ for right_token in right:
98
+ if len(left_token) < 5 or len(right_token) < 5:
99
+ continue
100
+ if left_token.startswith(right_token[:5]) or right_token.startswith(left_token[:5]):
101
+ return True
102
+ return False
103
+
104
+
105
+ def _contains_negation(text: str) -> bool:
106
+ lowered = f" {_normalize_text(text)} "
107
+ return any(token in lowered for token in NEGATION_PATTERNS)
108
+
109
+
110
+ def _negated_action_verbs(text: str) -> set[str]:
111
+ lowered = _normalize_text(text)
112
+ matches: set[str] = set()
113
+ for pattern in (
114
+ r"(?:never|avoid|skip|disable|remove|forbid|bypass|nunca|evita|evitar|desactiva|desactivar|elimina|eliminar|prohibe|prohibir|bloquea|bloquear)\s+([a-z0-9_-]+)",
115
+ r"(?:do not|don't|no)\s+([a-z0-9_-]+)",
116
+ r"(?:without|sin)\s+([a-z0-9_-]+)",
117
+ ):
118
+ matches.update(re.findall(pattern, lowered))
119
+ return {match for match in matches if len(match) > 2}
120
+
121
+
122
+ def looks_contradictory(existing_text: str, new_text: str) -> bool:
123
+ existing_norm = _normalize_text(existing_text)
124
+ new_norm = _normalize_text(new_text)
125
+ if not existing_norm or not new_norm:
126
+ return False
127
+ existing_tokens = set(_tokenize(existing_norm))
128
+ new_tokens = set(_tokenize(new_norm))
129
+ if not _token_sets_related(existing_tokens, new_tokens):
130
+ return False
131
+ existing_negated = _negated_action_verbs(existing_norm)
132
+ new_negated = _negated_action_verbs(new_norm)
133
+ if existing_negated & new_tokens and not existing_negated & new_negated:
134
+ return True
135
+ if new_negated & existing_tokens and not existing_negated & new_negated:
136
+ return True
137
+ if _contains_negation(existing_norm) != _contains_negation(new_norm):
138
+ return True
139
+ for positive, negative in CONTRADICTION_PAIRS:
140
+ existing_has_pair = positive in existing_norm or negative in existing_norm
141
+ new_has_pair = positive in new_norm or negative in new_norm
142
+ if existing_has_pair and new_has_pair:
143
+ if (positive in existing_norm and negative in new_norm) or (negative in existing_norm and positive in new_norm):
144
+ return True
145
+ return False
146
+
147
+
148
+ def _split_applies_to(applies_to: str) -> list[str]:
149
+ return [item.strip() for item in str(applies_to or "").split(",") if item.strip()]
150
+
151
+
152
+ def _normalize_applies_token(value: str) -> str:
153
+ return str(value or "").replace("\\", "/").rstrip("/").lower()
154
+
155
+
156
+ def applies_overlap(left: str, right: str) -> bool:
157
+ left_tokens = {_normalize_applies_token(item) for item in _split_applies_to(left)}
158
+ right_tokens = {_normalize_applies_token(item) for item in _split_applies_to(right)}
159
+ left_tokens.discard("")
160
+ right_tokens.discard("")
161
+ if not left_tokens or not right_tokens:
162
+ return False
163
+ if left_tokens & right_tokens:
164
+ return True
165
+ for left_token in left_tokens:
166
+ for right_token in right_tokens:
167
+ if "/" not in left_token and "/" not in right_token:
168
+ continue
169
+ if left_token.startswith(f"{right_token}/") or right_token.startswith(f"{left_token}/"):
170
+ return True
171
+ if left_token.endswith(f"/{right_token}") or right_token.endswith(f"/{left_token}"):
172
+ return True
173
+ return False
174
+
175
+
176
+ def _table_columns(conn: sqlite3.Connection, table: str) -> set[str]:
177
+ try:
178
+ return {str(row["name"]) for row in conn.execute(f"PRAGMA table_info({table})").fetchall()}
179
+ except Exception:
180
+ return set()
181
+
182
+
183
+ def _row_authority_rank(row: dict[str, Any]) -> int:
184
+ text = " ".join(
185
+ str(row.get(key) or "")
186
+ for key in ("title", "content", "reasoning", "prevention")
187
+ ).lower()
188
+ if "francisco" in text or "correction" in text or "correccion" in text:
189
+ return AUTHORITY_RANKS["francisco_correction"]
190
+ priority = str(row.get("priority") or "medium").strip().lower()
191
+ return {
192
+ "critical": 85,
193
+ "high": 70,
194
+ "medium": 50,
195
+ "low": 30,
196
+ }.get(priority, 50)
197
+
198
+
199
+ def _similarity(candidate_text: str, row: dict[str, Any]) -> float:
200
+ existing_text = f"{row.get('title') or ''} {row.get('content') or ''}".strip()
201
+ if not candidate_text or not existing_text:
202
+ return 0.0
203
+ return float(
204
+ hybrid_similarity_score(
205
+ candidate_text,
206
+ existing_text,
207
+ keyword_extractor=extract_keywords,
208
+ strong_semantic_threshold=0.82,
209
+ moderate_semantic_threshold=0.74,
210
+ moderate_keyword_floor=0.08,
211
+ )
212
+ )
213
+
214
+
215
+ def _decision(
216
+ *,
217
+ action: str,
218
+ reason: str,
219
+ target: dict[str, Any] | None,
220
+ similarity: float = 0.0,
221
+ source_authority: str,
222
+ existing_rank: int = 0,
223
+ candidate: dict[str, Any],
224
+ ) -> dict[str, Any]:
225
+ normalized_authority = normalize_authority(source_authority)
226
+ return {
227
+ "ok": action != "reject",
228
+ "action": action,
229
+ "allowed_actions": list(CANONICAL_ACTIONS),
230
+ "reason": reason,
231
+ "target_id": int(target.get("id") or 0) if target else 0,
232
+ "target_title": str(target.get("title") or "") if target else "",
233
+ "target_status": str(target.get("status") or "") if target else "",
234
+ "similarity": round(float(similarity or 0.0), 4),
235
+ "source_authority": normalized_authority,
236
+ "authority_rank": AUTHORITY_RANKS[normalized_authority],
237
+ "existing_authority_rank": int(existing_rank or 0),
238
+ "candidate": candidate,
239
+ }
240
+
241
+
242
+ def resolve_learning_candidate(
243
+ *,
244
+ category: str,
245
+ title: str,
246
+ content: str,
247
+ reasoning: str = "",
248
+ prevention: str = "",
249
+ applies_to: str = "",
250
+ priority: str = "medium",
251
+ supersedes_id: int = 0,
252
+ source_authority: str = "inference",
253
+ conn: sqlite3.Connection | None = None,
254
+ ) -> dict[str, Any]:
255
+ """Return the canonical action for an incoming learning candidate."""
256
+
257
+ clean_category = str(category or "").strip().lower()
258
+ clean_title = str(title or "").strip()
259
+ clean_content = str(content or "").strip()
260
+ candidate = {
261
+ "category": clean_category,
262
+ "title": clean_title,
263
+ "content_preview": clean_content[:240],
264
+ "applies_to": str(applies_to or "").strip(),
265
+ "priority": str(priority or "medium").strip().lower(),
266
+ "supersedes_id": int(supersedes_id or 0),
267
+ }
268
+ if not clean_category:
269
+ return _decision(
270
+ action="reject",
271
+ reason="category_required",
272
+ target=None,
273
+ source_authority=source_authority,
274
+ candidate=candidate,
275
+ )
276
+ if not clean_title or not clean_content:
277
+ return _decision(
278
+ action="reject",
279
+ reason="title_and_content_required",
280
+ target=None,
281
+ source_authority=source_authority,
282
+ candidate=candidate,
283
+ )
284
+
285
+ own_conn = conn is None
286
+ conn = conn or get_db()
287
+ try:
288
+ columns = _table_columns(conn, "learnings")
289
+ if not columns:
290
+ return _decision(
291
+ action="new",
292
+ reason="learnings_table_unavailable",
293
+ target=None,
294
+ source_authority=source_authority,
295
+ candidate=candidate,
296
+ )
297
+ status_filter = " AND COALESCE(status, 'active') = 'active'" if "status" in columns else ""
298
+ order_by = "updated_at DESC, id DESC" if "updated_at" in columns else "id DESC"
299
+ rows = conn.execute(
300
+ f"""
301
+ SELECT *
302
+ FROM learnings
303
+ WHERE category = ?
304
+ {status_filter}
305
+ ORDER BY {order_by}
306
+ LIMIT 500
307
+ """,
308
+ (clean_category,),
309
+ ).fetchall()
310
+ active_rows = [dict(row) for row in rows]
311
+ finally:
312
+ if own_conn:
313
+ pass
314
+
315
+ incoming_text = f"{clean_title} {clean_content}".strip()
316
+ incoming_rank = authority_rank(source_authority)
317
+ best_sim: tuple[float, dict[str, Any] | None] = (0.0, None)
318
+ conflict: dict[str, Any] | None = None
319
+ conflict_similarity = 0.0
320
+
321
+ for row in active_rows:
322
+ row_title = str(row.get("title") or "").strip()
323
+ row_content = str(row.get("content") or "").strip()
324
+ if row_title.lower() == clean_title.lower():
325
+ return _decision(
326
+ action="merge",
327
+ reason="exact_title_duplicate",
328
+ target=row,
329
+ similarity=1.0,
330
+ source_authority=source_authority,
331
+ existing_rank=_row_authority_rank(row),
332
+ candidate=candidate,
333
+ )
334
+
335
+ row_applies = str(row.get("applies_to") or "")
336
+ scoped_overlap = bool(applies_to and row_applies and applies_overlap(row_applies, applies_to))
337
+ if scoped_overlap and looks_contradictory(f"{row_title} {row_content}", incoming_text):
338
+ sim = _similarity(incoming_text, row)
339
+ conflict = row
340
+ conflict_similarity = sim
341
+ break
342
+
343
+ sim = _similarity(incoming_text, row)
344
+ if sim > best_sim[0]:
345
+ best_sim = (sim, row)
346
+
347
+ if conflict:
348
+ existing_rank = _row_authority_rank(conflict)
349
+ normalized_authority = normalize_authority(source_authority)
350
+ if int(supersedes_id or 0) == int(conflict.get("id") or 0):
351
+ return _decision(
352
+ action="supersede",
353
+ reason="explicit_supersedes_conflict",
354
+ target=conflict,
355
+ similarity=conflict_similarity,
356
+ source_authority=source_authority,
357
+ existing_rank=existing_rank,
358
+ candidate=candidate,
359
+ )
360
+ can_auto_supersede = (
361
+ normalized_authority == "francisco_correction"
362
+ or (
363
+ normalized_authority == "explicit_instruction"
364
+ and incoming_rank >= existing_rank
365
+ and existing_rank < AUTHORITY_RANKS["code_test_evidence"]
366
+ )
367
+ )
368
+ if can_auto_supersede:
369
+ return _decision(
370
+ action="supersede",
371
+ reason="higher_authority_conflict",
372
+ target=conflict,
373
+ similarity=conflict_similarity,
374
+ source_authority=source_authority,
375
+ existing_rank=existing_rank,
376
+ candidate=candidate,
377
+ )
378
+ return _decision(
379
+ action="conflict_review",
380
+ reason="conflicting_active_learning",
381
+ target=conflict,
382
+ similarity=conflict_similarity,
383
+ source_authority=source_authority,
384
+ existing_rank=existing_rank,
385
+ candidate=candidate,
386
+ )
387
+
388
+ best_score, best_row = best_sim
389
+ if best_row and best_score >= 0.85:
390
+ return _decision(
391
+ action="merge",
392
+ reason="high_similarity",
393
+ target=best_row,
394
+ similarity=best_score,
395
+ source_authority=source_authority,
396
+ existing_rank=_row_authority_rank(best_row),
397
+ candidate=candidate,
398
+ )
399
+
400
+ return _decision(
401
+ action="new",
402
+ reason="no_active_match",
403
+ target=None,
404
+ similarity=best_score,
405
+ source_authority=source_authority,
406
+ existing_rank=0,
407
+ candidate=candidate,
408
+ )
409
+
410
+
411
+ __all__ = [
412
+ "AUTHORITY_RANKS",
413
+ "CANONICAL_ACTIONS",
414
+ "applies_overlap",
415
+ "authority_rank",
416
+ "looks_contradictory",
417
+ "normalize_authority",
418
+ "resolve_learning_candidate",
419
+ ]
@@ -574,17 +574,17 @@ def write_fallback_diary_for_lifecycle_event(
574
574
  technical_reason = str(reason or lifecycle_reason or "fallback-diary").strip()
575
575
  diary_session_id = _preferred_diary_session_id(conn, session_id)
576
576
  summary = (
577
- "Diario automatico de emergencia generado por NEXO Desktop al cerrar "
578
- f"'{title}'. No se confirmo un diario escrito por el agente vivo, asi "
579
- "que se preserva el snapshot disponible para continuidad."
577
+ "Automatic emergency diary generated by NEXO Desktop when closing "
578
+ f"'{title}'. No diary written by the live agent was confirmed, so "
579
+ "the available snapshot is preserved for continuity."
580
580
  )
581
581
  decisions = (
582
- f"Accion de ciclo de vida: {action}. Evento: {event_id}. "
583
- f"Motivo tecnico: {technical_reason}."
582
+ f"Lifecycle action: {action}. Event: {event_id}. "
583
+ f"Technical reason: {technical_reason}."
584
584
  )
585
585
  pending = str(payload.get("current_goal") or payload.get("last_user_message") or "").strip()
586
586
  if not pending:
587
- pending = "Revisar la conversacion al reabrir y continuar desde el snapshot preservado."
587
+ pending = "Review the conversation on reopen and continue from the preserved snapshot."
588
588
  context_next_parts = [
589
589
  f"conversation_id={conversation_id}",
590
590
  f"session_id={session_id}",
@@ -602,10 +602,10 @@ def write_fallback_diary_for_lifecycle_event(
602
602
  discarded="",
603
603
  pending=pending,
604
604
  context_next=context_next,
605
- mental_state="Fallback automatico: el agente vivo no confirmo el cierre dentro del timeout.",
605
+ mental_state="Automatic fallback: the live agent did not confirm closure within the timeout.",
606
606
  domain="nexo-desktop",
607
- user_signals="Cierre/archivo de conversacion; preservar informacion antes de salir.",
608
- self_critique="El cierre no debe depender exclusivamente de que el agente responda a tiempo.",
607
+ user_signals="Conversation close/archive; preserve information before exit.",
608
+ self_critique="Closure must not depend exclusively on the agent responding in time.",
609
609
  source=source or "desktop-lifecycle-fallback",
610
610
  )
611
611
  return {
@@ -3177,7 +3177,61 @@ def _entity_matches_for_query(conn, query: str, *, limit: int) -> tuple[list[dic
3177
3177
  return matches[: int(limit)], boosts
3178
3178
 
3179
3179
 
3180
- def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: int = 5000) -> list:
3180
+ def _context_prefilter_limit(default: int = 1200) -> int:
3181
+ raw = os.environ.get("NEXO_LOCAL_CONTEXT_PREFILTER_LIMIT", str(default))
3182
+ try:
3183
+ value = int(raw)
3184
+ except Exception:
3185
+ value = default
3186
+ return max(100, min(value, 5000))
3187
+
3188
+
3189
+ def _context_candidate_rows(
3190
+ conn,
3191
+ entity_asset_ids: list[str],
3192
+ *,
3193
+ search_query: str = "",
3194
+ base_limit: int = 5000,
3195
+ ) -> list:
3196
+ terms = _query_terms(search_query)[:6]
3197
+ prefilter_limit = min(int(base_limit or 5000), _context_prefilter_limit())
3198
+ prefilter_rows = []
3199
+ if terms:
3200
+ term_clauses = []
3201
+ params: list[str] = []
3202
+ for term in terms:
3203
+ term_clauses.append("(lower(a.path) LIKE ? OR lower(COALESCE(v.summary, '')) LIKE ? OR lower(c.text) LIKE ?)")
3204
+ like = f"%{term}%"
3205
+ params.extend([like, like, like])
3206
+ prefilter_rows = conn.execute(
3207
+ f"""
3208
+ SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
3209
+ e.vector_json, e.model_id, e.model_revision, e.dimension
3210
+ FROM local_chunks c
3211
+ JOIN local_assets a ON a.asset_id = c.asset_id
3212
+ LEFT JOIN local_asset_versions v ON v.version_id = c.version_id
3213
+ LEFT JOIN local_embeddings e ON e.chunk_id = c.chunk_id
3214
+ WHERE a.status='active'
3215
+ AND a.privacy_class='normal'
3216
+ AND ({" OR ".join(term_clauses)})
3217
+ ORDER BY
3218
+ CASE
3219
+ WHEN {" OR ".join("lower(a.path) LIKE ?" for _ in terms)} THEN 0
3220
+ WHEN {" OR ".join("lower(COALESCE(v.summary, '')) LIKE ?" for _ in terms)} THEN 1
3221
+ ELSE 2
3222
+ END,
3223
+ c.created_at DESC
3224
+ LIMIT ?
3225
+ """,
3226
+ [
3227
+ *params,
3228
+ *(f"%{term}%" for term in terms),
3229
+ *(f"%{term}%" for term in terms),
3230
+ prefilter_limit,
3231
+ ],
3232
+ ).fetchall()
3233
+
3234
+ fallback_limit = prefilter_limit if not terms else max(120, min(500, prefilter_limit // 3))
3181
3235
  base_rows = conn.execute(
3182
3236
  """
3183
3237
  SELECT c.chunk_id, c.asset_id, c.text, a.path, a.file_type, a.privacy_class, v.summary,
@@ -3191,10 +3245,18 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
3191
3245
  ORDER BY c.created_at DESC
3192
3246
  LIMIT ?
3193
3247
  """,
3194
- (int(base_limit),),
3248
+ (int(fallback_limit),),
3195
3249
  ).fetchall()
3196
3250
  if not entity_asset_ids:
3197
- return base_rows
3251
+ rows = []
3252
+ seen_chunks = set()
3253
+ for row in [*prefilter_rows, *base_rows]:
3254
+ chunk_id = row["chunk_id"]
3255
+ if chunk_id in seen_chunks:
3256
+ continue
3257
+ seen_chunks.add(chunk_id)
3258
+ rows.append(row)
3259
+ return rows
3198
3260
 
3199
3261
  placeholders = ",".join("?" for _ in entity_asset_ids)
3200
3262
  entity_rows = conn.execute(
@@ -3216,7 +3278,7 @@ def _context_candidate_rows(conn, entity_asset_ids: list[str], *, base_limit: in
3216
3278
 
3217
3279
  rows = []
3218
3280
  seen_chunks = set()
3219
- for row in [*entity_rows, *base_rows]:
3281
+ for row in [*entity_rows, *prefilter_rows, *base_rows]:
3220
3282
  chunk_id = row["chunk_id"]
3221
3283
  if chunk_id in seen_chunks:
3222
3284
  continue
@@ -3618,7 +3680,7 @@ def _context_query_conn(
3618
3680
  query_embedding = embeddings.embed_record(search_query)
3619
3681
  qvec = query_embedding["vector"]
3620
3682
  entities_payload, entity_boosts = _entity_matches_for_query(conn, search_query, limit=max(int(limit), 1))
3621
- rows = _context_candidate_rows(conn, list(entity_boosts.keys()), base_limit=5000)
3683
+ rows = _context_candidate_rows(conn, list(entity_boosts.keys()), search_query=search_query, base_limit=5000)
3622
3684
  scored = []
3623
3685
  stale_embedding_seen = False
3624
3686
  for row in rows:
@@ -310,6 +310,8 @@ def _empty_summary(*, since: float, window_seconds: int, store_path: Path) -> di
310
310
  "latest_event_at": 0.0,
311
311
  "latest_used_before_response_at": 0.0,
312
312
  "by_intent": {},
313
+ "by_source": {},
314
+ "by_route_stage": {},
313
315
  }
314
316
 
315
317
 
@@ -371,6 +373,26 @@ def summarize_usage(
371
373
  """,
372
374
  (since,),
373
375
  ).fetchall()
376
+ source_rows = conn.execute(
377
+ f"""
378
+ SELECT source, COUNT(*) AS total
379
+ FROM {USAGE_TABLE}
380
+ WHERE created_at >= ?
381
+ GROUP BY source
382
+ ORDER BY total DESC, source ASC
383
+ """,
384
+ (since,),
385
+ ).fetchall()
386
+ stage_rows = conn.execute(
387
+ f"""
388
+ SELECT route_stage, COUNT(*) AS total
389
+ FROM {USAGE_TABLE}
390
+ WHERE created_at >= ?
391
+ GROUP BY route_stage
392
+ ORDER BY total DESC, route_stage ASC
393
+ """,
394
+ (since,),
395
+ ).fetchall()
374
396
  finally:
375
397
  conn.close()
376
398
  return {
@@ -386,6 +408,8 @@ def summarize_usage(
386
408
  "latest_event_at": float(totals["latest_event_at"] or 0.0),
387
409
  "latest_used_before_response_at": float(totals["latest_used_before_response_at"] or 0.0),
388
410
  "by_intent": {str(row["intent"]): int(row["total"] or 0) for row in intent_rows},
411
+ "by_source": {str(row["source"]): int(row["total"] or 0) for row in source_rows},
412
+ "by_route_stage": {str(row["route_stage"]): int(row["total"] or 0) for row in stage_rows},
389
413
  }
390
414
 
391
415
 
@@ -16,6 +16,8 @@ import db
16
16
  DEFAULT_BACKFILL_LIMIT = 100
17
17
  DEFAULT_PENDING_SLA_SECONDS = 3600
18
18
  DEFAULT_PROCESS_LIMIT = 100
19
+ DEFAULT_INTRADAY_PROCESS_LIMIT = 20
20
+ DEFAULT_INTRADAY_BACKFILL_LIMIT = 20
19
21
  MAX_BATCH_SIZE = 1000
20
22
 
21
23
 
@@ -275,3 +277,29 @@ def process_incremental(
275
277
  "processed": processed,
276
278
  "health": health,
277
279
  }
280
+
281
+
282
+ def process_intraday_cycle(
283
+ *,
284
+ process_limit: int = DEFAULT_INTRADAY_PROCESS_LIMIT,
285
+ backfill_limit: int = DEFAULT_INTRADAY_BACKFILL_LIMIT,
286
+ pending_sla_seconds: int = DEFAULT_PENDING_SLA_SECONDS,
287
+ now: float | None = None,
288
+ ) -> dict:
289
+ """Run the low-limit daytime path for evidence-backed intraday facts."""
290
+
291
+ return {
292
+ **process_incremental(
293
+ process_limit=_clamp_limit(process_limit, DEFAULT_INTRADAY_PROCESS_LIMIT),
294
+ backfill_limit=_clamp_limit(backfill_limit, DEFAULT_INTRADAY_BACKFILL_LIMIT),
295
+ pending_sla_seconds=pending_sla_seconds,
296
+ now=now,
297
+ ),
298
+ "mode": "intraday",
299
+ "limits": {
300
+ "process_limit": _clamp_limit(process_limit, DEFAULT_INTRADAY_PROCESS_LIMIT),
301
+ "backfill_limit": _clamp_limit(backfill_limit, DEFAULT_INTRADAY_BACKFILL_LIMIT),
302
+ "pending_sla_seconds": max(1, int(pending_sla_seconds or DEFAULT_PENDING_SLA_SECONDS)),
303
+ },
304
+ "promotion": "hot_context_intraday_fact_only",
305
+ }
@@ -246,8 +246,8 @@ def memory_timeline(query: str = "", *, project_hint: str = "", time_range: str
246
246
  def format_memory_search(result: dict) -> str:
247
247
  candidates = result.get("candidates") or []
248
248
  if not candidates:
249
- return "No hay evidencia suficiente en Memory Observations para esa consulta."
250
- lines = [f"MEMORY SEARCH ({len(candidates)}) — {result.get('query') or '(sin query)'}"]
249
+ return "There is not enough evidence in Memory Observations for that query."
250
+ lines = [f"MEMORY SEARCH ({len(candidates)}) — {result.get('query') or '(no query)'}"]
251
251
  for item in candidates:
252
252
  refs = item.get("evidence_refs") or []
253
253
  refs_note = f" refs={', '.join(refs[:3])}" if refs else " refs=none"
@@ -269,10 +269,10 @@ def answer_memory_question(query: str, *, project_hint: str = "", time_range: st
269
269
  candidates = result.get("candidates") or []
270
270
  evidence_candidates = [item for item in candidates if item.get("evidence_refs")]
271
271
  if not evidence_candidates:
272
- return "No tengo evidencia suficiente en la memoria nueva para responder eso sin inventar."
273
- lines = ["Respuesta basada en evidencia:"]
272
+ return "There is not enough evidence in new memory to answer that without inventing."
273
+ lines = ["Evidence-based answer:"]
274
274
  for item in evidence_candidates[:limit]:
275
275
  refs = item.get("evidence_refs") or []
276
- refs_note = ", ".join(refs[:3]) if refs else "sin refs"
276
+ refs_note = ", ".join(refs[:3]) if refs else "no refs"
277
277
  lines.append(f"- {item.get('summary')} ({refs_note})")
278
278
  return "\n".join(lines)
@@ -13,6 +13,8 @@ _LANGUAGE_LABELS = {
13
13
  "en": "English (en)",
14
14
  "es": "Spanish (es)",
15
15
  "fr": "French (fr)",
16
+ "gl": "Galician (gl)",
17
+ "eu": "Basque (eu)",
16
18
  "it": "Italian (it)",
17
19
  "ja": "Japanese (ja)",
18
20
  "pt": "Portuguese (pt)",