nexo-brain 5.3.10 → 5.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,21 @@ from protocol_settings import get_protocol_strictness
14
14
  READ_LIKE_TOOLS = {"Read"}
15
15
  WRITE_LIKE_TOOLS = {"Edit", "MultiEdit", "Write"}
16
16
  DELETE_LIKE_TOOLS = {"Delete"}
17
+ NON_TRIVIAL_PROTOCOL_TOOLS = {"Read", "Bash", "Grep", "Glob", "Edit", "MultiEdit", "Write", "Delete"}
18
+ PROTOCOL_SKIP_TOOLS = {
19
+ "nexo_startup",
20
+ "nexo_smart_startup",
21
+ "nexo_stop",
22
+ "nexo_heartbeat",
23
+ "nexo_task_open",
24
+ "nexo_task_close",
25
+ "nexo_workflow_open",
26
+ "nexo_workflow_update",
27
+ "nexo_guard_check",
28
+ "nexo_guard_file_check",
29
+ "nexo_rules_check",
30
+ }
31
+ ACTION_TASK_TYPES = {"edit", "execute", "delegate"}
17
32
  NEXO_CODE_ROOT = Path(os.environ.get("NEXO_CODE", str(Path(__file__).resolve().parent))).expanduser().resolve()
18
33
  LIVE_REPO_ROOT = NEXO_CODE_ROOT.parent if NEXO_CODE_ROOT.name == "src" else NEXO_CODE_ROOT
19
34
  PUBLIC_REPO_DIRS = {
@@ -50,6 +65,11 @@ def _operation_kind(tool_name: str) -> str:
50
65
  return "other"
51
66
 
52
67
 
68
+ def _short_tool_name(tool_name: str) -> str:
69
+ clean = str(tool_name or "").strip()
70
+ return clean.rsplit("__", 1)[-1] if "__" in clean else clean
71
+
72
+
53
73
  def _normalize_file_path(path: str) -> str:
54
74
  return _normalize_path_token(str(Path(path)))
55
75
 
@@ -154,7 +174,8 @@ def _resolve_nexo_sid(conn, external_session_id: str) -> str:
154
174
  def _find_open_task_for_file(conn, sid: str, filepath: str) -> dict | None:
155
175
  target = _normalize_file_path(filepath)
156
176
  rows = conn.execute(
157
- """SELECT task_id, files, guard_has_blocking
177
+ """SELECT task_id, files, guard_has_blocking, task_type, plan, unknowns,
178
+ verification_step, opened_with_guard, must_change_log, must_verify
158
179
  FROM protocol_tasks
159
180
  WHERE session_id = ? AND status = 'open'
160
181
  ORDER BY opened_at DESC""",
@@ -173,7 +194,8 @@ def _find_open_task_for_file(conn, sid: str, filepath: str) -> dict | None:
173
194
 
174
195
  def _find_any_open_task(conn, sid: str) -> dict | None:
175
196
  row = conn.execute(
176
- """SELECT task_id, files, guard_has_blocking
197
+ """SELECT task_id, files, guard_has_blocking, task_type, plan, unknowns,
198
+ verification_step, opened_with_guard, must_change_log, must_verify
177
199
  FROM protocol_tasks
178
200
  WHERE session_id = ? AND status = 'open'
179
201
  ORDER BY opened_at DESC
@@ -183,6 +205,26 @@ def _find_any_open_task(conn, sid: str) -> dict | None:
183
205
  return dict(row) if row else None
184
206
 
185
207
 
208
+ def _find_any_open_workflow(conn, sid: str) -> dict | None:
209
+ row = conn.execute(
210
+ """SELECT run_id, protocol_task_id, current_step_key
211
+ FROM workflow_runs
212
+ WHERE session_id = ? AND status IN ('open', 'running', 'blocked', 'waiting_approval')
213
+ ORDER BY updated_at DESC, run_id DESC
214
+ LIMIT 1""",
215
+ (sid,),
216
+ ).fetchone()
217
+ return dict(row) if row else None
218
+
219
+
220
+ def _session_has_guard_check(conn, sid: str) -> bool:
221
+ row = conn.execute(
222
+ "SELECT 1 FROM guard_checks WHERE session_id = ? LIMIT 1",
223
+ (sid,),
224
+ ).fetchone()
225
+ return bool(row)
226
+
227
+
186
228
  def _find_open_debt(conn, *, session_id: str, task_id: str, debt_type: str, file_token: str) -> dict | None:
187
229
  row = conn.execute(
188
230
  """SELECT *
@@ -241,6 +283,98 @@ def _ensure_protocol_debt(
241
283
  )
242
284
 
243
285
 
286
+ def _task_list_field(task: dict | None, key: str) -> list:
287
+ if not task:
288
+ return []
289
+ try:
290
+ parsed = json.loads(task.get(key) or "[]")
291
+ except Exception:
292
+ return []
293
+ return parsed if isinstance(parsed, list) else []
294
+
295
+
296
+ def _task_needs_workflow(task: dict | None) -> bool:
297
+ if not task:
298
+ return False
299
+ if str(task.get("task_type") or "").strip() not in ACTION_TASK_TYPES:
300
+ return False
301
+ if len(_task_list_field(task, "plan")) > 1:
302
+ return True
303
+ if len(_task_list_field(task, "unknowns")) > 0:
304
+ return True
305
+ if len(_task_list_field(task, "files")) > 1:
306
+ return True
307
+ return bool(str(task.get("verification_step") or "").strip())
308
+
309
+
310
+ def _append_protocol_warning(warnings: list[dict], message: str) -> None:
311
+ clean = (message or "").strip()
312
+ if not clean:
313
+ return
314
+ if any((item.get("message") or "").strip() == clean for item in warnings):
315
+ return
316
+ warnings.append({"message": clean})
317
+
318
+
319
+ def _collect_protocol_warnings(conn, *, sid: str, tool_name: str) -> list[dict]:
320
+ short_name = _short_tool_name(tool_name)
321
+ if short_name in PROTOCOL_SKIP_TOOLS or short_name not in NON_TRIVIAL_PROTOCOL_TOOLS:
322
+ return []
323
+
324
+ warnings: list[dict] = []
325
+ if not sid:
326
+ _append_protocol_warning(
327
+ warnings,
328
+ "Trabajo no trivial detectado antes de `nexo_startup(...)`. Arranca NEXO, abre `nexo_task_open(...)`, y si esto va a durar varias fases abre también `nexo_workflow_open(...)` antes de seguir.",
329
+ )
330
+ return warnings
331
+
332
+ task = _find_any_open_task(conn, sid)
333
+ has_guard = _session_has_guard_check(conn, sid)
334
+ if not task:
335
+ guard_note = (
336
+ " Ejecuta `nexo_guard_check(...)` antes de leer código condicionado o compartido."
337
+ if short_name in {"Read", "Bash", "Grep", "Glob"} and not has_guard
338
+ else ""
339
+ )
340
+ _append_protocol_warning(
341
+ warnings,
342
+ "Trabajo no trivial detectado sin `nexo_task_open(...)`. Ábrelo ahora y, si esto va a cruzar varios pasos o mensajes, añade `nexo_workflow_open(...)`." + guard_note,
343
+ )
344
+ _append_protocol_warning(
345
+ warnings,
346
+ "Recordatorio protocolario: mantén `nexo_heartbeat(...)` al día y no cierres en optimista; si hay cambios reales, registra `nexo_change_log(...)` o cierra con `nexo_task_close(...)` más evidencia.",
347
+ )
348
+ return warnings
349
+
350
+ task_id = str(task.get("task_id") or "").strip()
351
+ if str(task.get("task_type") or "").strip() in ACTION_TASK_TYPES and not (task.get("opened_with_guard") or has_guard):
352
+ _append_protocol_warning(
353
+ warnings,
354
+ f"La tarea {task_id} está activa sin guard visible. Ejecuta `nexo_guard_check(...)` antes de tocar código condicionado o compartido.",
355
+ )
356
+
357
+ workflow = _find_any_open_workflow(conn, sid)
358
+ if _task_needs_workflow(task) and not workflow:
359
+ _append_protocol_warning(
360
+ warnings,
361
+ f"La tarea {task_id} ya tiene pinta de multi-step y sigue sin `nexo_workflow_open(...)`. Ábrelo para que checkpoints, resume y replay no dependan de memoria implícita.",
362
+ )
363
+
364
+ if str(task.get("task_type") or "").strip() in ACTION_TASK_TYPES and short_name in {"Bash", "Edit", "MultiEdit", "Write", "Delete"}:
365
+ change_note = (
366
+ " Si editas de verdad y no vas a usar `nexo_task_close(...)` inmediatamente, captura `nexo_change_log(...)`."
367
+ if task.get("must_change_log")
368
+ else ""
369
+ )
370
+ _append_protocol_warning(
371
+ warnings,
372
+ f"Recordatorio protocolario para {task_id}: mantén `nexo_heartbeat(...)` al día y ciérrala con `nexo_task_close(...)` más evidencia antes de decir que está resuelta.{change_note}",
373
+ )
374
+
375
+ return warnings
376
+
377
+
244
378
  def _collect_automation_live_repo_blocks(
245
379
  conn,
246
380
  *,
@@ -429,21 +563,20 @@ def process_pre_tool_event(payload: dict) -> dict:
429
563
  def process_tool_event(payload: dict) -> dict:
430
564
  tool_name = str(payload.get("tool_name", "")).strip()
431
565
  op = _operation_kind(tool_name)
432
- if op == "other":
433
- return {"ok": True, "skipped": True, "reason": "tool not monitored"}
434
-
435
566
  tool_input = payload.get("tool_input")
436
567
  files = _extract_touched_files(tool_input)
437
- if not files:
438
- return {"ok": True, "skipped": True, "reason": "no touched files found"}
439
-
440
568
  conn = get_db()
441
569
  sid = _resolve_nexo_sid(conn, str(payload.get("session_id", "")))
442
- if not sid:
570
+ warnings = _collect_protocol_warnings(conn, sid=sid, tool_name=tool_name)
571
+
572
+ if op == "other" and not warnings:
573
+ return {"ok": True, "skipped": True, "reason": "tool not monitored"}
574
+ if not files and op in {"read", "write", "delete"} and not warnings:
575
+ return {"ok": True, "skipped": True, "reason": "no touched files found"}
576
+ if not sid and not warnings:
443
577
  return {"ok": True, "skipped": True, "reason": "session not mapped to nexo"}
444
578
 
445
- conditioned = _load_conditioned_learnings(conn, files)
446
- warnings: list[dict] = []
579
+ conditioned = _load_conditioned_learnings(conn, files) if sid else {}
447
580
  violations: list[dict] = []
448
581
 
449
582
  for filepath in files:
@@ -545,6 +678,9 @@ def format_hook_message(result: dict) -> str:
545
678
  return ""
546
679
  lines = ["NEXO DISCIPLINE:"]
547
680
  for item in result.get("warnings", []):
681
+ if item.get("message") and not item.get("learning_ids"):
682
+ lines.append(f"- PROTOCOL REMINDER: {item['message']}")
683
+ continue
548
684
  if item.get("debt_id"):
549
685
  lines.append(
550
686
  f"- REVIEW FILE RULES: {item['file']} -> learnings {item['learning_ids']}. "
@@ -22,6 +22,8 @@ import time
22
22
  from datetime import datetime, timedelta
23
23
  from pathlib import Path
24
24
 
25
+ from db import VALID_IMPACT_LEVELS, VALID_TASK_TYPES, validate_impact_level, validate_task_type
26
+
25
27
 
26
28
  def _get_db():
27
29
  from db import get_db
@@ -734,9 +736,19 @@ def handle_cortex_check(
734
736
  Returns:
735
737
  Mode (ask/propose/act), available tools, warnings, and relevant Core Rules
736
738
  """
739
+ try:
740
+ clean_type = validate_task_type(task_type)
741
+ except ValueError as exc:
742
+ return "\n".join(
743
+ [
744
+ f"ERROR: {exc}",
745
+ f"Valid task types: {', '.join(sorted(VALID_TASK_TYPES))}",
746
+ ]
747
+ )
748
+
737
749
  state = {
738
750
  "goal": goal.strip() if goal else "",
739
- "task_type": task_type if task_type in ("answer", "analyze", "edit", "execute", "delegate") else "answer",
751
+ "task_type": clean_type,
740
752
  "plan": _parse_json_list(plan),
741
753
  "known_facts": _parse_json_list(known_facts),
742
754
  "unknowns": _parse_json_list(unknowns),
@@ -860,8 +872,30 @@ def handle_cortex_decide(
860
872
  indent=2,
861
873
  )
862
874
 
863
- clean_type = task_type if task_type in {"answer", "analyze", "edit", "execute", "delegate"} else "execute"
864
- clean_level = impact_level if impact_level in {"medium", "high", "critical"} else "high"
875
+ try:
876
+ clean_type = validate_task_type(task_type)
877
+ except ValueError as exc:
878
+ return json.dumps(
879
+ {
880
+ "ok": False,
881
+ "error": str(exc),
882
+ "valid_task_types": sorted(VALID_TASK_TYPES),
883
+ },
884
+ ensure_ascii=False,
885
+ indent=2,
886
+ )
887
+ try:
888
+ clean_level = validate_impact_level(impact_level)
889
+ except ValueError as exc:
890
+ return json.dumps(
891
+ {
892
+ "ok": False,
893
+ "error": str(exc),
894
+ "valid_impact_levels": sorted(VALID_IMPACT_LEVELS),
895
+ },
896
+ ensure_ascii=False,
897
+ indent=2,
898
+ )
865
899
  parsed_constraints = _parse_json_list(constraints)
866
900
  parsed_evidence = _parse_json_list(evidence_refs)
867
901
  try:
@@ -11,13 +11,14 @@ if str(SRC_DIR) not in sys.path:
11
11
  sys.path.insert(0, str(SRC_DIR))
12
12
 
13
13
 
14
- def handle_doctor(tier: str = "boot", fix: bool = False, output: str = "text") -> str:
14
+ def handle_doctor(tier: str = "boot", fix: bool = False, output: str = "text", plane: str = "") -> str:
15
15
  """Unified diagnostic report for boot/runtime/deep health.
16
16
 
17
17
  Args:
18
18
  tier: Diagnostic tier — boot, runtime, deep, or all (default: boot)
19
19
  fix: Apply deterministic fixes (default: False)
20
20
  output: Output format — text or json (default: text)
21
+ plane: Diagnostic plane — runtime_personal, installation_live, or database_real
21
22
  """
22
23
  from doctor.orchestrator import run_doctor
23
24
  from doctor.formatters import format_report
@@ -27,7 +28,7 @@ def handle_doctor(tier: str = "boot", fix: bool = False, output: str = "text") -
27
28
  if output not in ("text", "json"):
28
29
  return f"Invalid output '{output}'. Use: text, json"
29
30
 
30
- report = run_doctor(tier=tier, fix=fix)
31
+ report = run_doctor(tier=tier, fix=fix, plane=plane)
31
32
  return format_report(report, fmt=output)
32
33
 
33
34
 
@@ -10,6 +10,8 @@ import secrets
10
10
  import time
11
11
 
12
12
  from db import (
13
+ VALID_TASK_TYPES,
14
+ VALID_CLOSE_OUTCOMES,
13
15
  close_protocol_task,
14
16
  create_followup,
15
17
  latest_cortex_evaluation_for_task,
@@ -28,6 +30,8 @@ from db import (
28
30
  resolve_protocol_debts,
29
31
  search_learnings,
30
32
  task_has_cortex_evaluation,
33
+ validate_close_outcome,
34
+ validate_task_type,
31
35
  )
32
36
  from plugins.cortex import evaluate_cortex_state
33
37
  from plugins.guard import handle_guard_check
@@ -651,7 +655,18 @@ def handle_confidence_check(
651
655
  clean_goal = (goal or "").strip()
652
656
  if not clean_goal:
653
657
  return json.dumps({"ok": False, "error": "goal is required"}, ensure_ascii=False, indent=2)
654
- clean_type = task_type if task_type in {"answer", "analyze", "edit", "execute", "delegate"} else "answer"
658
+ try:
659
+ clean_type = validate_task_type(task_type)
660
+ except ValueError as exc:
661
+ return json.dumps(
662
+ {
663
+ "ok": False,
664
+ "error": str(exc),
665
+ "valid_task_types": sorted(VALID_TASK_TYPES),
666
+ },
667
+ ensure_ascii=False,
668
+ indent=2,
669
+ )
655
670
  result = evaluate_response_confidence(
656
671
  goal=clean_goal,
657
672
  task_type=clean_type,
@@ -693,7 +708,18 @@ def handle_task_open(
693
708
  if not clean_goal:
694
709
  return json.dumps({"ok": False, "error": "goal is required"}, ensure_ascii=False, indent=2)
695
710
 
696
- clean_type = task_type if task_type in {"answer", "analyze", "edit", "execute", "delegate"} else "answer"
711
+ try:
712
+ clean_type = validate_task_type(task_type)
713
+ except ValueError as exc:
714
+ return json.dumps(
715
+ {
716
+ "ok": False,
717
+ "error": str(exc),
718
+ "valid_task_types": sorted(VALID_TASK_TYPES),
719
+ },
720
+ ensure_ascii=False,
721
+ indent=2,
722
+ )
697
723
  files_list = _parse_list(files)
698
724
  protocol_strictness = get_protocol_strictness()
699
725
  if protocol_strictness in {"strict", "learning"} and clean_type == "edit" and not files_list:
@@ -949,7 +975,19 @@ def handle_task_close(
949
975
  indent=2,
950
976
  )
951
977
 
952
- clean_outcome = outcome if outcome in {"done", "partial", "blocked", "failed", "cancelled"} else "failed"
978
+ try:
979
+ clean_outcome = validate_close_outcome(outcome)
980
+ except ValueError as exc:
981
+ return json.dumps(
982
+ {
983
+ "ok": False,
984
+ "error": str(exc),
985
+ "task_id": task_id,
986
+ "valid_outcomes": sorted(VALID_CLOSE_OUTCOMES),
987
+ },
988
+ ensure_ascii=False,
989
+ indent=2,
990
+ )
953
991
  clean_evidence = (evidence or "").strip()
954
992
  files_changed_list = _parse_list(files_changed)
955
993
  planned_files = _parse_list(task.get("files") or "[]")
@@ -20,6 +20,9 @@ from script_registry import (
20
20
  get_declared_schedule,
21
21
  )
22
22
 
23
+ LEGACY_BACKUP_CRON_ID = "backup"
24
+ LEGACY_BACKUP_SUMMARY = "legacy backup file evidence"
25
+
23
26
 
24
27
  def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
25
28
  """Show cron execution status — what ran, what failed, durations.
@@ -30,6 +33,8 @@ def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
30
33
  """
31
34
  if cron_id:
32
35
  runs = cron_runs_recent(hours, cron_id)
36
+ if cron_id == LEGACY_BACKUP_CRON_ID:
37
+ runs = _select_backup_runs(runs, hours)
33
38
  if not runs:
34
39
  return f"No runs for '{cron_id}' in the last {hours}h."
35
40
  schedule_meta = get_personal_script_schedule(cron_id) or {}
@@ -53,7 +58,7 @@ def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
53
58
  return "\n".join(lines)
54
59
 
55
60
  # Summary view — one line per cron
56
- summary = cron_runs_summary(hours)
61
+ summary = _merge_legacy_summaries(cron_runs_summary(hours), hours)
57
62
  if not summary:
58
63
  return f"No cron executions recorded in the last {hours}h."
59
64
 
@@ -82,6 +87,119 @@ def handle_schedule_status(hours: int = 24, cron_id: str = '') -> str:
82
87
  return "\n".join(lines)
83
88
 
84
89
 
90
+ def _select_backup_runs(db_runs: list[dict], hours: int) -> list[dict]:
91
+ legacy_runs = _legacy_backup_runs(hours)
92
+ if _prefer_legacy_over_db(db_runs, legacy_runs):
93
+ return legacy_runs
94
+ return db_runs
95
+
96
+
97
+ def _merge_legacy_summaries(summary_rows: list[dict], hours: int) -> list[dict]:
98
+ rows = [dict(row) for row in (summary_rows or [])]
99
+ legacy_summary = _legacy_backup_summary(hours)
100
+ if not legacy_summary:
101
+ return rows
102
+ by_cron_id = {row["cron_id"]: row for row in rows}
103
+ existing = by_cron_id.get(LEGACY_BACKUP_CRON_ID)
104
+ if _prefer_legacy_summary(existing, legacy_summary):
105
+ by_cron_id[LEGACY_BACKUP_CRON_ID] = legacy_summary
106
+ return sorted(
107
+ by_cron_id.values(),
108
+ key=lambda row: row.get("last_run") or "",
109
+ reverse=True,
110
+ )
111
+
112
+
113
+ def _prefer_legacy_over_db(db_runs: list[dict], legacy_runs: list[dict]) -> bool:
114
+ if not legacy_runs:
115
+ return False
116
+ if not db_runs:
117
+ return True
118
+ latest_db = _parse_db_timestamp(db_runs[0].get("started_at"))
119
+ latest_legacy = _parse_db_timestamp(legacy_runs[0].get("started_at"))
120
+ if latest_legacy is None:
121
+ return False
122
+ if latest_db is None:
123
+ return True
124
+ return latest_legacy > latest_db
125
+
126
+
127
+ def _prefer_legacy_summary(existing: dict | None, legacy: dict) -> bool:
128
+ if not legacy:
129
+ return False
130
+ if not existing:
131
+ return True
132
+ latest_existing = _parse_db_timestamp(existing.get("last_run"))
133
+ latest_legacy = _parse_db_timestamp(legacy.get("last_run"))
134
+ if latest_legacy is None:
135
+ return False
136
+ if latest_existing is None:
137
+ return True
138
+ return latest_legacy > latest_existing
139
+
140
+
141
+ def _legacy_backup_summary(hours: int) -> dict | None:
142
+ runs = _legacy_backup_runs(hours)
143
+ if not runs:
144
+ return None
145
+ return _build_summary_from_runs(LEGACY_BACKUP_CRON_ID, runs)
146
+
147
+
148
+ def _legacy_backup_runs(hours: int) -> list[dict]:
149
+ nexo_home = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
150
+ backup_dir = nexo_home / "backups"
151
+ if not backup_dir.exists():
152
+ return []
153
+ cutoff = _now_utc().timestamp() - (hours * 3600)
154
+ runs: list[dict] = []
155
+ for backup_file in backup_dir.glob("nexo-*.db"):
156
+ try:
157
+ stat = backup_file.stat()
158
+ except OSError:
159
+ continue
160
+ if stat.st_mtime < cutoff:
161
+ continue
162
+ started = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc).replace(microsecond=0)
163
+ started_at = started.strftime("%Y-%m-%d %H:%M:%S")
164
+ runs.append(
165
+ {
166
+ "cron_id": LEGACY_BACKUP_CRON_ID,
167
+ "started_at": started_at,
168
+ "ended_at": started_at,
169
+ "exit_code": 0,
170
+ "summary": LEGACY_BACKUP_SUMMARY,
171
+ "error": "",
172
+ "duration_secs": 1.0,
173
+ }
174
+ )
175
+ runs.sort(key=lambda row: row["started_at"], reverse=True)
176
+ return runs
177
+
178
+
179
+ def _build_summary_from_runs(cron_id: str, runs: list[dict]) -> dict:
180
+ completed_runs = [
181
+ row for row in runs if row.get("exit_code") is not None and row.get("ended_at")
182
+ ]
183
+ duration_values = [
184
+ float(row["duration_secs"])
185
+ for row in completed_runs
186
+ if row.get("duration_secs") is not None
187
+ ]
188
+ return {
189
+ "cron_id": cron_id,
190
+ "total_runs": len(runs),
191
+ "succeeded": sum(1 for row in completed_runs if row.get("exit_code") == 0),
192
+ "completed_runs": len(completed_runs),
193
+ "failed": sum(1 for row in completed_runs if row.get("exit_code") not in (None, 0)),
194
+ "open_runs": len(runs) - len(completed_runs),
195
+ "avg_duration": round(sum(duration_values) / len(duration_values), 1) if duration_values else None,
196
+ "last_run": runs[0].get("started_at"),
197
+ "last_exit_code": runs[0].get("exit_code"),
198
+ "last_ended_at": runs[0].get("ended_at"),
199
+ "last_summary": next((row.get("summary") for row in runs if row.get("summary")), ""),
200
+ }
201
+
202
+
85
203
  def _summary_has_warning(summary: str = "") -> bool:
86
204
  lowered = str(summary or "").strip().lower()
87
205
  if not lowered:
@@ -67,8 +67,9 @@ MACOS_FDA_PROBE_PATHS = (
67
67
  )
68
68
  DEFAULT_CLAUDE_CODE_MODEL = "claude-opus-4-6[1m]"
69
69
  DEFAULT_CLAUDE_CODE_REASONING_EFFORT = ""
70
- DEFAULT_CODEX_MODEL = "gpt-5.4"
71
- DEFAULT_CODEX_REASONING_EFFORT = "xhigh"
70
+ # Codex defaults mirror the user's primary model — no hardcoded third-party models.
71
+ DEFAULT_CODEX_MODEL = DEFAULT_CLAUDE_CODE_MODEL
72
+ DEFAULT_CODEX_REASONING_EFFORT = ""
72
73
 
73
74
 
74
75
  def _schedule_defaults() -> dict:
@@ -18,9 +18,9 @@ What this script does (idempotent and best-effort):
18
18
  3. Detects degradation signals on the 7-day window. The criteria are
19
19
  intentionally conservative to avoid false alarms on small samples:
20
20
  a. recommendation_accept_rate < 50% AND total_evaluations >= 10
21
- b. linked_outcome_success_rate < 50% AND linked_outcomes_total >= 5
21
+ b. linked_outcome_success_rate < 50% AND linked_outcomes_resolved >= 5
22
22
  c. override_success_rate > recommended_success_rate by >= 20pp
23
- AND linked_outcomes_total >= 5
23
+ AND linked_outcomes_resolved >= 5
24
24
  4. Opens (or refreshes) NF-CORTEX-QUALITY-DROP followup with the offending
25
25
  metrics when degradation is detected. Idempotent: if a non-PENDING /
26
26
  resolved followup of the same id already exists, it is updated in
@@ -96,6 +96,13 @@ def detect_quality_signals(summary: dict) -> list[dict]:
96
96
  total = int(summary.get("total_evaluations") or 0)
97
97
  accept_rate = float(summary.get("recommendation_accept_rate") or 0.0)
98
98
  linked_total = int(summary.get("linked_outcomes_total") or 0)
99
+ linked_met = int(summary.get("linked_outcomes_met") or 0)
100
+ linked_missed = int(summary.get("linked_outcomes_missed") or 0)
101
+ linked_pending = int(summary.get("linked_outcomes_pending") or 0)
102
+ linked_resolved = linked_met + linked_missed
103
+ if linked_resolved <= 0 and linked_total > 0:
104
+ # Older callers may omit the met/missed counters; fall back to total minus pending.
105
+ linked_resolved = max(0, linked_total - linked_pending)
99
106
  linked_success = float(summary.get("linked_outcome_success_rate") or 0.0)
100
107
  recommended_success = float(summary.get("recommended_success_rate") or 0.0)
101
108
  override_success = float(summary.get("override_success_rate") or 0.0)
@@ -114,21 +121,25 @@ def detect_quality_signals(summary: dict) -> list[dict]:
114
121
  ),
115
122
  })
116
123
 
117
- if linked_total >= LINKED_MIN_SAMPLE and linked_success < LINKED_SUCCESS_FLOOR:
124
+ linked_scope = f"{linked_resolved} resolved linked outcomes"
125
+ if linked_pending > 0:
126
+ linked_scope += f" ({linked_total} total, {linked_pending} pending)"
127
+
128
+ if linked_resolved >= LINKED_MIN_SAMPLE and linked_success < LINKED_SUCCESS_FLOOR:
118
129
  signals.append({
119
130
  "kind": "linked_success",
120
131
  "severity": "warn",
121
132
  "metric_value": linked_success,
122
133
  "threshold": LINKED_SUCCESS_FLOOR,
123
- "sample_size": linked_total,
134
+ "sample_size": linked_resolved,
124
135
  "message": (
125
136
  f"Cortex linked-outcome success rate {linked_success:.1f}% on "
126
- f"{linked_total} linked outcomes is below the "
137
+ f"{linked_scope} is below the "
127
138
  f"{LINKED_SUCCESS_FLOOR:.0f}% floor."
128
139
  ),
129
140
  })
130
141
 
131
- if linked_total >= LINKED_MIN_SAMPLE:
142
+ if linked_resolved >= LINKED_MIN_SAMPLE:
132
143
  gap = override_success - recommended_success
133
144
  if gap >= OVERRIDE_GAP_THRESHOLD:
134
145
  signals.append({
@@ -136,12 +147,12 @@ def detect_quality_signals(summary: dict) -> list[dict]:
136
147
  "severity": "error",
137
148
  "metric_value": gap,
138
149
  "threshold": OVERRIDE_GAP_THRESHOLD,
139
- "sample_size": linked_total,
150
+ "sample_size": linked_resolved,
140
151
  "message": (
141
152
  f"Cortex overrides outperform recommendations by {gap:.1f}pp "
142
153
  f"(override {override_success:.1f}% vs recommended "
143
- f"{recommended_success:.1f}% on {linked_total} linked "
144
- "outcomes). The recommender is mis-ranking choices."
154
+ f"{recommended_success:.1f}% on {linked_scope}). The "
155
+ "recommender is mis-ranking choices."
145
156
  ),
146
157
  })
147
158
 
@@ -171,15 +182,37 @@ def _upsert_quality_followup(signals: list[dict]) -> str:
171
182
  resolved, a fresh row is inserted with the same id (REPLACE) so the
172
183
  new degradation pattern is visible.
173
184
  """
174
- if not signals:
175
- return "no_signal"
176
-
177
185
  try:
178
- from db import get_followup, get_db
186
+ from db import complete_followup, get_followup, get_db
179
187
  except Exception as e:
180
188
  _log(f"WARN: cannot import db helpers: {e}")
181
189
  return "skipped_no_db"
182
190
 
191
+ try:
192
+ existing = get_followup(FOLLOWUP_ID)
193
+ except Exception as e:
194
+ _log(f"WARN: get_followup raised: {e}")
195
+ existing = None
196
+
197
+ if not signals:
198
+ if not existing:
199
+ return "no_signal"
200
+ status = str(existing.get("status") or "").upper()
201
+ if status.startswith("COMPLETED") or status in {"DELETED", "ARCHIVED", "BLOCKED", "WAITING", "CANCELLED"}:
202
+ return "no_signal"
203
+ try:
204
+ complete_followup(
205
+ FOLLOWUP_ID,
206
+ result=(
207
+ "Auto-resolved by cortex-cycle: no active degradation signals in the "
208
+ "current 7d window."
209
+ ),
210
+ )
211
+ except Exception as e:
212
+ _log(f"WARN: failed to close followup: {e}")
213
+ return "failed_close"
214
+ return "closed"
215
+
183
216
  summary_lines = ["Cortex continuous validation found quality degradation:"]
184
217
  for sig in signals:
185
218
  summary_lines.append(
@@ -197,12 +230,6 @@ def _upsert_quality_followup(signals: list[dict]) -> str:
197
230
  )
198
231
  now_epoch = datetime.now().timestamp()
199
232
 
200
- try:
201
- existing = get_followup(FOLLOWUP_ID)
202
- except Exception as e:
203
- _log(f"WARN: get_followup raised: {e}")
204
- existing = None
205
-
206
233
  try:
207
234
  conn = get_db()
208
235
  conn.execute(
@@ -255,8 +282,8 @@ def run() -> int:
255
282
  f"signals={len(signals)}"
256
283
  )
257
284
 
258
- if signals:
259
- action = _upsert_quality_followup(signals)
285
+ action = _upsert_quality_followup(signals)
286
+ if signals or action not in {"no_signal"}:
260
287
  _log(f"Cortex cycle: followup {FOLLOWUP_ID} {action} ({len(signals)} signal(s))")
261
288
 
262
289
  return 0