nexo-brain 2.7.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/.claude-plugin/plugin.json +1 -1
  2. package/README.md +66 -12
  3. package/hooks/hooks.json +79 -0
  4. package/package.json +1 -1
  5. package/src/agent_runner.py +290 -6
  6. package/src/cli.py +111 -0
  7. package/src/client_preferences.py +94 -0
  8. package/src/client_sync.py +202 -2
  9. package/src/cognitive/__init__.py +1 -1
  10. package/src/cognitive/_search.py +39 -19
  11. package/src/dashboard/app.py +140 -0
  12. package/src/dashboard/templates/base.html +4 -0
  13. package/src/dashboard/templates/protocol.html +199 -0
  14. package/src/db/__init__.py +23 -1
  15. package/src/db/_learnings.py +31 -4
  16. package/src/db/_personal_scripts.py +12 -0
  17. package/src/db/_protocol.py +303 -0
  18. package/src/db/_schema.py +248 -0
  19. package/src/db/_watchers.py +173 -0
  20. package/src/db/_workflow.py +952 -0
  21. package/src/doctor/providers/runtime.py +918 -7
  22. package/src/evolution_cycle.py +62 -0
  23. package/src/hook_guardrails.py +308 -0
  24. package/src/hooks/protocol-guardrail.sh +10 -0
  25. package/src/nexo_sdk.py +103 -0
  26. package/src/plugins/cognitive_memory.py +18 -0
  27. package/src/plugins/cortex.py +55 -35
  28. package/src/plugins/guard.py +132 -16
  29. package/src/plugins/protocol.py +911 -0
  30. package/src/plugins/schedule.py +40 -6
  31. package/src/plugins/simple_api.py +103 -0
  32. package/src/plugins/skills.py +67 -0
  33. package/src/plugins/state_watchers.py +79 -0
  34. package/src/plugins/workflow.py +588 -0
  35. package/src/public_contribution.py +86 -12
  36. package/src/script_registry.py +142 -0
  37. package/src/scripts/deep-sleep/apply_findings.py +204 -0
  38. package/src/scripts/deep-sleep/collect.py +49 -4
  39. package/src/scripts/nexo-agent-run.py +2 -0
  40. package/src/scripts/nexo-daily-self-audit.py +843 -5
  41. package/src/scripts/nexo-evolution-run.py +343 -1
  42. package/src/server.py +92 -6
  43. package/src/skills_runtime.py +151 -0
  44. package/src/state_watchers_runtime.py +334 -0
  45. package/src/tools_learnings.py +345 -7
  46. package/src/tools_sessions.py +183 -0
  47. package/templates/CLAUDE.md.template +9 -1
  48. package/templates/CODEX.AGENTS.md.template +10 -2
@@ -120,52 +120,93 @@ def save_public_contribution_config(config: dict) -> dict:
120
120
 
121
121
 
122
122
  def _gh(*args: str, cwd: Path | None = None, timeout: int = 20) -> subprocess.CompletedProcess:
123
+ env = os.environ.copy()
124
+ token = (
125
+ str(env.get("GH_TOKEN") or env.get("GITHUB_TOKEN") or "").strip()
126
+ or _github_token_from_credentials()
127
+ )
128
+ if token:
129
+ env["GH_TOKEN"] = token
123
130
  return subprocess.run(
124
131
  ["gh", *args],
125
132
  cwd=str(cwd) if cwd else None,
126
133
  capture_output=True,
127
134
  text=True,
128
135
  timeout=timeout,
136
+ env=env,
129
137
  )
130
138
 
131
139
 
140
+ def _github_token_from_credentials() -> str:
141
+ try:
142
+ from db import get_credential
143
+ except Exception:
144
+ return ""
145
+ for key in ("token", "gh_token", "github_token"):
146
+ try:
147
+ matches = get_credential("github", key)
148
+ except Exception:
149
+ continue
150
+ for item in matches or []:
151
+ value = str(item.get("value") or "").strip()
152
+ if value:
153
+ return value
154
+ return ""
155
+
156
+
132
157
  def github_auth_status() -> dict:
133
158
  if not shutil.which("gh"):
134
- return {"ok": False, "message": "GitHub CLI not found.", "login": ""}
159
+ return {"ok": False, "message": "GitHub CLI not found.", "login": "", "code": "gh_missing"}
135
160
  try:
136
161
  result = _gh("api", "user", timeout=20)
137
162
  except Exception as e:
138
- return {"ok": False, "message": str(e), "login": ""}
163
+ return {"ok": False, "message": str(e), "login": "", "code": "gh_error"}
139
164
  if result.returncode != 0:
140
- return {"ok": False, "message": (result.stderr or result.stdout).strip(), "login": ""}
165
+ message = (result.stderr or result.stdout).strip()
166
+ lowered = message.lower()
167
+ code = "auth_missing"
168
+ if "keychain" in lowered:
169
+ code = "keychain_blocked"
170
+ elif "token" in lowered or "authentication" in lowered or "login" in lowered:
171
+ code = "auth_missing"
172
+ return {"ok": False, "message": message, "login": "", "code": code}
141
173
  try:
142
174
  payload = json.loads(result.stdout or "{}")
143
175
  login = str(payload.get("login") or "").strip()
144
176
  except Exception:
145
177
  login = ""
146
- return {"ok": bool(login), "message": "", "login": login}
178
+ return {"ok": bool(login), "message": "", "login": login, "code": "ok" if login else "auth_missing"}
147
179
 
148
180
 
149
181
  def ensure_fork(login: str) -> dict:
150
182
  if not login:
151
- return {"ok": False, "message": "Missing GitHub login.", "fork_repo": ""}
183
+ return {"ok": False, "message": "Missing GitHub login.", "fork_repo": "", "code": "missing_login"}
152
184
  fork_repo = f"{login}/nexo"
153
185
  if not shutil.which("gh"):
154
- return {"ok": False, "message": "GitHub CLI not found.", "fork_repo": ""}
186
+ return {"ok": False, "message": "GitHub CLI not found.", "fork_repo": "", "code": "gh_missing"}
155
187
  try:
156
188
  check = _gh("repo", "view", fork_repo, "--json", "nameWithOwner", timeout=20)
157
189
  if check.returncode == 0:
158
- return {"ok": True, "message": "", "fork_repo": fork_repo}
190
+ return {"ok": True, "message": "", "fork_repo": fork_repo, "code": "ok"}
159
191
  create = _gh("repo", "fork", UPSTREAM_REPO, "--clone=false", "--remote=false", timeout=60)
160
192
  if create.returncode == 0:
161
- return {"ok": True, "message": "", "fork_repo": fork_repo}
193
+ return {"ok": True, "message": "", "fork_repo": fork_repo, "code": "ok"}
162
194
  return {
163
195
  "ok": False,
164
196
  "message": (create.stderr or create.stdout or check.stderr or check.stdout).strip(),
165
197
  "fork_repo": "",
198
+ "code": "fork_unavailable",
166
199
  }
167
200
  except Exception as e:
168
- return {"ok": False, "message": str(e), "fork_repo": ""}
201
+ return {"ok": False, "message": str(e), "fork_repo": "", "code": "fork_error"}
202
+
203
+
204
+ def _set_pending_auth(config: dict, message: str) -> dict:
205
+ config["status"] = STATUS_PENDING_AUTH
206
+ config["last_result"] = f"pending_auth:{message}"
207
+ save_public_contribution_config(config)
208
+ config["message"] = message
209
+ return config
169
210
 
170
211
 
171
212
  def _parse_iso(ts: str | None) -> datetime | None:
@@ -336,13 +377,45 @@ def refresh_public_contribution_state(config: dict | None = None) -> dict:
336
377
  config["status"] = STATUS_COOLDOWN
337
378
  save_public_contribution_config(config)
338
379
  return config
380
+ return _set_pending_auth(
381
+ config,
382
+ f"GitHub Draft PR status check failed: {(result.stderr or result.stdout).strip() or 'unknown gh error'}",
383
+ )
339
384
 
340
385
  cooldown_until = _parse_iso(config.get("cooldown_until"))
341
386
  if cooldown_until and cooldown_until > _utcnow():
342
387
  config["status"] = STATUS_COOLDOWN
343
- elif config["mode"] == MODE_PENDING_AUTH:
388
+ save_public_contribution_config(config)
389
+ return config
390
+
391
+ auth = github_auth_status()
392
+ if not auth.get("ok"):
393
+ return _set_pending_auth(
394
+ config,
395
+ auth.get("message") or "GitHub authentication is missing for public contribution.",
396
+ )
397
+ login = str(auth.get("login") or "").strip()
398
+ configured_login = str(config.get("github_user") or "").strip()
399
+ if configured_login and login and configured_login.lower() != login.lower():
400
+ return _set_pending_auth(
401
+ config,
402
+ f"GitHub login drift detected: configured {configured_login}, current {login}. Reconfirm public contribution credentials.",
403
+ )
404
+ if login and not configured_login:
405
+ config["github_user"] = login
406
+
407
+ if not str(config.get("fork_repo") or "").strip():
408
+ fork = ensure_fork(login)
409
+ if not fork.get("ok"):
410
+ return _set_pending_auth(
411
+ config,
412
+ fork.get("message") or "GitHub fork setup is missing for public contribution.",
413
+ )
414
+ config["fork_repo"] = str(fork.get("fork_repo") or "").strip()
415
+
416
+ if config["mode"] == MODE_PENDING_AUTH:
344
417
  config["status"] = STATUS_PENDING_AUTH
345
- elif config["mode"] == MODE_DRAFT_PRS:
418
+ else:
346
419
  config["status"] = STATUS_ACTIVE
347
420
  save_public_contribution_config(config)
348
421
  return config
@@ -351,7 +424,8 @@ def refresh_public_contribution_state(config: dict | None = None) -> dict:
351
424
  def can_run_public_contribution(config: dict | None = None) -> tuple[bool, str, dict]:
352
425
  config = refresh_public_contribution_state(config)
353
426
  if config["mode"] == MODE_PENDING_AUTH or config["status"] == STATUS_PENDING_AUTH:
354
- return False, "github authentication or fork setup is pending", config
427
+ detail = str(config.get("message") or config.get("last_result") or "").strip()
428
+ return False, detail or "github authentication or fork setup is pending", config
355
429
  if config["mode"] != MODE_DRAFT_PRS or not config.get("enabled"):
356
430
  return False, "public contribution is disabled", config
357
431
  if config["status"] == STATUS_PAUSED_OPEN_PR:
@@ -662,6 +662,105 @@ def _canonical_schedule_value(schedule_type: str, schedule_value: str | dict | l
662
662
  return str(schedule_value or "")
663
663
 
664
664
 
665
+ def _extract_launchctl_value(output: str, prefixes: str | tuple[str, ...]) -> str | None:
666
+ if isinstance(prefixes, str):
667
+ prefixes = (prefixes,)
668
+ for raw_line in output.splitlines():
669
+ line = raw_line.strip()
670
+ for prefix in prefixes:
671
+ if line.startswith(prefix):
672
+ return line[len(prefix):].strip()
673
+ return None
674
+
675
+
676
+ def _launchctl_service_state(label: str) -> dict:
677
+ state = {
678
+ "loaded": None,
679
+ "pid": "",
680
+ "state": "",
681
+ "last_exit_status": "",
682
+ "error": "",
683
+ }
684
+ if platform.system() != "Darwin":
685
+ return state
686
+
687
+ try:
688
+ result = subprocess.run(
689
+ ["launchctl", "print", f"gui/{os.getuid()}/{label}"],
690
+ capture_output=True,
691
+ text=True,
692
+ timeout=3,
693
+ )
694
+ except Exception as exc:
695
+ return {**state, "loaded": False, "error": str(exc)}
696
+
697
+ output = (result.stdout or "") + (result.stderr or "")
698
+ if result.returncode != 0 or "Could not find service" in output:
699
+ return {**state, "loaded": False, "error": output.strip() or "not loaded"}
700
+
701
+ return {
702
+ "loaded": True,
703
+ "pid": _extract_launchctl_value(output, ("pid = ", "PID = ")) or "",
704
+ "state": _extract_launchctl_value(output, "state = ") or "",
705
+ "last_exit_status": _extract_launchctl_value(
706
+ output,
707
+ ("last exit code = ", "last exit status = ", "LastExitStatus = "),
708
+ ) or "",
709
+ "error": "",
710
+ }
711
+
712
+
713
+ def _keep_alive_runtime_snapshot(record: dict) -> dict:
714
+ if record.get("schedule_type") != "keep_alive":
715
+ return {
716
+ "runtime_state": "unknown",
717
+ "runtime_summary": "",
718
+ "runtime_problems": [],
719
+ }
720
+
721
+ label = record.get("launchd_label") or f"com.nexo.{record.get('cron_id', '')}"
722
+ service = _launchctl_service_state(str(label))
723
+ problems: list[str] = []
724
+
725
+ if service.get("loaded") is False:
726
+ problems.append("keep_alive service not loaded in launchd")
727
+ return {
728
+ "runtime_state": "stale",
729
+ "runtime_summary": "keep_alive service not loaded",
730
+ "runtime_problems": problems,
731
+ }
732
+
733
+ pid = str(service.get("pid", "") or "").strip()
734
+ service_state = str(service.get("state", "") or "").strip().lower()
735
+ last_exit = str(service.get("last_exit_status", "") or "").strip()
736
+ if pid:
737
+ return {
738
+ "runtime_state": "alive",
739
+ "runtime_summary": f"running with pid {pid}",
740
+ "runtime_problems": [],
741
+ }
742
+ if service_state in {"running", "spawned"}:
743
+ return {
744
+ "runtime_state": "alive",
745
+ "runtime_summary": f"launchd state {service_state}",
746
+ "runtime_problems": [],
747
+ }
748
+ if last_exit and last_exit != "0":
749
+ problems.append(f"keep_alive daemon exited with status {last_exit}")
750
+ return {
751
+ "runtime_state": "degraded",
752
+ "runtime_summary": f"last exit {last_exit}",
753
+ "runtime_problems": problems,
754
+ }
755
+
756
+ problems.append("keep_alive service is loaded but has no active pid")
757
+ return {
758
+ "runtime_state": "degraded",
759
+ "runtime_summary": "loaded but not running",
760
+ "runtime_problems": problems,
761
+ }
762
+
763
+
665
764
  def _discover_personal_schedule_records() -> list[dict]:
666
765
  """Inspect macOS LaunchAgents and return raw personal schedule records."""
667
766
  if platform.system() != "Darwin":
@@ -737,6 +836,12 @@ def audit_personal_schedules() -> dict:
737
836
  "healthy": 0,
738
837
  "problems": 0,
739
838
  "managed_registered": 0,
839
+ "keep_alive": 0,
840
+ "runtime_alive": 0,
841
+ "runtime_degraded": 0,
842
+ "runtime_duplicated": 0,
843
+ "runtime_stale": 0,
844
+ "runtime_unknown": 0,
740
845
  }
741
846
 
742
847
  for record in _discover_personal_schedule_records():
@@ -790,6 +895,7 @@ def audit_personal_schedules() -> dict:
790
895
  schedule_state = "orphaned"
791
896
 
792
897
  audited_record = dict(record)
898
+ runtime_snapshot = _keep_alive_runtime_snapshot(record)
793
899
  audited_record.update({
794
900
  "schedule_origin": schedule_origin,
795
901
  "schedule_declared": declared_valid,
@@ -799,6 +905,7 @@ def audit_personal_schedules() -> dict:
799
905
  "problems": problems,
800
906
  "script_name": script.get("name", "") if script else "",
801
907
  "declared_schedule": declared if script else {},
908
+ **runtime_snapshot,
802
909
  })
803
910
  audited.append(audited_record)
804
911
  summary[schedule_origin] += 1
@@ -808,6 +915,41 @@ def audit_personal_schedules() -> dict:
808
915
  else:
809
916
  summary["problems"] += 1
810
917
 
918
+ duplicate_cron_ids: dict[str, int] = {}
919
+ duplicate_script_paths: dict[str, int] = {}
920
+ for record in audited:
921
+ if record.get("schedule_type") != "keep_alive":
922
+ continue
923
+ cron_id = str(record.get("cron_id", "") or "")
924
+ script_path = str(record.get("script_path", "") or "")
925
+ if cron_id:
926
+ duplicate_cron_ids[cron_id] = duplicate_cron_ids.get(cron_id, 0) + 1
927
+ if script_path:
928
+ duplicate_script_paths[script_path] = duplicate_script_paths.get(script_path, 0) + 1
929
+
930
+ for record in audited:
931
+ if record.get("schedule_type") == "keep_alive":
932
+ cron_id = str(record.get("cron_id", "") or "")
933
+ script_path = str(record.get("script_path", "") or "")
934
+ duplicated = (
935
+ (cron_id and duplicate_cron_ids.get(cron_id, 0) > 1)
936
+ or (script_path and duplicate_script_paths.get(script_path, 0) > 1)
937
+ )
938
+ if duplicated:
939
+ runtime_problems = list(record.get("runtime_problems", []))
940
+ runtime_problems.append("duplicate keep_alive schedules discovered for the same cron/script")
941
+ record["runtime_state"] = "duplicated"
942
+ record["runtime_summary"] = "multiple keep_alive schedules discovered"
943
+ record["runtime_problems"] = runtime_problems
944
+
945
+ if record.get("schedule_type") == "keep_alive":
946
+ summary["keep_alive"] += 1
947
+ runtime_state = str(record.get("runtime_state", "unknown") or "unknown")
948
+ key = f"runtime_{runtime_state}"
949
+ if key not in summary:
950
+ summary[key] = 0
951
+ summary[key] += 1
952
+
811
953
  return {
812
954
  "schedules": audited,
813
955
  "summary": summary,
@@ -1080,6 +1080,10 @@ def _aggregate_delivery_metrics(applied_logs: list[dict]) -> dict:
1080
1080
  "skipped_dedupe": 0,
1081
1081
  "errors": 0,
1082
1082
  "engineering_followups": 0,
1083
+ "followup_dedupe_matches": 0,
1084
+ "learning_reinforcements": 0,
1085
+ "learning_duplicate_skips": 0,
1086
+ "learning_contradiction_reviews": 0,
1083
1087
  }
1084
1088
  for payload in applied_logs:
1085
1089
  stats = payload.get("stats") or {}
@@ -1093,6 +1097,16 @@ def _aggregate_delivery_metrics(applied_logs: list[dict]) -> dict:
1093
1097
  description = str(details.get("description", "") or "") + " " + str(details.get("reasoning", "") or "")
1094
1098
  if "engineering" in description.lower() or "guardrail" in description.lower():
1095
1099
  totals["engineering_followups"] += 1
1100
+ if details.get("outcome") == "matched_existing_followup":
1101
+ totals["followup_dedupe_matches"] += 1
1102
+ elif action.get("action_type") == "learning_add":
1103
+ outcome = str(details.get("outcome", "") or "")
1104
+ if outcome == "reinforced_learning":
1105
+ totals["learning_reinforcements"] += 1
1106
+ elif outcome == "duplicate_learning":
1107
+ totals["learning_duplicate_skips"] += 1
1108
+ elif outcome == "contradiction_review":
1109
+ totals["learning_contradiction_reviews"] += 1
1096
1110
 
1097
1111
  attempted = totals["applied_actions"] + totals["deferred_actions"] + totals["skipped_dedupe"] + totals["errors"]
1098
1112
  totals["dedupe_rate_pct"] = _safe_pct(totals["skipped_dedupe"], attempted)
@@ -1100,6 +1114,147 @@ def _aggregate_delivery_metrics(applied_logs: list[dict]) -> dict:
1100
1114
  return totals
1101
1115
 
1102
1116
 
1117
+ def _semantic_duplicate_metrics(items: list[tuple[str, str]], *, threshold: float = 0.82) -> dict:
1118
+ filtered = [(item_id, _normalize_text(text)) for item_id, text in items if _normalize_text(text)]
1119
+ if len(filtered) < 2:
1120
+ return {
1121
+ "cluster_count": 0,
1122
+ "duplicate_items": 0,
1123
+ "duplicate_excess": 0,
1124
+ "sample_clusters": [],
1125
+ }
1126
+
1127
+ used: set[int] = set()
1128
+ clusters: list[list[tuple[str, str]]] = []
1129
+ for index, (item_id, text) in enumerate(filtered):
1130
+ if index in used:
1131
+ continue
1132
+ cluster = [(item_id, text)]
1133
+ for other_index in range(index + 1, len(filtered)):
1134
+ if other_index in used:
1135
+ continue
1136
+ other_id, other_text = filtered[other_index]
1137
+ if _text_similarity(text, other_text) >= threshold:
1138
+ cluster.append((other_id, other_text))
1139
+ used.add(other_index)
1140
+ if len(cluster) > 1:
1141
+ used.add(index)
1142
+ clusters.append(cluster)
1143
+
1144
+ return {
1145
+ "cluster_count": len(clusters),
1146
+ "duplicate_items": sum(len(cluster) for cluster in clusters),
1147
+ "duplicate_excess": sum(max(0, len(cluster) - 1) for cluster in clusters),
1148
+ "sample_clusters": [
1149
+ [item_id for item_id, _ in cluster[:4]]
1150
+ for cluster in clusters[:5]
1151
+ ],
1152
+ }
1153
+
1154
+
1155
+ def _followup_deduplication_metrics() -> dict:
1156
+ cols = _table_columns(NEXO_DB, "followups")
1157
+ if "description" not in cols:
1158
+ return {
1159
+ "open_followups": 0,
1160
+ "duplicate_clusters": 0,
1161
+ "duplicate_open_followups": 0,
1162
+ "duplicate_rate_pct": None,
1163
+ "sample_clusters": [],
1164
+ }
1165
+
1166
+ select_cols = ["description"]
1167
+ if "id" in cols:
1168
+ select_cols.append("id")
1169
+ if "status" in cols:
1170
+ select_cols.append("status")
1171
+
1172
+ conn = sqlite3.connect(str(NEXO_DB))
1173
+ conn.row_factory = sqlite3.Row
1174
+ rows = [dict(row) for row in conn.execute(f"SELECT {', '.join(select_cols)} FROM followups").fetchall()]
1175
+ conn.close()
1176
+
1177
+ open_rows = []
1178
+ for row in rows:
1179
+ status = str(row.get("status", "pending") or "pending").strip().lower()
1180
+ if status in {"done", "completed", "cancelled", "resolved"}:
1181
+ continue
1182
+ identifier = str(row.get("id") or row.get("description") or f"followup-{len(open_rows)+1}")
1183
+ open_rows.append((identifier, str(row.get("description", "") or "")))
1184
+
1185
+ duplicates = _semantic_duplicate_metrics(open_rows)
1186
+ return {
1187
+ "open_followups": len(open_rows),
1188
+ "duplicate_clusters": duplicates["cluster_count"],
1189
+ "duplicate_open_followups": duplicates["duplicate_excess"],
1190
+ "duplicate_rate_pct": _safe_pct(duplicates["duplicate_excess"], len(open_rows)),
1191
+ "sample_clusters": duplicates["sample_clusters"],
1192
+ }
1193
+
1194
+
1195
+ def _learning_consolidation_metrics() -> dict:
1196
+ cols = _table_columns(NEXO_DB, "learnings")
1197
+ if not {"title", "content"}.issubset(cols):
1198
+ return {
1199
+ "active_learnings": 0,
1200
+ "weak_active_learnings": 0,
1201
+ "duplicate_clusters": 0,
1202
+ "duplicate_active_learnings": 0,
1203
+ "noise_pressure": 0,
1204
+ "noise_rate_pct": None,
1205
+ "sample_clusters": [],
1206
+ }
1207
+
1208
+ select_cols = ["title", "content"]
1209
+ if "id" in cols:
1210
+ select_cols.append("id")
1211
+ for field in ("status", "weight", "reasoning", "prevention", "applies_to", "guard_hits"):
1212
+ if field in cols:
1213
+ select_cols.append(field)
1214
+
1215
+ conn = sqlite3.connect(str(NEXO_DB))
1216
+ conn.row_factory = sqlite3.Row
1217
+ rows = [dict(row) for row in conn.execute(f"SELECT {', '.join(select_cols)} FROM learnings").fetchall()]
1218
+ conn.close()
1219
+
1220
+ active_rows = []
1221
+ weak_active = 0
1222
+ for row in rows:
1223
+ status = str(row.get("status", "active") or "active").strip().lower()
1224
+ if status != "active":
1225
+ continue
1226
+ active_rows.append(row)
1227
+ weight = row.get("weight")
1228
+ reasoning = str(row.get("reasoning", "") or "").strip()
1229
+ prevention = str(row.get("prevention", "") or "").strip()
1230
+ guard_hits = int(row.get("guard_hits", 0) or 0)
1231
+ applies_to = str(row.get("applies_to", "") or "").strip()
1232
+ if isinstance(weight, (int, float)) and float(weight) < 1.0:
1233
+ weak_active += 1
1234
+ elif not reasoning and not prevention:
1235
+ weak_active += 1
1236
+ elif applies_to and guard_hits <= 0:
1237
+ weak_active += 1
1238
+
1239
+ duplicates = _semantic_duplicate_metrics(
1240
+ [
1241
+ (str(row.get("id") or f"learning-{index}"), f"{row.get('title', '')} {row.get('content', '')}")
1242
+ for index, row in enumerate(active_rows, 1)
1243
+ ],
1244
+ threshold=0.8,
1245
+ )
1246
+ noise_pressure = weak_active + duplicates["duplicate_excess"]
1247
+ return {
1248
+ "active_learnings": len(active_rows),
1249
+ "weak_active_learnings": weak_active,
1250
+ "duplicate_clusters": duplicates["cluster_count"],
1251
+ "duplicate_active_learnings": duplicates["duplicate_excess"],
1252
+ "noise_pressure": noise_pressure,
1253
+ "noise_rate_pct": _safe_pct(noise_pressure, len(active_rows)),
1254
+ "sample_clusters": duplicates["sample_clusters"],
1255
+ }
1256
+
1257
+
1103
1258
  def _load_previous_period_summary(kind: str, label: str) -> dict | None:
1104
1259
  pattern = f"*-{kind}-summary.json"
1105
1260
  candidates: list[tuple[str, Path]] = []
@@ -1169,6 +1324,10 @@ def _build_period_trend(summary: dict, previous_summary: dict | None) -> dict:
1169
1324
  "avg_trust_delta": None,
1170
1325
  "total_corrections_delta": None,
1171
1326
  "protocol_compliance_delta": None,
1327
+ "followup_duplicate_open_delta": None,
1328
+ "followup_duplicate_rate_delta": None,
1329
+ "learning_noise_delta": None,
1330
+ "learning_noise_rate_delta": None,
1172
1331
  }
1173
1332
 
1174
1333
  current_protocol = summary.get("protocol_summary", {}).get("overall_compliance_pct")
@@ -1177,6 +1336,14 @@ def _build_period_trend(summary: dict, previous_summary: dict | None) -> dict:
1177
1336
  previous_mood = previous_summary.get("avg_mood_score")
1178
1337
  current_trust = summary.get("avg_trust_score")
1179
1338
  previous_trust = previous_summary.get("avg_trust_score")
1339
+ current_followup = (summary.get("followup_deduplication") or {}).get("duplicate_open_followups")
1340
+ previous_followup = (previous_summary.get("followup_deduplication") or {}).get("duplicate_open_followups")
1341
+ current_followup_rate = (summary.get("followup_deduplication") or {}).get("duplicate_rate_pct")
1342
+ previous_followup_rate = (previous_summary.get("followup_deduplication") or {}).get("duplicate_rate_pct")
1343
+ current_learning_noise = (summary.get("learning_consolidation") or {}).get("noise_pressure")
1344
+ previous_learning_noise = (previous_summary.get("learning_consolidation") or {}).get("noise_pressure")
1345
+ current_learning_rate = (summary.get("learning_consolidation") or {}).get("noise_rate_pct")
1346
+ previous_learning_rate = (previous_summary.get("learning_consolidation") or {}).get("noise_rate_pct")
1180
1347
 
1181
1348
  return {
1182
1349
  "has_previous": True,
@@ -1184,6 +1351,10 @@ def _build_period_trend(summary: dict, previous_summary: dict | None) -> dict:
1184
1351
  "avg_trust_delta": round(current_trust - previous_trust, 1) if isinstance(current_trust, (int, float)) and isinstance(previous_trust, (int, float)) else None,
1185
1352
  "total_corrections_delta": int(summary.get("total_corrections", 0) or 0) - int(previous_summary.get("total_corrections", 0) or 0),
1186
1353
  "protocol_compliance_delta": round(current_protocol - previous_protocol, 1) if isinstance(current_protocol, (int, float)) and isinstance(previous_protocol, (int, float)) else None,
1354
+ "followup_duplicate_open_delta": int(current_followup or 0) - int(previous_followup or 0) if current_followup is not None or previous_followup is not None else None,
1355
+ "followup_duplicate_rate_delta": round(float(current_followup_rate) - float(previous_followup_rate), 1) if isinstance(current_followup_rate, (int, float)) and isinstance(previous_followup_rate, (int, float)) else None,
1356
+ "learning_noise_delta": int(current_learning_noise or 0) - int(previous_learning_noise or 0) if current_learning_noise is not None or previous_learning_noise is not None else None,
1357
+ "learning_noise_rate_delta": round(float(current_learning_rate) - float(previous_learning_rate), 1) if isinstance(current_learning_rate, (int, float)) and isinstance(previous_learning_rate, (int, float)) else None,
1187
1358
  }
1188
1359
 
1189
1360
 
@@ -1236,6 +1407,8 @@ def _build_period_summary(target_date: str, synthesis: dict, *, kind: str, windo
1236
1407
  ]
1237
1408
  protocol_summary = _aggregate_protocol_summary(extractions)
1238
1409
  delivery_metrics = _aggregate_delivery_metrics(applied_logs)
1410
+ followup_deduplication = _followup_deduplication_metrics()
1411
+ learning_consolidation = _learning_consolidation_metrics()
1239
1412
  previous_summary = _load_previous_period_summary(kind, label)
1240
1413
  project_pulse = _build_project_pulse(top_projects, previous_summary)
1241
1414
 
@@ -1267,6 +1440,8 @@ def _build_period_summary(target_date: str, synthesis: dict, *, kind: str, windo
1267
1440
  "recurring_agenda": recurring_agenda,
1268
1441
  "protocol_summary": protocol_summary,
1269
1442
  "delivery_metrics": delivery_metrics,
1443
+ "followup_deduplication": followup_deduplication,
1444
+ "learning_consolidation": learning_consolidation,
1270
1445
  "summary": summary,
1271
1446
  }
1272
1447
  period_summary["trend"] = _build_period_trend(period_summary, previous_summary)
@@ -1328,6 +1503,27 @@ def _render_period_summary_markdown(summary: dict) -> str:
1328
1503
  lines.append(f"- Dedupe rate: {delivery_metrics['dedupe_rate_pct']:.1f}%")
1329
1504
  if delivery_metrics.get("error_rate_pct") is not None:
1330
1505
  lines.append(f"- Error rate: {delivery_metrics['error_rate_pct']:.1f}%")
1506
+ lines.append(f"- Followup dedupe matches: {delivery_metrics.get('followup_dedupe_matches', 0)}")
1507
+ lines.append(f"- Learning reinforcements: {delivery_metrics.get('learning_reinforcements', 0)}")
1508
+ lines.append(f"- Learning duplicate skips: {delivery_metrics.get('learning_duplicate_skips', 0)}")
1509
+ lines.append(f"- Learning contradiction reviews: {delivery_metrics.get('learning_contradiction_reviews', 0)}")
1510
+ lines.append("")
1511
+
1512
+ followup_deduplication = summary.get("followup_deduplication") or {}
1513
+ learning_consolidation = summary.get("learning_consolidation") or {}
1514
+ if followup_deduplication or learning_consolidation:
1515
+ lines.append("## Prevention Quality")
1516
+ lines.append("")
1517
+ if followup_deduplication:
1518
+ lines.append(f"- Open followups: {followup_deduplication.get('open_followups', 0)}")
1519
+ lines.append(f"- Duplicate open followups: {followup_deduplication.get('duplicate_open_followups', 0)}")
1520
+ if followup_deduplication.get("duplicate_rate_pct") is not None:
1521
+ lines.append(f"- Duplicate followup rate: {followup_deduplication['duplicate_rate_pct']:.1f}%")
1522
+ if learning_consolidation:
1523
+ lines.append(f"- Active learnings: {learning_consolidation.get('active_learnings', 0)}")
1524
+ lines.append(f"- Learning noise pressure: {learning_consolidation.get('noise_pressure', 0)}")
1525
+ if learning_consolidation.get("noise_rate_pct") is not None:
1526
+ lines.append(f"- Learning noise rate: {learning_consolidation['noise_rate_pct']:.1f}%")
1331
1527
  lines.append("")
1332
1528
 
1333
1529
  if summary.get("top_projects"):
@@ -1379,6 +1575,14 @@ def _render_period_summary_markdown(summary: dict) -> str:
1379
1575
  lines.append(f"- Corrections delta: {trend['total_corrections_delta']:+d}")
1380
1576
  if trend.get("protocol_compliance_delta") is not None:
1381
1577
  lines.append(f"- Protocol delta: {trend['protocol_compliance_delta']:+.1f}%")
1578
+ if trend.get("followup_duplicate_open_delta") is not None:
1579
+ lines.append(f"- Duplicate followups delta: {trend['followup_duplicate_open_delta']:+d}")
1580
+ if trend.get("followup_duplicate_rate_delta") is not None:
1581
+ lines.append(f"- Duplicate followup rate delta: {trend['followup_duplicate_rate_delta']:+.1f}%")
1582
+ if trend.get("learning_noise_delta") is not None:
1583
+ lines.append(f"- Learning noise delta: {trend['learning_noise_delta']:+d}")
1584
+ if trend.get("learning_noise_rate_delta") is not None:
1585
+ lines.append(f"- Learning noise rate delta: {trend['learning_noise_rate_delta']:+.1f}%")
1382
1586
  lines.append("")
1383
1587
 
1384
1588
  return "\n".join(lines).rstrip() + "\n"