nexo-brain 7.23.2 → 7.23.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +9 -1
- package/package.json +1 -1
- package/scripts/sync_release_artifacts.py +28 -0
- package/src/auto_update.py +27 -49
- package/src/automation_reconciler.py +383 -0
- package/src/automation_supervisor.py +86 -9
- package/src/backup_retention.py +70 -0
- package/src/cli.py +55 -2
- package/src/cognitive/_core.py +4 -3
- package/src/cognitive_paths.py +194 -0
- package/src/dashboard/app.py +2 -1
- package/src/db/_episodic.py +85 -7
- package/src/db/_schema.py +81 -0
- package/src/db/_skills.py +3 -3
- package/src/disk_recovery/__init__.py +11 -0
- package/src/disk_recovery/handlers/__init__.py +1 -0
- package/src/disk_recovery/handlers/common.py +37 -0
- package/src/disk_recovery/handlers/macos.py +39 -0
- package/src/disk_recovery/handlers/windows.py +49 -0
- package/src/disk_recovery/registry.py +135 -0
- package/src/doctor/providers/boot.py +115 -15
- package/src/kg_populate.py +2 -5
- package/src/paths.py +321 -5
- package/src/plugins/update.py +14 -36
- package/src/pre_answer_router.py +21 -0
- package/src/runtime_service.py +30 -3
- package/src/runtime_versioning.py +272 -10
- package/src/script_registry.py +3 -2
- package/src/scripts/backfill_task_owner.py +10 -4
- package/src/scripts/deep-sleep/apply_findings.py +2 -5
- package/src/scripts/deep-sleep/collect.py +2 -5
- package/src/scripts/nexo-cognitive-decay.py +2 -1
- package/src/scripts/nexo-daily-self-audit.py +36 -10
- package/src/scripts/nexo-followup-runner.py +1 -1
- package/src/scripts/nexo-immune.py +2 -1
- package/src/scripts/nexo-migrate.py +2 -3
- package/src/scripts/post_disk_recovery_sweep.py +75 -0
- package/src/scripts/prune_runtime_backups.py +78 -11
- package/src/server.py +13 -1
- package/src/storage_router.py +2 -3
- package/src/support_snapshot.py +25 -0
- package/src/transcript_index.py +234 -0
- package/src/transcript_utils.py +31 -8
- package/src/user_data_portability.py +2 -3
- package/tool-enforcement-map.json +15 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.23.
|
|
3
|
+
"version": "7.23.5",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,15 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.23.
|
|
21
|
+
Version `7.23.5` is the current packaged-runtime line. Patch over v7.23.4 - `nexo update` keeps external CLI maintenance summary copy in English.
|
|
22
|
+
|
|
23
|
+
Previously in `7.23.4`: patch over v7.23.3 - release tags now fail closed when npm publication fails and OpenClaw lockfile metadata stays synchronized with the release version.
|
|
24
|
+
|
|
25
|
+
Previously in `7.23.3`: patch over v7.23.2 - Followup runner skips DONE terminal statuses so already-finished followups do not re-enter executable batches.
|
|
26
|
+
|
|
27
|
+
Previously in `7.23.2`: patch over v7.23.1 - Desktop-facing version checks refresh stale latest-version cache entries so a just-published Brain update can still appear inside NEXO Desktop.
|
|
28
|
+
|
|
29
|
+
Previously in `7.23.1`: express patch over v7.23.0 - headless automations no longer hang on silent Claude children, synthetic followup prompts no longer trigger session-end loops, and runtime backups self-prune under a hard cap before creating new large artifacts.
|
|
22
30
|
|
|
23
31
|
Previously in `7.23.0`: minor release over v7.22.0 - pre-answer routing now consults continuity evidence before visible replies, Memory Observations queue processing converges through a bounded processor, and audits expose saved-but-not-used stores, automation drift, MCP live/catalog gaps, artifact location and transcript coverage.
|
|
24
32
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.23.
|
|
3
|
+
"version": "7.23.5",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
|
@@ -12,6 +12,7 @@ ROOT_PACKAGE_JSON = ROOT / "package.json"
|
|
|
12
12
|
CLAUDE_PLUGIN_JSON = ROOT / ".claude-plugin" / "plugin.json"
|
|
13
13
|
CLAWHUB_SKILL_MD = ROOT / "clawhub-skill" / "SKILL.md"
|
|
14
14
|
OPENCLAW_PACKAGE_JSON = ROOT / "openclaw-plugin" / "package.json"
|
|
15
|
+
OPENCLAW_PACKAGE_LOCK = ROOT / "openclaw-plugin" / "package-lock.json"
|
|
15
16
|
OPENCLAW_MCP_BRIDGE = ROOT / "openclaw-plugin" / "src" / "mcp-bridge.ts"
|
|
16
17
|
|
|
17
18
|
|
|
@@ -37,6 +38,30 @@ def sync_json_version(path: Path, expected_version: str, label: str) -> bool:
|
|
|
37
38
|
return True
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
def sync_openclaw_package_lock(path: Path, expected_version: str) -> bool:
|
|
42
|
+
payload = load_json(path)
|
|
43
|
+
changed = False
|
|
44
|
+
|
|
45
|
+
if payload.get("version") != expected_version:
|
|
46
|
+
payload["version"] = expected_version
|
|
47
|
+
changed = True
|
|
48
|
+
|
|
49
|
+
root_package = payload.get("packages", {}).get("")
|
|
50
|
+
if not isinstance(root_package, dict):
|
|
51
|
+
fail("OpenClaw package-lock.json is missing packages['']")
|
|
52
|
+
|
|
53
|
+
if root_package.get("version") != expected_version:
|
|
54
|
+
root_package["version"] = expected_version
|
|
55
|
+
changed = True
|
|
56
|
+
|
|
57
|
+
if not changed:
|
|
58
|
+
return False
|
|
59
|
+
|
|
60
|
+
dump_json(path, payload)
|
|
61
|
+
print(f"[sync-release-artifacts] synced OpenClaw package-lock -> {expected_version}")
|
|
62
|
+
return True
|
|
63
|
+
|
|
64
|
+
|
|
40
65
|
def sync_clawhub_skill(skill_path: Path, expected_version: str) -> bool:
|
|
41
66
|
text = skill_path.read_text()
|
|
42
67
|
updated = text
|
|
@@ -99,6 +124,7 @@ def main() -> None:
|
|
|
99
124
|
CLAUDE_PLUGIN_JSON: CLAUDE_PLUGIN_JSON.read_text(),
|
|
100
125
|
CLAWHUB_SKILL_MD: CLAWHUB_SKILL_MD.read_text(),
|
|
101
126
|
OPENCLAW_PACKAGE_JSON: OPENCLAW_PACKAGE_JSON.read_text(),
|
|
127
|
+
OPENCLAW_PACKAGE_LOCK: OPENCLAW_PACKAGE_LOCK.read_text(),
|
|
102
128
|
OPENCLAW_MCP_BRIDGE: OPENCLAW_MCP_BRIDGE.read_text(),
|
|
103
129
|
}
|
|
104
130
|
|
|
@@ -109,6 +135,8 @@ def main() -> None:
|
|
|
109
135
|
changed.append("clawhub-skill/SKILL.md")
|
|
110
136
|
if sync_json_version(OPENCLAW_PACKAGE_JSON, root_version, "OpenClaw package"):
|
|
111
137
|
changed.append("openclaw-plugin/package.json")
|
|
138
|
+
if sync_openclaw_package_lock(OPENCLAW_PACKAGE_LOCK, root_version):
|
|
139
|
+
changed.append("openclaw-plugin/package-lock.json")
|
|
112
140
|
if sync_openclaw_bridge(OPENCLAW_MCP_BRIDGE, root_version):
|
|
113
141
|
changed.append("openclaw-plugin/src/mcp-bridge.ts")
|
|
114
142
|
|
package/src/auto_update.py
CHANGED
|
@@ -115,33 +115,16 @@ def _env_int(name: str, default: int) -> int:
|
|
|
115
115
|
return default
|
|
116
116
|
|
|
117
117
|
|
|
118
|
-
BACKUP_MAX_BYTES =
|
|
119
|
-
BACKUP_MIN_FREE_BYTES =
|
|
118
|
+
BACKUP_MAX_BYTES = paths.backup_retention_cap_bytes()
|
|
119
|
+
BACKUP_MIN_FREE_BYTES = paths.backup_min_free_bytes()
|
|
120
120
|
LOCAL_CONTEXT_MAX_BACKUP_BYTES = _env_int("NEXO_LOCAL_CONTEXT_MAX_BACKUP_BYTES", 2 * 1024 * 1024 * 1024)
|
|
121
121
|
_LAST_BACKUP_ERROR = ""
|
|
122
122
|
|
|
123
123
|
|
|
124
124
|
def _run_runtime_backup_prune() -> None:
|
|
125
|
-
|
|
126
|
-
if not
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
subprocess.run(
|
|
130
|
-
[
|
|
131
|
-
sys.executable,
|
|
132
|
-
str(script),
|
|
133
|
-
"--root",
|
|
134
|
-
str(paths.backups_dir()),
|
|
135
|
-
"--apply",
|
|
136
|
-
"--max-bytes",
|
|
137
|
-
str(BACKUP_MAX_BYTES),
|
|
138
|
-
],
|
|
139
|
-
capture_output=True,
|
|
140
|
-
text=True,
|
|
141
|
-
timeout=120,
|
|
142
|
-
)
|
|
143
|
-
except Exception as e:
|
|
144
|
-
_log(f"Backup self-clean warning: {e}")
|
|
125
|
+
result = paths.run_runtime_backup_prune(max_bytes=BACKUP_MAX_BYTES)
|
|
126
|
+
if result.get("ok") is False and not result.get("skipped"):
|
|
127
|
+
_log(f"Backup self-clean warning: {result.get('error') or result.get('stderr') or 'unknown'}")
|
|
145
128
|
|
|
146
129
|
|
|
147
130
|
def _backup_free_bytes() -> int | None:
|
|
@@ -154,14 +137,7 @@ def _backup_free_bytes() -> int | None:
|
|
|
154
137
|
|
|
155
138
|
|
|
156
139
|
def _backup_space_error() -> str | None:
|
|
157
|
-
|
|
158
|
-
free = _backup_free_bytes()
|
|
159
|
-
if free is not None and free < BACKUP_MIN_FREE_BYTES:
|
|
160
|
-
return (
|
|
161
|
-
"free disk below NEXO backup safety floor after automatic cleanup "
|
|
162
|
-
f"({free}B < {BACKUP_MIN_FREE_BYTES}B)"
|
|
163
|
-
)
|
|
164
|
-
return None
|
|
140
|
+
return paths.backup_space_error(reason="auto_update")
|
|
165
141
|
|
|
166
142
|
|
|
167
143
|
def _should_include_local_context_backup(path: Path) -> bool:
|
|
@@ -283,15 +259,15 @@ def _cleanup_legacy_root_db_stubs(runtime_root: Path = NEXO_HOME, *, dry_run: bo
|
|
|
283
259
|
continue
|
|
284
260
|
|
|
285
261
|
if backup_root is None:
|
|
286
|
-
|
|
287
|
-
backup_root = paths.backups_dir() / f"legacy-root-db-stubs-{timestamp}"
|
|
288
|
-
backup_root.mkdir(parents=True, exist_ok=True)
|
|
262
|
+
backup_root = paths.create_backup_dir("legacy-root-db-stubs")
|
|
289
263
|
target = backup_root / candidate.name
|
|
290
264
|
try:
|
|
291
265
|
shutil.move(str(candidate), str(target))
|
|
292
266
|
report["archived"].append({"path": str(candidate), "backup_path": str(target)})
|
|
293
267
|
except Exception as exc:
|
|
294
268
|
report["errors"].append({"path": str(candidate), "error": str(exc)})
|
|
269
|
+
if backup_root is not None:
|
|
270
|
+
paths.finalize_backup_snapshot(backup_root)
|
|
295
271
|
return report
|
|
296
272
|
|
|
297
273
|
|
|
@@ -374,9 +350,7 @@ def _cleanup_empty_personal_brain_db_stubs(runtime_root: Path = NEXO_HOME, *, dr
|
|
|
374
350
|
continue
|
|
375
351
|
|
|
376
352
|
if backup_root is None:
|
|
377
|
-
|
|
378
|
-
backup_root = paths.backups_dir() / f"legacy-personal-brain-db-stubs-{timestamp}"
|
|
379
|
-
backup_root.mkdir(parents=True, exist_ok=True)
|
|
353
|
+
backup_root = paths.create_backup_dir("legacy-personal-brain-db-stubs")
|
|
380
354
|
target = backup_root / candidate.name
|
|
381
355
|
try:
|
|
382
356
|
shutil.move(str(candidate), str(target))
|
|
@@ -387,6 +361,8 @@ def _cleanup_empty_personal_brain_db_stubs(runtime_root: Path = NEXO_HOME, *, dr
|
|
|
387
361
|
})
|
|
388
362
|
except Exception as exc:
|
|
389
363
|
report["errors"].append({"path": str(candidate), "error": str(exc)})
|
|
364
|
+
if backup_root is not None:
|
|
365
|
+
paths.finalize_backup_snapshot(backup_root)
|
|
390
366
|
return report
|
|
391
367
|
|
|
392
368
|
|
|
@@ -1613,7 +1589,7 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1613
1589
|
return report
|
|
1614
1590
|
|
|
1615
1591
|
# Snapshot the current (wiped) state so the heal is reversible.
|
|
1616
|
-
pre_heal_dir = paths.
|
|
1592
|
+
pre_heal_dir = paths.create_backup_dir("pre-heal")
|
|
1617
1593
|
try:
|
|
1618
1594
|
import shutil as _shutil
|
|
1619
1595
|
pre_heal_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -1623,6 +1599,7 @@ def _self_heal_if_wiped() -> dict | None:
|
|
|
1623
1599
|
_shutil.copy2(str(sidecar), str(pre_heal_dir / sidecar.name))
|
|
1624
1600
|
except Exception as e:
|
|
1625
1601
|
_log(f"self-heal: pre-heal snapshot warning: {e}")
|
|
1602
|
+
paths.finalize_backup_snapshot(pre_heal_dir)
|
|
1626
1603
|
|
|
1627
1604
|
# Clear stale WAL/SHM before the restore so the new DB starts clean.
|
|
1628
1605
|
for suffix in ("-wal", "-shm"):
|
|
@@ -2141,8 +2118,7 @@ def _backup_dbs() -> str | None:
|
|
|
2141
2118
|
# path selection and turn into empty shells in the backup, breaking both
|
|
2142
2119
|
# validation and rollback paths. Safe no-op when there are none.
|
|
2143
2120
|
_purge_zero_byte_db_files()
|
|
2144
|
-
|
|
2145
|
-
backup_dir = paths.backups_dir() / f"pre-autoupdate-{timestamp}"
|
|
2121
|
+
backup_dir: Path | None = None
|
|
2146
2122
|
|
|
2147
2123
|
db_files = list(DATA_DIR.glob("*.db")) if DATA_DIR.is_dir() else []
|
|
2148
2124
|
local_context_db = paths.memory_dir() / "local-context.db"
|
|
@@ -2162,7 +2138,7 @@ def _backup_dbs() -> str | None:
|
|
|
2162
2138
|
_log(f"DB backup aborted: {space_err}")
|
|
2163
2139
|
return None
|
|
2164
2140
|
|
|
2165
|
-
backup_dir.
|
|
2141
|
+
backup_dir = paths.create_backup_dir("pre-autoupdate")
|
|
2166
2142
|
for db_file in db_files:
|
|
2167
2143
|
src_conn = None
|
|
2168
2144
|
dst_conn = None
|
|
@@ -2187,6 +2163,7 @@ def _backup_dbs() -> str | None:
|
|
|
2187
2163
|
_rotate_auto_update_backups("pre-autoupdate-")
|
|
2188
2164
|
except Exception as e:
|
|
2189
2165
|
_log(f"Backup rotation warning (pre-autoupdate): {e}")
|
|
2166
|
+
paths.finalize_backup_snapshot(backup_dir)
|
|
2190
2167
|
return str(backup_dir)
|
|
2191
2168
|
|
|
2192
2169
|
|
|
@@ -2665,8 +2642,7 @@ def _promote_packaged_runtime_code_to_core() -> None:
|
|
|
2665
2642
|
def _conflict_dir() -> Path:
|
|
2666
2643
|
nonlocal conflict_root
|
|
2667
2644
|
if conflict_root is None:
|
|
2668
|
-
conflict_root = paths.
|
|
2669
|
-
conflict_root.mkdir(parents=True, exist_ok=True)
|
|
2645
|
+
conflict_root = paths.create_backup_dir("packaged-code-f06-conflicts")
|
|
2670
2646
|
return conflict_root
|
|
2671
2647
|
|
|
2672
2648
|
def _same_file(a: Path, b: Path) -> bool:
|
|
@@ -2765,6 +2741,8 @@ def _promote_packaged_runtime_code_to_core() -> None:
|
|
|
2765
2741
|
shutil.move(str(source), str(canonical))
|
|
2766
2742
|
except Exception as exc:
|
|
2767
2743
|
_log(f"[F0.6 packaged-code] move {source} -> {canonical} failed: {exc}")
|
|
2744
|
+
if conflict_root is not None:
|
|
2745
|
+
paths.finalize_backup_snapshot(conflict_root)
|
|
2768
2746
|
|
|
2769
2747
|
|
|
2770
2748
|
def _ensure_f06_legacy_shims() -> None:
|
|
@@ -2783,8 +2761,7 @@ def _ensure_f06_legacy_shims() -> None:
|
|
|
2783
2761
|
def _conflict_dir() -> Path:
|
|
2784
2762
|
nonlocal conflict_root
|
|
2785
2763
|
if conflict_root is None:
|
|
2786
|
-
conflict_root = paths.
|
|
2787
|
-
conflict_root.mkdir(parents=True, exist_ok=True)
|
|
2764
|
+
conflict_root = paths.create_backup_dir("legacy-shim-conflicts")
|
|
2788
2765
|
return conflict_root
|
|
2789
2766
|
|
|
2790
2767
|
def _same_file(a: Path, b: Path) -> bool:
|
|
@@ -2950,6 +2927,8 @@ def _ensure_f06_legacy_shims() -> None:
|
|
|
2950
2927
|
marker.write_text("F0.6\n", encoding="utf-8")
|
|
2951
2928
|
except Exception:
|
|
2952
2929
|
pass
|
|
2930
|
+
if conflict_root is not None:
|
|
2931
|
+
paths.finalize_backup_snapshot(conflict_root)
|
|
2953
2932
|
|
|
2954
2933
|
|
|
2955
2934
|
def _rewrite_f06_launch_agents() -> int:
|
|
@@ -3526,7 +3505,7 @@ def _format_external_clis_results(results: dict) -> list[str]:
|
|
|
3526
3505
|
any_updated = True
|
|
3527
3506
|
lines.append(
|
|
3528
3507
|
f" CLI updated: {pkg} {entry.get('old')} -> {entry.get('new')} "
|
|
3529
|
-
f"—
|
|
3508
|
+
f"— restart the terminal to activate"
|
|
3530
3509
|
)
|
|
3531
3510
|
elif status == "already_latest":
|
|
3532
3511
|
any_checked_latest = True
|
|
@@ -3539,7 +3518,7 @@ def _format_external_clis_results(results: dict) -> list[str]:
|
|
|
3539
3518
|
# CLIs that the operator never installed shouldn't spam the summary.
|
|
3540
3519
|
|
|
3541
3520
|
if not any_updated and not any_failed and any_checked_latest:
|
|
3542
|
-
lines.append(" CLIs
|
|
3521
|
+
lines.append(" External CLIs: already on latest versions")
|
|
3543
3522
|
|
|
3544
3523
|
return lines
|
|
3545
3524
|
|
|
@@ -4430,9 +4409,7 @@ def _installed_scripts_classification(dest: Path) -> dict[str, str]:
|
|
|
4430
4409
|
|
|
4431
4410
|
|
|
4432
4411
|
def _backup_runtime_tree(dest: Path = NEXO_HOME) -> str:
|
|
4433
|
-
|
|
4434
|
-
backup_dir = paths.backups_dir() / f"runtime-tree-{timestamp}"
|
|
4435
|
-
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
4412
|
+
backup_dir = paths.create_backup_dir("runtime-tree")
|
|
4436
4413
|
|
|
4437
4414
|
code_dirs = [
|
|
4438
4415
|
"hooks",
|
|
@@ -4471,6 +4448,7 @@ def _backup_runtime_tree(dest: Path = NEXO_HOME) -> str:
|
|
|
4471
4448
|
_rotate_auto_update_backups("runtime-tree-")
|
|
4472
4449
|
except Exception as e:
|
|
4473
4450
|
_log(f"Backup rotation warning (runtime-tree): {e}")
|
|
4451
|
+
paths.finalize_backup_snapshot(backup_dir)
|
|
4474
4452
|
return str(backup_dir)
|
|
4475
4453
|
|
|
4476
4454
|
|
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
"""Safe reconciliation plan/apply contract for NEXO automations.
|
|
2
|
+
|
|
3
|
+
The reconciler never touches LaunchAgents and never deletes spool files. It
|
|
4
|
+
only closes retryable stale cron rows and archives terminal spool records when
|
|
5
|
+
the dry-run plan proves the action is deterministic.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
import hashlib
|
|
13
|
+
import json
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import shutil
|
|
16
|
+
import sqlite3
|
|
17
|
+
from typing import Any, Mapping
|
|
18
|
+
|
|
19
|
+
from automation_supervisor import (
|
|
20
|
+
AutomationSupervisorConfig,
|
|
21
|
+
audit_automation,
|
|
22
|
+
default_config as supervisor_default_config,
|
|
23
|
+
load_job_contracts,
|
|
24
|
+
_is_retryable,
|
|
25
|
+
_normalise_now,
|
|
26
|
+
_spool_cron_id,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
DEFAULT_SPOOL_STALE_SECONDS = 60 * 60
|
|
31
|
+
TERMINAL_SPOOL_STATUSES = {"done", "completed", "failed", "cancelled", "terminal"}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class AutomationReconcileConfig:
|
|
36
|
+
nexo_db_path: Path | None = None
|
|
37
|
+
manifest_path: Path | None = None
|
|
38
|
+
cron_spool_dir: Path | None = None
|
|
39
|
+
cron_spool_archive_dir: Path | None = None
|
|
40
|
+
now: datetime | None = None
|
|
41
|
+
spool_stale_seconds: int = DEFAULT_SPOOL_STALE_SECONDS
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def default_config() -> AutomationReconcileConfig:
|
|
45
|
+
cfg = supervisor_default_config()
|
|
46
|
+
archive = cfg.cron_spool_dir / "archive" if cfg.cron_spool_dir else None
|
|
47
|
+
return AutomationReconcileConfig(
|
|
48
|
+
nexo_db_path=cfg.nexo_db_path,
|
|
49
|
+
manifest_path=cfg.manifest_path,
|
|
50
|
+
cron_spool_dir=cfg.cron_spool_dir,
|
|
51
|
+
cron_spool_archive_dir=archive,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def build_reconciliation_plan(config: AutomationReconcileConfig | None = None) -> dict[str, Any]:
|
|
56
|
+
cfg = config or default_config()
|
|
57
|
+
now = _normalise_now(cfg.now)
|
|
58
|
+
supervisor_cfg = AutomationSupervisorConfig(
|
|
59
|
+
nexo_db_path=cfg.nexo_db_path,
|
|
60
|
+
manifest_path=cfg.manifest_path,
|
|
61
|
+
cron_spool_dir=cfg.cron_spool_dir,
|
|
62
|
+
now=now,
|
|
63
|
+
)
|
|
64
|
+
report = audit_automation(supervisor_cfg)
|
|
65
|
+
contracts, _excluded = load_job_contracts(cfg.manifest_path)
|
|
66
|
+
actions: list[dict[str, Any]] = []
|
|
67
|
+
|
|
68
|
+
for row in report.get("open_runs") or []:
|
|
69
|
+
status = str(row.get("status") or "")
|
|
70
|
+
run_id = row.get("run_id")
|
|
71
|
+
cron_id = str(row.get("cron_id") or "")
|
|
72
|
+
if status == "retryable" and run_id is not None:
|
|
73
|
+
actions.append(
|
|
74
|
+
{
|
|
75
|
+
"action": "close_cron_run",
|
|
76
|
+
"safe_apply": True,
|
|
77
|
+
"run_id": run_id,
|
|
78
|
+
"cron_id": cron_id,
|
|
79
|
+
"started_at": str(row.get("started_at") or ""),
|
|
80
|
+
"classification": status,
|
|
81
|
+
"reason": row.get("reason", ""),
|
|
82
|
+
"then": "scheduler may retry according to recovery_policy",
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
elif status in {"stuck", "abandoned"}:
|
|
86
|
+
actions.append(
|
|
87
|
+
{
|
|
88
|
+
"action": "manual_review_open_run",
|
|
89
|
+
"safe_apply": False,
|
|
90
|
+
"run_id": run_id,
|
|
91
|
+
"cron_id": cron_id,
|
|
92
|
+
"classification": status,
|
|
93
|
+
"reason": row.get("reason", ""),
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
spool_items = _classify_spool_items(
|
|
98
|
+
cfg.cron_spool_dir,
|
|
99
|
+
contracts=contracts,
|
|
100
|
+
now=now,
|
|
101
|
+
stale_seconds=cfg.spool_stale_seconds,
|
|
102
|
+
)
|
|
103
|
+
for item in spool_items:
|
|
104
|
+
if item["classification"] == "terminal":
|
|
105
|
+
actions.append(
|
|
106
|
+
{
|
|
107
|
+
"action": "archive_spool_file",
|
|
108
|
+
"safe_apply": True,
|
|
109
|
+
"cron_id": item["cron_id"],
|
|
110
|
+
"path": item["path"],
|
|
111
|
+
"content_hash": item.get("content_hash", ""),
|
|
112
|
+
"classification": "terminal",
|
|
113
|
+
"reason": item["reason"],
|
|
114
|
+
}
|
|
115
|
+
)
|
|
116
|
+
elif item["classification"] in {"orphaned", "stale", "retryable"}:
|
|
117
|
+
actions.append(
|
|
118
|
+
{
|
|
119
|
+
"action": "manual_review_spool_file",
|
|
120
|
+
"safe_apply": False,
|
|
121
|
+
"cron_id": item["cron_id"],
|
|
122
|
+
"path": item["path"],
|
|
123
|
+
"classification": item["classification"],
|
|
124
|
+
"reason": item["reason"],
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
"ok": True,
|
|
130
|
+
"generated_at": now.isoformat(),
|
|
131
|
+
"dry_run": True,
|
|
132
|
+
"actions": actions,
|
|
133
|
+
"spool_items": spool_items,
|
|
134
|
+
"summary": {
|
|
135
|
+
"actions": len(actions),
|
|
136
|
+
"safe_actions": sum(1 for item in actions if item.get("safe_apply")),
|
|
137
|
+
"manual_actions": sum(1 for item in actions if not item.get("safe_apply")),
|
|
138
|
+
"spool_items": len(spool_items),
|
|
139
|
+
},
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def apply_reconciliation_plan(plan: Mapping[str, Any], config: AutomationReconcileConfig | None = None) -> dict[str, Any]:
|
|
144
|
+
cfg = config or default_config()
|
|
145
|
+
now = _normalise_now(cfg.now)
|
|
146
|
+
applied: list[dict[str, Any]] = []
|
|
147
|
+
skipped: list[dict[str, Any]] = []
|
|
148
|
+
for action in plan.get("actions") or []:
|
|
149
|
+
if not isinstance(action, Mapping) or not action.get("safe_apply"):
|
|
150
|
+
skipped.append({"action": dict(action) if isinstance(action, Mapping) else action, "reason": "not_safe_apply"})
|
|
151
|
+
continue
|
|
152
|
+
kind = str(action.get("action") or "")
|
|
153
|
+
if kind == "close_cron_run":
|
|
154
|
+
applied.append(_close_cron_run(cfg, action, now=now))
|
|
155
|
+
elif kind == "archive_spool_file":
|
|
156
|
+
applied.append(_archive_spool_file(cfg, action, now=now))
|
|
157
|
+
else:
|
|
158
|
+
skipped.append({"action": dict(action), "reason": "unknown_safe_action"})
|
|
159
|
+
ok = not any(item.get("ok") is False for item in applied)
|
|
160
|
+
return {
|
|
161
|
+
"ok": ok,
|
|
162
|
+
"applied": applied,
|
|
163
|
+
"skipped": skipped,
|
|
164
|
+
"summary": {
|
|
165
|
+
"applied": len(applied),
|
|
166
|
+
"skipped": len(skipped),
|
|
167
|
+
"errors": sum(1 for item in applied if item.get("ok") is False),
|
|
168
|
+
},
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _classify_spool_items(
|
|
173
|
+
spool_dir: Path | None,
|
|
174
|
+
*,
|
|
175
|
+
contracts: Mapping[str, Any],
|
|
176
|
+
now: datetime,
|
|
177
|
+
stale_seconds: int,
|
|
178
|
+
) -> list[dict[str, Any]]:
|
|
179
|
+
if spool_dir is None or not spool_dir.exists():
|
|
180
|
+
return []
|
|
181
|
+
result: list[dict[str, Any]] = []
|
|
182
|
+
for path in sorted(spool_dir.glob("*.json")):
|
|
183
|
+
if not path.is_file():
|
|
184
|
+
continue
|
|
185
|
+
payload = _load_json(path)
|
|
186
|
+
cron_id = _spool_cron_id(path, contracts)
|
|
187
|
+
contract = contracts.get(cron_id)
|
|
188
|
+
mtime = datetime.fromtimestamp(path.stat().st_mtime, tz=timezone.utc)
|
|
189
|
+
age_seconds = int((now - mtime).total_seconds())
|
|
190
|
+
status = str(payload.get("status") or "").strip().lower() if isinstance(payload, dict) else ""
|
|
191
|
+
terminal = bool(isinstance(payload, dict) and payload.get("terminal") is True) or status in TERMINAL_SPOOL_STATUSES
|
|
192
|
+
if contract is None:
|
|
193
|
+
classification = "orphaned"
|
|
194
|
+
reason = "spool item does not match a declared non-Evolution cron"
|
|
195
|
+
elif terminal:
|
|
196
|
+
classification = "terminal"
|
|
197
|
+
reason = "spool item is marked terminal and can be archived"
|
|
198
|
+
elif age_seconds > max(1, int(stale_seconds or DEFAULT_SPOOL_STALE_SECONDS)):
|
|
199
|
+
if _is_retryable(contract):
|
|
200
|
+
classification = "retryable"
|
|
201
|
+
reason = "stale spool item belongs to a retryable/idempotent cron"
|
|
202
|
+
else:
|
|
203
|
+
classification = "stale"
|
|
204
|
+
reason = "stale spool item has no retry contract"
|
|
205
|
+
else:
|
|
206
|
+
classification = "pending"
|
|
207
|
+
reason = "spool item is recent and pending normal processing"
|
|
208
|
+
result.append(
|
|
209
|
+
{
|
|
210
|
+
"cron_id": cron_id,
|
|
211
|
+
"path": str(path),
|
|
212
|
+
"age_seconds": max(age_seconds, 0),
|
|
213
|
+
"classification": classification,
|
|
214
|
+
"reason": reason,
|
|
215
|
+
"content_hash": _file_sha256(path),
|
|
216
|
+
}
|
|
217
|
+
)
|
|
218
|
+
return result
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def _close_cron_run(cfg: AutomationReconcileConfig, action: Mapping[str, Any], *, now: datetime) -> dict[str, Any]:
|
|
222
|
+
db_path = cfg.nexo_db_path
|
|
223
|
+
if db_path is None or not db_path.is_file():
|
|
224
|
+
return {"ok": False, "action": "close_cron_run", "error": "db_missing"}
|
|
225
|
+
run_id = action.get("run_id")
|
|
226
|
+
try:
|
|
227
|
+
run_id_int = int(run_id)
|
|
228
|
+
except Exception:
|
|
229
|
+
return {"ok": False, "action": "close_cron_run", "error": "invalid_run_id", "run_id": run_id}
|
|
230
|
+
current = _current_open_run(cfg, run_id_int)
|
|
231
|
+
expected_cron_id = str(action.get("cron_id") or "")
|
|
232
|
+
expected_classification = str(action.get("classification") or "")
|
|
233
|
+
expected_started_at = str(action.get("started_at") or "")
|
|
234
|
+
if not expected_cron_id or not expected_classification or not expected_started_at:
|
|
235
|
+
return {
|
|
236
|
+
"ok": False,
|
|
237
|
+
"action": "close_cron_run",
|
|
238
|
+
"error": "missing_plan_evidence",
|
|
239
|
+
"run_id": run_id_int,
|
|
240
|
+
}
|
|
241
|
+
if not current:
|
|
242
|
+
return {"ok": False, "action": "close_cron_run", "error": "stale_plan_run_not_open", "run_id": run_id_int}
|
|
243
|
+
if (
|
|
244
|
+
str(current.get("cron_id") or "") != expected_cron_id
|
|
245
|
+
or str(current.get("status") or "") != expected_classification
|
|
246
|
+
or str(current.get("started_at") or "") != expected_started_at
|
|
247
|
+
or expected_classification != "retryable"
|
|
248
|
+
):
|
|
249
|
+
return {
|
|
250
|
+
"ok": False,
|
|
251
|
+
"action": "close_cron_run",
|
|
252
|
+
"error": "stale_plan_run_changed",
|
|
253
|
+
"run_id": run_id_int,
|
|
254
|
+
"expected_cron_id": expected_cron_id,
|
|
255
|
+
"current_cron_id": current.get("cron_id", ""),
|
|
256
|
+
"current_status": current.get("status", ""),
|
|
257
|
+
"current_started_at": current.get("started_at", ""),
|
|
258
|
+
}
|
|
259
|
+
conn = sqlite3.connect(str(db_path), timeout=5)
|
|
260
|
+
try:
|
|
261
|
+
cursor = conn.execute(
|
|
262
|
+
"""
|
|
263
|
+
UPDATE cron_runs
|
|
264
|
+
SET ended_at = ?, exit_code = COALESCE(exit_code, 75),
|
|
265
|
+
summary = CASE WHEN COALESCE(summary, '') = '' THEN ? ELSE summary END,
|
|
266
|
+
error = CASE WHEN COALESCE(error, '') = '' THEN ? ELSE error END
|
|
267
|
+
WHERE id = ? AND cron_id = ? AND started_at = ? AND (ended_at IS NULL OR exit_code IS NULL)
|
|
268
|
+
""",
|
|
269
|
+
(
|
|
270
|
+
now.replace(microsecond=0).isoformat(),
|
|
271
|
+
"closed by automation reconciler",
|
|
272
|
+
str(action.get("reason") or "stale retryable run"),
|
|
273
|
+
run_id_int,
|
|
274
|
+
expected_cron_id,
|
|
275
|
+
expected_started_at,
|
|
276
|
+
),
|
|
277
|
+
)
|
|
278
|
+
conn.commit()
|
|
279
|
+
return {"ok": True, "action": "close_cron_run", "run_id": run_id_int, "rows": cursor.rowcount}
|
|
280
|
+
finally:
|
|
281
|
+
conn.close()
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _archive_spool_file(
|
|
285
|
+
cfg: AutomationReconcileConfig,
|
|
286
|
+
action: Mapping[str, Any],
|
|
287
|
+
*,
|
|
288
|
+
now: datetime,
|
|
289
|
+
) -> dict[str, Any]:
|
|
290
|
+
spool_dir = cfg.cron_spool_dir
|
|
291
|
+
archive_dir = cfg.cron_spool_archive_dir or (spool_dir / "archive" if spool_dir else None)
|
|
292
|
+
if spool_dir is None or archive_dir is None:
|
|
293
|
+
return {"ok": False, "action": "archive_spool_file", "error": "spool_missing"}
|
|
294
|
+
source = Path(str(action.get("path") or ""))
|
|
295
|
+
try:
|
|
296
|
+
source_resolved = source.resolve(strict=True)
|
|
297
|
+
spool_resolved = spool_dir.resolve(strict=True)
|
|
298
|
+
source_resolved.relative_to(spool_resolved)
|
|
299
|
+
except Exception:
|
|
300
|
+
return {"ok": False, "action": "archive_spool_file", "error": "unsafe_path", "path": str(source)}
|
|
301
|
+
current = _current_spool_item(cfg, source_resolved, now=now)
|
|
302
|
+
expected_hash = str(action.get("content_hash") or "").strip()
|
|
303
|
+
if not expected_hash:
|
|
304
|
+
return {
|
|
305
|
+
"ok": False,
|
|
306
|
+
"action": "archive_spool_file",
|
|
307
|
+
"error": "missing_plan_evidence",
|
|
308
|
+
"path": str(source),
|
|
309
|
+
}
|
|
310
|
+
if (
|
|
311
|
+
not current
|
|
312
|
+
or current.get("classification") != "terminal"
|
|
313
|
+
or current.get("cron_id") != str(action.get("cron_id") or "")
|
|
314
|
+
or current.get("content_hash") != expected_hash
|
|
315
|
+
):
|
|
316
|
+
return {
|
|
317
|
+
"ok": False,
|
|
318
|
+
"action": "archive_spool_file",
|
|
319
|
+
"error": "stale_plan_spool_changed",
|
|
320
|
+
"path": str(source),
|
|
321
|
+
"current": current or {},
|
|
322
|
+
}
|
|
323
|
+
dated_dir = archive_dir / now.strftime("%Y%m%d")
|
|
324
|
+
dated_dir.mkdir(parents=True, exist_ok=True)
|
|
325
|
+
target = dated_dir / source.name
|
|
326
|
+
if target.exists():
|
|
327
|
+
target = dated_dir / f"{source.stem}-{int(now.timestamp())}{source.suffix}"
|
|
328
|
+
shutil.move(str(source_resolved), str(target))
|
|
329
|
+
return {"ok": True, "action": "archive_spool_file", "from": str(source), "to": str(target)}
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _load_json(path: Path) -> dict[str, Any]:
|
|
333
|
+
try:
|
|
334
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
335
|
+
except Exception:
|
|
336
|
+
return {}
|
|
337
|
+
return payload if isinstance(payload, dict) else {}
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _current_open_run(cfg: AutomationReconcileConfig, run_id: int) -> dict[str, Any] | None:
|
|
341
|
+
supervisor_cfg = AutomationSupervisorConfig(
|
|
342
|
+
nexo_db_path=cfg.nexo_db_path,
|
|
343
|
+
manifest_path=cfg.manifest_path,
|
|
344
|
+
cron_spool_dir=cfg.cron_spool_dir,
|
|
345
|
+
now=_normalise_now(cfg.now),
|
|
346
|
+
)
|
|
347
|
+
report = audit_automation(supervisor_cfg)
|
|
348
|
+
for row in report.get("open_runs") or []:
|
|
349
|
+
try:
|
|
350
|
+
if int(row.get("run_id")) == run_id:
|
|
351
|
+
return dict(row)
|
|
352
|
+
except Exception:
|
|
353
|
+
continue
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _current_spool_item(
|
|
358
|
+
cfg: AutomationReconcileConfig,
|
|
359
|
+
source_resolved: Path,
|
|
360
|
+
*,
|
|
361
|
+
now: datetime,
|
|
362
|
+
) -> dict[str, Any] | None:
|
|
363
|
+
contracts, _excluded = load_job_contracts(cfg.manifest_path)
|
|
364
|
+
items = _classify_spool_items(
|
|
365
|
+
cfg.cron_spool_dir,
|
|
366
|
+
contracts=contracts,
|
|
367
|
+
now=now,
|
|
368
|
+
stale_seconds=cfg.spool_stale_seconds,
|
|
369
|
+
)
|
|
370
|
+
for item in items:
|
|
371
|
+
try:
|
|
372
|
+
if Path(str(item.get("path") or "")).resolve(strict=True) == source_resolved:
|
|
373
|
+
return dict(item)
|
|
374
|
+
except Exception:
|
|
375
|
+
continue
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _file_sha256(path: Path) -> str:
|
|
380
|
+
try:
|
|
381
|
+
return hashlib.sha256(path.read_bytes()).hexdigest()
|
|
382
|
+
except Exception:
|
|
383
|
+
return ""
|