nexo-brain 7.21.0 → 7.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +5 -1
- package/package.json +1 -1
- package/src/artifact_locator.py +89 -0
- package/src/automation_supervisor.py +570 -0
- package/src/cli.py +165 -14
- package/src/continuity_sources.py +103 -0
- package/src/email_memory_bridge.py +86 -0
- package/src/evidence_ledger.py +1042 -0
- package/src/local_context/__init__.py +2 -0
- package/src/local_context/api.py +804 -30
- package/src/local_context/db.py +51 -0
- package/src/local_context/extractors.py +68 -8
- package/src/local_context/health.py +242 -0
- package/src/local_context/usage_events.py +448 -0
- package/src/mcp_live_audit.py +476 -0
- package/src/mcp_required_tools.py +2 -0
- package/src/mcp_write_queue.py +354 -0
- package/src/memory_observation_processor.py +277 -0
- package/src/pre_answer_router.py +1451 -0
- package/src/pre_answer_runtime.py +100 -0
- package/src/runtime_versioning.py +2 -0
- package/src/saved_not_used_audit.py +917 -0
- package/src/server.py +223 -4
- package/src/tools_sessions.py +326 -71
- package/src/transcript_coverage.py +148 -0
- package/tool-enforcement-map.json +133 -7
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.23.0",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,11 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.
|
|
21
|
+
Version `7.23.0` is the current packaged-runtime line. Minor release over v7.22.0 - pre-answer routing now consults continuity evidence before visible replies, Memory Observations queue processing converges through a bounded processor, and audits expose saved-but-not-used stores, automation drift, MCP live/catalog gaps, artifact location and transcript coverage.
|
|
22
|
+
|
|
23
|
+
Previously in `7.22.0`: minor release over v7.21.0 - heartbeat stays fast in Desktop-managed sessions, MCP writes can be accepted through a durable file-backed queue before SQLite commit, Brain exposes compliance state for Desktop gates, and Local Context adds Entity Dossier for open-domain local evidence aggregation.
|
|
24
|
+
|
|
25
|
+
Previously in `7.21.0`: minor release over v7.20.25 - MCP now starts through a thin compatibility adapter backed by one resident local Runtime Service, reducing duplicate Brain processes and SQLite contention across Claude Code, Codex, Claude Desktop, and NEXO Desktop.
|
|
22
26
|
|
|
23
27
|
Previously in `7.20.25`: patch release over v7.20.24 — Local Context now uses the pinned local BGE embedding model when available, automatically refreshes old hash embeddings, prioritizes known documents before lower-value files, and treats the Desktop-owned Qwen local-presence model as optional in standalone Brain installs.
|
|
24
28
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.23.0",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Project/artifact locator helpers with Project Atlas as authority."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Callable, Iterable
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
FallbackSearch = Callable[[str, int], Iterable[dict]]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def load_project_atlas(path: str | Path) -> dict:
|
|
14
|
+
try:
|
|
15
|
+
payload = json.loads(Path(path).expanduser().read_text(encoding="utf-8"))
|
|
16
|
+
except Exception:
|
|
17
|
+
return {}
|
|
18
|
+
return payload if isinstance(payload, dict) else {}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _projects(atlas: dict) -> dict:
|
|
22
|
+
if not isinstance(atlas, dict):
|
|
23
|
+
return {}
|
|
24
|
+
if isinstance(atlas.get("projects"), dict):
|
|
25
|
+
return atlas["projects"]
|
|
26
|
+
return atlas
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def resolve_project(atlas: dict, query: str) -> dict | None:
|
|
30
|
+
clean_query = str(query or "").strip().lower()
|
|
31
|
+
if not clean_query:
|
|
32
|
+
return None
|
|
33
|
+
for key, entry in _projects(atlas).items():
|
|
34
|
+
if not isinstance(entry, dict):
|
|
35
|
+
continue
|
|
36
|
+
aliases = [str(key), *(entry.get("aliases") or [])]
|
|
37
|
+
haystack = " ".join([*aliases, str(entry.get("description") or "")]).lower()
|
|
38
|
+
if clean_query == str(key).lower() or clean_query in haystack:
|
|
39
|
+
return {"key": str(key), **entry}
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def project_locations(project: dict | None) -> dict:
|
|
44
|
+
if not project:
|
|
45
|
+
return {}
|
|
46
|
+
locations = project.get("locations")
|
|
47
|
+
return locations if isinstance(locations, dict) else {}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def locate_artifact(
|
|
51
|
+
*,
|
|
52
|
+
atlas: dict,
|
|
53
|
+
query: str,
|
|
54
|
+
artifact_kind: str = "",
|
|
55
|
+
fallback_search: FallbackSearch | None = None,
|
|
56
|
+
limit: int = 5,
|
|
57
|
+
) -> dict:
|
|
58
|
+
project = resolve_project(atlas, query)
|
|
59
|
+
locations = project_locations(project)
|
|
60
|
+
matches: list[dict] = []
|
|
61
|
+
if project:
|
|
62
|
+
for name, value in locations.items():
|
|
63
|
+
if artifact_kind and artifact_kind not in str(name):
|
|
64
|
+
continue
|
|
65
|
+
matches.append({
|
|
66
|
+
"source": "project_atlas",
|
|
67
|
+
"project_key": project["key"],
|
|
68
|
+
"kind": str(name),
|
|
69
|
+
"path": str(value),
|
|
70
|
+
"confidence": 1.0,
|
|
71
|
+
})
|
|
72
|
+
if not matches and fallback_search:
|
|
73
|
+
for row in list(fallback_search(query, limit))[:limit]:
|
|
74
|
+
if not isinstance(row, dict):
|
|
75
|
+
continue
|
|
76
|
+
matches.append({
|
|
77
|
+
"source": str(row.get("source") or "fallback"),
|
|
78
|
+
"project_key": str(row.get("project_key") or ""),
|
|
79
|
+
"kind": str(row.get("kind") or artifact_kind or "artifact"),
|
|
80
|
+
"path": str(row.get("path") or row.get("file") or ""),
|
|
81
|
+
"confidence": float(row.get("confidence") or row.get("score") or 0.4),
|
|
82
|
+
})
|
|
83
|
+
return {
|
|
84
|
+
"query": query,
|
|
85
|
+
"artifact_kind": artifact_kind,
|
|
86
|
+
"project_key": project["key"] if project else "",
|
|
87
|
+
"matches": matches,
|
|
88
|
+
"used_fallback": not bool(project) and bool(fallback_search),
|
|
89
|
+
}
|
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""Side-effect free supervisor for NEXO automations.
|
|
2
|
+
|
|
3
|
+
The supervisor reconciles the cron manifest, LaunchAgent inventory,
|
|
4
|
+
``cron_runs`` open rows and the cron-spool directory. It deliberately reports
|
|
5
|
+
instead of repairing so tests and future wiring can run without touching live
|
|
6
|
+
LaunchAgents or production cron state.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import asdict, dataclass, field
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
import sqlite3
|
|
17
|
+
from typing import Any, Iterable, Mapping
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
import paths
|
|
21
|
+
except Exception: # pragma: no cover - keeps ad-hoc imports usable.
|
|
22
|
+
paths = None # type: ignore[assignment]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
DEFAULT_STUCK_AFTER_SECONDS = 60 * 60
|
|
26
|
+
DEFAULT_SPOOL_WARN_THRESHOLD = 0
|
|
27
|
+
TERMINAL_SEVERITIES = {"P0", "P1", "P2"}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class AutomationSupervisorConfig:
|
|
32
|
+
nexo_db_path: Path | None = None
|
|
33
|
+
manifest_path: Path | None = None
|
|
34
|
+
cron_spool_dir: Path | None = None
|
|
35
|
+
launchagent_labels: frozenset[str] | None = None
|
|
36
|
+
now: datetime | None = None
|
|
37
|
+
default_stuck_after_seconds: int = DEFAULT_STUCK_AFTER_SECONDS
|
|
38
|
+
spool_warn_threshold: int = DEFAULT_SPOOL_WARN_THRESHOLD
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class JobContract:
|
|
43
|
+
cron_id: str
|
|
44
|
+
launchagent_label: str
|
|
45
|
+
run_type: str
|
|
46
|
+
sla_seconds: int
|
|
47
|
+
recovery_policy: str
|
|
48
|
+
idempotent: bool
|
|
49
|
+
open_run_allowed: bool
|
|
50
|
+
source: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class OpenRunClassification:
|
|
55
|
+
run_id: int | None
|
|
56
|
+
cron_id: str
|
|
57
|
+
started_at: str
|
|
58
|
+
age_seconds: int | None
|
|
59
|
+
status: str
|
|
60
|
+
severity: str
|
|
61
|
+
reason: str
|
|
62
|
+
recovery_action: str
|
|
63
|
+
evidence: dict[str, Any] = field(default_factory=dict)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass(frozen=True)
|
|
67
|
+
class LaunchAgentClassification:
|
|
68
|
+
cron_id: str
|
|
69
|
+
launchagent_label: str
|
|
70
|
+
status: str
|
|
71
|
+
severity: str
|
|
72
|
+
reason: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass(frozen=True)
|
|
76
|
+
class CronSpoolClassification:
|
|
77
|
+
cron_id: str
|
|
78
|
+
files: int
|
|
79
|
+
oldest_path: str
|
|
80
|
+
oldest_mtime: str
|
|
81
|
+
status: str
|
|
82
|
+
severity: str
|
|
83
|
+
reason: str
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def default_config() -> AutomationSupervisorConfig:
|
|
87
|
+
home = Path(os.environ.get("NEXO_HOME", str(Path.home() / ".nexo")))
|
|
88
|
+
if paths is not None:
|
|
89
|
+
nexo_db = _safe_path_call(paths.resolve_db_path)
|
|
90
|
+
manifest = _safe_path_call(lambda: paths.crons_dir() / "manifest.json")
|
|
91
|
+
spool = _safe_path_call(lambda: paths.operations_dir() / "cron-spool")
|
|
92
|
+
else:
|
|
93
|
+
nexo_db = home / "runtime" / "data" / "nexo.db"
|
|
94
|
+
manifest = home / "runtime" / "crons" / "manifest.json"
|
|
95
|
+
spool = home / "runtime" / "operations" / "cron-spool"
|
|
96
|
+
return AutomationSupervisorConfig(
|
|
97
|
+
nexo_db_path=nexo_db,
|
|
98
|
+
manifest_path=manifest,
|
|
99
|
+
cron_spool_dir=spool,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def audit_automation(config: AutomationSupervisorConfig | None = None) -> dict[str, Any]:
|
|
104
|
+
cfg = config or default_config()
|
|
105
|
+
now = _normalise_now(cfg.now)
|
|
106
|
+
contracts, excluded = load_job_contracts(
|
|
107
|
+
cfg.manifest_path,
|
|
108
|
+
default_stuck_after_seconds=cfg.default_stuck_after_seconds,
|
|
109
|
+
)
|
|
110
|
+
open_runs = classify_open_runs(
|
|
111
|
+
cfg.nexo_db_path,
|
|
112
|
+
contracts=contracts,
|
|
113
|
+
now=now,
|
|
114
|
+
default_stuck_after_seconds=cfg.default_stuck_after_seconds,
|
|
115
|
+
)
|
|
116
|
+
launchagents = classify_launchagents(contracts, cfg.launchagent_labels)
|
|
117
|
+
cron_spool = classify_cron_spool(
|
|
118
|
+
cfg.cron_spool_dir,
|
|
119
|
+
contracts=contracts,
|
|
120
|
+
now=now,
|
|
121
|
+
warn_threshold=cfg.spool_warn_threshold,
|
|
122
|
+
)
|
|
123
|
+
findings = _collect_findings(open_runs, launchagents, cron_spool)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
"ok": not any(item.get("severity") in TERMINAL_SEVERITIES for item in findings),
|
|
127
|
+
"generated_at": now.isoformat(),
|
|
128
|
+
"jobs": [asdict(item) for item in contracts.values()],
|
|
129
|
+
"open_runs": [asdict(item) for item in open_runs],
|
|
130
|
+
"launchagents": [asdict(item) for item in launchagents],
|
|
131
|
+
"cron_spool": [asdict(item) for item in cron_spool],
|
|
132
|
+
"findings": findings,
|
|
133
|
+
"summary": {
|
|
134
|
+
"jobs": len(contracts),
|
|
135
|
+
"open_runs": len(open_runs),
|
|
136
|
+
"launchagents_checked": cfg.launchagent_labels is not None,
|
|
137
|
+
"cron_spool_jobs": len(cron_spool),
|
|
138
|
+
"findings": len(findings),
|
|
139
|
+
"p0": sum(1 for item in findings if item.get("severity") == "P0"),
|
|
140
|
+
"p1": sum(1 for item in findings if item.get("severity") == "P1"),
|
|
141
|
+
"p2": sum(1 for item in findings if item.get("severity") == "P2"),
|
|
142
|
+
"excluded_jobs": sorted(excluded),
|
|
143
|
+
},
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def load_job_contracts(
|
|
148
|
+
manifest_path: Path | None,
|
|
149
|
+
*,
|
|
150
|
+
default_stuck_after_seconds: int = DEFAULT_STUCK_AFTER_SECONDS,
|
|
151
|
+
) -> tuple[dict[str, JobContract], set[str]]:
|
|
152
|
+
manifest = _load_json(manifest_path, default={"crons": []})
|
|
153
|
+
entries = manifest.get("crons") if isinstance(manifest, Mapping) else []
|
|
154
|
+
contracts: dict[str, JobContract] = {}
|
|
155
|
+
excluded: set[str] = set()
|
|
156
|
+
if not isinstance(entries, list):
|
|
157
|
+
return contracts, excluded
|
|
158
|
+
|
|
159
|
+
for entry in entries:
|
|
160
|
+
if not isinstance(entry, Mapping):
|
|
161
|
+
continue
|
|
162
|
+
cron_id = str(entry.get("id") or "").strip()
|
|
163
|
+
if not cron_id:
|
|
164
|
+
continue
|
|
165
|
+
if _is_evolution(cron_id):
|
|
166
|
+
excluded.add(cron_id)
|
|
167
|
+
continue
|
|
168
|
+
run_type = str(entry.get("run_type") or _infer_run_type(entry)).strip() or "scheduled"
|
|
169
|
+
open_run_allowed = bool(entry.get("open_run_allowed") or entry.get("allow_open_run") or run_type == "daemon")
|
|
170
|
+
sla_seconds = _coerce_int(
|
|
171
|
+
entry.get("sla_seconds")
|
|
172
|
+
or entry.get("stuck_after_seconds")
|
|
173
|
+
or _infer_sla_seconds(entry, run_type, default_stuck_after_seconds),
|
|
174
|
+
default_stuck_after_seconds,
|
|
175
|
+
)
|
|
176
|
+
contracts[cron_id] = JobContract(
|
|
177
|
+
cron_id=cron_id,
|
|
178
|
+
launchagent_label=str(entry.get("launchagent_label") or f"com.nexo.{cron_id}"),
|
|
179
|
+
run_type=run_type,
|
|
180
|
+
sla_seconds=max(1, sla_seconds),
|
|
181
|
+
recovery_policy=str(entry.get("recovery_policy") or ""),
|
|
182
|
+
idempotent=bool(entry.get("idempotent")),
|
|
183
|
+
open_run_allowed=open_run_allowed,
|
|
184
|
+
source=str(manifest_path or ""),
|
|
185
|
+
)
|
|
186
|
+
return contracts, excluded
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def classify_open_runs(
|
|
190
|
+
db_path: Path | None,
|
|
191
|
+
*,
|
|
192
|
+
contracts: Mapping[str, JobContract],
|
|
193
|
+
now: datetime | None = None,
|
|
194
|
+
default_stuck_after_seconds: int = DEFAULT_STUCK_AFTER_SECONDS,
|
|
195
|
+
) -> list[OpenRunClassification]:
|
|
196
|
+
current = _normalise_now(now)
|
|
197
|
+
rows = _load_open_cron_rows(db_path)
|
|
198
|
+
classifications: list[OpenRunClassification] = []
|
|
199
|
+
for row in rows:
|
|
200
|
+
cron_id = str(row.get("cron_id") or "")
|
|
201
|
+
if _is_evolution(cron_id):
|
|
202
|
+
continue
|
|
203
|
+
started_at = str(row.get("started_at") or "")
|
|
204
|
+
started = _parse_timestamp(started_at)
|
|
205
|
+
age_seconds = int((current - started).total_seconds()) if started is not None else None
|
|
206
|
+
contract = contracts.get(cron_id)
|
|
207
|
+
if contract is None:
|
|
208
|
+
classifications.append(
|
|
209
|
+
OpenRunClassification(
|
|
210
|
+
run_id=_coerce_optional_int(row.get("id")),
|
|
211
|
+
cron_id=cron_id,
|
|
212
|
+
started_at=started_at,
|
|
213
|
+
age_seconds=age_seconds,
|
|
214
|
+
status="abandoned",
|
|
215
|
+
severity="P1",
|
|
216
|
+
reason="cron_runs row is open but cron_id is not declared in the non-Evolution manifest",
|
|
217
|
+
recovery_action="reconcile row manually before relaunching an unknown automation",
|
|
218
|
+
evidence=_row_evidence(row, contract=None),
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
continue
|
|
222
|
+
if age_seconds is None:
|
|
223
|
+
classifications.append(
|
|
224
|
+
OpenRunClassification(
|
|
225
|
+
run_id=_coerce_optional_int(row.get("id")),
|
|
226
|
+
cron_id=cron_id,
|
|
227
|
+
started_at=started_at,
|
|
228
|
+
age_seconds=None,
|
|
229
|
+
status="abandoned",
|
|
230
|
+
severity="P1",
|
|
231
|
+
reason="cron_runs row has an unparsable started_at timestamp",
|
|
232
|
+
recovery_action="inspect the row and close or rewrite it with a valid timestamp",
|
|
233
|
+
evidence=_row_evidence(row, contract=contract),
|
|
234
|
+
)
|
|
235
|
+
)
|
|
236
|
+
continue
|
|
237
|
+
if contract.open_run_allowed:
|
|
238
|
+
classifications.append(
|
|
239
|
+
OpenRunClassification(
|
|
240
|
+
run_id=_coerce_optional_int(row.get("id")),
|
|
241
|
+
cron_id=cron_id,
|
|
242
|
+
started_at=started_at,
|
|
243
|
+
age_seconds=age_seconds,
|
|
244
|
+
status="running",
|
|
245
|
+
severity="OK",
|
|
246
|
+
reason=f"{contract.run_type} job allows an open cron_runs row",
|
|
247
|
+
recovery_action="none",
|
|
248
|
+
evidence=_row_evidence(row, contract=contract),
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
continue
|
|
252
|
+
if age_seconds <= contract.sla_seconds:
|
|
253
|
+
classifications.append(
|
|
254
|
+
OpenRunClassification(
|
|
255
|
+
run_id=_coerce_optional_int(row.get("id")),
|
|
256
|
+
cron_id=cron_id,
|
|
257
|
+
started_at=started_at,
|
|
258
|
+
age_seconds=age_seconds,
|
|
259
|
+
status="running",
|
|
260
|
+
severity="OK",
|
|
261
|
+
reason=f"open row is within SLA ({contract.sla_seconds}s)",
|
|
262
|
+
recovery_action="wait for normal completion",
|
|
263
|
+
evidence=_row_evidence(row, contract=contract),
|
|
264
|
+
)
|
|
265
|
+
)
|
|
266
|
+
continue
|
|
267
|
+
if _is_retryable(contract):
|
|
268
|
+
classifications.append(
|
|
269
|
+
OpenRunClassification(
|
|
270
|
+
run_id=_coerce_optional_int(row.get("id")),
|
|
271
|
+
cron_id=cron_id,
|
|
272
|
+
started_at=started_at,
|
|
273
|
+
age_seconds=age_seconds,
|
|
274
|
+
status="retryable",
|
|
275
|
+
severity="P1",
|
|
276
|
+
reason=f"SLA exceeded ({age_seconds}s > {contract.sla_seconds}s) and recovery policy permits retry",
|
|
277
|
+
recovery_action=f"close stale row, then retry via {contract.recovery_policy or 'idempotent replay'}",
|
|
278
|
+
evidence=_row_evidence(row, contract=contract),
|
|
279
|
+
)
|
|
280
|
+
)
|
|
281
|
+
else:
|
|
282
|
+
classifications.append(
|
|
283
|
+
OpenRunClassification(
|
|
284
|
+
run_id=_coerce_optional_int(row.get("id")),
|
|
285
|
+
cron_id=cron_id,
|
|
286
|
+
started_at=started_at,
|
|
287
|
+
age_seconds=age_seconds,
|
|
288
|
+
status="stuck",
|
|
289
|
+
severity="P1",
|
|
290
|
+
reason=f"SLA exceeded ({age_seconds}s > {contract.sla_seconds}s) without retry contract",
|
|
291
|
+
recovery_action="inspect process/logs before closing row or relaunching",
|
|
292
|
+
evidence=_row_evidence(row, contract=contract),
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
return sorted(classifications, key=lambda item: (item.severity != "P1", item.status, item.cron_id, item.run_id or 0))
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def classify_launchagents(
|
|
299
|
+
contracts: Mapping[str, JobContract],
|
|
300
|
+
launchagent_labels: frozenset[str] | set[str] | list[str] | tuple[str, ...] | None,
|
|
301
|
+
) -> list[LaunchAgentClassification]:
|
|
302
|
+
if launchagent_labels is None:
|
|
303
|
+
return []
|
|
304
|
+
labels = {str(item) for item in launchagent_labels}
|
|
305
|
+
results: list[LaunchAgentClassification] = []
|
|
306
|
+
for contract in contracts.values():
|
|
307
|
+
if contract.launchagent_label in labels:
|
|
308
|
+
results.append(
|
|
309
|
+
LaunchAgentClassification(
|
|
310
|
+
cron_id=contract.cron_id,
|
|
311
|
+
launchagent_label=contract.launchagent_label,
|
|
312
|
+
status="loaded",
|
|
313
|
+
severity="OK",
|
|
314
|
+
reason="expected non-Evolution LaunchAgent is present in supplied inventory",
|
|
315
|
+
)
|
|
316
|
+
)
|
|
317
|
+
else:
|
|
318
|
+
results.append(
|
|
319
|
+
LaunchAgentClassification(
|
|
320
|
+
cron_id=contract.cron_id,
|
|
321
|
+
launchagent_label=contract.launchagent_label,
|
|
322
|
+
status="missing",
|
|
323
|
+
severity="P1",
|
|
324
|
+
reason="expected non-Evolution LaunchAgent is absent from supplied inventory",
|
|
325
|
+
)
|
|
326
|
+
)
|
|
327
|
+
return sorted(results, key=lambda item: (item.severity != "P1", item.cron_id))
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def classify_cron_spool(
|
|
331
|
+
spool_dir: Path | None,
|
|
332
|
+
*,
|
|
333
|
+
contracts: Mapping[str, JobContract],
|
|
334
|
+
now: datetime | None = None,
|
|
335
|
+
warn_threshold: int = DEFAULT_SPOOL_WARN_THRESHOLD,
|
|
336
|
+
) -> list[CronSpoolClassification]:
|
|
337
|
+
_normalise_now(now)
|
|
338
|
+
if spool_dir is None or not spool_dir.exists():
|
|
339
|
+
return []
|
|
340
|
+
files = sorted(path for path in spool_dir.glob("*.json") if path.is_file())
|
|
341
|
+
grouped: dict[str, list[Path]] = {}
|
|
342
|
+
for path in files:
|
|
343
|
+
cron_id = _spool_cron_id(path, contracts)
|
|
344
|
+
if _is_evolution(cron_id):
|
|
345
|
+
continue
|
|
346
|
+
grouped.setdefault(cron_id, []).append(path)
|
|
347
|
+
|
|
348
|
+
results: list[CronSpoolClassification] = []
|
|
349
|
+
for cron_id, paths_for_job in grouped.items():
|
|
350
|
+
oldest = min(paths_for_job, key=lambda item: item.stat().st_mtime)
|
|
351
|
+
severity = "P1" if len(paths_for_job) > warn_threshold else "OK"
|
|
352
|
+
status = "unreconciled" if severity == "P1" else "ok"
|
|
353
|
+
reason = (
|
|
354
|
+
f"{len(paths_for_job)} cron-spool JSON file(s) waiting for reconciliation"
|
|
355
|
+
if severity == "P1"
|
|
356
|
+
else "cron-spool count is within threshold"
|
|
357
|
+
)
|
|
358
|
+
results.append(
|
|
359
|
+
CronSpoolClassification(
|
|
360
|
+
cron_id=cron_id,
|
|
361
|
+
files=len(paths_for_job),
|
|
362
|
+
oldest_path=str(oldest),
|
|
363
|
+
oldest_mtime=datetime.fromtimestamp(oldest.stat().st_mtime, tz=timezone.utc).isoformat(),
|
|
364
|
+
status=status,
|
|
365
|
+
severity=severity,
|
|
366
|
+
reason=reason,
|
|
367
|
+
)
|
|
368
|
+
)
|
|
369
|
+
return sorted(results, key=lambda item: (item.severity != "P1", item.cron_id))
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def format_markdown(report: Mapping[str, Any]) -> str:
|
|
373
|
+
summary = report.get("summary") if isinstance(report, Mapping) else {}
|
|
374
|
+
findings = report.get("findings") if isinstance(report, Mapping) else []
|
|
375
|
+
lines = [
|
|
376
|
+
"### G13 Automation supervisor sin Evolution",
|
|
377
|
+
"",
|
|
378
|
+
"| Area | Resultado |",
|
|
379
|
+
"|---|---|",
|
|
380
|
+
f"| Jobs no Evolution | {summary.get('jobs', 0)} |",
|
|
381
|
+
f"| Open runs clasificadas | {summary.get('open_runs', 0)} |",
|
|
382
|
+
f"| Cron-spool jobs con JSON | {summary.get('cron_spool_jobs', 0)} |",
|
|
383
|
+
f"| Hallazgos P1 | {summary.get('p1', 0)} |",
|
|
384
|
+
f"| Evolution excluido | {', '.join(summary.get('excluded_jobs') or []) or 'si'} |",
|
|
385
|
+
]
|
|
386
|
+
lines.extend(["", "| Hallazgo | Severidad | Razon |", "|---|---|---|"])
|
|
387
|
+
for item in findings or []:
|
|
388
|
+
lines.append(
|
|
389
|
+
"| {kind}:{key} | {severity} | {reason} |".format(
|
|
390
|
+
kind=_md(item.get("kind")),
|
|
391
|
+
key=_md(item.get("key")),
|
|
392
|
+
severity=_md(item.get("severity")),
|
|
393
|
+
reason=_md(item.get("reason")),
|
|
394
|
+
)
|
|
395
|
+
)
|
|
396
|
+
if not findings:
|
|
397
|
+
lines.append("| ninguno | OK | Sin open rows/spool/LaunchAgents pendientes en fixtures |")
|
|
398
|
+
return "\n".join(lines)
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
def _collect_findings(
|
|
402
|
+
open_runs: Iterable[OpenRunClassification],
|
|
403
|
+
launchagents: Iterable[LaunchAgentClassification],
|
|
404
|
+
cron_spool: Iterable[CronSpoolClassification],
|
|
405
|
+
) -> list[dict[str, Any]]:
|
|
406
|
+
findings: list[dict[str, Any]] = []
|
|
407
|
+
for item in open_runs:
|
|
408
|
+
if item.severity != "OK":
|
|
409
|
+
findings.append({"kind": "open_run", "key": f"{item.cron_id}:{item.run_id}", **asdict(item)})
|
|
410
|
+
for item in launchagents:
|
|
411
|
+
if item.severity != "OK":
|
|
412
|
+
findings.append({"kind": "launchagent", "key": item.launchagent_label, **asdict(item)})
|
|
413
|
+
for item in cron_spool:
|
|
414
|
+
if item.severity != "OK":
|
|
415
|
+
findings.append({"kind": "cron_spool", "key": item.cron_id, **asdict(item)})
|
|
416
|
+
severity_order = {"P0": 0, "P1": 1, "P2": 2, "OK": 3}
|
|
417
|
+
return sorted(findings, key=lambda item: (severity_order.get(str(item.get("severity")), 9), str(item.get("kind")), str(item.get("key"))))
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def _load_open_cron_rows(db_path: Path | None) -> list[dict[str, Any]]:
|
|
421
|
+
if db_path is None or not db_path.is_file():
|
|
422
|
+
return []
|
|
423
|
+
conn = None
|
|
424
|
+
try:
|
|
425
|
+
conn = sqlite3.connect(str(db_path), timeout=2)
|
|
426
|
+
conn.row_factory = sqlite3.Row
|
|
427
|
+
table = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='cron_runs'").fetchone()
|
|
428
|
+
if table is None:
|
|
429
|
+
return []
|
|
430
|
+
rows = conn.execute(
|
|
431
|
+
"""
|
|
432
|
+
SELECT id, cron_id, started_at, ended_at, exit_code, summary, error, duration_secs
|
|
433
|
+
FROM cron_runs
|
|
434
|
+
WHERE ended_at IS NULL OR exit_code IS NULL
|
|
435
|
+
ORDER BY started_at ASC, id ASC
|
|
436
|
+
"""
|
|
437
|
+
).fetchall()
|
|
438
|
+
return [dict(row) for row in rows]
|
|
439
|
+
except sqlite3.Error:
|
|
440
|
+
return []
|
|
441
|
+
finally:
|
|
442
|
+
if conn is not None:
|
|
443
|
+
conn.close()
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _spool_cron_id(path: Path, contracts: Mapping[str, JobContract]) -> str:
|
|
447
|
+
data = _load_json(path, default={})
|
|
448
|
+
if isinstance(data, Mapping):
|
|
449
|
+
for key in ("cron_id", "job_id", "id", "name"):
|
|
450
|
+
value = data.get(key)
|
|
451
|
+
if isinstance(value, str) and value.strip():
|
|
452
|
+
return value.strip()
|
|
453
|
+
stem = path.stem
|
|
454
|
+
for cron_id in sorted(contracts, key=len, reverse=True):
|
|
455
|
+
if stem == cron_id or stem.startswith(f"{cron_id}-") or stem.startswith(f"{cron_id}_"):
|
|
456
|
+
return cron_id
|
|
457
|
+
return stem
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _row_evidence(row: Mapping[str, Any], *, contract: JobContract | None) -> dict[str, Any]:
|
|
461
|
+
evidence = {
|
|
462
|
+
"ended_at": row.get("ended_at"),
|
|
463
|
+
"exit_code": row.get("exit_code"),
|
|
464
|
+
"summary": row.get("summary") or "",
|
|
465
|
+
"error": row.get("error") or "",
|
|
466
|
+
}
|
|
467
|
+
if contract is not None:
|
|
468
|
+
evidence.update(
|
|
469
|
+
{
|
|
470
|
+
"run_type": contract.run_type,
|
|
471
|
+
"sla_seconds": contract.sla_seconds,
|
|
472
|
+
"recovery_policy": contract.recovery_policy,
|
|
473
|
+
"idempotent": contract.idempotent,
|
|
474
|
+
"launchagent_label": contract.launchagent_label,
|
|
475
|
+
}
|
|
476
|
+
)
|
|
477
|
+
return evidence
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _is_retryable(contract: JobContract) -> bool:
|
|
481
|
+
policy = contract.recovery_policy.lower()
|
|
482
|
+
return contract.idempotent or policy in {"catchup", "restart", "retry", "replay"}
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _infer_run_type(entry: Mapping[str, Any]) -> str:
|
|
486
|
+
if entry.get("daemon") or entry.get("open_run_allowed") or entry.get("allow_open_run"):
|
|
487
|
+
return "daemon"
|
|
488
|
+
if entry.get("schedule") or entry.get("schedule_strategy"):
|
|
489
|
+
return "scheduled"
|
|
490
|
+
if entry.get("interval_seconds"):
|
|
491
|
+
return "scheduled"
|
|
492
|
+
return "oneshot"
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def _infer_sla_seconds(entry: Mapping[str, Any], run_type: str, default: int) -> int:
|
|
496
|
+
if run_type == "daemon":
|
|
497
|
+
return default
|
|
498
|
+
interval = _coerce_optional_int(entry.get("interval_seconds"))
|
|
499
|
+
if interval is not None:
|
|
500
|
+
return max(default, interval * 2)
|
|
501
|
+
return default
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def _is_evolution(cron_id: str) -> bool:
|
|
505
|
+
return "evolution" in cron_id.lower()
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def _load_json(path: Path | None, *, default: Any) -> Any:
|
|
509
|
+
if path is None or not path.is_file():
|
|
510
|
+
return default
|
|
511
|
+
try:
|
|
512
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
513
|
+
except Exception:
|
|
514
|
+
return default
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _normalise_now(now: datetime | None) -> datetime:
|
|
518
|
+
current = now or datetime.now(timezone.utc)
|
|
519
|
+
if current.tzinfo is None:
|
|
520
|
+
return current.replace(tzinfo=timezone.utc)
|
|
521
|
+
return current.astimezone(timezone.utc)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _parse_timestamp(value: str) -> datetime | None:
|
|
525
|
+
text = (value or "").strip()
|
|
526
|
+
if not text:
|
|
527
|
+
return None
|
|
528
|
+
if text.endswith("Z"):
|
|
529
|
+
text = f"{text[:-1]}+00:00"
|
|
530
|
+
for candidate in (text, text.replace(" ", "T")):
|
|
531
|
+
try:
|
|
532
|
+
parsed = datetime.fromisoformat(candidate)
|
|
533
|
+
if parsed.tzinfo is None:
|
|
534
|
+
return parsed.replace(tzinfo=timezone.utc)
|
|
535
|
+
return parsed.astimezone(timezone.utc)
|
|
536
|
+
except ValueError:
|
|
537
|
+
continue
|
|
538
|
+
for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d %H:%M:%S.%f"):
|
|
539
|
+
try:
|
|
540
|
+
return datetime.strptime(value, fmt).replace(tzinfo=timezone.utc)
|
|
541
|
+
except ValueError:
|
|
542
|
+
continue
|
|
543
|
+
return None
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _coerce_int(value: Any, default: int) -> int:
|
|
547
|
+
result = _coerce_optional_int(value)
|
|
548
|
+
return default if result is None else result
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _coerce_optional_int(value: Any) -> int | None:
|
|
552
|
+
if value is None:
|
|
553
|
+
return None
|
|
554
|
+
try:
|
|
555
|
+
return int(value)
|
|
556
|
+
except (TypeError, ValueError):
|
|
557
|
+
return None
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def _safe_path_call(fn: Any) -> Path | None:
|
|
561
|
+
try:
|
|
562
|
+
value = fn()
|
|
563
|
+
return Path(value) if value is not None else None
|
|
564
|
+
except Exception:
|
|
565
|
+
return None
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
def _md(value: Any) -> str:
|
|
569
|
+
text = "" if value is None else str(value)
|
|
570
|
+
return text.replace("|", "\\|").replace("\n", " ")
|