nexo-brain 7.33.0 → 7.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +1 -1
- package/package.json +1 -1
- package/src/db/__init__.py +2 -1
- package/src/db/_episodic.py +32 -0
- package/src/db/_protocol.py +35 -0
- package/src/db/_schema.py +105 -0
- package/src/local_context/usage_events.py +2 -0
- package/src/message_batch_preview.py +290 -0
- package/src/plugins/protocol.py +195 -3
- package/src/ppr.py +473 -0
- package/src/pre_answer_router.py +239 -3
- package/src/pre_answer_runtime.py +156 -1
- package/src/resolution_cache.py +1119 -0
- package/src/scripts/deep-sleep/apply_findings.py +86 -9
- package/src/scripts/deep-sleep/rewrite.py +625 -0
- package/src/scripts/nexo-deep-sleep.sh +10 -0
- package/src/scripts/nexo-morning-agent.py +43 -2
- package/src/self_error_detector.py +414 -0
- package/src/semantic_layers.py +30 -3
- package/templates/core-prompts/morning-agent.md +3 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.34.0",
|
|
4
4
|
"description": "Local cognitive runtime for Claude Code \u2014 persistent memory, overnight learning, doctor diagnostics, personal scripts, recovery-aware jobs, startup preflight, and optional dashboard/power helper.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "NEXO Brain",
|
package/README.md
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
[Watch the overview video](https://nexo-brain.com/watch/) · [Watch on YouTube](https://www.youtube.com/watch?v=i2lkGhKyVqI) · [Open the infographic](https://nexo-brain.com/assets/nexo-brain-infographic-v5.png)
|
|
20
20
|
|
|
21
|
-
Version `7.
|
|
21
|
+
Version `7.34.0` is the current packaged-runtime line. Minor release - Cognitive OS Ola 2: a working-memory `resolution_cache` fast-path avoids re-resolving what was just resolved (never-stale, fail-closed: content-snapshot + global watermark + 15-min TTP, repo-map for code), a later action that reveals a prior self-error auto-captures a learning + prevention, the associative graph (Personalized PageRank) connects the dots multi-hop over the KG at answer time (anti-hub, fail-open, per-process cache), Deep Sleep gains a nightly phase that safely merges duplicate learnings (reversible, zero hard-delete, fail-closed backup, daily cap), and a reproducible memory-recall eval bank (recall@k/MRR) lands with a baseline. Builds on v7.33.0 (semantic recall + graph-at-answer + reliability).
|
|
22
22
|
|
|
23
23
|
Previously in `7.31.9`: patch release over v7.31.8 - UI release closeout now has to prove the original reported symptom was reopened with observable evidence before claiming the release is ready.
|
|
24
24
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nexo-brain",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.34.0",
|
|
4
4
|
"mcpName": "io.github.wazionapps/nexo",
|
|
5
5
|
"description": "NEXO Brain — Shared brain for AI agents. Persistent memory, semantic RAG, natural forgetting, metacognitive guard, trust scoring, 150+ MCP tools. Works with Claude Code, Codex, Claude Desktop & any MCP client. 100% local, free.",
|
|
6
6
|
"homepage": "https://nexo-brain.com",
|
package/src/db/__init__.py
CHANGED
|
@@ -168,7 +168,7 @@ from db._entities import (
|
|
|
168
168
|
# Episodic memory
|
|
169
169
|
from db._episodic import (
|
|
170
170
|
cleanup_old_changes, change_log_retention_days, change_log_retention_policy,
|
|
171
|
-
log_change, search_changes, update_change_commit, auto_resolve_followups,
|
|
171
|
+
log_change, search_changes, get_change_watermark, update_change_commit, auto_resolve_followups,
|
|
172
172
|
cleanup_old_decisions, log_decision, update_decision_outcome,
|
|
173
173
|
get_memory_review_queue, find_decisions_by_context_ref, search_decisions,
|
|
174
174
|
cleanup_old_diaries, write_session_diary,
|
|
@@ -197,6 +197,7 @@ from db._protocol import (
|
|
|
197
197
|
VALID_TASK_TYPES,
|
|
198
198
|
VALID_CLOSE_OUTCOMES,
|
|
199
199
|
create_protocol_task, get_protocol_task, close_protocol_task,
|
|
200
|
+
list_recent_closed_tasks,
|
|
200
201
|
set_protocol_task_guard_acknowledged,
|
|
201
202
|
create_protocol_debt, resolve_protocol_debts, list_protocol_debts,
|
|
202
203
|
record_session_correction_requirement, list_session_correction_requirements,
|
package/src/db/_episodic.py
CHANGED
|
@@ -93,6 +93,38 @@ def search_changes(query: str = '', files: str = '', days: int = 30) -> list[dic
|
|
|
93
93
|
return [dict(r) for r in rows]
|
|
94
94
|
|
|
95
95
|
|
|
96
|
+
def get_change_watermark(sid: str | None = None) -> int:
|
|
97
|
+
"""Cheap monotonic integer that rises whenever a relevant mutation lands.
|
|
98
|
+
|
|
99
|
+
Used by the resolution cache (working memory) as the "nothing changed"
|
|
100
|
+
invalidation signal — Francisco's third rule. ``change_log`` is the ledger
|
|
101
|
+
where the PostToolUse hook records every code/config/state mutation, so
|
|
102
|
+
``MAX(id)`` is a one-SELECT, monotonic, append-only proxy for "did anything
|
|
103
|
+
change since I cached this answer?". If the watermark advanced, the cache
|
|
104
|
+
is invalidated by conservatism (prefer recomputing over serving stale).
|
|
105
|
+
|
|
106
|
+
``sid`` optionally narrows the watermark to a single session's mutations.
|
|
107
|
+
Returns 0 when the ledger is empty or unavailable (which a fresh cache
|
|
108
|
+
entry will also store, so an empty ledger never spuriously invalidates).
|
|
109
|
+
"""
|
|
110
|
+
try:
|
|
111
|
+
conn = get_db()
|
|
112
|
+
if sid:
|
|
113
|
+
row = conn.execute(
|
|
114
|
+
"SELECT MAX(id) FROM change_log WHERE session_id = ?", (str(sid),)
|
|
115
|
+
).fetchone()
|
|
116
|
+
else:
|
|
117
|
+
row = conn.execute("SELECT MAX(id) FROM change_log").fetchone()
|
|
118
|
+
except Exception:
|
|
119
|
+
return 0
|
|
120
|
+
if not row or row[0] is None:
|
|
121
|
+
return 0
|
|
122
|
+
try:
|
|
123
|
+
return int(row[0])
|
|
124
|
+
except (TypeError, ValueError):
|
|
125
|
+
return 0
|
|
126
|
+
|
|
127
|
+
|
|
96
128
|
def auto_resolve_followups(change: dict) -> list[str]:
|
|
97
129
|
"""Cross-reference a change_log entry with open followups. Auto-completes matches.
|
|
98
130
|
|
package/src/db/_protocol.py
CHANGED
|
@@ -436,6 +436,41 @@ def close_protocol_task(
|
|
|
436
436
|
return get_protocol_task(task_id) or {}
|
|
437
437
|
|
|
438
438
|
|
|
439
|
+
def list_recent_closed_tasks(
|
|
440
|
+
*,
|
|
441
|
+
outcome: str = "done",
|
|
442
|
+
exclude_task_id: str = "",
|
|
443
|
+
limit: int = 200,
|
|
444
|
+
within_days: int = 0,
|
|
445
|
+
) -> list[dict]:
|
|
446
|
+
"""Return recently CLOSED protocol tasks for self-error detection.
|
|
447
|
+
|
|
448
|
+
Read-only. Ordered most-recent-first by ``closed_at``. The self-error
|
|
449
|
+
detector compares the just-closed task against these prior closures to
|
|
450
|
+
spot a later action that corrects something a previous task already
|
|
451
|
+
claimed as ``done``. Kept deliberately narrow (status filter + small
|
|
452
|
+
limit) so it never scans the whole history on every close.
|
|
453
|
+
"""
|
|
454
|
+
conn = get_db()
|
|
455
|
+
clauses = ["status = ?", "closed_at IS NOT NULL"]
|
|
456
|
+
params: list[object] = [str(outcome).strip() or "done"]
|
|
457
|
+
if exclude_task_id:
|
|
458
|
+
clauses.append("task_id != ?")
|
|
459
|
+
params.append(exclude_task_id.strip())
|
|
460
|
+
if within_days and within_days > 0:
|
|
461
|
+
clauses.append("closed_at >= datetime('now', ?)")
|
|
462
|
+
params.append(f"-{int(within_days)} days")
|
|
463
|
+
where = " AND ".join(clauses)
|
|
464
|
+
rows = conn.execute(
|
|
465
|
+
f"""SELECT * FROM protocol_tasks
|
|
466
|
+
WHERE {where}
|
|
467
|
+
ORDER BY closed_at DESC
|
|
468
|
+
LIMIT ?""",
|
|
469
|
+
(*params, max(1, int(limit))),
|
|
470
|
+
).fetchall()
|
|
471
|
+
return [dict(row) for row in rows]
|
|
472
|
+
|
|
473
|
+
|
|
439
474
|
def create_protocol_debt(
|
|
440
475
|
session_id: str,
|
|
441
476
|
debt_type: str,
|
package/src/db/_schema.py
CHANGED
|
@@ -3213,6 +3213,108 @@ def _m83_observation_embeddings(conn):
|
|
|
3213
3213
|
conn.commit()
|
|
3214
3214
|
|
|
3215
3215
|
|
|
3216
|
+
def _m85_eval_runs(conn):
|
|
3217
|
+
"""Time series for the memory eval bench (recall@k / MRR / semantic gain).
|
|
3218
|
+
|
|
3219
|
+
One row per (run, metric) so the table is queryable as a series: the
|
|
3220
|
+
before/after delta of an Ola 1 change is just two rows for the same metric
|
|
3221
|
+
with different ``ola1_enabled``. ``model_warm`` distinguishes numbers from
|
|
3222
|
+
the real embedding model (1) vs the deterministic offline fallback (0), so
|
|
3223
|
+
a CI run (fallback, pipeline check) is never confused with a Deep Sleep run
|
|
3224
|
+
(real model, semantic quality). Append-only and additive — re-running the
|
|
3225
|
+
harness inserts new rows, never mutates old ones.
|
|
3226
|
+
|
|
3227
|
+
Mirrors the existing _m28_automation_runs / _m34_cortex_evaluations shape.
|
|
3228
|
+
"""
|
|
3229
|
+
conn.execute(
|
|
3230
|
+
"""
|
|
3231
|
+
CREATE TABLE IF NOT EXISTS eval_runs (
|
|
3232
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
3233
|
+
suite TEXT NOT NULL DEFAULT '',
|
|
3234
|
+
case_set_id TEXT DEFAULT '',
|
|
3235
|
+
case_set_version TEXT DEFAULT '',
|
|
3236
|
+
fixture_hash TEXT DEFAULT '',
|
|
3237
|
+
metric TEXT NOT NULL DEFAULT '',
|
|
3238
|
+
value REAL NOT NULL DEFAULT 0.0,
|
|
3239
|
+
ola1_enabled INTEGER NOT NULL DEFAULT 1,
|
|
3240
|
+
model_warm INTEGER NOT NULL DEFAULT 0,
|
|
3241
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
3242
|
+
)
|
|
3243
|
+
"""
|
|
3244
|
+
)
|
|
3245
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_eval_runs_suite ON eval_runs(suite)")
|
|
3246
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_eval_runs_metric ON eval_runs(suite, metric)")
|
|
3247
|
+
conn.execute("CREATE INDEX IF NOT EXISTS idx_eval_runs_created ON eval_runs(created_at)")
|
|
3248
|
+
conn.commit()
|
|
3249
|
+
|
|
3250
|
+
|
|
3251
|
+
def _m86_resolution_cache(conn):
|
|
3252
|
+
"""Working-memory / resolution cache for the pre-answer router and repo maps.
|
|
3253
|
+
|
|
3254
|
+
Non-authoritative, like semantic_layers (_m76): the canonical facts still
|
|
3255
|
+
live in diary/workflows/tasks/evidence/memory/learnings/change_log and in
|
|
3256
|
+
the git repos themselves. This table only caches the FINAL organized
|
|
3257
|
+
result of a retrieval (a ``PreAnswerRoute.to_dict()`` for ``kind='route'``)
|
|
3258
|
+
or a lightweight repo snapshot (``kind='repo_map'``), keyed by a
|
|
3259
|
+
deterministic ``cache_key`` (route_cache_key | ``repo:{project_key}``).
|
|
3260
|
+
|
|
3261
|
+
The anti-stale contract (Francisco's rule of gold) lives in the read path
|
|
3262
|
+
(``resolution_cache.is_valid``): a HIT is only valid when ALL hold —
|
|
3263
|
+
(1) now() < expires_at, (2) status=='fresh',
|
|
3264
|
+
(3) source_fingerprint recomputed == stored, (4) change_watermark global ==
|
|
3265
|
+
stored. The columns below exist to support exactly that check. The
|
|
3266
|
+
``instant`` tier (ttl=0) never writes here.
|
|
3267
|
+
"""
|
|
3268
|
+
conn.execute(
|
|
3269
|
+
"""
|
|
3270
|
+
CREATE TABLE IF NOT EXISTS resolution_cache (
|
|
3271
|
+
cache_key TEXT PRIMARY KEY,
|
|
3272
|
+
kind TEXT NOT NULL DEFAULT 'route',
|
|
3273
|
+
intent TEXT NOT NULL DEFAULT '',
|
|
3274
|
+
area TEXT NOT NULL DEFAULT '',
|
|
3275
|
+
sid TEXT NOT NULL DEFAULT '',
|
|
3276
|
+
result_json TEXT NOT NULL,
|
|
3277
|
+
source_fingerprint TEXT NOT NULL,
|
|
3278
|
+
source_refs_json TEXT NOT NULL DEFAULT '[]',
|
|
3279
|
+
change_watermark INTEGER NOT NULL DEFAULT 0,
|
|
3280
|
+
status TEXT NOT NULL DEFAULT 'fresh',
|
|
3281
|
+
policy_version TEXT NOT NULL DEFAULT '',
|
|
3282
|
+
resolved_at REAL NOT NULL,
|
|
3283
|
+
expires_at REAL NOT NULL DEFAULT 0,
|
|
3284
|
+
hit_count INTEGER NOT NULL DEFAULT 0,
|
|
3285
|
+
CHECK(kind IN ('route', 'repo_map')),
|
|
3286
|
+
CHECK(status IN ('fresh', 'stale', 'expired', 'invalid'))
|
|
3287
|
+
)
|
|
3288
|
+
"""
|
|
3289
|
+
)
|
|
3290
|
+
_migrate_add_index(conn, "idx_resolution_cache_status_exp", "resolution_cache", "status, expires_at")
|
|
3291
|
+
_migrate_add_index(conn, "idx_resolution_cache_kind", "resolution_cache", "kind, sid")
|
|
3292
|
+
_migrate_add_index(conn, "idx_resolution_cache_fingerprint", "resolution_cache", "source_fingerprint")
|
|
3293
|
+
conn.commit()
|
|
3294
|
+
|
|
3295
|
+
|
|
3296
|
+
def _m87_resolution_cache_content_snapshot(conn):
|
|
3297
|
+
"""Per-row content snapshot for the resolution cache's anti-stale check.
|
|
3298
|
+
|
|
3299
|
+
The fingerprint (``source_fingerprint``) is a single opaque digest over the
|
|
3300
|
+
versions of the consulted refs. It proved the AGGREGATE changed but could
|
|
3301
|
+
not by itself say WHICH ref moved, and — more importantly — it relied on
|
|
3302
|
+
``semantic_layers.source_version_for`` keyed by CANONICAL prefixes
|
|
3303
|
+
(``followup:``), while the pre-answer router emits its own SOURCE-NAME refs
|
|
3304
|
+
(``followups:``). Those source-name refs resolved to an ``unsupported``
|
|
3305
|
+
namespace → empty version → an inert fingerprint, so a followup completed by
|
|
3306
|
+
a plain UPDATE (no change_log write → watermark unmoved) was served stale.
|
|
3307
|
+
|
|
3308
|
+
This column stores an explicit ``{ref: version}`` map captured from the REAL
|
|
3309
|
+
rows at write time (``resolution_cache.row_version_snapshot``). On read we
|
|
3310
|
+
re-read those same rows by id and compare — the snapshot is now the PRIMARY
|
|
3311
|
+
freshness guarantee; TTL and the global watermark remain cheap fast-fails.
|
|
3312
|
+
Idempotent, append-only ALTER (non-destructive).
|
|
3313
|
+
"""
|
|
3314
|
+
_migrate_add_column(conn, "resolution_cache", "content_snapshot_json", "TEXT NOT NULL DEFAULT '{}'")
|
|
3315
|
+
conn.commit()
|
|
3316
|
+
|
|
3317
|
+
|
|
3216
3318
|
MIGRATIONS = [
|
|
3217
3319
|
(1, "learnings_columns", _m1_learnings_columns),
|
|
3218
3320
|
(2, "followups_reasoning", _m2_followups_reasoning),
|
|
@@ -3297,6 +3399,9 @@ MIGRATIONS = [
|
|
|
3297
3399
|
(81, "core_rules_product_metadata", _m81_core_rules_product_metadata),
|
|
3298
3400
|
(82, "confidence_checks", _m82_confidence_checks),
|
|
3299
3401
|
(83, "observation_embeddings", _m83_observation_embeddings),
|
|
3402
|
+
(85, "eval_runs", _m85_eval_runs),
|
|
3403
|
+
(86, "resolution_cache", _m86_resolution_cache),
|
|
3404
|
+
(87, "resolution_cache_content_snapshot", _m87_resolution_cache_content_snapshot),
|
|
3300
3405
|
]
|
|
3301
3406
|
|
|
3302
3407
|
|
|
@@ -352,6 +352,7 @@ def record_router_usage(
|
|
|
352
352
|
elapsed_ms: int | None = None,
|
|
353
353
|
deadline_ms: int | None = None,
|
|
354
354
|
used_before_response: bool = True,
|
|
355
|
+
cache_hit: bool = False,
|
|
355
356
|
db_path: str | os.PathLike[str] | None = None,
|
|
356
357
|
) -> dict[str, Any]:
|
|
357
358
|
evidence_refs = router_payload.get("evidence_refs") or []
|
|
@@ -374,6 +375,7 @@ def record_router_usage(
|
|
|
374
375
|
"escalated_from": router_payload.get("escalated_from") or budget_policy.get("escalated_from") or "",
|
|
375
376
|
"escalated_to": router_payload.get("escalated_to") or budget_policy.get("escalated_to") or "",
|
|
376
377
|
"route_cache_key": budget_policy.get("route_cache_key") or "",
|
|
378
|
+
"cache_hit": bool(cache_hit or router_payload.get("cache_hit")),
|
|
377
379
|
"max_sources": budget_policy.get("max_sources") or 0,
|
|
378
380
|
"max_source_timeout_ms": budget_policy.get("max_source_timeout_ms") or 0,
|
|
379
381
|
"allowed_sources": budget_policy.get("allowed_sources") or [],
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""Build safe HTML previews before real WhatsApp/email batch sends.
|
|
2
|
+
|
|
3
|
+
This module is intentionally send-agnostic: it reads code/log/queue artifacts,
|
|
4
|
+
separates internal or test messages from deliverable candidates, renders a
|
|
5
|
+
sanitized HTML review document, and enforces a hard cap on real sends.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Iterable
|
|
16
|
+
|
|
17
|
+
from email_presentation import compose_html_document, text_to_html_fragment
|
|
18
|
+
from tools_email_guard import should_block_email_send
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
DEFAULT_REAL_SEND_LIMIT = 10
|
|
22
|
+
INTERNAL_MARKERS = (
|
|
23
|
+
"[internal]",
|
|
24
|
+
"internal:",
|
|
25
|
+
"nexo_internal",
|
|
26
|
+
"solo interno",
|
|
27
|
+
"nota interna",
|
|
28
|
+
"mensaje interno",
|
|
29
|
+
"test:",
|
|
30
|
+
"[test]",
|
|
31
|
+
"dry-run",
|
|
32
|
+
"dry_run",
|
|
33
|
+
"prueba",
|
|
34
|
+
)
|
|
35
|
+
TEST_RECIPIENT_PATTERNS = (
|
|
36
|
+
re.compile(r"(^|@)(example|test|localhost)(\.|$)", re.I),
|
|
37
|
+
re.compile(r"\+test\b", re.I),
|
|
38
|
+
re.compile(r"^(?:0+|123456789|600000000)$"),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class PreviewMessage:
|
|
44
|
+
source: str
|
|
45
|
+
channel: str
|
|
46
|
+
recipient: str
|
|
47
|
+
body: str
|
|
48
|
+
subject: str = ""
|
|
49
|
+
metadata: dict[str, Any] | None = None
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def fingerprint(self) -> str:
|
|
53
|
+
base = "\x1f".join([
|
|
54
|
+
self.channel.strip().lower(),
|
|
55
|
+
self.recipient.strip().lower(),
|
|
56
|
+
self.subject.strip(),
|
|
57
|
+
" ".join(self.body.split()),
|
|
58
|
+
])
|
|
59
|
+
return str(abs(hash(base)))
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True)
|
|
63
|
+
class PreviewResult:
|
|
64
|
+
deliverable: list[PreviewMessage]
|
|
65
|
+
internal_or_test: list[PreviewMessage]
|
|
66
|
+
blocked: list[dict[str, str]]
|
|
67
|
+
real_send_limit: int
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def capped_deliverable(self) -> list[PreviewMessage]:
|
|
71
|
+
return self.deliverable[: self.real_send_limit]
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def over_limit_count(self) -> int:
|
|
75
|
+
return max(0, len(self.deliverable) - self.real_send_limit)
|
|
76
|
+
|
|
77
|
+
def to_dict(self) -> dict[str, Any]:
|
|
78
|
+
return {
|
|
79
|
+
"deliverable_count": len(self.deliverable),
|
|
80
|
+
"capped_deliverable_count": len(self.capped_deliverable),
|
|
81
|
+
"internal_or_test_count": len(self.internal_or_test),
|
|
82
|
+
"blocked_count": len(self.blocked),
|
|
83
|
+
"real_send_limit": self.real_send_limit,
|
|
84
|
+
"over_limit_count": self.over_limit_count,
|
|
85
|
+
"deliverable": [_message_to_dict(m) for m in self.capped_deliverable],
|
|
86
|
+
"internal_or_test": [_message_to_dict(m) for m in self.internal_or_test],
|
|
87
|
+
"blocked": self.blocked,
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _message_to_dict(message: PreviewMessage) -> dict[str, Any]:
|
|
92
|
+
return {
|
|
93
|
+
"source": message.source,
|
|
94
|
+
"channel": message.channel,
|
|
95
|
+
"recipient": message.recipient,
|
|
96
|
+
"subject": message.subject,
|
|
97
|
+
"body": message.body,
|
|
98
|
+
"metadata": message.metadata or {},
|
|
99
|
+
"fingerprint": message.fingerprint,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def read_messages(paths: Iterable[Path | str]) -> list[PreviewMessage]:
|
|
104
|
+
messages: list[PreviewMessage] = []
|
|
105
|
+
for raw_path in paths:
|
|
106
|
+
path = Path(raw_path)
|
|
107
|
+
if not path.exists() or not path.is_file():
|
|
108
|
+
raise FileNotFoundError(str(path))
|
|
109
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
110
|
+
messages.extend(_parse_artifact(path, text))
|
|
111
|
+
return messages
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _parse_artifact(path: Path, text: str) -> list[PreviewMessage]:
|
|
115
|
+
stripped = text.strip()
|
|
116
|
+
if not stripped:
|
|
117
|
+
return []
|
|
118
|
+
if path.suffix.lower() == ".jsonl":
|
|
119
|
+
rows = [json.loads(line) for line in stripped.splitlines() if line.strip()]
|
|
120
|
+
return [_row_to_message(row, path, index) for index, row in enumerate(rows, start=1)]
|
|
121
|
+
if path.suffix.lower() == ".json":
|
|
122
|
+
payload = json.loads(stripped)
|
|
123
|
+
if isinstance(payload, list):
|
|
124
|
+
rows = payload
|
|
125
|
+
elif isinstance(payload, dict):
|
|
126
|
+
rows = payload.get("messages") or payload.get("items") or payload.get("queue") or [payload]
|
|
127
|
+
else:
|
|
128
|
+
rows = []
|
|
129
|
+
return [_row_to_message(row, path, index) for index, row in enumerate(rows, start=1) if isinstance(row, dict)]
|
|
130
|
+
return [PreviewMessage(source=str(path), channel="log", recipient="", body=stripped)]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _row_to_message(row: dict[str, Any], path: Path, index: int) -> PreviewMessage:
|
|
134
|
+
recipient = str(
|
|
135
|
+
row.get("recipient")
|
|
136
|
+
or row.get("to")
|
|
137
|
+
or row.get("phone")
|
|
138
|
+
or row.get("email")
|
|
139
|
+
or ""
|
|
140
|
+
).strip()
|
|
141
|
+
body = str(
|
|
142
|
+
row.get("body")
|
|
143
|
+
or row.get("message")
|
|
144
|
+
or row.get("text")
|
|
145
|
+
or row.get("html")
|
|
146
|
+
or ""
|
|
147
|
+
).strip()
|
|
148
|
+
channel = str(row.get("channel") or row.get("type") or _infer_channel(recipient)).strip().lower()
|
|
149
|
+
subject = str(row.get("subject") or "").strip()
|
|
150
|
+
return PreviewMessage(
|
|
151
|
+
source=f"{path}:{index}",
|
|
152
|
+
channel=channel or "unknown",
|
|
153
|
+
recipient=recipient,
|
|
154
|
+
subject=subject,
|
|
155
|
+
body=body,
|
|
156
|
+
metadata={k: v for k, v in row.items() if k not in {"body", "message", "text", "html"}},
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _infer_channel(recipient: str) -> str:
|
|
161
|
+
if "@" in recipient:
|
|
162
|
+
return "email"
|
|
163
|
+
if recipient:
|
|
164
|
+
return "whatsapp"
|
|
165
|
+
return "unknown"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def is_internal_or_test(message: PreviewMessage) -> bool:
|
|
169
|
+
haystack = " ".join([
|
|
170
|
+
message.channel,
|
|
171
|
+
message.recipient,
|
|
172
|
+
message.subject,
|
|
173
|
+
message.body,
|
|
174
|
+
json.dumps(message.metadata or {}, ensure_ascii=False, sort_keys=True),
|
|
175
|
+
]).lower()
|
|
176
|
+
if any(marker in haystack for marker in INTERNAL_MARKERS):
|
|
177
|
+
return True
|
|
178
|
+
recipient = message.recipient.strip()
|
|
179
|
+
return any(pattern.search(recipient) for pattern in TEST_RECIPIENT_PATTERNS)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def build_preview(messages: Iterable[PreviewMessage], *, real_send_limit: int = DEFAULT_REAL_SEND_LIMIT) -> PreviewResult:
|
|
183
|
+
if real_send_limit < 1:
|
|
184
|
+
raise ValueError("real_send_limit must be >= 1")
|
|
185
|
+
deliverable: list[PreviewMessage] = []
|
|
186
|
+
internal_or_test: list[PreviewMessage] = []
|
|
187
|
+
blocked: list[dict[str, str]] = []
|
|
188
|
+
seen: set[str] = set()
|
|
189
|
+
|
|
190
|
+
for message in messages:
|
|
191
|
+
if is_internal_or_test(message):
|
|
192
|
+
internal_or_test.append(message)
|
|
193
|
+
continue
|
|
194
|
+
blocked_by_secret, reason = should_block_email_send(
|
|
195
|
+
"\n".join([message.subject, message.body, json.dumps(message.metadata or {}, ensure_ascii=False)])
|
|
196
|
+
)
|
|
197
|
+
if blocked_by_secret:
|
|
198
|
+
blocked.append({"source": message.source, "recipient": message.recipient, "reason": reason})
|
|
199
|
+
continue
|
|
200
|
+
if message.fingerprint in seen:
|
|
201
|
+
blocked.append({"source": message.source, "recipient": message.recipient, "reason": "duplicate message"})
|
|
202
|
+
continue
|
|
203
|
+
seen.add(message.fingerprint)
|
|
204
|
+
deliverable.append(message)
|
|
205
|
+
|
|
206
|
+
return PreviewResult(
|
|
207
|
+
deliverable=deliverable,
|
|
208
|
+
internal_or_test=internal_or_test,
|
|
209
|
+
blocked=blocked,
|
|
210
|
+
real_send_limit=real_send_limit,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def render_preview_html(result: PreviewResult) -> str:
|
|
215
|
+
parts = [
|
|
216
|
+
"<h1>Previsualización de lote</h1>",
|
|
217
|
+
"<table><tbody>",
|
|
218
|
+
f"<tr><th>Enviables</th><td>{len(result.deliverable)}</td></tr>",
|
|
219
|
+
f"<tr><th>Incluidos por límite</th><td>{len(result.capped_deliverable)}</td></tr>",
|
|
220
|
+
f"<tr><th>Internos/tests separados</th><td>{len(result.internal_or_test)}</td></tr>",
|
|
221
|
+
f"<tr><th>Bloqueados</th><td>{len(result.blocked)}</td></tr>",
|
|
222
|
+
f"<tr><th>Exceso de lote</th><td>{result.over_limit_count}</td></tr>",
|
|
223
|
+
"</tbody></table>",
|
|
224
|
+
"<h2>Candidatos a envío real</h2>",
|
|
225
|
+
_render_message_list(result.capped_deliverable),
|
|
226
|
+
"<h2>Separados: internos/tests</h2>",
|
|
227
|
+
_render_message_list(result.internal_or_test),
|
|
228
|
+
"<h2>Bloqueados</h2>",
|
|
229
|
+
_render_blocked(result.blocked),
|
|
230
|
+
]
|
|
231
|
+
return compose_html_document("".join(parts))
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _render_message_list(messages: list[PreviewMessage]) -> str:
|
|
235
|
+
if not messages:
|
|
236
|
+
return "<p>Ninguno.</p>"
|
|
237
|
+
rows = []
|
|
238
|
+
for message in messages:
|
|
239
|
+
body = text_to_html_fragment(message.body[:1200])
|
|
240
|
+
rows.append(
|
|
241
|
+
"<tr>"
|
|
242
|
+
f"<td>{text_to_html_fragment(message.channel)}</td>"
|
|
243
|
+
f"<td>{text_to_html_fragment(message.recipient or '(sin destinatario)')}</td>"
|
|
244
|
+
f"<td>{text_to_html_fragment(message.subject or message.source)}</td>"
|
|
245
|
+
f"<td>{body}</td>"
|
|
246
|
+
"</tr>"
|
|
247
|
+
)
|
|
248
|
+
return "<table><thead><tr><th>Canal</th><th>Destino</th><th>Asunto/fuente</th><th>Mensaje</th></tr></thead><tbody>" + "".join(rows) + "</tbody></table>"
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _render_blocked(blocked: list[dict[str, str]]) -> str:
|
|
252
|
+
if not blocked:
|
|
253
|
+
return "<p>Ninguno.</p>"
|
|
254
|
+
rows = [
|
|
255
|
+
"<tr>"
|
|
256
|
+
f"<td>{text_to_html_fragment(item.get('source', ''))}</td>"
|
|
257
|
+
f"<td>{text_to_html_fragment(item.get('recipient', ''))}</td>"
|
|
258
|
+
f"<td>{text_to_html_fragment(item.get('reason', ''))}</td>"
|
|
259
|
+
"</tr>"
|
|
260
|
+
for item in blocked
|
|
261
|
+
]
|
|
262
|
+
return "<table><thead><tr><th>Fuente</th><th>Destino</th><th>Motivo</th></tr></thead><tbody>" + "".join(rows) + "</tbody></table>"
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def main(argv: list[str] | None = None) -> int:
|
|
266
|
+
parser = argparse.ArgumentParser(description="Generate a safe HTML preview for WhatsApp/email batch candidates.")
|
|
267
|
+
parser.add_argument("paths", nargs="+", help="JSON, JSONL, log, or text artifacts to inspect.")
|
|
268
|
+
parser.add_argument("--limit", type=int, default=DEFAULT_REAL_SEND_LIMIT, help="Maximum real sends allowed in one batch.")
|
|
269
|
+
parser.add_argument("--html-out", required=True, help="Destination HTML preview file.")
|
|
270
|
+
parser.add_argument("--json-out", default="", help="Optional JSON summary destination.")
|
|
271
|
+
args = parser.parse_args(argv)
|
|
272
|
+
|
|
273
|
+
result = build_preview(read_messages(args.paths), real_send_limit=args.limit)
|
|
274
|
+
Path(args.html_out).write_text(render_preview_html(result), encoding="utf-8")
|
|
275
|
+
if args.json_out:
|
|
276
|
+
Path(args.json_out).write_text(json.dumps(result.to_dict(), indent=2, ensure_ascii=False), encoding="utf-8")
|
|
277
|
+
print(json.dumps({
|
|
278
|
+
"html_out": args.html_out,
|
|
279
|
+
"json_out": args.json_out,
|
|
280
|
+
"deliverable": len(result.deliverable),
|
|
281
|
+
"capped_deliverable": len(result.capped_deliverable),
|
|
282
|
+
"internal_or_test": len(result.internal_or_test),
|
|
283
|
+
"blocked": len(result.blocked),
|
|
284
|
+
"over_limit": result.over_limit_count,
|
|
285
|
+
}, ensure_ascii=False))
|
|
286
|
+
return 0
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
if __name__ == "__main__":
|
|
290
|
+
raise SystemExit(main())
|