nexo-brain 7.34.0 → 7.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,763 @@
1
+ """Ola 4 — SCHEMA-ABSTRACTION: distill recurring incident archetypes into
2
+ reusable diagnostic templates that prime the COMPLETE diagnosis instantly.
3
+
4
+ Francisco's ask, in his words: when the SAME class of incident keeps coming back
5
+ (the canonical one being "cron exit 0 but the tool failed in SILENCE" — the
6
+ wrapper swallowed the error with ``|| echo {}`` and review badges stayed frozen
7
+ for three weeks), the system should not re-diagnose it from scratch every time.
8
+ It should already KNOW the diagnosis: "check the cron actually executed the tool
9
+ and did not swallow the error". That primed checklist is a *diagnostic template*.
10
+
11
+ This module sits on top of the existing failure-prevention substrate (~70% built):
12
+
13
+ * ``failure_prevention_cases`` — the per-incident ledger. Today ``failure_uid``
14
+ is hashed on the EXACT normalized symptom (``failure_prevention._stable_uid``),
15
+ so two differently-worded reports of the same archetype create separate UIDs
16
+ and separate frequency counters — there is NO clustering. That gap is exactly
17
+ what this module closes.
18
+ * ``self_error_detector`` — already the prototype of the silent-failure /
19
+ "shipped but a step was missing" archetype, and the family we start with.
20
+ * ``learnings`` with ``source_authority='code_test_evidence'`` — the learnings
21
+ the self-error detector fires.
22
+
23
+ What this module adds, and ONLY this (narrow slice, precision-first):
24
+
25
+ 1. CLUSTERING — group incidents by symptom similarity (NOT exact-hash), reusing
26
+ the resolver's own ``candidate_similarity`` math so we stay in lockstep with
27
+ dedup/merge thresholds used everywhere else.
28
+ 2. DISTILLATION — when a cluster of the SAME archetype reaches the recurrence
29
+ threshold (``MIN_CLUSTER_SIZE`` distinct incidents, high confidence), mint a
30
+ diagnostic template: {archetype, symptom pattern, complete diagnosis steps,
31
+ prevention}. Idempotent: deduped by a stable ``archetype_key``.
32
+ 3. (injection lives in ``db/_hot_context.build_pre_action_context``; this module
33
+ exposes ``match_templates_for_action`` which that injection point calls.)
34
+
35
+ Anti-noise contract — PRECISION OVER RECALL (Francisco hates spurious templates)
36
+ --------------------------------------------------------------------------------
37
+ A spurious template is strictly worse than none. Therefore:
38
+ * A template is minted ONLY from a cluster of >= ``MIN_CLUSTER_SIZE`` DISTINCT
39
+ incidents (distinct ``failure_uid``) of the SAME archetype, above
40
+ ``MIN_TEMPLATE_CONFIDENCE``.
41
+ * One-off incidents, disparate symptoms, and below-threshold clusters mint
42
+ NOTHING (no active template; at most an internal cluster observation).
43
+ * We start with the single archetype the self-error detector already covers
44
+ (``silent_failure``), not a general pattern engine.
45
+ * Cross-area symptom collisions are kept apart by an area gate, mirroring the
46
+ self-error detector.
47
+
48
+ Everything here is best-effort and non-authoritative: templates are GUIDANCE,
49
+ they never block an action, and they never touch high-authority rules. The
50
+ distiller is idempotent and safe to re-run (deep-sleep phase or on demand).
51
+ """
52
+ from __future__ import annotations
53
+
54
+ import hashlib
55
+ import json
56
+ import re
57
+ import sqlite3
58
+ import time
59
+ from typing import Any
60
+
61
+ from db import get_db
62
+ from failure_prevention import redact_value
63
+
64
+ POLICY_VERSION = "schema_abstraction.v1"
65
+
66
+ # ── Recurrence / precision tunables (conservative by design) ──────────────
67
+ # A genuinely recurring class needs at least this many DISTINCT incidents.
68
+ MIN_CLUSTER_SIZE = 3
69
+ # Two incidents join the same cluster only at/above this symptom similarity.
70
+ # Note: incidents are already gated into the SAME (archetype, area) bucket by
71
+ # objective markers before this threshold applies, so the threshold only has to
72
+ # separate genuinely different sub-symptoms WITHIN one archetype — not provide
73
+ # the precision (the archetype/area gate does that). We mirror the resolver's
74
+ # own relatedness floor (``find_similar_learnings`` keeps matches > 0.3) so
75
+ # paraphrases of the same incident cluster while unrelated text does not.
76
+ CLUSTER_SIMILARITY_THRESHOLD = 0.32
77
+ # A cluster only mints an ACTIVE template at/above this confidence. Confidence
78
+ # rises with incident count and intra-cluster cohesion.
79
+ MIN_TEMPLATE_CONFIDENCE = 0.7
80
+ # An action only gets a template injected on a CLEAR archetype match.
81
+ INJECT_MATCH_THRESHOLD = 0.55
82
+
83
+
84
+ # ── Archetype taxonomy (start narrow: only the silent-failure family) ─────
85
+ # Each archetype is a recognizable incident SHAPE, independent of wording. We
86
+ # detect it from objective lexical markers in the symptom/missed-signal text.
87
+ # The silent_failure archetype is the one the self_error_detector already covers
88
+ # ("cron exit 0 but the tool failed silently" / "shipped but a step was missing").
89
+ # STRONG markers self-evidently describe a silent failure or a missing step
90
+ # (the self-error archetype). Any single one is enough to classify, because on
91
+ # its own it already names error-hiding (`|| echo`, "swallowed") or an omitted
92
+ # step ("forgot the cron", "missing the trigger") — exactly the cron/deploy
93
+ # omissions Francisco wants primed before he repeats them.
94
+ _SILENT_FAILURE_STRONG = [
95
+ re.compile(r"\|\|\s*(?:echo|true|:)\b", re.IGNORECASE), # `|| echo {}` / `|| true`
96
+ re.compile(r"\b(?:swallow(?:ed|ing)?|suppress(?:ed|ing)?|masked?|hid(?:den)?)\b", re.IGNORECASE),
97
+ # shipped-but-a-step-missing shape
98
+ re.compile(r"\b(?:forgot|forgotten|missed|omitted|never\s+(?:created|added|set\s*up|configured|ran|deployed))\b", re.IGNORECASE),
99
+ re.compile(r"\b(?:was\s+(?:never|not)\s+(?:created|added|configured|deployed|wired|registered|executed))\b", re.IGNORECASE),
100
+ re.compile(r"\b(?:missing\s+(?:the\s+)?(?:cron|step|trigger|hook|migration|index|webhook|deploy|alert))\b", re.IGNORECASE),
101
+ re.compile(r"\b(?:ran\s+but\s+(?:did\s*n.?t|never)|appeared\s+(?:to\s+)?(?:work|succeed)\s+but)\b", re.IGNORECASE),
102
+ re.compile(r"\b(?:cron|scheduled|wrapper|launchd|launchagent)\b.{0,60}\b(?:fail|fall|pet|empty|vac)", re.IGNORECASE),
103
+ # Spanish
104
+ re.compile(r"\b(?:silenci|trag(?:ó|aba|a)|tap(?:ó|aba)|enmascar|ocult)", re.IGNORECASE),
105
+ re.compile(r"\b(?:olvid[éeè]|falt[óoa]ba?|no\s+(?:se\s+)?(?:cre[óo]|configur[óo]|despleg[óo]|registr[óo]|ejecut[óo]))\b", re.IGNORECASE),
106
+ re.compile(r"\b(?:corr[íi]a?\s+pero|parec[íi]a\s+(?:que\s+)?(?:funcionaba|iba\s+bien)\s+pero)\b", re.IGNORECASE),
107
+ ]
108
+
109
+ # WEAK markers are ambiguous on their own: a healthy script also "exits 0", a
110
+ # routine deploy can be "silent", and "no alerts" can mean nothing broke. A
111
+ # lone weak hit must NOT seed a template (that was the spurious-injection on
112
+ # "exit 0 on success" / "silent deploy"). Two weak hits, or any strong hit,
113
+ # do classify.
114
+ _SILENT_FAILURE_WEAK = [
115
+ re.compile(r"\b(?:exit(?:ed)?\s*0|exit\s*code\s*0|returned?\s*0|status\s*0)\b", re.IGNORECASE),
116
+ re.compile(r"\b(?:silent(?:ly)?|in\s+silence)\b", re.IGNORECASE),
117
+ re.compile(r"\b(?:no\s+(?:error|alert|alarm|warning)|without\s+(?:error|alert|failing))\b", re.IGNORECASE),
118
+ ]
119
+
120
+ # Backwards-compatible flat view (introspection / any future consumer).
121
+ _SILENT_FAILURE_MARKERS = _SILENT_FAILURE_STRONG + _SILENT_FAILURE_WEAK
122
+
123
+ ARCHETYPES: dict[str, dict[str, Any]] = {
124
+ "silent_failure": {
125
+ "label": "Silent failure — the job reported success but the real work did not happen",
126
+ "markers": _SILENT_FAILURE_MARKERS,
127
+ "strong_markers": _SILENT_FAILURE_STRONG,
128
+ "weak_markers": _SILENT_FAILURE_WEAK,
129
+ # The COMPLETE diagnosis, primed instantly. This is the load-bearing
130
+ # payload: when the archetype reappears, prime these checks first.
131
+ "diagnosis_steps": [
132
+ "Confirm the scheduled job (cron/launchd/wrapper) ACTUALLY executed the underlying tool — not just that the scheduler ran. Check the tool's own log/output, not the wrapper exit code.",
133
+ "Verify the exit code is real: a wrapper that ends with `|| echo {}` / `|| true` / a swallowed exception will exit 0 even when the tool crashed. Grep the command for error-swallowing constructs.",
134
+ "Compare the produced artifact/output against last-known-good. A silent failure typically leaves a STALE or EMPTY result (frozen badges, empty scrape, unchanged file) while everything 'looks' green.",
135
+ "Check that every required side artifact actually exists and ran: the cron entry, the deploy, the webhook, the migration, the browser/runtime dependency. Code landing is necessary, not sufficient.",
136
+ "Confirm there is an ALERT path that escalates on a missing/empty source, so the next occurrence is detected by an alarm, not by the operator noticing weeks later.",
137
+ ],
138
+ "prevention": (
139
+ "For any scheduled/automated path, do not trust the wrapper exit code: assert the tool ran "
140
+ "and produced a fresh, non-empty result, and escalate (email/alert) when a source is missing — "
141
+ "never let an error be swallowed by `|| echo`/`|| true` or a bare exception handler."
142
+ ),
143
+ # Tokens used to match a CURRENT action against this archetype.
144
+ "match_tokens": [
145
+ "cron", "launchd", "launchagent", "scheduled", "wrapper", "scrape",
146
+ "exit", "silent", "swallow", "echo", "deploy", "webhook", "trigger",
147
+ "freeze", "frozen", "stale", "empty", "alert", "health",
148
+ ],
149
+ },
150
+ }
151
+
152
+
153
+ def _now() -> float:
154
+ return time.time()
155
+
156
+
157
+ def _stable_uid(*parts: object) -> str:
158
+ payload = "\0".join(str(part or "") for part in parts)
159
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
160
+
161
+
162
+ def _json(value: Any) -> str:
163
+ return json.dumps(value, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
164
+
165
+
166
+ def _load_json(value: str, default: Any) -> Any:
167
+ try:
168
+ return json.loads(value or "")
169
+ except Exception:
170
+ return default
171
+
172
+
173
+ def _table_exists(conn: sqlite3.Connection, table: str) -> bool:
174
+ row = conn.execute(
175
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name = ?",
176
+ (table,),
177
+ ).fetchone()
178
+ return bool(row)
179
+
180
+
181
+ def _ensure_tables(conn: sqlite3.Connection) -> None:
182
+ if _table_exists(conn, "diagnostic_templates") and _table_exists(conn, "failure_prevention_cases"):
183
+ return
184
+ from db._schema import run_migrations
185
+
186
+ run_migrations(conn)
187
+
188
+
189
+ def _normalize(value: object) -> str:
190
+ return re.sub(r"\s+", " ", str(value or "").strip().lower())
191
+
192
+
193
+ def classify_archetype(text: str) -> str:
194
+ """Return the archetype key whose markers the text matches, or "".
195
+
196
+ A text belongs to an archetype when it hits at least one STRONG marker, or
197
+ at least two WEAK markers. A lone weak marker ("exit 0", "silent") is
198
+ ambiguous and must NOT seed a template — a healthy deploy also exits 0 and
199
+ can be silent. Strong markers ("|| echo", "forgot the cron", "swallowed")
200
+ name the failure on their own, so one is enough. This keeps the cron/deploy
201
+ omissions primed while dropping spurious hits on benign success phrasing.
202
+
203
+ Deterministic and pure. Ambiguity → "" (no archetype), so the incident
204
+ never seeds a template on its own. Archetypes without an explicit
205
+ strong/weak split fall back to the legacy any-marker rule.
206
+ """
207
+ clean = str(text or "")
208
+ if not clean.strip():
209
+ return ""
210
+ for key, spec in ARCHETYPES.items():
211
+ strong = spec.get("strong_markers")
212
+ weak = spec.get("weak_markers")
213
+ if strong is not None or weak is not None:
214
+ if any(m.search(clean) for m in (strong or [])):
215
+ return key
216
+ if sum(1 for m in (weak or []) if m.search(clean)) >= 2:
217
+ return key
218
+ continue
219
+ for marker in spec["markers"]:
220
+ if marker.search(clean):
221
+ return key
222
+ return ""
223
+
224
+
225
+ # ── Incident harvesting ───────────────────────────────────────────────────
226
+
227
+
228
+ def _case_symptom_text(case_row: sqlite3.Row) -> str:
229
+ """Reconstruct the searchable symptom text from a failure case row.
230
+
231
+ The free-text fields are stored as field-evidence JSON ({"value_redacted":..}).
232
+ We concatenate symptom + missed_signal + root_cause + corrective_action so
233
+ similarity reflects the whole incident shape, not just the headline.
234
+ """
235
+ parts: list[str] = []
236
+ for col in ("symptom_json", "missed_signal_json", "root_cause_json", "corrective_action_json"):
237
+ field = _load_json(str(case_row[col] or ""), {})
238
+ if isinstance(field, dict):
239
+ parts.append(str(field.get("value_redacted") or ""))
240
+ else:
241
+ parts.append(str(field or ""))
242
+ return " ".join(p for p in parts if p).strip()
243
+
244
+
245
+ def harvest_incidents(conn: sqlite3.Connection) -> list[dict[str, Any]]:
246
+ """Collect candidate incidents from the substrate (no mutation).
247
+
248
+ Two sources, unified into a common shape ``{uid, archetype, area, text}``:
249
+ * ``failure_prevention_cases`` (the per-incident ledger).
250
+ * self-error learnings (``source_authority='code_test_evidence'``) — these
251
+ ARE silent-failure incidents the detector already captured.
252
+
253
+ Only incidents that classify into a known archetype are returned; everything
254
+ else is dropped here (anti-noise at the source).
255
+ """
256
+ incidents: list[dict[str, Any]] = []
257
+ seen: set[str] = set()
258
+
259
+ # Source 1: failure_prevention_cases (skip already-resolved/false-positive).
260
+ rows = conn.execute(
261
+ """
262
+ SELECT failure_uid, area, failure_type, privacy_level,
263
+ symptom_json, missed_signal_json, root_cause_json, corrective_action_json
264
+ FROM failure_prevention_cases
265
+ WHERE status NOT IN ('rejected','false_positive','expired')
266
+ """
267
+ ).fetchall()
268
+ for row in rows:
269
+ text = _case_symptom_text(row)
270
+ archetype = classify_archetype(text)
271
+ if not archetype:
272
+ continue
273
+ uid = f"case:{row['failure_uid']}"
274
+ if uid in seen:
275
+ continue
276
+ seen.add(uid)
277
+ incidents.append(
278
+ {
279
+ "uid": uid,
280
+ "archetype": archetype,
281
+ "area": _normalize(row["area"]),
282
+ "text": text,
283
+ "privacy_level": str(row["privacy_level"] or "normal"),
284
+ }
285
+ )
286
+
287
+ # Source 2: self-error learnings (objective, code/ledger-derived).
288
+ #
289
+ # The persisted ``learnings`` table does NOT carry a source_authority column
290
+ # (that value is consumed by the resolver/cognitive ingest, not stored on the
291
+ # row), so we cannot filter on it. The self-error detector tags its learnings
292
+ # with a distinctive ``reasoning`` marker ("Auto-detected by the self-error
293
+ # detector ...") and a ``prevention``. We harvest those defensively, querying
294
+ # only columns that actually exist in this schema revision.
295
+ if _table_exists(conn, "learnings"):
296
+ cols = {row[1] for row in conn.execute("PRAGMA table_info(learnings)").fetchall()}
297
+ has_reasoning = "reasoning" in cols
298
+ has_prevention = "prevention" in cols
299
+ has_status = "status" in cols
300
+ select_cols = ["id", "category", "title", "content"]
301
+ if has_prevention:
302
+ select_cols.append("prevention")
303
+ if has_reasoning:
304
+ select_cols.append("reasoning")
305
+ where = "WHERE COALESCE(status,'active') = 'active'" if has_status else ""
306
+ lrows = conn.execute(
307
+ f"SELECT {', '.join(select_cols)} FROM learnings {where}"
308
+ ).fetchall()
309
+ for row in lrows:
310
+ keys = row.keys()
311
+ reasoning = str(row["reasoning"]) if "reasoning" in keys and row["reasoning"] else ""
312
+ # Only treat a learning as a self-error INCIDENT when it is objectively
313
+ # one (the detector's marker). Generic learnings are not incidents.
314
+ if "self-error detector" not in reasoning.lower():
315
+ continue
316
+ prevention = str(row["prevention"]) if "prevention" in keys and row["prevention"] else ""
317
+ text = f"{row['title']} {row['content']} {prevention}".strip()
318
+ archetype = classify_archetype(text)
319
+ if not archetype:
320
+ continue
321
+ uid = f"learning:{row['id']}"
322
+ if uid in seen:
323
+ continue
324
+ seen.add(uid)
325
+ incidents.append(
326
+ {
327
+ "uid": uid,
328
+ "archetype": archetype,
329
+ "area": _normalize(row["category"]),
330
+ "text": text,
331
+ "privacy_level": "normal",
332
+ }
333
+ )
334
+
335
+ return incidents
336
+
337
+
338
+ # ── Clustering ──────────────────────────────────────────────────────────
339
+
340
+
341
+ def _similarity(text_a: str, text_b: str) -> float:
342
+ """Symptom similarity using the resolver's own math.
343
+
344
+ Imported lazily so the conftest's repo-import isolation can reload the
345
+ resolver against the temp DB before this is first called.
346
+ """
347
+ from learning_resolver import candidate_similarity
348
+
349
+ return float(candidate_similarity(text_a, text_b))
350
+
351
+
352
+ def cluster_incidents(incidents: list[dict[str, Any]]) -> list[dict[str, Any]]:
353
+ """Greedy single-link clustering within each (archetype, area).
354
+
355
+ Returns clusters ``{archetype, area, members:[incident...], cohesion}``.
356
+ Two incidents join the same cluster when their symptom similarity is at/above
357
+ ``CLUSTER_SIMILARITY_THRESHOLD``. The area gate keeps cross-project symptom
358
+ collisions apart (mirrors the self-error detector's same-area requirement).
359
+
360
+ Deterministic: members are processed in a stable (area, uid) order.
361
+ """
362
+ clusters: list[dict[str, Any]] = []
363
+ # Group by (archetype, area) first so clusters never cross archetype/area.
364
+ buckets: dict[tuple[str, str], list[dict[str, Any]]] = {}
365
+ for inc in sorted(incidents, key=lambda i: (i["archetype"], i["area"], i["uid"])):
366
+ buckets.setdefault((inc["archetype"], inc["area"]), []).append(inc)
367
+
368
+ for (archetype, area), members in buckets.items():
369
+ local: list[dict[str, Any]] = []
370
+ for inc in members:
371
+ placed = False
372
+ for cluster in local:
373
+ # Single-link: join if similar to ANY existing member.
374
+ if any(
375
+ _similarity(inc["text"], other["text"]) >= CLUSTER_SIMILARITY_THRESHOLD
376
+ for other in cluster["members"]
377
+ ):
378
+ cluster["members"].append(inc)
379
+ placed = True
380
+ break
381
+ if not placed:
382
+ local.append({"archetype": archetype, "area": area, "members": [inc]})
383
+ clusters.extend(local)
384
+
385
+ # Compute cohesion (mean pairwise similarity) for confidence scoring.
386
+ for cluster in clusters:
387
+ members = cluster["members"]
388
+ if len(members) < 2:
389
+ cluster["cohesion"] = 1.0 if members else 0.0
390
+ continue
391
+ sims: list[float] = []
392
+ for i in range(len(members)):
393
+ for j in range(i + 1, len(members)):
394
+ sims.append(_similarity(members[i]["text"], members[j]["text"]))
395
+ cluster["cohesion"] = round(sum(sims) / len(sims), 3) if sims else 0.0
396
+ return clusters
397
+
398
+
399
+ def _archetype_key(archetype: str, area: str) -> str:
400
+ return f"{archetype}|{_normalize(area)}"
401
+
402
+
403
+ def _cluster_confidence(cluster: dict[str, Any]) -> float:
404
+ """Confidence that this cluster is a genuine recurring archetype.
405
+
406
+ Rises with distinct-incident count (recurrence) and intra-cluster cohesion
407
+ (the members really are the same shape). Capped at 0.95 — never certainty.
408
+ """
409
+ n = len({m["uid"] for m in cluster["members"]})
410
+ if n < MIN_CLUSTER_SIZE:
411
+ return 0.0
412
+ # Reaching MIN_CLUSTER_SIZE distinct incidents that ALL classify into the
413
+ # same objectively-marked archetype in the same area is itself the genuine
414
+ # recurrence signal (not mere wording overlap) — that is where the precision
415
+ # comes from. So confidence is recurrence-led (a qualifying cluster starts at
416
+ # the template floor and rises with extra incidents), with intra-cluster
417
+ # cohesion only as a small positive bonus. Recurrence: n=3 → 0.70, then +0.05
418
+ # per extra incident; cohesion adds up to +0.15. Capped at 0.95 (never certain).
419
+ recurrence = 0.70 + 0.05 * (n - MIN_CLUSTER_SIZE)
420
+ cohesion = float(cluster.get("cohesion") or 0.0)
421
+ confidence = round(min(0.95, recurrence + 0.15 * cohesion), 3)
422
+ return confidence
423
+
424
+
425
+ # ── Distillation ──────────────────────────────────────────────────────────
426
+
427
+
428
+ def _build_symptom_pattern(cluster: dict[str, Any]) -> str:
429
+ """A short, redacted human description of the shared symptom."""
430
+ sample = cluster["members"][0]["text"]
431
+ return redact_value(sample)[:300]
432
+
433
+
434
+ def distill_template_payload(cluster: dict[str, Any]) -> dict[str, Any] | None:
435
+ """Turn a qualifying cluster into a diagnostic-template payload, or None.
436
+
437
+ Returns None for clusters below the recurrence/confidence threshold — the
438
+ anti-noise gate. The payload mirrors the archetype's primed diagnosis.
439
+ """
440
+ archetype = cluster["archetype"]
441
+ spec = ARCHETYPES.get(archetype)
442
+ if spec is None:
443
+ return None
444
+ member_uids = sorted({m["uid"] for m in cluster["members"]})
445
+ if len(member_uids) < MIN_CLUSTER_SIZE:
446
+ return None
447
+ confidence = _cluster_confidence(cluster)
448
+ if confidence < MIN_TEMPLATE_CONFIDENCE:
449
+ return None
450
+ area = cluster["area"]
451
+ archetype_key = _archetype_key(archetype, area)
452
+ return {
453
+ "template_uid": _stable_uid(POLICY_VERSION, archetype_key),
454
+ "archetype": archetype,
455
+ "archetype_key": archetype_key,
456
+ "failure_type": "tool" if archetype == "silent_failure" else "other",
457
+ "area": area,
458
+ "symptom_pattern": _build_symptom_pattern(cluster),
459
+ "diagnosis_steps": list(spec["diagnosis_steps"]),
460
+ "prevention": spec["prevention"],
461
+ "match_tokens": list(spec["match_tokens"]),
462
+ "member_uids": member_uids,
463
+ "incident_count": len(member_uids),
464
+ "confidence": confidence,
465
+ "label": spec["label"],
466
+ }
467
+
468
+
469
+ def _upsert_template(conn: sqlite3.Connection, payload: dict[str, Any]) -> dict[str, Any]:
470
+ """Idempotent insert/refresh of a diagnostic template by template_uid.
471
+
472
+ On re-run with new members the row is REFRESHED (member set + count grow),
473
+ never duplicated. A retired template is NOT silently re-activated here.
474
+ """
475
+ now = _now()
476
+ existing = conn.execute(
477
+ "SELECT id, status, member_uids_json FROM diagnostic_templates WHERE template_uid = ?",
478
+ (payload["template_uid"],),
479
+ ).fetchone()
480
+ if existing is None:
481
+ conn.execute(
482
+ """
483
+ INSERT INTO diagnostic_templates (
484
+ template_uid, policy_version, archetype, archetype_key,
485
+ failure_type, area, symptom_pattern, diagnosis_steps_json,
486
+ prevention, match_tokens_json, member_uids_json, incident_count,
487
+ confidence, status, privacy_level, created_at, updated_at,
488
+ metadata_json
489
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'active', 'normal', ?, ?, ?)
490
+ """,
491
+ (
492
+ payload["template_uid"],
493
+ POLICY_VERSION,
494
+ payload["archetype"],
495
+ payload["archetype_key"],
496
+ payload["failure_type"],
497
+ payload["area"],
498
+ payload["symptom_pattern"],
499
+ _json(payload["diagnosis_steps"]),
500
+ payload["prevention"],
501
+ _json(payload["match_tokens"]),
502
+ _json(payload["member_uids"]),
503
+ payload["incident_count"],
504
+ payload["confidence"],
505
+ now,
506
+ now,
507
+ _json({"label": payload["label"]}),
508
+ ),
509
+ )
510
+ conn.commit()
511
+ return {"created": True, "refreshed": False, "template_uid": payload["template_uid"]}
512
+
513
+ # Refresh: only update mutable fields; keep created_at + status.
514
+ prior_members = set(_load_json(str(existing["member_uids_json"] or ""), []))
515
+ new_members = set(payload["member_uids"])
516
+ changed = new_members != prior_members
517
+ conn.execute(
518
+ """
519
+ UPDATE diagnostic_templates
520
+ SET symptom_pattern = ?, diagnosis_steps_json = ?, prevention = ?,
521
+ match_tokens_json = ?, member_uids_json = ?, incident_count = ?,
522
+ confidence = ?, updated_at = ?
523
+ WHERE template_uid = ?
524
+ """,
525
+ (
526
+ payload["symptom_pattern"],
527
+ _json(payload["diagnosis_steps"]),
528
+ payload["prevention"],
529
+ _json(payload["match_tokens"]),
530
+ _json(payload["member_uids"]),
531
+ payload["incident_count"],
532
+ payload["confidence"],
533
+ now,
534
+ payload["template_uid"],
535
+ ),
536
+ )
537
+ conn.commit()
538
+ return {"created": False, "refreshed": changed, "template_uid": payload["template_uid"]}
539
+
540
+
541
+ def distill_templates(conn: sqlite3.Connection | None = None) -> dict[str, Any]:
542
+ """Full distillation pass: harvest → cluster → distill → idempotent upsert.
543
+
544
+ Safe to re-run (deep-sleep phase or on demand). Returns a report. Never
545
+ raises on malformed substrate; never blocks anything.
546
+ """
547
+ conn = conn or get_db()
548
+ _ensure_tables(conn)
549
+ incidents = harvest_incidents(conn)
550
+ clusters = cluster_incidents(incidents)
551
+
552
+ created = 0
553
+ refreshed = 0
554
+ minted: list[dict[str, Any]] = []
555
+ skipped_low: list[dict[str, Any]] = []
556
+ for cluster in clusters:
557
+ payload = distill_template_payload(cluster)
558
+ if payload is None:
559
+ n = len({m["uid"] for m in cluster["members"]})
560
+ skipped_low.append(
561
+ {
562
+ "archetype": cluster["archetype"],
563
+ "area": cluster["area"],
564
+ "incident_count": n,
565
+ "reason": "below_min_cluster_size" if n < MIN_CLUSTER_SIZE else "below_min_confidence",
566
+ }
567
+ )
568
+ continue
569
+ result = _upsert_template(conn, payload)
570
+ if result["created"]:
571
+ created += 1
572
+ elif result["refreshed"]:
573
+ refreshed += 1
574
+ minted.append(
575
+ {
576
+ "template_uid": payload["template_uid"],
577
+ "archetype": payload["archetype"],
578
+ "area": payload["area"],
579
+ "incident_count": payload["incident_count"],
580
+ "confidence": payload["confidence"],
581
+ }
582
+ )
583
+
584
+ return {
585
+ "ok": True,
586
+ "incidents": len(incidents),
587
+ "clusters": len(clusters),
588
+ "templates_created": created,
589
+ "templates_refreshed": refreshed,
590
+ "templates_minted": minted,
591
+ "skipped_low_signal": skipped_low,
592
+ }
593
+
594
+
595
+ # ── Matching / injection support ──────────────────────────────────────────
596
+
597
+
598
+ def _tokenize(text: str) -> set[str]:
599
+ return {t for t in re.findall(r"[a-z0-9_]+", _normalize(text)) if len(t) > 2}
600
+
601
+
602
+ def match_templates_for_action(
603
+ *,
604
+ query: str = "",
605
+ area: str = "",
606
+ files: str = "",
607
+ conn: sqlite3.Connection | None = None,
608
+ limit: int = 1,
609
+ ) -> list[dict[str, Any]]:
610
+ """Return active templates whose archetype CLEARLY matches the current action.
611
+
612
+ Used by ``build_pre_action_context`` to PRIME the diagnosis. Precision-first:
613
+ a template matches only when (a) the action text classifies into the SAME
614
+ archetype AND (b) token overlap with the template's match tokens clears
615
+ ``INJECT_MATCH_THRESHOLD``. A vague/unrelated action matches nothing.
616
+ """
617
+ conn = conn or get_db()
618
+ if not _table_exists(conn, "diagnostic_templates"):
619
+ return []
620
+ action_text = " ".join(str(p or "") for p in (query, files)).strip()
621
+ if not action_text:
622
+ return []
623
+ action_archetype = classify_archetype(action_text)
624
+ action_tokens = _tokenize(action_text)
625
+ if not action_tokens:
626
+ return []
627
+ clean_area = _normalize(area)
628
+
629
+ rows = conn.execute(
630
+ """
631
+ SELECT template_uid, archetype, archetype_key, area, failure_type,
632
+ symptom_pattern, diagnosis_steps_json, prevention,
633
+ match_tokens_json, incident_count, confidence, metadata_json
634
+ FROM diagnostic_templates
635
+ WHERE status = 'active'
636
+ """
637
+ ).fetchall()
638
+
639
+ scored: list[tuple[float, dict[str, Any]]] = []
640
+ for row in rows:
641
+ match_tokens = set(_load_json(str(row["match_tokens_json"] or ""), []))
642
+ if not match_tokens:
643
+ continue
644
+ overlap = action_tokens & match_tokens
645
+ token_score = len(overlap) / max(1, min(len(action_tokens), len(match_tokens)))
646
+ # Archetype agreement is a HARD precondition (precision-first contract).
647
+ # The action must classify into the SAME archetype as the template before
648
+ # token overlap is even considered. Token overlap alone NEVER qualifies:
649
+ # real actions like "deploy the webhook trigger", "add a health alert to
650
+ # the deploy", or "set up deploy alert health monitoring" share tokens
651
+ # (deploy/webhook/trigger/alert/health) with the silent-failure archetype
652
+ # but are NOT silent-failure incidents — they must not over-fire an
653
+ # injection. Francisco's rule: a spurious template is worse than none.
654
+ archetype_match = bool(action_archetype) and action_archetype == row["archetype"]
655
+ if not archetype_match:
656
+ continue
657
+ # Area must not contradict (empty area on either side is permissive).
658
+ area_ok = (not clean_area) or (not row["area"]) or clean_area == row["area"]
659
+ if not area_ok:
660
+ continue
661
+ # Archetype agreed: token overlap then has to clear the inject threshold
662
+ # (some concrete shared vocabulary, not a bare archetype guess). The
663
+ # archetype bonus keeps the strongest signal weighted highest.
664
+ score = token_score + 0.4
665
+ qualifies = bool(overlap) and score >= INJECT_MATCH_THRESHOLD
666
+ if not qualifies:
667
+ continue
668
+ scored.append(
669
+ (
670
+ round(score, 3),
671
+ {
672
+ "template_uid": row["template_uid"],
673
+ "archetype": row["archetype"],
674
+ "area": row["area"],
675
+ "failure_type": row["failure_type"],
676
+ "label": _load_json(str(row["metadata_json"] or ""), {}).get("label", ""),
677
+ "symptom_pattern": row["symptom_pattern"],
678
+ "diagnosis_steps": _load_json(str(row["diagnosis_steps_json"] or ""), []),
679
+ "prevention": row["prevention"],
680
+ "incident_count": int(row["incident_count"] or 0),
681
+ "confidence": float(row["confidence"] or 0.0),
682
+ "match_score": round(score, 3),
683
+ "matched_tokens": sorted(overlap),
684
+ },
685
+ )
686
+ )
687
+
688
+ scored.sort(key=lambda pair: (pair[0], pair[1]["confidence"]), reverse=True)
689
+ return [item for _, item in scored[: max(1, int(limit or 1))]]
690
+
691
+
692
+ def list_templates(*, status: str = "active", limit: int = 50, conn: sqlite3.Connection | None = None) -> list[dict[str, Any]]:
693
+ conn = conn or get_db()
694
+ if not _table_exists(conn, "diagnostic_templates"):
695
+ return []
696
+ clauses: list[str] = []
697
+ params: list[Any] = []
698
+ if status:
699
+ clauses.append("status = ?")
700
+ params.append(status)
701
+ where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
702
+ rows = conn.execute(
703
+ f"SELECT * FROM diagnostic_templates {where} ORDER BY confidence DESC, updated_at DESC LIMIT ?",
704
+ params + [max(1, int(limit or 50))],
705
+ ).fetchall()
706
+ out: list[dict[str, Any]] = []
707
+ for row in rows:
708
+ data = dict(row)
709
+ for key in ("diagnosis_steps_json", "match_tokens_json", "member_uids_json", "metadata_json"):
710
+ default = [] if key.endswith("s_json") and key != "metadata_json" else {}
711
+ data[key[:-5]] = _load_json(str(data.pop(key) or ""), default)
712
+ out.append(data)
713
+ return out
714
+
715
+
716
+ def retire_template(template_uid: str, *, reason: str = "", conn: sqlite3.Connection | None = None) -> dict[str, Any]:
717
+ """Retire a template (lifecycle). Guidance only — never deletes incidents."""
718
+ conn = conn or get_db()
719
+ if not _table_exists(conn, "diagnostic_templates"):
720
+ return {"ok": False, "error": "diagnostic_templates_table_missing"}
721
+ now = _now()
722
+ cur = conn.execute(
723
+ "UPDATE diagnostic_templates SET status='retired', retired_at=?, retired_reason=?, updated_at=? WHERE template_uid=? AND status='active'",
724
+ (now, redact_value(reason)[:240], now, str(template_uid or "").strip()),
725
+ )
726
+ conn.commit()
727
+ return {"ok": True, "retired": cur.rowcount > 0, "template_uid": template_uid}
728
+
729
+
730
+ def format_templates_for_injection(templates: list[dict[str, Any]]) -> str:
731
+ """Render matched templates as a primed-diagnosis block for pre-action context."""
732
+ if not templates:
733
+ return ""
734
+ lines = ["PRIMED DIAGNOSIS (recurring incident archetype matched — diagnose this FIRST):"]
735
+ for tpl in templates:
736
+ head = tpl.get("label") or tpl.get("archetype") or "archetype"
737
+ lines.append(
738
+ f"- [{tpl.get('archetype')}] {head} "
739
+ f"(seen {tpl.get('incident_count')}x, conf={tpl.get('confidence')})"
740
+ )
741
+ for step in (tpl.get("diagnosis_steps") or [])[:5]:
742
+ lines.append(f" • {step}")
743
+ prevention = tpl.get("prevention")
744
+ if prevention:
745
+ lines.append(f" ⇒ prevent: {prevention}")
746
+ return "\n".join(lines)
747
+
748
+
749
+ __all__ = [
750
+ "POLICY_VERSION",
751
+ "MIN_CLUSTER_SIZE",
752
+ "MIN_TEMPLATE_CONFIDENCE",
753
+ "ARCHETYPES",
754
+ "classify_archetype",
755
+ "harvest_incidents",
756
+ "cluster_incidents",
757
+ "distill_template_payload",
758
+ "distill_templates",
759
+ "match_templates_for_action",
760
+ "list_templates",
761
+ "retire_template",
762
+ "format_templates_for_injection",
763
+ ]