@simbimbo/brainstem 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,55 +1,136 @@
1
1
  from __future__ import annotations
2
2
 
3
- from dataclasses import asdict
3
+ import json
4
+ from dataclasses import asdict, dataclass, field
4
5
  from datetime import datetime
5
6
  from pathlib import Path
6
- from typing import Iterable, List
7
+ from typing import Callable, Iterable, List, Optional
7
8
 
9
+ from .config import get_runtime_config
8
10
  from .fingerprint import fingerprint_event, normalize_message
9
- from .models import CanonicalEvent, Event, RawInputEnvelope, Signature
11
+ from .models import Candidate, CanonicalEvent, Event, RawInputEnvelope, Signature
12
+ from .recurrence import build_recurrence_candidates
13
+ from .source_drivers import parse_source_payloads
14
+ from .storage import (
15
+ get_raw_envelopes_by_ids,
16
+ init_db,
17
+ RAW_ENVELOPE_STATUSES,
18
+ set_raw_envelope_status,
19
+ store_candidates,
20
+ store_events,
21
+ store_raw_envelopes,
22
+ store_signatures,
23
+ )
10
24
 
11
25
 
12
- def parse_syslog_line(line: str, *, tenant_id: str, source_path: str = "") -> CanonicalEvent:
13
- return canonicalize_raw_input_envelope(
14
- parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path)
26
+ ErrorHandler = Callable[[Exception, str], None]
27
+
28
+
29
+ @dataclass
30
+ class IngestionItemResult:
31
+ index: int
32
+ status: str
33
+ tenant_id: str
34
+ source_type: str
35
+ source_id: str
36
+ source_name: str
37
+ raw_envelope_id: int | None
38
+ failure_reason: str | None = None
39
+
40
+
41
+ @dataclass
42
+ class IngestionResult:
43
+ raw_envelopes: List[RawInputEnvelope]
44
+ raw_envelope_ids: List[int]
45
+ events: List[CanonicalEvent]
46
+ signatures: List[Signature]
47
+ candidates: List[Candidate]
48
+ parse_failed: int
49
+ item_results: List[IngestionItemResult] = field(default_factory=list)
50
+
51
+
52
+ @dataclass
53
+ class ReplayAttempt:
54
+ raw_envelope_id: int
55
+ reason: str
56
+ status: str | None = None
57
+
58
+
59
+ @dataclass
60
+ class ReplayResult:
61
+ requested_raw_envelope_ids: List[int]
62
+ attempted_raw_envelope_ids: List[int]
63
+ skipped: List[ReplayAttempt]
64
+ events: List[CanonicalEvent]
65
+ signatures: List[Signature]
66
+ candidates: List[Candidate]
67
+ parse_failed: int
68
+
69
+
70
+ def _ingestion_item_result_from_event(
71
+ index: int,
72
+ raw_event: RawInputEnvelope,
73
+ raw_envelope_id: int | None,
74
+ status: str,
75
+ *,
76
+ failure_reason: str | None = None,
77
+ ) -> IngestionItemResult:
78
+ return IngestionItemResult(
79
+ index=index,
80
+ status=status,
81
+ tenant_id=raw_event.tenant_id,
82
+ source_type=raw_event.source_type,
83
+ source_id=raw_event.source_id,
84
+ source_name=raw_event.source_name,
85
+ raw_envelope_id=raw_envelope_id,
86
+ failure_reason=failure_reason,
15
87
  )
16
88
 
17
89
 
90
+ def parse_syslog_line(line: str, *, tenant_id: str, source_path: str = "") -> CanonicalEvent:
91
+ return canonicalize_raw_input_envelope(parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path))
92
+
93
+
18
94
  def parse_syslog_envelope(line: str, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
19
- text = (line or "").rstrip("\n")
20
- timestamp = datetime.utcnow().isoformat() + "Z"
21
- host = ""
22
- service = ""
23
- message = text
24
-
25
- parts = text.split()
26
- if len(parts) >= 5:
27
- host = parts[3]
28
- rest = " ".join(parts[4:])
29
- if ":" in rest:
30
- svc, _, msg = rest.partition(":")
31
- service = svc.strip()
32
- message = msg.strip() or rest.strip()
33
- else:
34
- message = rest.strip()
95
+ return parse_source_payloads("syslog", [line], tenant_id=tenant_id, source_path=source_path)[0]
35
96
 
36
- return RawInputEnvelope(
97
+
98
+ def parse_file_line(line: str, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
99
+ return parse_source_payloads("file", [line], tenant_id=tenant_id, source_path=source_path)[0]
100
+
101
+
102
+ def parse_syslog_envelopes(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[RawInputEnvelope]:
103
+ parsed = [parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path) for line in lines if str(line).strip()]
104
+ return parsed
105
+
106
+
107
+ def parse_file_envelopes(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[RawInputEnvelope]:
108
+ return parse_source_payloads(
109
+ "file",
110
+ [line for line in lines if str(line).strip()],
37
111
  tenant_id=tenant_id,
38
- source_type="syslog",
39
- timestamp=timestamp,
40
- message_raw=message,
41
- host=host,
42
- service=service,
43
112
  source_path=source_path,
44
- metadata={"raw_line": text},
45
113
  )
46
114
 
47
115
 
48
- def parse_syslog_envelopes(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[RawInputEnvelope]:
49
- return [parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path) for line in lines if str(line).strip()]
116
+ def _coerce_raw_envelope_id(value: object) -> int | None:
117
+ if isinstance(value, bool):
118
+ return None
119
+ if isinstance(value, int):
120
+ return value
121
+ if isinstance(value, str):
122
+ value = value.strip()
123
+ if not value.isdigit():
124
+ return None
125
+ return int(value)
126
+ return None
50
127
 
51
128
 
52
- def canonicalize_raw_input_envelope(raw: RawInputEnvelope) -> CanonicalEvent:
129
+ def canonicalize_raw_input_envelope(
130
+ raw: RawInputEnvelope,
131
+ *,
132
+ raw_envelope_id: int | None = None,
133
+ ) -> CanonicalEvent:
53
134
  parse_error = (raw.metadata or {}).get("parse_error")
54
135
  if parse_error:
55
136
  raise ValueError(f"parse_error: {parse_error}")
@@ -57,14 +138,22 @@ def canonicalize_raw_input_envelope(raw: RawInputEnvelope) -> CanonicalEvent:
57
138
  if not (raw.message_raw or "").strip():
58
139
  raise ValueError("message_raw is empty and cannot be canonicalized")
59
140
 
141
+ resolved_raw_envelope_id = _coerce_raw_envelope_id(raw_envelope_id)
142
+ if resolved_raw_envelope_id is None:
143
+ resolved_raw_envelope_id = _coerce_raw_envelope_id(raw.metadata.get("raw_envelope_id"))
144
+
60
145
  message_normalized = normalize_message(raw.message_raw)
61
146
  metadata = dict(raw.metadata or {})
62
147
  metadata.setdefault("canonicalization_source", raw.source_type)
63
148
  metadata["raw_input_seen"] = True
149
+ if resolved_raw_envelope_id is not None:
150
+ metadata["raw_envelope_id"] = resolved_raw_envelope_id
151
+
64
152
  return CanonicalEvent(
65
153
  tenant_id=raw.tenant_id,
66
154
  source_type=raw.source_type,
67
155
  timestamp=raw.timestamp,
156
+ raw_envelope_id=resolved_raw_envelope_id,
68
157
  host=raw.host,
69
158
  service=raw.service,
70
159
  severity=raw.severity,
@@ -88,20 +177,338 @@ def canonicalize_raw_input_envelopes(events: Iterable[RawInputEnvelope]) -> List
88
177
  return [canonicalize_raw_input_envelope(raw_event) for raw_event in events]
89
178
 
90
179
 
180
+ def _parse_json_map(value: str | None) -> dict:
181
+ if not value:
182
+ return {}
183
+ try:
184
+ parsed = json.loads(value)
185
+ except json.JSONDecodeError:
186
+ return {}
187
+ if isinstance(parsed, dict):
188
+ return parsed
189
+ return {}
190
+
191
+
192
+ def _raw_envelope_from_row(row) -> RawInputEnvelope:
193
+ metadata = _parse_json_map(row["metadata_json"])
194
+ metadata["raw_envelope_id"] = int(row["id"])
195
+ return RawInputEnvelope(
196
+ tenant_id=row["tenant_id"],
197
+ source_type=row["source_type"],
198
+ source_id=row["source_id"] or "",
199
+ source_name=row["source_name"] or "",
200
+ timestamp=row["timestamp"],
201
+ host=row["host"] or "",
202
+ service=row["service"] or "",
203
+ severity=row["severity"] or "info",
204
+ asset_id=row["asset_id"] or "",
205
+ source_path=row["source_path"] or "",
206
+ message_raw=row["message_raw"] or "",
207
+ facility=row["facility"] or "",
208
+ structured_fields=_parse_json_map(row["structured_fields_json"]),
209
+ correlation_keys=_parse_json_map(row["correlation_keys_json"]),
210
+ metadata=metadata,
211
+ )
212
+
213
+
214
+ def replay_raw_envelopes_by_ids(
215
+ raw_envelope_ids: Iterable[int | str | object],
216
+ *,
217
+ db_path: str,
218
+ threshold: int | None = None,
219
+ on_event: Optional[Callable[[CanonicalEvent], None]] = None,
220
+ on_parse_error: Optional[ErrorHandler] = None,
221
+ force: bool = False,
222
+ allowed_statuses: Iterable[str] = ("received", "parse_failed"),
223
+ ) -> ReplayResult:
224
+ if threshold is None:
225
+ threshold = get_runtime_config().defaults.replay_threshold
226
+
227
+ requested_raw_envelope_ids = list(dict.fromkeys([_coerce_raw_envelope_id(item) for item in raw_envelope_ids]))
228
+ requested_raw_envelope_ids = [item for item in requested_raw_envelope_ids if item is not None]
229
+
230
+ if not requested_raw_envelope_ids:
231
+ return ReplayResult(
232
+ requested_raw_envelope_ids=[],
233
+ attempted_raw_envelope_ids=[],
234
+ skipped=[],
235
+ events=[],
236
+ signatures=[],
237
+ candidates=[],
238
+ parse_failed=0,
239
+ )
240
+
241
+ allowed_status_set = set(allowed_statuses)
242
+ if any(status not in RAW_ENVELOPE_STATUSES for status in allowed_status_set):
243
+ raise ValueError(
244
+ "allowed_statuses must only include one of: "
245
+ + ", ".join(RAW_ENVELOPE_STATUSES)
246
+ )
247
+
248
+ raw_rows_by_id = {
249
+ int(row["id"]): row
250
+ for row in get_raw_envelopes_by_ids(requested_raw_envelope_ids, db_path=db_path)
251
+ }
252
+
253
+ replay_rows = []
254
+ skipped: List[ReplayAttempt] = []
255
+ for raw_envelope_id in requested_raw_envelope_ids:
256
+ row = raw_rows_by_id.get(raw_envelope_id)
257
+ if row is None:
258
+ skipped.append(
259
+ ReplayAttempt(
260
+ raw_envelope_id=raw_envelope_id,
261
+ reason="not_found",
262
+ status="missing",
263
+ )
264
+ )
265
+ continue
266
+ if not force and row["canonicalization_status"] not in allowed_status_set:
267
+ skipped.append(
268
+ ReplayAttempt(
269
+ raw_envelope_id=raw_envelope_id,
270
+ reason="not_replayable",
271
+ status=row["canonicalization_status"],
272
+ )
273
+ )
274
+ continue
275
+ replay_rows.append(row)
276
+
277
+ replay_envelopes = [_raw_envelope_from_row(row) for row in replay_rows]
278
+ raw_pipeline_result = run_ingest_pipeline(
279
+ replay_envelopes,
280
+ threshold=threshold,
281
+ db_path=db_path,
282
+ on_event=on_event,
283
+ on_parse_error=on_parse_error,
284
+ store_raw=False,
285
+ )
286
+
287
+ return ReplayResult(
288
+ requested_raw_envelope_ids=requested_raw_envelope_ids,
289
+ attempted_raw_envelope_ids=[row["id"] for row in replay_rows],
290
+ skipped=skipped,
291
+ events=raw_pipeline_result.events,
292
+ signatures=raw_pipeline_result.signatures,
293
+ candidates=raw_pipeline_result.candidates,
294
+ parse_failed=raw_pipeline_result.parse_failed,
295
+ )
296
+
297
+
298
+ def run_ingest_pipeline(
299
+ raw_envelopes: Iterable[RawInputEnvelope],
300
+ *,
301
+ threshold: int | None = None,
302
+ db_path: str | None = None,
303
+ store_raw: bool = True,
304
+ on_event: Optional[Callable[[CanonicalEvent], None]] = None,
305
+ on_parse_error: Optional[ErrorHandler] = None,
306
+ ) -> IngestionResult:
307
+ if threshold is None:
308
+ threshold = get_runtime_config().defaults.ingest_threshold
309
+
310
+ raw_envelopes_list = list(raw_envelopes)
311
+ raw_envelope_ids: List[int] = []
312
+ if db_path:
313
+ init_db(db_path)
314
+ if store_raw:
315
+ raw_envelope_ids = store_raw_envelopes(raw_envelopes_list, db_path)
316
+
317
+ canonical_events: List[CanonicalEvent] = []
318
+ parse_failed = 0
319
+ item_results: List[IngestionItemResult] = []
320
+ for idx, raw_event in enumerate(raw_envelopes_list):
321
+ raw_envelope_id = raw_envelope_ids[idx] if idx < len(raw_envelope_ids) else None
322
+ if raw_envelope_id is None:
323
+ raw_envelope_id = _coerce_raw_envelope_id(raw_event.metadata.get("raw_envelope_id"))
324
+ try:
325
+ canonical_event = canonicalize_raw_input_envelope(raw_event, raw_envelope_id=raw_envelope_id)
326
+ except Exception as exc:
327
+ parse_failed += 1
328
+ item_results.append(
329
+ _ingestion_item_result_from_event(
330
+ idx,
331
+ raw_event,
332
+ raw_envelope_id=raw_envelope_id,
333
+ status="parse_failed",
334
+ failure_reason=str(exc),
335
+ )
336
+ )
337
+ if raw_envelope_id is not None:
338
+ set_raw_envelope_status(
339
+ raw_envelope_id,
340
+ "parse_failed",
341
+ db_path=db_path,
342
+ failure_reason=str(exc),
343
+ )
344
+ if on_parse_error is not None:
345
+ on_parse_error(exc, raw_event.metadata.get("raw_line", raw_event.message_raw))
346
+ continue
347
+
348
+ canonical_events.append(canonical_event)
349
+ item_results.append(
350
+ _ingestion_item_result_from_event(
351
+ idx,
352
+ raw_event,
353
+ raw_envelope_id=raw_envelope_id,
354
+ status="canonicalized",
355
+ )
356
+ )
357
+ if raw_envelope_id is not None:
358
+ set_raw_envelope_status(raw_envelope_id, "canonicalized", db_path=db_path)
359
+ if on_event is not None:
360
+ on_event(canonical_event)
361
+
362
+ if not canonical_events:
363
+ return IngestionResult(
364
+ raw_envelopes=raw_envelopes_list,
365
+ raw_envelope_ids=raw_envelope_ids,
366
+ events=[],
367
+ signatures=[],
368
+ candidates=[],
369
+ parse_failed=parse_failed,
370
+ item_results=item_results,
371
+ )
372
+
373
+ signatures = signatures_for_events(canonical_events)
374
+ candidates = build_recurrence_candidates(canonical_events, signatures, threshold=threshold)
375
+ if db_path:
376
+ store_events(canonical_events, db_path)
377
+ store_signatures(signatures, db_path)
378
+ store_candidates(candidates, db_path)
379
+
380
+ return IngestionResult(
381
+ raw_envelopes=raw_envelopes_list,
382
+ raw_envelope_ids=raw_envelope_ids,
383
+ events=canonical_events,
384
+ signatures=signatures,
385
+ candidates=candidates,
386
+ parse_failed=parse_failed,
387
+ item_results=item_results,
388
+ )
389
+
390
+
391
+ def run_ingest_source_payload(
392
+ source_type: str,
393
+ payload: object,
394
+ *,
395
+ tenant_id: str,
396
+ source_path: str,
397
+ threshold: int | None = None,
398
+ db_path: Optional[str] = None,
399
+ on_event: Optional[Callable[[CanonicalEvent], None]] = None,
400
+ on_parse_error: Optional[ErrorHandler] = None,
401
+ ) -> IngestionResult:
402
+ return run_ingest_pipeline(
403
+ parse_source_payloads(
404
+ source_type,
405
+ payload,
406
+ tenant_id=tenant_id,
407
+ source_path=source_path,
408
+ on_parse_error=on_parse_error,
409
+ ),
410
+ threshold=threshold,
411
+ db_path=db_path,
412
+ on_event=on_event,
413
+ on_parse_error=on_parse_error,
414
+ )
415
+
416
+
417
+ def run_ingest_logicmonitor_events(
418
+ events: Iterable[object],
419
+ *,
420
+ tenant_id: str,
421
+ source_path: str = "/logicmonitor/ingest",
422
+ threshold: int | None = None,
423
+ db_path: Optional[str] = None,
424
+ on_event: Optional[Callable[[CanonicalEvent], None]] = None,
425
+ on_parse_error: Optional[ErrorHandler] = None,
426
+ ) -> IngestionResult:
427
+ return run_ingest_source_payload(
428
+ "logicmonitor",
429
+ list(events),
430
+ tenant_id=tenant_id,
431
+ source_path=source_path,
432
+ threshold=threshold,
433
+ db_path=db_path,
434
+ on_event=on_event,
435
+ on_parse_error=on_parse_error,
436
+ )
437
+
438
+
439
+ def run_ingest_file_lines(
440
+ lines: Iterable[str],
441
+ *,
442
+ tenant_id: str,
443
+ source_path: str,
444
+ threshold: int | None = None,
445
+ db_path: str | None = None,
446
+ on_event: Optional[Callable[[CanonicalEvent], None]] = None,
447
+ on_parse_error: Optional[ErrorHandler] = None,
448
+ ) -> IngestionResult:
449
+ return run_ingest_source_payload(
450
+ "file",
451
+ [line for line in lines if str(line).strip()],
452
+ tenant_id=tenant_id,
453
+ source_path=source_path,
454
+ threshold=threshold,
455
+ db_path=db_path,
456
+ on_event=on_event,
457
+ on_parse_error=on_parse_error,
458
+ )
459
+
460
+
91
461
  def ingest_syslog_lines(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[CanonicalEvent]:
92
462
  return canonicalize_raw_input_envelopes(
93
463
  parse_syslog_envelopes(lines, tenant_id=tenant_id, source_path=source_path),
94
464
  )
95
465
 
96
466
 
467
+ def ingest_file_lines(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[CanonicalEvent]:
468
+ return canonicalize_raw_input_envelopes(
469
+ parse_file_envelopes(lines, tenant_id=tenant_id, source_path=source_path),
470
+ )
471
+
472
+
97
473
  def ingest_syslog_file(path: str, *, tenant_id: str) -> List[Event]:
98
474
  file_path = Path(path)
99
475
  lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
100
476
  return ingest_syslog_lines(lines, tenant_id=tenant_id, source_path=str(file_path))
101
477
 
102
478
 
479
+ def run_ingest_file(
480
+ path: str,
481
+ *,
482
+ tenant_id: str,
483
+ threshold: int | None = None,
484
+ db_path: Optional[str] = None,
485
+ on_event: Optional[Callable[[CanonicalEvent], None]] = None,
486
+ on_parse_error: Optional[ErrorHandler] = None,
487
+ ) -> IngestionResult:
488
+ file_path = Path(path)
489
+ lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
490
+ return run_ingest_file_lines(
491
+ lines,
492
+ tenant_id=tenant_id,
493
+ source_path=str(file_path),
494
+ threshold=threshold,
495
+ db_path=db_path,
496
+ on_event=on_event,
497
+ on_parse_error=on_parse_error,
498
+ )
499
+
500
+
103
501
  def signatures_for_events(events: Iterable[Event]) -> List[Signature]:
104
- return [fingerprint_event(event) for event in events]
502
+ signatures = []
503
+ for event in events:
504
+ signature = fingerprint_event(event)
505
+ source_raw_envelope_id = getattr(event, "raw_envelope_id", None)
506
+ if source_raw_envelope_id is not None:
507
+ signature.metadata = dict(signature.metadata)
508
+ signature.metadata["source_raw_envelope_id"] = int(source_raw_envelope_id)
509
+ signature.metadata["source_raw_envelope_ids"] = [int(source_raw_envelope_id)]
510
+ signatures.append(signature)
511
+ return signatures
105
512
 
106
513
 
107
514
  def events_as_dicts(events: Iterable[Event]) -> List[dict]:
@@ -1,9 +1,29 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Iterable, List, Dict, Any
3
+ from typing import Any, Dict, Iterable, List
4
4
 
5
5
  from .models import Candidate
6
6
 
7
+ ATTN_SIGNAL_LABELS = {
8
+ "recurrence": "recurrence",
9
+ "recovery": "recovery",
10
+ "spread": "spread",
11
+ "novelty": "novelty",
12
+ "impact": "human-impact",
13
+ "precursor": "precursor",
14
+ "memory_weight": "memory",
15
+ }
16
+
17
+ ATTN_SIGNAL_RATIONALES = {
18
+ "recurrence": "recurrence indicates repeated observation",
19
+ "recovery": "recovery suggests a pattern that often resets",
20
+ "spread": "spread shows similar behavior across context",
21
+ "novelty": "novelty indicates non-routine pattern shape",
22
+ "impact": "impact shows likely operator visibility value",
23
+ "precursor": "precursor score indicates early warning behavior",
24
+ "memory_weight": "memory_weight reflects previous recurrence context",
25
+ }
26
+
7
27
 
8
28
  def _attention_band(decision_band: str) -> str:
9
29
  mapping = {
@@ -16,10 +36,42 @@ def _attention_band(decision_band: str) -> str:
16
36
  return mapping.get(decision_band, "watch")
17
37
 
18
38
 
39
+ def _dominant_attention_signals(score_breakdown: Dict[str, float], *, limit: int = 3) -> List[Dict[str, Any]]:
40
+ ordered = sorted(score_breakdown.items(), key=lambda item: (float(item[1]), item[0]), reverse=True)
41
+ dominant = ordered[:limit]
42
+ return [
43
+ {
44
+ "signal": name,
45
+ "value": round(float(value), 3),
46
+ "label": ATTN_SIGNAL_LABELS.get(name, name.replace("_", "-")),
47
+ "rationale": ATTN_SIGNAL_RATIONALES.get(name, "prototype attention component"),
48
+ }
49
+ for name, value in dominant
50
+ if float(value) > 0
51
+ ]
52
+
53
+
54
+ def _attention_explanation(candidate: Candidate) -> Dict[str, Any]:
55
+ attention_band = _attention_band(candidate.decision_band)
56
+ dominant_signals = _dominant_attention_signals(candidate.score_breakdown)
57
+ signal_summary = ", ".join(f"{item['label']}:{item['value']}" for item in dominant_signals)
58
+ if signal_summary:
59
+ summary = f"{attention_band} attention is driven by {signal_summary}."
60
+ else:
61
+ summary = f"{attention_band} attention is currently low; no dominant attention signals are available."
62
+ return {
63
+ "attention_band": attention_band,
64
+ "dominant_signals": dominant_signals,
65
+ "summary": summary,
66
+ }
67
+
68
+
19
69
  def _why_it_matters(candidate: Candidate) -> str:
20
70
  count = int((candidate.metadata or {}).get("count") or 0)
21
71
  service = str((candidate.metadata or {}).get("service") or "").strip()
22
72
  family = candidate.candidate_type.replace("_", " ")
73
+ attention_explanation = _attention_explanation(candidate)
74
+ top_signals = ", ".join(item["label"] for item in attention_explanation["dominant_signals"])
23
75
  pieces = []
24
76
  if count:
25
77
  pieces.append(f"observed {count} times")
@@ -37,6 +89,8 @@ def _why_it_matters(candidate: Candidate) -> str:
37
89
  else:
38
90
  level = "is low-attention noise"
39
91
  detail = ", ".join(pieces) if pieces else family
92
+ if top_signals:
93
+ detail = f"{detail} ({top_signals})" if detail else top_signals
40
94
  return f"{detail}; {level}."
41
95
 
42
96
 
@@ -55,6 +109,7 @@ def interesting_items(candidates: Iterable[Candidate], *, limit: int = 5) -> Lis
55
109
  "score_total": candidate.score_total,
56
110
  "confidence": candidate.confidence,
57
111
  "why_it_matters": _why_it_matters(candidate),
112
+ "attention_explanation": _attention_explanation(candidate),
58
113
  "signals": dict(candidate.score_breakdown),
59
114
  "metadata": dict(candidate.metadata),
60
115
  }