@simbimbo/brainstem 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +99 -3
  3. package/brainstem/__init__.py +3 -0
  4. package/brainstem/api.py +257 -0
  5. package/brainstem/connectors/__init__.py +1 -0
  6. package/brainstem/connectors/logicmonitor.py +26 -0
  7. package/brainstem/connectors/types.py +16 -0
  8. package/brainstem/demo.py +64 -0
  9. package/brainstem/fingerprint.py +44 -0
  10. package/brainstem/ingest.py +108 -0
  11. package/brainstem/instrumentation.py +38 -0
  12. package/brainstem/interesting.py +62 -0
  13. package/brainstem/models.py +80 -0
  14. package/brainstem/recurrence.py +112 -0
  15. package/brainstem/scoring.py +38 -0
  16. package/brainstem/storage.py +428 -0
  17. package/docs/adapters.md +435 -0
  18. package/docs/api.md +380 -0
  19. package/docs/architecture.md +333 -0
  20. package/docs/connectors.md +66 -0
  21. package/docs/data-model.md +290 -0
  22. package/docs/design-governance.md +595 -0
  23. package/docs/mvp-flow.md +109 -0
  24. package/docs/roadmap.md +87 -0
  25. package/docs/scoring.md +424 -0
  26. package/docs/v0.0.1.md +277 -0
  27. package/docs/vision.md +85 -0
  28. package/package.json +6 -14
  29. package/pyproject.toml +18 -0
  30. package/tests/fixtures/sample_syslog.log +6 -0
  31. package/tests/test_api.py +319 -0
  32. package/tests/test_canonicalization.py +28 -0
  33. package/tests/test_demo.py +25 -0
  34. package/tests/test_fingerprint.py +22 -0
  35. package/tests/test_ingest.py +15 -0
  36. package/tests/test_instrumentation.py +16 -0
  37. package/tests/test_interesting.py +36 -0
  38. package/tests/test_logicmonitor.py +22 -0
  39. package/tests/test_recurrence.py +16 -0
  40. package/tests/test_scoring.py +21 -0
  41. package/tests/test_storage.py +294 -0
@@ -0,0 +1,108 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Iterable, List
7
+
8
+ from .fingerprint import fingerprint_event, normalize_message
9
+ from .models import CanonicalEvent, Event, RawInputEnvelope, Signature
10
+
11
+
12
+ def parse_syslog_line(line: str, *, tenant_id: str, source_path: str = "") -> CanonicalEvent:
13
+ return canonicalize_raw_input_envelope(
14
+ parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path)
15
+ )
16
+
17
+
18
+ def parse_syslog_envelope(line: str, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
19
+ text = (line or "").rstrip("\n")
20
+ timestamp = datetime.utcnow().isoformat() + "Z"
21
+ host = ""
22
+ service = ""
23
+ message = text
24
+
25
+ parts = text.split()
26
+ if len(parts) >= 5:
27
+ host = parts[3]
28
+ rest = " ".join(parts[4:])
29
+ if ":" in rest:
30
+ svc, _, msg = rest.partition(":")
31
+ service = svc.strip()
32
+ message = msg.strip() or rest.strip()
33
+ else:
34
+ message = rest.strip()
35
+
36
+ return RawInputEnvelope(
37
+ tenant_id=tenant_id,
38
+ source_type="syslog",
39
+ timestamp=timestamp,
40
+ message_raw=message,
41
+ host=host,
42
+ service=service,
43
+ source_path=source_path,
44
+ metadata={"raw_line": text},
45
+ )
46
+
47
+
48
+ def parse_syslog_envelopes(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[RawInputEnvelope]:
49
+ return [parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path) for line in lines if str(line).strip()]
50
+
51
+
52
+ def canonicalize_raw_input_envelope(raw: RawInputEnvelope) -> CanonicalEvent:
53
+ parse_error = (raw.metadata or {}).get("parse_error")
54
+ if parse_error:
55
+ raise ValueError(f"parse_error: {parse_error}")
56
+
57
+ if not (raw.message_raw or "").strip():
58
+ raise ValueError("message_raw is empty and cannot be canonicalized")
59
+
60
+ message_normalized = normalize_message(raw.message_raw)
61
+ metadata = dict(raw.metadata or {})
62
+ metadata.setdefault("canonicalization_source", raw.source_type)
63
+ metadata["raw_input_seen"] = True
64
+ return CanonicalEvent(
65
+ tenant_id=raw.tenant_id,
66
+ source_type=raw.source_type,
67
+ timestamp=raw.timestamp,
68
+ host=raw.host,
69
+ service=raw.service,
70
+ severity=raw.severity,
71
+ asset_id=raw.asset_id,
72
+ source_path=raw.source_path,
73
+ facility=raw.facility,
74
+ message_raw=raw.message_raw,
75
+ structured_fields=dict(raw.structured_fields),
76
+ correlation_keys=dict(raw.correlation_keys),
77
+ message_normalized=message_normalized,
78
+ signature_input=message_normalized,
79
+ ingest_metadata={
80
+ "canonicalized_at": datetime.utcnow().isoformat() + "Z",
81
+ "source_timestamp": raw.timestamp,
82
+ **metadata,
83
+ },
84
+ )
85
+
86
+
87
+ def canonicalize_raw_input_envelopes(events: Iterable[RawInputEnvelope]) -> List[CanonicalEvent]:
88
+ return [canonicalize_raw_input_envelope(raw_event) for raw_event in events]
89
+
90
+
91
+ def ingest_syslog_lines(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[CanonicalEvent]:
92
+ return canonicalize_raw_input_envelopes(
93
+ parse_syslog_envelopes(lines, tenant_id=tenant_id, source_path=source_path),
94
+ )
95
+
96
+
97
+ def ingest_syslog_file(path: str, *, tenant_id: str) -> List[Event]:
98
+ file_path = Path(path)
99
+ lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
100
+ return ingest_syslog_lines(lines, tenant_id=tenant_id, source_path=str(file_path))
101
+
102
+
103
+ def signatures_for_events(events: Iterable[Event]) -> List[Signature]:
104
+ return [fingerprint_event(event) for event in events]
105
+
106
+
107
+ def events_as_dicts(events: Iterable[Event]) -> List[dict]:
108
+ return [asdict(event) for event in events]
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ import time
6
+ from contextlib import contextmanager
7
+ from typing import Any, Dict, Iterator
8
+
9
+
10
+ def emit(event: str, **fields: Any) -> None:
11
+ payload: Dict[str, Any] = {
12
+ "event": event,
13
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
14
+ **fields,
15
+ }
16
+ print(json.dumps(payload, ensure_ascii=False), file=sys.stderr)
17
+
18
+
19
+ @contextmanager
20
+ def span(event: str, **fields: Any) -> Iterator[None]:
21
+ started = time.perf_counter()
22
+ emit(f"{event}_start", **fields)
23
+ try:
24
+ yield
25
+ except Exception as exc:
26
+ emit(
27
+ f"{event}_failed",
28
+ error_type=type(exc).__name__,
29
+ error=str(exc),
30
+ elapsed_ms=round((time.perf_counter() - started) * 1000, 3),
31
+ **fields,
32
+ )
33
+ raise
34
+ emit(
35
+ f"{event}_complete",
36
+ elapsed_ms=round((time.perf_counter() - started) * 1000, 3),
37
+ **fields,
38
+ )
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable, List, Dict, Any
4
+
5
+ from .models import Candidate
6
+
7
+
8
+ def _attention_band(decision_band: str) -> str:
9
+ mapping = {
10
+ "ignore": "ignore_fast",
11
+ "watch": "background",
12
+ "review": "watch",
13
+ "urgent_human_review": "investigate",
14
+ "promote_to_incident_memory": "promote",
15
+ }
16
+ return mapping.get(decision_band, "watch")
17
+
18
+
19
+ def _why_it_matters(candidate: Candidate) -> str:
20
+ count = int((candidate.metadata or {}).get("count") or 0)
21
+ service = str((candidate.metadata or {}).get("service") or "").strip()
22
+ family = candidate.candidate_type.replace("_", " ")
23
+ pieces = []
24
+ if count:
25
+ pieces.append(f"observed {count} times")
26
+ if service:
27
+ pieces.append(f"around {service}")
28
+ band = _attention_band(candidate.decision_band)
29
+ if band == "promote":
30
+ level = "has earned high operator attention"
31
+ elif band == "investigate":
32
+ level = "has earned active operator attention"
33
+ elif band == "watch":
34
+ level = "has earned watch-level attention"
35
+ elif band == "background":
36
+ level = "is worth keeping in the background"
37
+ else:
38
+ level = "is low-attention noise"
39
+ detail = ", ".join(pieces) if pieces else family
40
+ return f"{detail}; {level}."
41
+
42
+
43
+ def interesting_items(candidates: Iterable[Candidate], *, limit: int = 5) -> List[Dict[str, Any]]:
44
+ ordered = sorted(candidates, key=lambda c: (c.score_total, c.confidence), reverse=True)
45
+ items: List[Dict[str, Any]] = []
46
+ for candidate in ordered[: max(limit, 1)]:
47
+ attention_band = _attention_band(candidate.decision_band)
48
+ items.append(
49
+ {
50
+ "title": candidate.title,
51
+ "summary": candidate.summary,
52
+ "decision_band": candidate.decision_band,
53
+ "attention_band": attention_band,
54
+ "attention_score": candidate.score_total,
55
+ "score_total": candidate.score_total,
56
+ "confidence": candidate.confidence,
57
+ "why_it_matters": _why_it_matters(candidate),
58
+ "signals": dict(candidate.score_breakdown),
59
+ "metadata": dict(candidate.metadata),
60
+ }
61
+ )
62
+ return items
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, List
5
+
6
+
7
+ @dataclass
8
+ class RawInputEnvelope:
9
+ tenant_id: str
10
+ source_type: str
11
+ timestamp: str
12
+ message_raw: str
13
+ source_id: str = ""
14
+ source_name: str = ""
15
+ host: str = ""
16
+ service: str = ""
17
+ severity: str = "info"
18
+ asset_id: str = ""
19
+ source_path: str = ""
20
+ facility: str = ""
21
+ structured_fields: Dict[str, Any] = field(default_factory=dict)
22
+ correlation_keys: Dict[str, Any] = field(default_factory=dict)
23
+ metadata: Dict[str, Any] = field(default_factory=dict)
24
+
25
+
26
+ @dataclass
27
+ class CanonicalEvent:
28
+ tenant_id: str
29
+ source_type: str
30
+ timestamp: str
31
+ message_raw: str
32
+ host: str = ""
33
+ service: str = ""
34
+ severity: str = "info"
35
+ asset_id: str = ""
36
+ source_path: str = ""
37
+ facility: str = ""
38
+ structured_fields: Dict[str, Any] = field(default_factory=dict)
39
+ correlation_keys: Dict[str, Any] = field(default_factory=dict)
40
+ message_normalized: str = ""
41
+ signature_input: str = ""
42
+ ingest_metadata: Dict[str, Any] = field(default_factory=dict)
43
+
44
+
45
+ # Backward-compatible name used throughout existing code.
46
+ Event = CanonicalEvent
47
+
48
+
49
+ @dataclass
50
+ class Signature:
51
+ signature_key: str
52
+ event_family: str
53
+ normalized_pattern: str
54
+ service: str = ""
55
+ metadata: Dict[str, Any] = field(default_factory=dict)
56
+
57
+
58
+ @dataclass
59
+ class Candidate:
60
+ candidate_type: str
61
+ title: str
62
+ summary: str
63
+ score_total: float
64
+ score_breakdown: Dict[str, float]
65
+ decision_band: str
66
+ source_signature_ids: List[str] = field(default_factory=list)
67
+ source_event_ids: List[str] = field(default_factory=list)
68
+ confidence: float = 0.0
69
+ metadata: Dict[str, Any] = field(default_factory=dict)
70
+
71
+
72
+ @dataclass
73
+ class IncidentMemory:
74
+ title: str
75
+ summary: str
76
+ incident_type: str
77
+ source_candidate_ids: List[str] = field(default_factory=list)
78
+ recurrence_count: int = 0
79
+ confidence: float = 0.0
80
+ metadata: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from dataclasses import asdict
5
+ from typing import Iterable, List
6
+
7
+ from .models import Candidate, Event, Signature
8
+ from .scoring import score_candidate
9
+
10
+
11
+ FAMILY_TITLES = {
12
+ "failure": "Recurring failure pattern",
13
+ "auth": "Recurring authentication failure pattern",
14
+ "service_lifecycle": "Recurring service lifecycle instability",
15
+ "generic": "Recurring operational pattern",
16
+ }
17
+
18
+
19
+ def _candidate_title(signature: Signature) -> str:
20
+ normalized = signature.normalized_pattern
21
+ service = (signature.service or "").strip()
22
+ if "vpn" in normalized or service == "charon":
23
+ return "Recurring VPN tunnel instability"
24
+ if "failed password" in normalized or "auth" in normalized or service == "sshd":
25
+ return "Recurring SSH authentication failures"
26
+ if signature.event_family == "service_lifecycle" and service:
27
+ return f"Recurring {service} service instability"
28
+ return FAMILY_TITLES.get(signature.event_family, "Recurring operational pattern")
29
+
30
+
31
+ def _candidate_summary(signature: Signature, count: int) -> str:
32
+ normalized = signature.normalized_pattern
33
+ service = (signature.service or "").strip()
34
+ if "vpn" in normalized or service == "charon":
35
+ return f"VPN tunnel instability was observed {count} times and may deserve more operator attention if it continues."
36
+ if "failed password" in normalized or "auth" in normalized or service == "sshd":
37
+ return f"SSH authentication failures were observed {count} times and are worth background attention if the pattern continues."
38
+ if service:
39
+ return f"A recurring {service} pattern was observed {count} times in the current event stream."
40
+ return f"A recurring operational pattern was observed {count} times in the current event stream."
41
+
42
+
43
+ def signature_counts(signatures: Iterable[Signature]) -> Counter:
44
+ return Counter(sig.signature_key for sig in signatures)
45
+
46
+
47
+ def build_recurrence_candidates(events: List[Event], signatures: List[Signature], *, threshold: int = 2) -> List[Candidate]:
48
+ counts = signature_counts(signatures)
49
+ candidates: List[Candidate] = []
50
+ for signature in signatures:
51
+ count = counts[signature.signature_key]
52
+ if count < threshold:
53
+ continue
54
+ recurrence = min(count / 10.0, 1.0)
55
+ recovery = 0.4
56
+ spread = 0.2
57
+ novelty = 0.3
58
+ impact = 0.5 if signature.event_family in {"failure", "auth"} else 0.2
59
+ precursor = 0.3
60
+ memory_weight = 0.4
61
+ candidate = score_candidate(
62
+ recurrence=recurrence,
63
+ recovery=recovery,
64
+ spread=spread,
65
+ novelty=novelty,
66
+ impact=impact,
67
+ precursor=precursor,
68
+ memory_weight=memory_weight,
69
+ )
70
+ candidate.title = _candidate_title(signature)
71
+ candidate.summary = _candidate_summary(signature, count)
72
+ candidate.source_signature_ids = [signature.signature_key]
73
+ candidate.source_event_ids = [str(i) for i, sig in enumerate(signatures) if sig.signature_key == signature.signature_key]
74
+ candidate.metadata = {"count": count, "service": signature.service}
75
+ candidates.append(candidate)
76
+ # dedupe by signature key/title
77
+ seen = set()
78
+ unique: List[Candidate] = []
79
+ for candidate in candidates:
80
+ key = tuple(candidate.source_signature_ids)
81
+ if key in seen:
82
+ continue
83
+ seen.add(key)
84
+ unique.append(candidate)
85
+ return unique
86
+
87
+
88
+ def _attention_band(decision_band: str) -> str:
89
+ mapping = {
90
+ "ignore": "ignore_fast",
91
+ "watch": "background",
92
+ "review": "watch",
93
+ "urgent_human_review": "investigate",
94
+ "promote_to_incident_memory": "promote",
95
+ }
96
+ return mapping.get(decision_band, "watch")
97
+
98
+
99
+ def digest_items(candidates: Iterable[Candidate]) -> List[dict]:
100
+ return [
101
+ {
102
+ "title": c.title,
103
+ "summary": c.summary,
104
+ "decision_band": c.decision_band,
105
+ "attention_band": _attention_band(c.decision_band),
106
+ "attention_score": c.score_total,
107
+ "score_total": c.score_total,
108
+ "score_breakdown": c.score_breakdown,
109
+ "metadata": c.metadata,
110
+ }
111
+ for c in candidates
112
+ ]
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ from .models import Candidate
4
+
5
+
6
+ def decision_band(score_total: float) -> str:
7
+ if score_total >= 0.85:
8
+ return "promote_to_incident_memory"
9
+ if score_total >= 0.65:
10
+ return "urgent_human_review"
11
+ if score_total >= 0.45:
12
+ return "review"
13
+ if score_total >= 0.25:
14
+ return "watch"
15
+ return "ignore"
16
+
17
+
18
+ def score_candidate(*, recurrence: float, recovery: float, spread: float, novelty: float, impact: float, precursor: float, memory_weight: float) -> Candidate:
19
+ score_breakdown = {
20
+ "recurrence": recurrence,
21
+ "recovery": recovery,
22
+ "spread": spread,
23
+ "novelty": novelty,
24
+ "impact": impact,
25
+ "precursor": precursor,
26
+ "memory_weight": memory_weight,
27
+ }
28
+ total = sum(score_breakdown.values()) / len(score_breakdown)
29
+ band = decision_band(total)
30
+ return Candidate(
31
+ candidate_type="recurrence",
32
+ title="Derived operational candidate",
33
+ summary="A weak-signal candidate derived from recurring events.",
34
+ score_total=round(total, 3),
35
+ score_breakdown=score_breakdown,
36
+ decision_band=band,
37
+ confidence=round(total, 3),
38
+ )