@simbimbo/brainstem 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/README.md +99 -3
  3. package/brainstem/__init__.py +3 -0
  4. package/brainstem/api.py +131 -0
  5. package/brainstem/connectors/__init__.py +1 -0
  6. package/brainstem/connectors/logicmonitor.py +26 -0
  7. package/brainstem/connectors/types.py +16 -0
  8. package/brainstem/demo.py +64 -0
  9. package/brainstem/fingerprint.py +44 -0
  10. package/brainstem/ingest.py +101 -0
  11. package/brainstem/instrumentation.py +38 -0
  12. package/brainstem/interesting.py +62 -0
  13. package/brainstem/models.py +78 -0
  14. package/brainstem/recurrence.py +112 -0
  15. package/brainstem/scoring.py +38 -0
  16. package/brainstem/storage.py +182 -0
  17. package/docs/adapters.md +435 -0
  18. package/docs/api.md +380 -0
  19. package/docs/architecture.md +333 -0
  20. package/docs/connectors.md +66 -0
  21. package/docs/data-model.md +290 -0
  22. package/docs/design-governance.md +595 -0
  23. package/docs/mvp-flow.md +109 -0
  24. package/docs/roadmap.md +87 -0
  25. package/docs/scoring.md +424 -0
  26. package/docs/v0.0.1.md +277 -0
  27. package/docs/vision.md +85 -0
  28. package/package.json +6 -14
  29. package/pyproject.toml +18 -0
  30. package/tests/fixtures/sample_syslog.log +6 -0
  31. package/tests/test_api.py +72 -0
  32. package/tests/test_canonicalization.py +28 -0
  33. package/tests/test_demo.py +25 -0
  34. package/tests/test_fingerprint.py +22 -0
  35. package/tests/test_ingest.py +15 -0
  36. package/tests/test_instrumentation.py +16 -0
  37. package/tests/test_interesting.py +36 -0
  38. package/tests/test_logicmonitor.py +22 -0
  39. package/tests/test_recurrence.py +16 -0
  40. package/tests/test_scoring.py +21 -0
  41. package/tests/test_storage.py +26 -0
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict, List
5
+
6
+
7
+ @dataclass
8
+ class RawInputEnvelope:
9
+ tenant_id: str
10
+ source_type: str
11
+ timestamp: str
12
+ message_raw: str
13
+ host: str = ""
14
+ service: str = ""
15
+ severity: str = "info"
16
+ asset_id: str = ""
17
+ source_path: str = ""
18
+ facility: str = ""
19
+ structured_fields: Dict[str, Any] = field(default_factory=dict)
20
+ correlation_keys: Dict[str, Any] = field(default_factory=dict)
21
+ metadata: Dict[str, Any] = field(default_factory=dict)
22
+
23
+
24
+ @dataclass
25
+ class CanonicalEvent:
26
+ tenant_id: str
27
+ source_type: str
28
+ timestamp: str
29
+ message_raw: str
30
+ host: str = ""
31
+ service: str = ""
32
+ severity: str = "info"
33
+ asset_id: str = ""
34
+ source_path: str = ""
35
+ facility: str = ""
36
+ structured_fields: Dict[str, Any] = field(default_factory=dict)
37
+ correlation_keys: Dict[str, Any] = field(default_factory=dict)
38
+ message_normalized: str = ""
39
+ signature_input: str = ""
40
+ ingest_metadata: Dict[str, Any] = field(default_factory=dict)
41
+
42
+
43
+ # Backward-compatible name used throughout existing code.
44
+ Event = CanonicalEvent
45
+
46
+
47
+ @dataclass
48
+ class Signature:
49
+ signature_key: str
50
+ event_family: str
51
+ normalized_pattern: str
52
+ service: str = ""
53
+ metadata: Dict[str, Any] = field(default_factory=dict)
54
+
55
+
56
+ @dataclass
57
+ class Candidate:
58
+ candidate_type: str
59
+ title: str
60
+ summary: str
61
+ score_total: float
62
+ score_breakdown: Dict[str, float]
63
+ decision_band: str
64
+ source_signature_ids: List[str] = field(default_factory=list)
65
+ source_event_ids: List[str] = field(default_factory=list)
66
+ confidence: float = 0.0
67
+ metadata: Dict[str, Any] = field(default_factory=dict)
68
+
69
+
70
+ @dataclass
71
+ class IncidentMemory:
72
+ title: str
73
+ summary: str
74
+ incident_type: str
75
+ source_candidate_ids: List[str] = field(default_factory=list)
76
+ recurrence_count: int = 0
77
+ confidence: float = 0.0
78
+ metadata: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from dataclasses import asdict
5
+ from typing import Iterable, List
6
+
7
+ from .models import Candidate, Event, Signature
8
+ from .scoring import score_candidate
9
+
10
+
11
+ FAMILY_TITLES = {
12
+ "failure": "Recurring failure pattern",
13
+ "auth": "Recurring authentication failure pattern",
14
+ "service_lifecycle": "Recurring service lifecycle instability",
15
+ "generic": "Recurring operational pattern",
16
+ }
17
+
18
+
19
+ def _candidate_title(signature: Signature) -> str:
20
+ normalized = signature.normalized_pattern
21
+ service = (signature.service or "").strip()
22
+ if "vpn" in normalized or service == "charon":
23
+ return "Recurring VPN tunnel instability"
24
+ if "failed password" in normalized or "auth" in normalized or service == "sshd":
25
+ return "Recurring SSH authentication failures"
26
+ if signature.event_family == "service_lifecycle" and service:
27
+ return f"Recurring {service} service instability"
28
+ return FAMILY_TITLES.get(signature.event_family, "Recurring operational pattern")
29
+
30
+
31
+ def _candidate_summary(signature: Signature, count: int) -> str:
32
+ normalized = signature.normalized_pattern
33
+ service = (signature.service or "").strip()
34
+ if "vpn" in normalized or service == "charon":
35
+ return f"VPN tunnel instability was observed {count} times and may deserve more operator attention if it continues."
36
+ if "failed password" in normalized or "auth" in normalized or service == "sshd":
37
+ return f"SSH authentication failures were observed {count} times and are worth background attention if the pattern continues."
38
+ if service:
39
+ return f"A recurring {service} pattern was observed {count} times in the current event stream."
40
+ return f"A recurring operational pattern was observed {count} times in the current event stream."
41
+
42
+
43
+ def signature_counts(signatures: Iterable[Signature]) -> Counter:
44
+ return Counter(sig.signature_key for sig in signatures)
45
+
46
+
47
+ def build_recurrence_candidates(events: List[Event], signatures: List[Signature], *, threshold: int = 2) -> List[Candidate]:
48
+ counts = signature_counts(signatures)
49
+ candidates: List[Candidate] = []
50
+ for signature in signatures:
51
+ count = counts[signature.signature_key]
52
+ if count < threshold:
53
+ continue
54
+ recurrence = min(count / 10.0, 1.0)
55
+ recovery = 0.4
56
+ spread = 0.2
57
+ novelty = 0.3
58
+ impact = 0.5 if signature.event_family in {"failure", "auth"} else 0.2
59
+ precursor = 0.3
60
+ memory_weight = 0.4
61
+ candidate = score_candidate(
62
+ recurrence=recurrence,
63
+ recovery=recovery,
64
+ spread=spread,
65
+ novelty=novelty,
66
+ impact=impact,
67
+ precursor=precursor,
68
+ memory_weight=memory_weight,
69
+ )
70
+ candidate.title = _candidate_title(signature)
71
+ candidate.summary = _candidate_summary(signature, count)
72
+ candidate.source_signature_ids = [signature.signature_key]
73
+ candidate.source_event_ids = [str(i) for i, sig in enumerate(signatures) if sig.signature_key == signature.signature_key]
74
+ candidate.metadata = {"count": count, "service": signature.service}
75
+ candidates.append(candidate)
76
+ # dedupe by signature key/title
77
+ seen = set()
78
+ unique: List[Candidate] = []
79
+ for candidate in candidates:
80
+ key = tuple(candidate.source_signature_ids)
81
+ if key in seen:
82
+ continue
83
+ seen.add(key)
84
+ unique.append(candidate)
85
+ return unique
86
+
87
+
88
+ def _attention_band(decision_band: str) -> str:
89
+ mapping = {
90
+ "ignore": "ignore_fast",
91
+ "watch": "background",
92
+ "review": "watch",
93
+ "urgent_human_review": "investigate",
94
+ "promote_to_incident_memory": "promote",
95
+ }
96
+ return mapping.get(decision_band, "watch")
97
+
98
+
99
+ def digest_items(candidates: Iterable[Candidate]) -> List[dict]:
100
+ return [
101
+ {
102
+ "title": c.title,
103
+ "summary": c.summary,
104
+ "decision_band": c.decision_band,
105
+ "attention_band": _attention_band(c.decision_band),
106
+ "attention_score": c.score_total,
107
+ "score_total": c.score_total,
108
+ "score_breakdown": c.score_breakdown,
109
+ "metadata": c.metadata,
110
+ }
111
+ for c in candidates
112
+ ]
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ from .models import Candidate
4
+
5
+
6
+ def decision_band(score_total: float) -> str:
7
+ if score_total >= 0.85:
8
+ return "promote_to_incident_memory"
9
+ if score_total >= 0.65:
10
+ return "urgent_human_review"
11
+ if score_total >= 0.45:
12
+ return "review"
13
+ if score_total >= 0.25:
14
+ return "watch"
15
+ return "ignore"
16
+
17
+
18
+ def score_candidate(*, recurrence: float, recovery: float, spread: float, novelty: float, impact: float, precursor: float, memory_weight: float) -> Candidate:
19
+ score_breakdown = {
20
+ "recurrence": recurrence,
21
+ "recovery": recovery,
22
+ "spread": spread,
23
+ "novelty": novelty,
24
+ "impact": impact,
25
+ "precursor": precursor,
26
+ "memory_weight": memory_weight,
27
+ }
28
+ total = sum(score_breakdown.values()) / len(score_breakdown)
29
+ band = decision_band(total)
30
+ return Candidate(
31
+ candidate_type="recurrence",
32
+ title="Derived operational candidate",
33
+ summary="A weak-signal candidate derived from recurring events.",
34
+ score_total=round(total, 3),
35
+ score_breakdown=score_breakdown,
36
+ decision_band=band,
37
+ confidence=round(total, 3),
38
+ )
@@ -0,0 +1,182 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sqlite3
5
+ from dataclasses import asdict
6
+ from pathlib import Path
7
+ from typing import Iterable, List
8
+
9
+ from .models import Candidate, Event, Signature
10
+
11
+
12
+ def default_db_path() -> Path:
13
+ return Path('.brainstem-state') / 'brainstem.sqlite3'
14
+
15
+
16
+ def connect(db_path: str | None = None) -> sqlite3.Connection:
17
+ path = Path(db_path) if db_path else default_db_path()
18
+ path.parent.mkdir(parents=True, exist_ok=True)
19
+ conn = sqlite3.connect(path)
20
+ conn.row_factory = sqlite3.Row
21
+ return conn
22
+
23
+
24
+ def init_db(db_path: str | None = None) -> None:
25
+ conn = connect(db_path)
26
+ try:
27
+ conn.executescript(
28
+ '''
29
+ CREATE TABLE IF NOT EXISTS events (
30
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
31
+ tenant_id TEXT NOT NULL,
32
+ source_type TEXT NOT NULL,
33
+ timestamp TEXT NOT NULL,
34
+ host TEXT,
35
+ service TEXT,
36
+ severity TEXT,
37
+ asset_id TEXT,
38
+ source_path TEXT,
39
+ facility TEXT,
40
+ message_raw TEXT NOT NULL,
41
+ structured_fields_json TEXT NOT NULL,
42
+ correlation_keys_json TEXT NOT NULL
43
+ );
44
+
45
+ CREATE TABLE IF NOT EXISTS signatures (
46
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
47
+ signature_key TEXT NOT NULL UNIQUE,
48
+ event_family TEXT NOT NULL,
49
+ normalized_pattern TEXT NOT NULL,
50
+ service TEXT,
51
+ metadata_json TEXT NOT NULL,
52
+ occurrence_count INTEGER NOT NULL DEFAULT 0
53
+ );
54
+
55
+ CREATE TABLE IF NOT EXISTS candidates (
56
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
57
+ candidate_type TEXT NOT NULL,
58
+ title TEXT NOT NULL,
59
+ summary TEXT NOT NULL,
60
+ score_total REAL NOT NULL,
61
+ score_breakdown_json TEXT NOT NULL,
62
+ decision_band TEXT NOT NULL,
63
+ source_signature_ids_json TEXT NOT NULL,
64
+ source_event_ids_json TEXT NOT NULL,
65
+ confidence REAL NOT NULL,
66
+ metadata_json TEXT NOT NULL
67
+ );
68
+ '''
69
+ )
70
+ conn.commit()
71
+ finally:
72
+ conn.close()
73
+
74
+
75
+ def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
76
+ conn = connect(db_path)
77
+ count = 0
78
+ try:
79
+ for event in events:
80
+ conn.execute(
81
+ '''
82
+ INSERT INTO events (
83
+ tenant_id, source_type, timestamp, host, service, severity,
84
+ asset_id, source_path, facility, message_raw,
85
+ structured_fields_json, correlation_keys_json
86
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
87
+ ''',
88
+ (
89
+ event.tenant_id,
90
+ event.source_type,
91
+ event.timestamp,
92
+ event.host,
93
+ event.service,
94
+ event.severity,
95
+ event.asset_id,
96
+ event.source_path,
97
+ event.facility,
98
+ event.message_raw,
99
+ json.dumps(event.structured_fields, ensure_ascii=False),
100
+ json.dumps(event.correlation_keys, ensure_ascii=False),
101
+ ),
102
+ )
103
+ count += 1
104
+ conn.commit()
105
+ return count
106
+ finally:
107
+ conn.close()
108
+
109
+
110
+ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None) -> int:
111
+ conn = connect(db_path)
112
+ count = 0
113
+ try:
114
+ for signature in signatures:
115
+ conn.execute(
116
+ '''
117
+ INSERT INTO signatures (
118
+ signature_key, event_family, normalized_pattern, service, metadata_json, occurrence_count
119
+ ) VALUES (?, ?, ?, ?, ?, 1)
120
+ ON CONFLICT(signature_key) DO UPDATE SET
121
+ occurrence_count = occurrence_count + 1,
122
+ metadata_json = excluded.metadata_json,
123
+ service = excluded.service
124
+ ''',
125
+ (
126
+ signature.signature_key,
127
+ signature.event_family,
128
+ signature.normalized_pattern,
129
+ signature.service,
130
+ json.dumps(signature.metadata, ensure_ascii=False),
131
+ ),
132
+ )
133
+ count += 1
134
+ conn.commit()
135
+ return count
136
+ finally:
137
+ conn.close()
138
+
139
+
140
+ def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None) -> int:
141
+ conn = connect(db_path)
142
+ count = 0
143
+ try:
144
+ for candidate in candidates:
145
+ conn.execute(
146
+ '''
147
+ INSERT INTO candidates (
148
+ candidate_type, title, summary, score_total, score_breakdown_json,
149
+ decision_band, source_signature_ids_json, source_event_ids_json,
150
+ confidence, metadata_json
151
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
152
+ ''',
153
+ (
154
+ candidate.candidate_type,
155
+ candidate.title,
156
+ candidate.summary,
157
+ candidate.score_total,
158
+ json.dumps(candidate.score_breakdown, ensure_ascii=False),
159
+ candidate.decision_band,
160
+ json.dumps(candidate.source_signature_ids, ensure_ascii=False),
161
+ json.dumps(candidate.source_event_ids, ensure_ascii=False),
162
+ candidate.confidence,
163
+ json.dumps(candidate.metadata, ensure_ascii=False),
164
+ ),
165
+ )
166
+ count += 1
167
+ conn.commit()
168
+ return count
169
+ finally:
170
+ conn.close()
171
+
172
+
173
+ def list_candidates(db_path: str | None = None, limit: int = 20) -> List[sqlite3.Row]:
174
+ conn = connect(db_path)
175
+ try:
176
+ rows = conn.execute(
177
+ 'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
178
+ (max(1, limit),),
179
+ ).fetchall()
180
+ return rows
181
+ finally:
182
+ conn.close()