@simbimbo/brainstem 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/README.md +99 -3
- package/brainstem/__init__.py +3 -0
- package/brainstem/api.py +131 -0
- package/brainstem/connectors/__init__.py +1 -0
- package/brainstem/connectors/logicmonitor.py +26 -0
- package/brainstem/connectors/types.py +16 -0
- package/brainstem/demo.py +64 -0
- package/brainstem/fingerprint.py +44 -0
- package/brainstem/ingest.py +101 -0
- package/brainstem/instrumentation.py +38 -0
- package/brainstem/interesting.py +62 -0
- package/brainstem/models.py +78 -0
- package/brainstem/recurrence.py +112 -0
- package/brainstem/scoring.py +38 -0
- package/brainstem/storage.py +182 -0
- package/docs/adapters.md +435 -0
- package/docs/api.md +380 -0
- package/docs/architecture.md +333 -0
- package/docs/connectors.md +66 -0
- package/docs/data-model.md +290 -0
- package/docs/design-governance.md +595 -0
- package/docs/mvp-flow.md +109 -0
- package/docs/roadmap.md +87 -0
- package/docs/scoring.md +424 -0
- package/docs/v0.0.1.md +277 -0
- package/docs/vision.md +85 -0
- package/package.json +6 -14
- package/pyproject.toml +18 -0
- package/tests/fixtures/sample_syslog.log +6 -0
- package/tests/test_api.py +72 -0
- package/tests/test_canonicalization.py +28 -0
- package/tests/test_demo.py +25 -0
- package/tests/test_fingerprint.py +22 -0
- package/tests/test_ingest.py +15 -0
- package/tests/test_instrumentation.py +16 -0
- package/tests/test_interesting.py +36 -0
- package/tests/test_logicmonitor.py +22 -0
- package/tests/test_recurrence.py +16 -0
- package/tests/test_scoring.py +21 -0
- package/tests/test_storage.py +26 -0
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class RawInputEnvelope:
|
|
9
|
+
tenant_id: str
|
|
10
|
+
source_type: str
|
|
11
|
+
timestamp: str
|
|
12
|
+
message_raw: str
|
|
13
|
+
host: str = ""
|
|
14
|
+
service: str = ""
|
|
15
|
+
severity: str = "info"
|
|
16
|
+
asset_id: str = ""
|
|
17
|
+
source_path: str = ""
|
|
18
|
+
facility: str = ""
|
|
19
|
+
structured_fields: Dict[str, Any] = field(default_factory=dict)
|
|
20
|
+
correlation_keys: Dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class CanonicalEvent:
|
|
26
|
+
tenant_id: str
|
|
27
|
+
source_type: str
|
|
28
|
+
timestamp: str
|
|
29
|
+
message_raw: str
|
|
30
|
+
host: str = ""
|
|
31
|
+
service: str = ""
|
|
32
|
+
severity: str = "info"
|
|
33
|
+
asset_id: str = ""
|
|
34
|
+
source_path: str = ""
|
|
35
|
+
facility: str = ""
|
|
36
|
+
structured_fields: Dict[str, Any] = field(default_factory=dict)
|
|
37
|
+
correlation_keys: Dict[str, Any] = field(default_factory=dict)
|
|
38
|
+
message_normalized: str = ""
|
|
39
|
+
signature_input: str = ""
|
|
40
|
+
ingest_metadata: Dict[str, Any] = field(default_factory=dict)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# Backward-compatible name used throughout existing code.
|
|
44
|
+
Event = CanonicalEvent
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class Signature:
|
|
49
|
+
signature_key: str
|
|
50
|
+
event_family: str
|
|
51
|
+
normalized_pattern: str
|
|
52
|
+
service: str = ""
|
|
53
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class Candidate:
|
|
58
|
+
candidate_type: str
|
|
59
|
+
title: str
|
|
60
|
+
summary: str
|
|
61
|
+
score_total: float
|
|
62
|
+
score_breakdown: Dict[str, float]
|
|
63
|
+
decision_band: str
|
|
64
|
+
source_signature_ids: List[str] = field(default_factory=list)
|
|
65
|
+
source_event_ids: List[str] = field(default_factory=list)
|
|
66
|
+
confidence: float = 0.0
|
|
67
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass
|
|
71
|
+
class IncidentMemory:
|
|
72
|
+
title: str
|
|
73
|
+
summary: str
|
|
74
|
+
incident_type: str
|
|
75
|
+
source_candidate_ids: List[str] = field(default_factory=list)
|
|
76
|
+
recurrence_count: int = 0
|
|
77
|
+
confidence: float = 0.0
|
|
78
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from dataclasses import asdict
|
|
5
|
+
from typing import Iterable, List
|
|
6
|
+
|
|
7
|
+
from .models import Candidate, Event, Signature
|
|
8
|
+
from .scoring import score_candidate
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
FAMILY_TITLES = {
|
|
12
|
+
"failure": "Recurring failure pattern",
|
|
13
|
+
"auth": "Recurring authentication failure pattern",
|
|
14
|
+
"service_lifecycle": "Recurring service lifecycle instability",
|
|
15
|
+
"generic": "Recurring operational pattern",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _candidate_title(signature: Signature) -> str:
|
|
20
|
+
normalized = signature.normalized_pattern
|
|
21
|
+
service = (signature.service or "").strip()
|
|
22
|
+
if "vpn" in normalized or service == "charon":
|
|
23
|
+
return "Recurring VPN tunnel instability"
|
|
24
|
+
if "failed password" in normalized or "auth" in normalized or service == "sshd":
|
|
25
|
+
return "Recurring SSH authentication failures"
|
|
26
|
+
if signature.event_family == "service_lifecycle" and service:
|
|
27
|
+
return f"Recurring {service} service instability"
|
|
28
|
+
return FAMILY_TITLES.get(signature.event_family, "Recurring operational pattern")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _candidate_summary(signature: Signature, count: int) -> str:
|
|
32
|
+
normalized = signature.normalized_pattern
|
|
33
|
+
service = (signature.service or "").strip()
|
|
34
|
+
if "vpn" in normalized or service == "charon":
|
|
35
|
+
return f"VPN tunnel instability was observed {count} times and may deserve more operator attention if it continues."
|
|
36
|
+
if "failed password" in normalized or "auth" in normalized or service == "sshd":
|
|
37
|
+
return f"SSH authentication failures were observed {count} times and are worth background attention if the pattern continues."
|
|
38
|
+
if service:
|
|
39
|
+
return f"A recurring {service} pattern was observed {count} times in the current event stream."
|
|
40
|
+
return f"A recurring operational pattern was observed {count} times in the current event stream."
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def signature_counts(signatures: Iterable[Signature]) -> Counter:
|
|
44
|
+
return Counter(sig.signature_key for sig in signatures)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def build_recurrence_candidates(events: List[Event], signatures: List[Signature], *, threshold: int = 2) -> List[Candidate]:
|
|
48
|
+
counts = signature_counts(signatures)
|
|
49
|
+
candidates: List[Candidate] = []
|
|
50
|
+
for signature in signatures:
|
|
51
|
+
count = counts[signature.signature_key]
|
|
52
|
+
if count < threshold:
|
|
53
|
+
continue
|
|
54
|
+
recurrence = min(count / 10.0, 1.0)
|
|
55
|
+
recovery = 0.4
|
|
56
|
+
spread = 0.2
|
|
57
|
+
novelty = 0.3
|
|
58
|
+
impact = 0.5 if signature.event_family in {"failure", "auth"} else 0.2
|
|
59
|
+
precursor = 0.3
|
|
60
|
+
memory_weight = 0.4
|
|
61
|
+
candidate = score_candidate(
|
|
62
|
+
recurrence=recurrence,
|
|
63
|
+
recovery=recovery,
|
|
64
|
+
spread=spread,
|
|
65
|
+
novelty=novelty,
|
|
66
|
+
impact=impact,
|
|
67
|
+
precursor=precursor,
|
|
68
|
+
memory_weight=memory_weight,
|
|
69
|
+
)
|
|
70
|
+
candidate.title = _candidate_title(signature)
|
|
71
|
+
candidate.summary = _candidate_summary(signature, count)
|
|
72
|
+
candidate.source_signature_ids = [signature.signature_key]
|
|
73
|
+
candidate.source_event_ids = [str(i) for i, sig in enumerate(signatures) if sig.signature_key == signature.signature_key]
|
|
74
|
+
candidate.metadata = {"count": count, "service": signature.service}
|
|
75
|
+
candidates.append(candidate)
|
|
76
|
+
# dedupe by signature key/title
|
|
77
|
+
seen = set()
|
|
78
|
+
unique: List[Candidate] = []
|
|
79
|
+
for candidate in candidates:
|
|
80
|
+
key = tuple(candidate.source_signature_ids)
|
|
81
|
+
if key in seen:
|
|
82
|
+
continue
|
|
83
|
+
seen.add(key)
|
|
84
|
+
unique.append(candidate)
|
|
85
|
+
return unique
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _attention_band(decision_band: str) -> str:
|
|
89
|
+
mapping = {
|
|
90
|
+
"ignore": "ignore_fast",
|
|
91
|
+
"watch": "background",
|
|
92
|
+
"review": "watch",
|
|
93
|
+
"urgent_human_review": "investigate",
|
|
94
|
+
"promote_to_incident_memory": "promote",
|
|
95
|
+
}
|
|
96
|
+
return mapping.get(decision_band, "watch")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def digest_items(candidates: Iterable[Candidate]) -> List[dict]:
|
|
100
|
+
return [
|
|
101
|
+
{
|
|
102
|
+
"title": c.title,
|
|
103
|
+
"summary": c.summary,
|
|
104
|
+
"decision_band": c.decision_band,
|
|
105
|
+
"attention_band": _attention_band(c.decision_band),
|
|
106
|
+
"attention_score": c.score_total,
|
|
107
|
+
"score_total": c.score_total,
|
|
108
|
+
"score_breakdown": c.score_breakdown,
|
|
109
|
+
"metadata": c.metadata,
|
|
110
|
+
}
|
|
111
|
+
for c in candidates
|
|
112
|
+
]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .models import Candidate
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def decision_band(score_total: float) -> str:
|
|
7
|
+
if score_total >= 0.85:
|
|
8
|
+
return "promote_to_incident_memory"
|
|
9
|
+
if score_total >= 0.65:
|
|
10
|
+
return "urgent_human_review"
|
|
11
|
+
if score_total >= 0.45:
|
|
12
|
+
return "review"
|
|
13
|
+
if score_total >= 0.25:
|
|
14
|
+
return "watch"
|
|
15
|
+
return "ignore"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def score_candidate(*, recurrence: float, recovery: float, spread: float, novelty: float, impact: float, precursor: float, memory_weight: float) -> Candidate:
|
|
19
|
+
score_breakdown = {
|
|
20
|
+
"recurrence": recurrence,
|
|
21
|
+
"recovery": recovery,
|
|
22
|
+
"spread": spread,
|
|
23
|
+
"novelty": novelty,
|
|
24
|
+
"impact": impact,
|
|
25
|
+
"precursor": precursor,
|
|
26
|
+
"memory_weight": memory_weight,
|
|
27
|
+
}
|
|
28
|
+
total = sum(score_breakdown.values()) / len(score_breakdown)
|
|
29
|
+
band = decision_band(total)
|
|
30
|
+
return Candidate(
|
|
31
|
+
candidate_type="recurrence",
|
|
32
|
+
title="Derived operational candidate",
|
|
33
|
+
summary="A weak-signal candidate derived from recurring events.",
|
|
34
|
+
score_total=round(total, 3),
|
|
35
|
+
score_breakdown=score_breakdown,
|
|
36
|
+
decision_band=band,
|
|
37
|
+
confidence=round(total, 3),
|
|
38
|
+
)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sqlite3
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Iterable, List
|
|
8
|
+
|
|
9
|
+
from .models import Candidate, Event, Signature
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def default_db_path() -> Path:
|
|
13
|
+
return Path('.brainstem-state') / 'brainstem.sqlite3'
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def connect(db_path: str | None = None) -> sqlite3.Connection:
|
|
17
|
+
path = Path(db_path) if db_path else default_db_path()
|
|
18
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
conn = sqlite3.connect(path)
|
|
20
|
+
conn.row_factory = sqlite3.Row
|
|
21
|
+
return conn
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def init_db(db_path: str | None = None) -> None:
|
|
25
|
+
conn = connect(db_path)
|
|
26
|
+
try:
|
|
27
|
+
conn.executescript(
|
|
28
|
+
'''
|
|
29
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
30
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
31
|
+
tenant_id TEXT NOT NULL,
|
|
32
|
+
source_type TEXT NOT NULL,
|
|
33
|
+
timestamp TEXT NOT NULL,
|
|
34
|
+
host TEXT,
|
|
35
|
+
service TEXT,
|
|
36
|
+
severity TEXT,
|
|
37
|
+
asset_id TEXT,
|
|
38
|
+
source_path TEXT,
|
|
39
|
+
facility TEXT,
|
|
40
|
+
message_raw TEXT NOT NULL,
|
|
41
|
+
structured_fields_json TEXT NOT NULL,
|
|
42
|
+
correlation_keys_json TEXT NOT NULL
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
CREATE TABLE IF NOT EXISTS signatures (
|
|
46
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
47
|
+
signature_key TEXT NOT NULL UNIQUE,
|
|
48
|
+
event_family TEXT NOT NULL,
|
|
49
|
+
normalized_pattern TEXT NOT NULL,
|
|
50
|
+
service TEXT,
|
|
51
|
+
metadata_json TEXT NOT NULL,
|
|
52
|
+
occurrence_count INTEGER NOT NULL DEFAULT 0
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
CREATE TABLE IF NOT EXISTS candidates (
|
|
56
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
57
|
+
candidate_type TEXT NOT NULL,
|
|
58
|
+
title TEXT NOT NULL,
|
|
59
|
+
summary TEXT NOT NULL,
|
|
60
|
+
score_total REAL NOT NULL,
|
|
61
|
+
score_breakdown_json TEXT NOT NULL,
|
|
62
|
+
decision_band TEXT NOT NULL,
|
|
63
|
+
source_signature_ids_json TEXT NOT NULL,
|
|
64
|
+
source_event_ids_json TEXT NOT NULL,
|
|
65
|
+
confidence REAL NOT NULL,
|
|
66
|
+
metadata_json TEXT NOT NULL
|
|
67
|
+
);
|
|
68
|
+
'''
|
|
69
|
+
)
|
|
70
|
+
conn.commit()
|
|
71
|
+
finally:
|
|
72
|
+
conn.close()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
|
|
76
|
+
conn = connect(db_path)
|
|
77
|
+
count = 0
|
|
78
|
+
try:
|
|
79
|
+
for event in events:
|
|
80
|
+
conn.execute(
|
|
81
|
+
'''
|
|
82
|
+
INSERT INTO events (
|
|
83
|
+
tenant_id, source_type, timestamp, host, service, severity,
|
|
84
|
+
asset_id, source_path, facility, message_raw,
|
|
85
|
+
structured_fields_json, correlation_keys_json
|
|
86
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
87
|
+
''',
|
|
88
|
+
(
|
|
89
|
+
event.tenant_id,
|
|
90
|
+
event.source_type,
|
|
91
|
+
event.timestamp,
|
|
92
|
+
event.host,
|
|
93
|
+
event.service,
|
|
94
|
+
event.severity,
|
|
95
|
+
event.asset_id,
|
|
96
|
+
event.source_path,
|
|
97
|
+
event.facility,
|
|
98
|
+
event.message_raw,
|
|
99
|
+
json.dumps(event.structured_fields, ensure_ascii=False),
|
|
100
|
+
json.dumps(event.correlation_keys, ensure_ascii=False),
|
|
101
|
+
),
|
|
102
|
+
)
|
|
103
|
+
count += 1
|
|
104
|
+
conn.commit()
|
|
105
|
+
return count
|
|
106
|
+
finally:
|
|
107
|
+
conn.close()
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def store_signatures(signatures: Iterable[Signature], db_path: str | None = None) -> int:
|
|
111
|
+
conn = connect(db_path)
|
|
112
|
+
count = 0
|
|
113
|
+
try:
|
|
114
|
+
for signature in signatures:
|
|
115
|
+
conn.execute(
|
|
116
|
+
'''
|
|
117
|
+
INSERT INTO signatures (
|
|
118
|
+
signature_key, event_family, normalized_pattern, service, metadata_json, occurrence_count
|
|
119
|
+
) VALUES (?, ?, ?, ?, ?, 1)
|
|
120
|
+
ON CONFLICT(signature_key) DO UPDATE SET
|
|
121
|
+
occurrence_count = occurrence_count + 1,
|
|
122
|
+
metadata_json = excluded.metadata_json,
|
|
123
|
+
service = excluded.service
|
|
124
|
+
''',
|
|
125
|
+
(
|
|
126
|
+
signature.signature_key,
|
|
127
|
+
signature.event_family,
|
|
128
|
+
signature.normalized_pattern,
|
|
129
|
+
signature.service,
|
|
130
|
+
json.dumps(signature.metadata, ensure_ascii=False),
|
|
131
|
+
),
|
|
132
|
+
)
|
|
133
|
+
count += 1
|
|
134
|
+
conn.commit()
|
|
135
|
+
return count
|
|
136
|
+
finally:
|
|
137
|
+
conn.close()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None) -> int:
|
|
141
|
+
conn = connect(db_path)
|
|
142
|
+
count = 0
|
|
143
|
+
try:
|
|
144
|
+
for candidate in candidates:
|
|
145
|
+
conn.execute(
|
|
146
|
+
'''
|
|
147
|
+
INSERT INTO candidates (
|
|
148
|
+
candidate_type, title, summary, score_total, score_breakdown_json,
|
|
149
|
+
decision_band, source_signature_ids_json, source_event_ids_json,
|
|
150
|
+
confidence, metadata_json
|
|
151
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
152
|
+
''',
|
|
153
|
+
(
|
|
154
|
+
candidate.candidate_type,
|
|
155
|
+
candidate.title,
|
|
156
|
+
candidate.summary,
|
|
157
|
+
candidate.score_total,
|
|
158
|
+
json.dumps(candidate.score_breakdown, ensure_ascii=False),
|
|
159
|
+
candidate.decision_band,
|
|
160
|
+
json.dumps(candidate.source_signature_ids, ensure_ascii=False),
|
|
161
|
+
json.dumps(candidate.source_event_ids, ensure_ascii=False),
|
|
162
|
+
candidate.confidence,
|
|
163
|
+
json.dumps(candidate.metadata, ensure_ascii=False),
|
|
164
|
+
),
|
|
165
|
+
)
|
|
166
|
+
count += 1
|
|
167
|
+
conn.commit()
|
|
168
|
+
return count
|
|
169
|
+
finally:
|
|
170
|
+
conn.close()
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def list_candidates(db_path: str | None = None, limit: int = 20) -> List[sqlite3.Row]:
|
|
174
|
+
conn = connect(db_path)
|
|
175
|
+
try:
|
|
176
|
+
rows = conn.execute(
|
|
177
|
+
'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
|
|
178
|
+
(max(1, limit),),
|
|
179
|
+
).fetchall()
|
|
180
|
+
return rows
|
|
181
|
+
finally:
|
|
182
|
+
conn.close()
|