@simbimbo/brainstem 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/README.md +26 -0
- package/brainstem/__init__.py +1 -1
- package/brainstem/adapters.py +120 -0
- package/brainstem/api.py +468 -57
- package/brainstem/config.py +136 -0
- package/brainstem/connectors/logicmonitor.py +57 -0
- package/brainstem/demo.py +16 -2
- package/brainstem/fingerprint.py +54 -0
- package/brainstem/ingest.py +440 -33
- package/brainstem/interesting.py +56 -1
- package/brainstem/listener.py +181 -0
- package/brainstem/models.py +1 -0
- package/brainstem/recurrence.py +63 -9
- package/brainstem/scoring.py +6 -4
- package/brainstem/source_drivers.py +179 -0
- package/brainstem/storage.py +389 -12
- package/docs/README.md +103 -0
- package/docs/api.md +260 -280
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/tests/test_adapters.py +95 -0
- package/tests/test_api.py +812 -0
- package/tests/test_canonicalization.py +8 -0
- package/tests/test_config.py +39 -0
- package/tests/test_file_ingest.py +77 -0
- package/tests/test_fingerprint.py +51 -1
- package/tests/test_interesting.py +10 -0
- package/tests/test_listener.py +253 -0
- package/tests/test_logicmonitor.py +54 -1
- package/tests/test_recurrence.py +16 -0
- package/tests/test_source_drivers.py +95 -0
- package/tests/test_storage.py +178 -1
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import asdict, dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def resolve_default_db_path() -> str:
|
|
10
|
+
configured_db_path = os.getenv("BRAINSTEM_DB_PATH", "").strip()
|
|
11
|
+
if configured_db_path:
|
|
12
|
+
return configured_db_path
|
|
13
|
+
return str(Path(".brainstem-state") / "brainstem.sqlite3")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _read_env_int(env_name: str, default: int) -> int:
|
|
17
|
+
value = os.getenv(env_name, "").strip()
|
|
18
|
+
if not value:
|
|
19
|
+
return default
|
|
20
|
+
try:
|
|
21
|
+
return int(value)
|
|
22
|
+
except ValueError:
|
|
23
|
+
return default
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _read_env_float(env_name: str, default: float) -> float:
|
|
27
|
+
value = os.getenv(env_name, "").strip()
|
|
28
|
+
if not value:
|
|
29
|
+
return default
|
|
30
|
+
try:
|
|
31
|
+
return float(value)
|
|
32
|
+
except ValueError:
|
|
33
|
+
return default
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class ListenerConfig:
|
|
38
|
+
syslog_host: str = "127.0.0.1"
|
|
39
|
+
syslog_port: int = 5514
|
|
40
|
+
syslog_source_path: str = "/dev/udp"
|
|
41
|
+
syslog_socket_timeout: float = 0.5
|
|
42
|
+
ingest_threshold: int = 2
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class RuntimeDefaults:
|
|
47
|
+
ingest_threshold: int = 2
|
|
48
|
+
recurrence_threshold: int = 2
|
|
49
|
+
batch_threshold: int = 2
|
|
50
|
+
interesting_limit: int = 5
|
|
51
|
+
failure_limit: int = 20
|
|
52
|
+
ingest_recent_limit: int = 20
|
|
53
|
+
sources_limit: int = 10
|
|
54
|
+
sources_status_limit: int = 20
|
|
55
|
+
replay_threshold: int = 2
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class RuntimeLimits:
|
|
60
|
+
replay_raw_max_ids: int = 32
|
|
61
|
+
status_filter_limit: int = 20
|
|
62
|
+
replay_allowed_statuses: tuple[str, ...] = ("received", "parse_failed")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class CandidateAttentionProfile:
|
|
67
|
+
recurrence_count_normalizer: int = 10
|
|
68
|
+
recovery_signal_weight: float = 0.4
|
|
69
|
+
spread_signal_weight: float = 0.2
|
|
70
|
+
novelty_signal_weight: float = 0.3
|
|
71
|
+
impact_high_weight: float = 0.5
|
|
72
|
+
impact_default_weight: float = 0.2
|
|
73
|
+
precursor_weight: float = 0.3
|
|
74
|
+
memory_weight: float = 0.4
|
|
75
|
+
decision_band_promote: float = 0.85
|
|
76
|
+
decision_band_urgent_human_review: float = 0.65
|
|
77
|
+
decision_band_review: float = 0.45
|
|
78
|
+
decision_band_watch: float = 0.25
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(frozen=True)
|
|
82
|
+
class DBConfig:
|
|
83
|
+
default_path: str
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(frozen=True)
|
|
87
|
+
class RuntimeConfig:
|
|
88
|
+
api_token_env_var: str = "BRAINSTEM_API_TOKEN"
|
|
89
|
+
listener: ListenerConfig = ListenerConfig()
|
|
90
|
+
defaults: RuntimeDefaults = RuntimeDefaults()
|
|
91
|
+
candidate_attention: CandidateAttentionProfile = CandidateAttentionProfile()
|
|
92
|
+
limits: RuntimeLimits = RuntimeLimits()
|
|
93
|
+
db: DBConfig = DBConfig(default_path=resolve_default_db_path())
|
|
94
|
+
|
|
95
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
96
|
+
return {
|
|
97
|
+
"api_token_env_var": self.api_token_env_var,
|
|
98
|
+
"listener": asdict(self.listener),
|
|
99
|
+
"defaults": asdict(self.defaults),
|
|
100
|
+
"candidate_attention": asdict(self.candidate_attention),
|
|
101
|
+
"limits": asdict(self.limits),
|
|
102
|
+
"db": asdict(self.db),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_runtime_config() -> RuntimeConfig:
|
|
107
|
+
defaults = RuntimeDefaults(
|
|
108
|
+
ingest_threshold=_read_env_int("BRAINSTEM_INGEST_THRESHOLD", 2),
|
|
109
|
+
recurrence_threshold=_read_env_int("BRAINSTEM_RECURRENCE_THRESHOLD", 2),
|
|
110
|
+
batch_threshold=_read_env_int("BRAINSTEM_BATCH_THRESHOLD", 2),
|
|
111
|
+
interesting_limit=_read_env_int("BRAINSTEM_INTERESTING_LIMIT", 5),
|
|
112
|
+
failure_limit=_read_env_int("BRAINSTEM_FAILURE_LIMIT", 20),
|
|
113
|
+
ingest_recent_limit=_read_env_int("BRAINSTEM_INGEST_RECENT_LIMIT", 20),
|
|
114
|
+
sources_limit=_read_env_int("BRAINSTEM_SOURCES_LIMIT", 10),
|
|
115
|
+
sources_status_limit=_read_env_int("BRAINSTEM_SOURCES_STATUS_LIMIT", 20),
|
|
116
|
+
replay_threshold=_read_env_int("BRAINSTEM_REPLAY_THRESHOLD", 2),
|
|
117
|
+
)
|
|
118
|
+
candidate_attention = CandidateAttentionProfile(
|
|
119
|
+
recurrence_count_normalizer=_read_env_int("BRAINSTEM_CANDIDATE_RECURRENCE_NORMALIZER", 10),
|
|
120
|
+
recovery_signal_weight=_read_env_float("BRAINSTEM_CANDIDATE_RECOVERY", 0.4),
|
|
121
|
+
spread_signal_weight=_read_env_float("BRAINSTEM_CANDIDATE_SPREAD", 0.2),
|
|
122
|
+
novelty_signal_weight=_read_env_float("BRAINSTEM_CANDIDATE_NOVELTY", 0.3),
|
|
123
|
+
impact_high_weight=_read_env_float("BRAINSTEM_CANDIDATE_IMPACT_HIGH", 0.5),
|
|
124
|
+
impact_default_weight=_read_env_float("BRAINSTEM_CANDIDATE_IMPACT_DEFAULT", 0.2),
|
|
125
|
+
precursor_weight=_read_env_float("BRAINSTEM_CANDIDATE_PRECURSOR", 0.3),
|
|
126
|
+
memory_weight=_read_env_float("BRAINSTEM_CANDIDATE_MEMORY_WEIGHT", 0.4),
|
|
127
|
+
decision_band_promote=_read_env_float("BRAINSTEM_DECISION_BAND_PROMOTE", 0.85),
|
|
128
|
+
decision_band_urgent_human_review=_read_env_float("BRAINSTEM_DECISION_BAND_URGENT_HUMAN_REVIEW", 0.65),
|
|
129
|
+
decision_band_review=_read_env_float("BRAINSTEM_DECISION_BAND_REVIEW", 0.45),
|
|
130
|
+
decision_band_watch=_read_env_float("BRAINSTEM_DECISION_BAND_WATCH", 0.25),
|
|
131
|
+
)
|
|
132
|
+
return RuntimeConfig(
|
|
133
|
+
defaults=defaults,
|
|
134
|
+
candidate_attention=candidate_attention,
|
|
135
|
+
db=DBConfig(default_path=resolve_default_db_path()),
|
|
136
|
+
)
|
|
@@ -1,8 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from ..adapters import RawInputAdapter, register_raw_input_adapter
|
|
7
|
+
from ..models import RawInputEnvelope
|
|
3
8
|
from .types import ConnectorEvent
|
|
4
9
|
|
|
5
10
|
|
|
11
|
+
def _coerce_mapping(payload: object) -> dict:
|
|
12
|
+
if not isinstance(payload, dict):
|
|
13
|
+
raise ValueError("logicmonitor payload must be an object")
|
|
14
|
+
return payload
|
|
15
|
+
|
|
16
|
+
|
|
6
17
|
def map_logicmonitor_event(payload: dict, *, tenant_id: str) -> ConnectorEvent:
|
|
7
18
|
metadata = payload.get("metadata") or {}
|
|
8
19
|
host = payload.get("host") or payload.get("resource_name") or ""
|
|
@@ -24,3 +35,49 @@ def map_logicmonitor_event(payload: dict, *, tenant_id: str) -> ConnectorEvent:
|
|
|
24
35
|
"cleared_at": metadata.get("cleared_at"),
|
|
25
36
|
},
|
|
26
37
|
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def map_logicmonitor_payload_to_raw_envelope(payload: object, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
|
|
41
|
+
event_payload = _coerce_mapping(payload)
|
|
42
|
+
event = map_logicmonitor_event(event_payload, tenant_id=tenant_id)
|
|
43
|
+
return RawInputEnvelope(
|
|
44
|
+
tenant_id=event.tenant_id,
|
|
45
|
+
source_type=event.source_type,
|
|
46
|
+
timestamp=event.timestamp,
|
|
47
|
+
message_raw=event.message_raw,
|
|
48
|
+
host=event.host,
|
|
49
|
+
service=event.service,
|
|
50
|
+
severity=event.severity,
|
|
51
|
+
source_path=source_path,
|
|
52
|
+
metadata=dict(event.metadata),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class LogicMonitorRawInputAdapter:
|
|
58
|
+
"""Adapter for LogicMonitor-shaped event payload objects."""
|
|
59
|
+
|
|
60
|
+
source_type: str = "logicmonitor"
|
|
61
|
+
|
|
62
|
+
def parse_raw_input(self, payload, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
|
|
63
|
+
if isinstance(payload, (bytes, bytearray)):
|
|
64
|
+
payload_text = payload.decode("utf-8", errors="replace")
|
|
65
|
+
payload_obj = json.loads(payload_text)
|
|
66
|
+
return map_logicmonitor_payload_to_raw_envelope(payload_obj, tenant_id=tenant_id, source_path=source_path)
|
|
67
|
+
|
|
68
|
+
if isinstance(payload, dict):
|
|
69
|
+
return map_logicmonitor_payload_to_raw_envelope(payload, tenant_id=tenant_id, source_path=source_path)
|
|
70
|
+
|
|
71
|
+
if isinstance(payload, str):
|
|
72
|
+
payload_text = payload.strip()
|
|
73
|
+
if payload_text.startswith("{") and payload_text.endswith("}"):
|
|
74
|
+
return map_logicmonitor_payload_to_raw_envelope(
|
|
75
|
+
json.loads(payload_text),
|
|
76
|
+
tenant_id=tenant_id,
|
|
77
|
+
source_path=source_path,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
raise ValueError("logicmonitor payload must be a mapping or JSON object string")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
register_raw_input_adapter(LogicMonitorRawInputAdapter())
|
package/brainstem/demo.py
CHANGED
|
@@ -10,9 +10,18 @@ from .instrumentation import emit, span
|
|
|
10
10
|
from .interesting import interesting_items
|
|
11
11
|
from .recurrence import build_recurrence_candidates, digest_items
|
|
12
12
|
from .storage import init_db, store_candidates, store_events, store_signatures
|
|
13
|
+
from .config import get_runtime_config
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
def run_syslog_demo(
|
|
16
|
+
def run_syslog_demo(
|
|
17
|
+
path: str,
|
|
18
|
+
tenant_id: str,
|
|
19
|
+
threshold: int | None = None,
|
|
20
|
+
db_path: str | None = None,
|
|
21
|
+
) -> Dict[str, Any]:
|
|
22
|
+
if threshold is None:
|
|
23
|
+
threshold = get_runtime_config().defaults.recurrence_threshold
|
|
24
|
+
|
|
16
25
|
with span("syslog_demo", path=path, tenant_id=tenant_id, threshold=threshold):
|
|
17
26
|
init_db(db_path)
|
|
18
27
|
events = ingest_syslog_file(path, tenant_id=tenant_id)
|
|
@@ -52,7 +61,12 @@ def main() -> int:
|
|
|
52
61
|
parser = argparse.ArgumentParser(description="Run the brAInstem syslog weak-signal demo.")
|
|
53
62
|
parser.add_argument("path", help="Path to a syslog-like input file")
|
|
54
63
|
parser.add_argument("--tenant", default="demo-tenant", help="Tenant/environment identifier")
|
|
55
|
-
parser.add_argument(
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--threshold",
|
|
66
|
+
type=int,
|
|
67
|
+
default=get_runtime_config().defaults.recurrence_threshold,
|
|
68
|
+
help="Minimum recurrence count for candidate emission",
|
|
69
|
+
)
|
|
56
70
|
parser.add_argument("--db-path", default=None, help="Optional SQLite path for persistent state")
|
|
57
71
|
args = parser.parse_args()
|
|
58
72
|
payload = run_syslog_demo(args.path, tenant_id=args.tenant, threshold=args.threshold, db_path=args.db_path)
|
package/brainstem/fingerprint.py
CHANGED
|
@@ -9,6 +9,54 @@ _WHITESPACE_RE = re.compile(r"\s+")
|
|
|
9
9
|
_IPV4_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
|
|
10
10
|
_NUMBER_RE = re.compile(r"\b\d+\b")
|
|
11
11
|
|
|
12
|
+
_CONNECTIVITY_SERVICE_HINTS = {
|
|
13
|
+
"charon",
|
|
14
|
+
"ipsec",
|
|
15
|
+
"wireguard",
|
|
16
|
+
"strongswan",
|
|
17
|
+
"openvpn",
|
|
18
|
+
"bgp",
|
|
19
|
+
}
|
|
20
|
+
_CONNECTIVITY_ANCHORS = {
|
|
21
|
+
"vpn",
|
|
22
|
+
"tunnel",
|
|
23
|
+
"rekey",
|
|
24
|
+
"ipsec",
|
|
25
|
+
"handshake",
|
|
26
|
+
"peer",
|
|
27
|
+
}
|
|
28
|
+
_CONNECTIVITY_STATE_HINTS = {
|
|
29
|
+
"down",
|
|
30
|
+
"up",
|
|
31
|
+
"dropped",
|
|
32
|
+
"recovered",
|
|
33
|
+
"unreachable",
|
|
34
|
+
"timeout",
|
|
35
|
+
"flapped",
|
|
36
|
+
}
|
|
37
|
+
_RESOURCE_HINTS = {
|
|
38
|
+
"disk",
|
|
39
|
+
"memory",
|
|
40
|
+
"cpu",
|
|
41
|
+
"storage",
|
|
42
|
+
"inode",
|
|
43
|
+
"pressure",
|
|
44
|
+
"filesystem",
|
|
45
|
+
"out of space",
|
|
46
|
+
"swap",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _has_any(text: str, values: set[str]) -> bool:
|
|
51
|
+
return any(value in text for value in values)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _has_connectivity_context(message: str, service: str) -> bool:
|
|
55
|
+
service_hint = service and _has_any(service, _CONNECTIVITY_SERVICE_HINTS)
|
|
56
|
+
anchor_hint = _has_any(message, _CONNECTIVITY_ANCHORS)
|
|
57
|
+
state_hint = _has_any(message, _CONNECTIVITY_STATE_HINTS)
|
|
58
|
+
return service_hint or (anchor_hint and state_hint)
|
|
59
|
+
|
|
12
60
|
|
|
13
61
|
def normalize_message(message: str) -> str:
|
|
14
62
|
text = (message or "").strip().lower()
|
|
@@ -21,10 +69,16 @@ def normalize_message(message: str) -> str:
|
|
|
21
69
|
def event_family_for(event: Event) -> str:
|
|
22
70
|
message_normalized = getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
|
|
23
71
|
base = message_normalized
|
|
72
|
+
service = (event.service or "").strip().lower()
|
|
73
|
+
|
|
24
74
|
if "fail" in base or "error" in base:
|
|
25
75
|
return "failure"
|
|
26
76
|
if "restart" in base or "stopped" in base or "started" in base:
|
|
27
77
|
return "service_lifecycle"
|
|
78
|
+
if _has_connectivity_context(base, service):
|
|
79
|
+
return "connectivity"
|
|
80
|
+
if _has_any(base, _RESOURCE_HINTS):
|
|
81
|
+
return "resource"
|
|
28
82
|
if "auth" in base or "login" in base:
|
|
29
83
|
return "auth"
|
|
30
84
|
return "generic"
|