@simbimbo/brainstem 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import socket
6
+ from dataclasses import asdict
7
+ from typing import Any, Callable, List, Optional
8
+
9
+ from .ingest import IngestionResult, run_ingest_source_payload
10
+ from .ingest import canonicalize_raw_input_envelope
11
+ from .config import get_runtime_config
12
+ from .source_drivers import parse_source_payloads
13
+ from .models import CanonicalEvent
14
+ from .models import RawInputEnvelope
15
+
16
+ EventHandler = Callable[[CanonicalEvent], None]
17
+ ErrorHandler = Callable[[Exception, str], None]
18
+ LISTENER_CONFIG = get_runtime_config().listener
19
+
20
+
21
+ def parse_syslog_datagram(
22
+ payload: bytes,
23
+ *,
24
+ tenant_id: str,
25
+ source_path: str = LISTENER_CONFIG.syslog_source_path,
26
+ on_parse_error: Optional[ErrorHandler] = None,
27
+ ) -> List[CanonicalEvent]:
28
+ raw_events = parse_source_payloads(
29
+ "syslog",
30
+ payload,
31
+ tenant_id=tenant_id,
32
+ source_path=source_path,
33
+ on_parse_error=on_parse_error,
34
+ )
35
+ events: List[CanonicalEvent] = []
36
+ for raw_event in raw_events:
37
+ try:
38
+ events.append(canonicalize_raw_input_envelope(raw_event))
39
+ except Exception as exc:
40
+ if on_parse_error is None:
41
+ raise
42
+ on_parse_error(exc, raw_event.metadata.get("raw_line", raw_event.message_raw))
43
+ return events
44
+
45
+
46
+ def parse_syslog_raw_datagram(
47
+ payload: bytes,
48
+ *,
49
+ tenant_id: str,
50
+ source_path: str = LISTENER_CONFIG.syslog_source_path,
51
+ on_parse_error: Optional[ErrorHandler] = None,
52
+ ) -> List[RawInputEnvelope]:
53
+ return parse_source_payloads(
54
+ "syslog",
55
+ payload,
56
+ tenant_id=tenant_id,
57
+ source_path=source_path,
58
+ on_parse_error=on_parse_error,
59
+ )
60
+
61
+
62
+ def run_ingest_syslog_datagram(
63
+ payload: bytes,
64
+ *,
65
+ tenant_id: str,
66
+ source_path: str = LISTENER_CONFIG.syslog_source_path,
67
+ threshold: int = LISTENER_CONFIG.ingest_threshold,
68
+ db_path: Optional[str] = None,
69
+ on_event: Optional[EventHandler] = None,
70
+ on_parse_error: Optional[ErrorHandler] = None,
71
+ ) -> IngestionResult:
72
+ return run_ingest_source_payload(
73
+ "syslog",
74
+ payload,
75
+ tenant_id=tenant_id,
76
+ source_path=source_path,
77
+ threshold=threshold,
78
+ db_path=db_path,
79
+ on_event=on_event,
80
+ on_parse_error=on_parse_error,
81
+ )
82
+
83
+
84
+ def run_udp_syslog_listener(
85
+ tenant_id: str,
86
+ *,
87
+ host: str = LISTENER_CONFIG.syslog_host,
88
+ port: int = LISTENER_CONFIG.syslog_port,
89
+ source_path: Optional[str] = None,
90
+ on_event: Optional[EventHandler] = None,
91
+ on_parse_error: Optional[ErrorHandler] = None,
92
+ db_path: Optional[str] = None,
93
+ threshold: int = LISTENER_CONFIG.ingest_threshold,
94
+ max_datagrams: Optional[int] = None,
95
+ socket_timeout: float = LISTENER_CONFIG.syslog_socket_timeout,
96
+ socket_obj: Optional[socket.socket] = None,
97
+ ) -> int:
98
+ sock = socket_obj
99
+ owns_socket = sock is None
100
+ if sock is None:
101
+ sock = socket.socket(family=socket.AF_INET, type=socket.SOCK_DGRAM)
102
+ try:
103
+ bound_host, bound_port = sock.getsockname()
104
+ if bound_port == 0:
105
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
106
+ sock.bind((host, port))
107
+ bound_host, bound_port = sock.getsockname()
108
+ if source_path is None:
109
+ source_path = f"udp://{bound_host}:{bound_port}"
110
+
111
+ received_events = 0
112
+ sock.settimeout(socket_timeout)
113
+
114
+ processed_datagrams = 0
115
+ while True:
116
+ if max_datagrams is not None and processed_datagrams >= max_datagrams:
117
+ break
118
+ try:
119
+ payload, _ = sock.recvfrom(65535)
120
+ except socket.timeout:
121
+ continue
122
+ processed_datagrams += 1
123
+ if db_path is None:
124
+ events = parse_syslog_datagram(
125
+ payload,
126
+ tenant_id=tenant_id,
127
+ source_path=source_path,
128
+ on_parse_error=on_parse_error,
129
+ )
130
+ else:
131
+ result = run_ingest_syslog_datagram(
132
+ payload,
133
+ tenant_id=tenant_id,
134
+ source_path=source_path,
135
+ threshold=threshold,
136
+ db_path=db_path,
137
+ on_event=None,
138
+ on_parse_error=on_parse_error,
139
+ )
140
+ events = result.events
141
+ for event in events:
142
+ received_events += 1
143
+ if on_event is not None:
144
+ on_event(event)
145
+ return received_events
146
+ finally:
147
+ if owns_socket:
148
+ sock.close()
149
+
150
+
151
+ def main(argv: Optional[list[str]] = None) -> int:
152
+ parser = argparse.ArgumentParser(description="Run the brAInstem UDP syslog listener.")
153
+ parser.add_argument("--host", default=LISTENER_CONFIG.syslog_host, help="Listen host")
154
+ parser.add_argument("--port", type=int, default=LISTENER_CONFIG.syslog_port, help="UDP port")
155
+ parser.add_argument("--tenant", default="demo-tenant", help="Tenant identifier")
156
+ parser.add_argument("--source-path", default="", help="Override source_path metadata on parsed envelopes")
157
+ args = parser.parse_args(argv)
158
+
159
+ source_path = args.source_path or f"udp://{args.host}:{args.port}"
160
+
161
+ def _emit(event: CanonicalEvent) -> None:
162
+ print(json.dumps(asdict(event), default=str))
163
+
164
+ run_udp_syslog_listener(
165
+ args.tenant,
166
+ host=args.host,
167
+ port=args.port,
168
+ source_path=source_path,
169
+ on_event=_emit,
170
+ )
171
+ return 0
172
+
173
+
174
+ if __name__ == "__main__":
175
+ raise SystemExit(main())
@@ -29,6 +29,7 @@ class CanonicalEvent:
29
29
  source_type: str
30
30
  timestamp: str
31
31
  message_raw: str
32
+ raw_envelope_id: int | None = None
32
33
  host: str = ""
33
34
  service: str = ""
34
35
  severity: str = "info"
@@ -5,6 +5,7 @@ from dataclasses import asdict
5
5
  from typing import Iterable, List
6
6
 
7
7
  from .models import Candidate, Event, Signature
8
+ from .interesting import _attention_explanation
8
9
  from .scoring import score_candidate
9
10
 
10
11
 
@@ -44,9 +45,40 @@ def signature_counts(signatures: Iterable[Signature]) -> Counter:
44
45
  return Counter(sig.signature_key for sig in signatures)
45
46
 
46
47
 
48
+ def _coerce_raw_envelope_id(value: object) -> int | None:
49
+ if isinstance(value, bool):
50
+ return None
51
+ if isinstance(value, int):
52
+ return value
53
+ if isinstance(value, str):
54
+ value = value.strip()
55
+ if not value.isdigit():
56
+ return None
57
+ return int(value)
58
+ return None
59
+
60
+
61
+ def _signature_lineage_index(
62
+ events: List[Event],
63
+ signatures: List[Signature],
64
+ ) -> dict[str, list[int]]:
65
+ per_signature_raw_ids: dict[str, list[int]] = {}
66
+ seen = {}
67
+ for event, signature in zip(events, signatures):
68
+ raw_envelope_id = _coerce_raw_envelope_id(getattr(event, "raw_envelope_id", None))
69
+ if raw_envelope_id is None:
70
+ continue
71
+ seen.setdefault(signature.signature_key, set()).add(raw_envelope_id)
72
+
73
+ for key, values in seen.items():
74
+ per_signature_raw_ids[key] = sorted(values)
75
+ return per_signature_raw_ids
76
+
77
+
47
78
  def build_recurrence_candidates(events: List[Event], signatures: List[Signature], *, threshold: int = 2) -> List[Candidate]:
48
79
  counts = signature_counts(signatures)
49
80
  candidates: List[Candidate] = []
81
+ signature_raw_envelope_ids = _signature_lineage_index(events, signatures)
50
82
  for signature in signatures:
51
83
  count = counts[signature.signature_key]
52
84
  if count < threshold:
@@ -71,7 +103,11 @@ def build_recurrence_candidates(events: List[Event], signatures: List[Signature]
71
103
  candidate.summary = _candidate_summary(signature, count)
72
104
  candidate.source_signature_ids = [signature.signature_key]
73
105
  candidate.source_event_ids = [str(i) for i, sig in enumerate(signatures) if sig.signature_key == signature.signature_key]
74
- candidate.metadata = {"count": count, "service": signature.service}
106
+ candidate.metadata = {
107
+ "count": count,
108
+ "service": signature.service,
109
+ "source_raw_envelope_ids": signature_raw_envelope_ids.get(signature.signature_key, []),
110
+ }
75
111
  candidates.append(candidate)
76
112
  # dedupe by signature key/title
77
113
  seen = set()
@@ -105,6 +141,7 @@ def digest_items(candidates: Iterable[Candidate]) -> List[dict]:
105
141
  "attention_band": _attention_band(c.decision_band),
106
142
  "attention_score": c.score_total,
107
143
  "score_total": c.score_total,
144
+ "attention_explanation": _attention_explanation(c),
108
145
  "score_breakdown": c.score_breakdown,
109
146
  "metadata": c.metadata,
110
147
  }
@@ -0,0 +1,150 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Callable, Iterable, List, Protocol, runtime_checkable
5
+
6
+ from .adapters import get_raw_input_adapter
7
+ from .models import RawInputEnvelope
8
+
9
+ ErrorHandler = Callable[[Exception, str], None]
10
+
11
+
12
+ def iter_syslog_lines(payload: bytes | str) -> list[str]:
13
+ text = payload.decode("utf-8", errors="replace") if isinstance(payload, (bytes, bytearray)) else str(payload)
14
+ return [line.rstrip("\r") for line in text.splitlines() if line.strip()]
15
+
16
+
17
+ @runtime_checkable
18
+ class SourceDriver(Protocol):
19
+ """Contract for a small source-to-raw-envelope driver."""
20
+
21
+ source_type: str
22
+
23
+ def parse_payload(
24
+ self,
25
+ payload: Any,
26
+ *,
27
+ tenant_id: str,
28
+ source_path: str = "",
29
+ on_parse_error: ErrorHandler | None = None,
30
+ ) -> List[RawInputEnvelope]:
31
+ """Parse one input payload into one or more raw envelopes."""
32
+
33
+
34
+ _SOURCE_DRIVER_REGISTRY: dict[str, SourceDriver] = {}
35
+
36
+
37
+ def register_source_driver(driver: SourceDriver) -> None:
38
+ _SOURCE_DRIVER_REGISTRY[driver.source_type] = driver
39
+
40
+
41
+ def get_source_driver(source_type: str) -> SourceDriver:
42
+ try:
43
+ return _SOURCE_DRIVER_REGISTRY[source_type]
44
+ except KeyError as exc:
45
+ raise ValueError(f"unsupported source_type: {source_type}") from exc
46
+
47
+
48
+ def list_source_driver_types() -> list[str]:
49
+ return sorted(_SOURCE_DRIVER_REGISTRY.keys())
50
+
51
+
52
+ def _iter_payload_items(payload: Any) -> Iterable[Any]:
53
+ if payload is None or isinstance(payload, (bytes, bytearray, str)):
54
+ yield payload
55
+ return
56
+ try:
57
+ iter(payload)
58
+ except TypeError:
59
+ yield payload
60
+ return
61
+ yield from payload
62
+
63
+
64
+ def parse_source_payloads(
65
+ source_type: str,
66
+ payloads: Any,
67
+ *,
68
+ tenant_id: str,
69
+ source_path: str = "",
70
+ on_parse_error: ErrorHandler | None = None,
71
+ ) -> List[RawInputEnvelope]:
72
+ driver = get_source_driver(source_type)
73
+ raw_envelopes: List[RawInputEnvelope] = []
74
+ for payload in _iter_payload_items(payloads):
75
+ raw_envelopes.extend(
76
+ driver.parse_payload(
77
+ payload,
78
+ tenant_id=tenant_id,
79
+ source_path=source_path,
80
+ on_parse_error=on_parse_error,
81
+ )
82
+ )
83
+ return raw_envelopes
84
+
85
+
86
+ @dataclass(frozen=True)
87
+ class FileSourceDriver:
88
+ """Driver for file-line payloads."""
89
+
90
+ source_type: str = "file"
91
+
92
+ def parse_payload(
93
+ self,
94
+ payload: Any,
95
+ *,
96
+ tenant_id: str,
97
+ source_path: str = "",
98
+ on_parse_error: ErrorHandler | None = None,
99
+ ) -> List[RawInputEnvelope]:
100
+ adapter = get_raw_input_adapter(self.source_type)
101
+ text = "" if payload is None else str(payload)
102
+ try:
103
+ return [adapter.parse_raw_input(text, tenant_id=tenant_id, source_path=source_path)]
104
+ except Exception as exc:
105
+ if on_parse_error is None:
106
+ raise
107
+ on_parse_error(exc, text)
108
+ return []
109
+
110
+
111
+ @dataclass(frozen=True)
112
+ class SyslogSourceDriver:
113
+ """Driver for syslog payload chunks and line-oriented payloads."""
114
+
115
+ source_type: str = "syslog"
116
+
117
+ def parse_payload(
118
+ self,
119
+ payload: Any,
120
+ *,
121
+ tenant_id: str,
122
+ source_path: str = "",
123
+ on_parse_error: ErrorHandler | None = None,
124
+ ) -> List[RawInputEnvelope]:
125
+ adapter = get_raw_input_adapter(self.source_type)
126
+ raw_envelopes: List[RawInputEnvelope] = []
127
+
128
+ if isinstance(payload, (bytes, bytearray)):
129
+ payload_lines = iter_syslog_lines(payload)
130
+ for line in payload_lines:
131
+ try:
132
+ raw_envelopes.append(adapter.parse_raw_input(line, tenant_id=tenant_id, source_path=source_path))
133
+ except Exception as exc:
134
+ if on_parse_error is None:
135
+ raise
136
+ on_parse_error(exc, line)
137
+ return raw_envelopes
138
+
139
+ payload_text = "" if payload is None else str(payload)
140
+ try:
141
+ raw_envelopes.append(adapter.parse_raw_input(payload_text, tenant_id=tenant_id, source_path=source_path))
142
+ except Exception as exc:
143
+ if on_parse_error is None:
144
+ raise
145
+ on_parse_error(exc, payload_text)
146
+ return raw_envelopes
147
+
148
+
149
+ register_source_driver(FileSourceDriver())
150
+ register_source_driver(SyslogSourceDriver())