@simbimbo/brainstem 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +25 -0
- package/brainstem/__init__.py +1 -1
- package/brainstem/adapters.py +120 -0
- package/brainstem/api.py +483 -23
- package/brainstem/config.py +70 -0
- package/brainstem/ingest.py +418 -33
- package/brainstem/interesting.py +56 -1
- package/brainstem/listener.py +175 -0
- package/brainstem/models.py +3 -0
- package/brainstem/recurrence.py +38 -1
- package/brainstem/source_drivers.py +150 -0
- package/brainstem/storage.py +547 -8
- package/docs/README.md +94 -0
- package/docs/adapters.md +97 -401
- package/docs/api.md +223 -278
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/tests/test_adapters.py +94 -0
- package/tests/test_api.py +973 -0
- package/tests/test_canonicalization.py +8 -0
- package/tests/test_config.py +24 -0
- package/tests/test_file_ingest.py +77 -0
- package/tests/test_interesting.py +10 -0
- package/tests/test_listener.py +253 -0
- package/tests/test_recurrence.py +2 -0
- package/tests/test_source_drivers.py +95 -0
- package/tests/test_storage.py +370 -2
|
@@ -14,6 +14,7 @@ def test_canonicalization_from_raw_envelope_is_explicit() -> None:
|
|
|
14
14
|
raw = parse_syslog_envelope("Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered", tenant_id="client-a", source_path="/var/log/syslog")
|
|
15
15
|
canonical = canonicalize_raw_input_envelope(raw)
|
|
16
16
|
assert isinstance(canonical, CanonicalEvent)
|
|
17
|
+
assert canonical.raw_envelope_id is None
|
|
17
18
|
assert canonical.message_normalized == "vpn tunnel dropped and recovered"
|
|
18
19
|
assert canonical.signature_input == canonical.message_normalized
|
|
19
20
|
assert canonical.ingest_metadata["canonicalization_source"] == "syslog"
|
|
@@ -21,6 +22,13 @@ def test_canonicalization_from_raw_envelope_is_explicit() -> None:
|
|
|
21
22
|
assert canonical.ingest_metadata["raw_line"] == "Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered"
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
def test_canonicalization_accepts_and_carries_raw_envelope_id() -> None:
|
|
26
|
+
raw = parse_syslog_envelope("Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered", tenant_id="client-a", source_path="/var/log/syslog")
|
|
27
|
+
canonical = canonicalize_raw_input_envelope(raw, raw_envelope_id=99)
|
|
28
|
+
assert canonical.raw_envelope_id == 99
|
|
29
|
+
assert canonical.ingest_metadata["raw_envelope_id"] == 99
|
|
30
|
+
|
|
31
|
+
|
|
24
32
|
def test_parse_syslog_line_still_returns_canonical_event() -> None:
|
|
25
33
|
event = parse_syslog_line("Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered", tenant_id="client-a", source_path="/var/log/syslog")
|
|
26
34
|
assert isinstance(event, CanonicalEvent)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from brainstem.config import get_runtime_config, resolve_default_db_path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_runtime_config_exposes_shared_defaults() -> None:
|
|
9
|
+
cfg = get_runtime_config()
|
|
10
|
+
assert cfg.api_token_env_var == "BRAINSTEM_API_TOKEN"
|
|
11
|
+
assert cfg.listener.syslog_host == "127.0.0.1"
|
|
12
|
+
assert cfg.listener.syslog_port == 5514
|
|
13
|
+
assert cfg.listener.syslog_source_path == "/dev/udp"
|
|
14
|
+
assert cfg.defaults.ingest_threshold == 2
|
|
15
|
+
assert cfg.defaults.interesting_limit == 5
|
|
16
|
+
assert cfg.limits.replay_raw_max_ids == 32
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_resolve_default_db_path_uses_env_override(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
|
20
|
+
custom_db = tmp_path / "override.sqlite3"
|
|
21
|
+
monkeypatch.setenv("BRAINSTEM_DB_PATH", str(custom_db))
|
|
22
|
+
assert resolve_default_db_path() == str(custom_db)
|
|
23
|
+
monkeypatch.delenv("BRAINSTEM_DB_PATH", raising=False)
|
|
24
|
+
assert resolve_default_db_path() == ".brainstem-state/brainstem.sqlite3"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from brainstem.adapters import FileRawInputAdapter, get_raw_input_adapter, register_raw_input_adapter
|
|
4
|
+
from brainstem.ingest import run_ingest_file, run_ingest_file_lines
|
|
5
|
+
from brainstem.models import RawInputEnvelope
|
|
6
|
+
from brainstem.storage import get_ingest_stats, list_recent_raw_envelopes
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_run_ingest_file_persists_accounting_and_generates_candidates(tmp_path: Path) -> None:
|
|
10
|
+
path = tmp_path / "file.log"
|
|
11
|
+
path.write_text("vpn tunnel dropped and recovered\nvpn tunnel dropped and recovered\n", encoding="utf-8")
|
|
12
|
+
|
|
13
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
14
|
+
result = run_ingest_file(str(path), tenant_id="client-a", threshold=2, db_path=str(db_path))
|
|
15
|
+
|
|
16
|
+
assert len(result.events) == 2
|
|
17
|
+
assert result.parse_failed == 0
|
|
18
|
+
assert len(result.signatures) >= 1
|
|
19
|
+
assert len(result.candidates) >= 1
|
|
20
|
+
|
|
21
|
+
stats = get_ingest_stats(str(db_path))
|
|
22
|
+
assert stats["received"] == 2
|
|
23
|
+
assert stats["canonicalized"] == 2
|
|
24
|
+
assert stats["parse_failed"] == 0
|
|
25
|
+
assert stats["candidates_generated"] >= 1
|
|
26
|
+
|
|
27
|
+
rows = list_recent_raw_envelopes(str(db_path), limit=10)
|
|
28
|
+
assert rows[0]["source_type"] == "file"
|
|
29
|
+
assert rows[0]["source_path"] == str(path)
|
|
30
|
+
assert all(row["canonicalization_status"] == "canonicalized" for row in rows)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_run_ingest_file_lines_records_parse_failures_without_breaking_pipeline(tmp_path: Path) -> None:
|
|
34
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
35
|
+
original = get_raw_input_adapter("file")
|
|
36
|
+
|
|
37
|
+
class BrokenFileAdapter:
|
|
38
|
+
source_type = "file"
|
|
39
|
+
calls = 0
|
|
40
|
+
|
|
41
|
+
def parse_raw_input(self, payload, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
|
|
42
|
+
BrokenFileAdapter.calls += 1
|
|
43
|
+
if payload == "BROKEN":
|
|
44
|
+
return RawInputEnvelope(
|
|
45
|
+
tenant_id=tenant_id,
|
|
46
|
+
source_type="file",
|
|
47
|
+
timestamp="2026-03-22T00:00:00Z",
|
|
48
|
+
message_raw="",
|
|
49
|
+
source_path=source_path,
|
|
50
|
+
metadata={"raw_line": payload},
|
|
51
|
+
)
|
|
52
|
+
return FileRawInputAdapter().parse_raw_input(payload, tenant_id=tenant_id, source_path=source_path)
|
|
53
|
+
|
|
54
|
+
register_raw_input_adapter(BrokenFileAdapter())
|
|
55
|
+
try:
|
|
56
|
+
result = run_ingest_file_lines(
|
|
57
|
+
["vpn tunnel dropped and recovered", "BROKEN", "vpn tunnel dropped and recovered"],
|
|
58
|
+
tenant_id="client-a",
|
|
59
|
+
source_path="/tmp/test.log",
|
|
60
|
+
threshold=2,
|
|
61
|
+
db_path=str(db_path),
|
|
62
|
+
)
|
|
63
|
+
finally:
|
|
64
|
+
register_raw_input_adapter(original)
|
|
65
|
+
|
|
66
|
+
assert result.parse_failed == 1
|
|
67
|
+
assert len(result.events) == 2
|
|
68
|
+
assert result.raw_envelope_ids
|
|
69
|
+
|
|
70
|
+
stats = get_ingest_stats(str(db_path))
|
|
71
|
+
assert stats["received"] == 3
|
|
72
|
+
assert stats["canonicalized"] == 2
|
|
73
|
+
assert stats["parse_failed"] == 1
|
|
74
|
+
assert stats["candidates_generated"] >= 1
|
|
75
|
+
|
|
76
|
+
failed_rows = list_recent_raw_envelopes(str(db_path), status="parse_failed", limit=10)
|
|
77
|
+
assert len(failed_rows) == 1
|
|
@@ -18,7 +18,17 @@ def test_interesting_items_returns_ranked_observations() -> None:
|
|
|
18
18
|
assert items
|
|
19
19
|
assert items[0]['title']
|
|
20
20
|
assert 'why_it_matters' in items[0]
|
|
21
|
+
assert 'attention_explanation' in items[0]
|
|
21
22
|
assert items[0]['signals']
|
|
23
|
+
explanation = items[0]["attention_explanation"]
|
|
24
|
+
assert explanation["attention_band"] in {"ignore_fast", "background", "watch", "investigate", "promote"}
|
|
25
|
+
assert isinstance(explanation["dominant_signals"], list)
|
|
26
|
+
assert explanation["dominant_signals"]
|
|
27
|
+
first_signal = explanation["dominant_signals"][0]
|
|
28
|
+
assert {"signal", "value", "label", "rationale"} <= set(first_signal.keys())
|
|
29
|
+
values = [signal["value"] for signal in explanation["dominant_signals"]]
|
|
30
|
+
assert values == sorted(values, reverse=True)
|
|
31
|
+
assert explanation["dominant_signals"][0]["label"] in items[0]["why_it_matters"]
|
|
22
32
|
|
|
23
33
|
|
|
24
34
|
def test_interesting_items_respects_limit() -> None:
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import socket
|
|
2
|
+
import threading
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import brainstem.listener
|
|
6
|
+
from brainstem.listener import parse_syslog_datagram, run_ingest_syslog_datagram, run_udp_syslog_listener
|
|
7
|
+
from brainstem.adapters import get_raw_input_adapter, register_raw_input_adapter
|
|
8
|
+
from brainstem.models import RawInputEnvelope
|
|
9
|
+
from brainstem.storage import get_ingest_stats, list_candidates, list_recent_raw_envelopes
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_parse_syslog_datagram_pushes_through_canonicalization() -> None:
|
|
13
|
+
payload = (
|
|
14
|
+
b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered\n"
|
|
15
|
+
b"Mar 22 00:00:02 fw-01 charon: IPsec SA rekey succeeded\n"
|
|
16
|
+
)
|
|
17
|
+
events = parse_syslog_datagram(payload, tenant_id="client-a", source_path="/var/log/syslog")
|
|
18
|
+
|
|
19
|
+
assert len(events) == 2
|
|
20
|
+
assert events[0].tenant_id == "client-a"
|
|
21
|
+
assert events[0].source_path == "/var/log/syslog"
|
|
22
|
+
assert events[0].host == "fw-01"
|
|
23
|
+
assert events[0].service == "charon"
|
|
24
|
+
assert events[0].message_normalized == "vpn tunnel dropped and recovered"
|
|
25
|
+
assert events[0].ingest_metadata["canonicalization_source"] == "syslog"
|
|
26
|
+
assert events[1].ingest_metadata["raw_line"].startswith("Mar 22 00:00:02")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class _FakeDatagramSocket:
|
|
30
|
+
def __init__(self, datagrams: list[bytes]) -> None:
|
|
31
|
+
self.datagrams = list(datagrams)
|
|
32
|
+
self.closed = False
|
|
33
|
+
|
|
34
|
+
def getsockname(self) -> tuple[str, int]:
|
|
35
|
+
return ("127.0.0.1", 5514)
|
|
36
|
+
|
|
37
|
+
def settimeout(self, timeout: float) -> None:
|
|
38
|
+
self.timeout = timeout
|
|
39
|
+
|
|
40
|
+
def recvfrom(self, size: int) -> tuple[bytes, tuple[str, int]]:
|
|
41
|
+
if not self.datagrams:
|
|
42
|
+
raise socket.timeout("no more datagrams")
|
|
43
|
+
return self.datagrams.pop(0), ("127.0.0.1", 514)
|
|
44
|
+
|
|
45
|
+
def close(self) -> None:
|
|
46
|
+
self.closed = True
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_udp_listener_processes_local_datagrams_without_external_network() -> None:
|
|
50
|
+
datagrams = [
|
|
51
|
+
b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered",
|
|
52
|
+
b"Mar 22 00:00:02 fw-01 charon: IPsec SA rekey succeeded",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
fake_socket = _FakeDatagramSocket(datagrams)
|
|
56
|
+
|
|
57
|
+
received_events = []
|
|
58
|
+
parse_errors = []
|
|
59
|
+
|
|
60
|
+
processed = threading.Event()
|
|
61
|
+
|
|
62
|
+
def on_event(event) -> None:
|
|
63
|
+
received_events.append(event)
|
|
64
|
+
if len(received_events) >= 2:
|
|
65
|
+
processed.set()
|
|
66
|
+
|
|
67
|
+
def on_parse_error(exc, line: str) -> None:
|
|
68
|
+
parse_errors.append((exc, line))
|
|
69
|
+
|
|
70
|
+
thread = threading.Thread(
|
|
71
|
+
target=run_udp_syslog_listener,
|
|
72
|
+
kwargs={
|
|
73
|
+
"tenant_id": "client-a",
|
|
74
|
+
"host": "127.0.0.1",
|
|
75
|
+
"port": 5514,
|
|
76
|
+
"source_path": "/var/log/syslog",
|
|
77
|
+
"socket_obj": fake_socket,
|
|
78
|
+
"max_datagrams": 2,
|
|
79
|
+
"on_event": on_event,
|
|
80
|
+
"on_parse_error": on_parse_error,
|
|
81
|
+
},
|
|
82
|
+
daemon=True,
|
|
83
|
+
)
|
|
84
|
+
thread.start()
|
|
85
|
+
|
|
86
|
+
processed.wait(1.0)
|
|
87
|
+
thread.join(timeout=1.0)
|
|
88
|
+
|
|
89
|
+
assert len(received_events) == 2
|
|
90
|
+
assert [event.message_normalized for event in received_events] == [
|
|
91
|
+
"vpn tunnel dropped and recovered",
|
|
92
|
+
"ipsec sa rekey succeeded",
|
|
93
|
+
]
|
|
94
|
+
assert parse_errors == []
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def test_udp_listener_can_persist_datagrams_and_update_accounting(tmp_path: Path) -> None:
|
|
98
|
+
datagrams = [
|
|
99
|
+
b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered\n"
|
|
100
|
+
b"Mar 22 00:00:02 fw-01 charon: VPN tunnel dropped and recovered",
|
|
101
|
+
]
|
|
102
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
103
|
+
|
|
104
|
+
fake_socket = _FakeDatagramSocket(datagrams)
|
|
105
|
+
received_events = []
|
|
106
|
+
processed = threading.Event()
|
|
107
|
+
|
|
108
|
+
def on_event(event) -> None:
|
|
109
|
+
received_events.append(event)
|
|
110
|
+
if len(received_events) >= 2:
|
|
111
|
+
processed.set()
|
|
112
|
+
|
|
113
|
+
thread = threading.Thread(
|
|
114
|
+
target=run_udp_syslog_listener,
|
|
115
|
+
kwargs={
|
|
116
|
+
"tenant_id": "client-a",
|
|
117
|
+
"host": "127.0.0.1",
|
|
118
|
+
"port": 5514,
|
|
119
|
+
"source_path": "/var/log/syslog",
|
|
120
|
+
"socket_obj": fake_socket,
|
|
121
|
+
"max_datagrams": 1,
|
|
122
|
+
"on_event": on_event,
|
|
123
|
+
"db_path": str(db_path),
|
|
124
|
+
"threshold": 2,
|
|
125
|
+
},
|
|
126
|
+
daemon=True,
|
|
127
|
+
)
|
|
128
|
+
thread.start()
|
|
129
|
+
|
|
130
|
+
processed.wait(1.0)
|
|
131
|
+
thread.join(timeout=1.0)
|
|
132
|
+
|
|
133
|
+
assert len(received_events) == 2
|
|
134
|
+
stats = get_ingest_stats(str(db_path))
|
|
135
|
+
assert stats["received"] == 2
|
|
136
|
+
assert stats["canonicalized"] == 2
|
|
137
|
+
assert stats["parse_failed"] == 0
|
|
138
|
+
assert stats["candidates_generated"] >= 1
|
|
139
|
+
|
|
140
|
+
rows = list_recent_raw_envelopes(str(db_path), limit=10)
|
|
141
|
+
raw_ids = [row["id"] for row in rows[:2]]
|
|
142
|
+
assert len(raw_ids) == 2
|
|
143
|
+
assert all(event.raw_envelope_id is not None for event in received_events)
|
|
144
|
+
assert all(event.raw_envelope_id in raw_ids for event in received_events)
|
|
145
|
+
assert len(rows) == 2
|
|
146
|
+
assert rows[0]["canonicalization_status"] == "canonicalized"
|
|
147
|
+
assert rows[1]["canonicalization_status"] == "canonicalized"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_udp_listener_marks_parse_failures_via_pipeline(tmp_path: Path) -> None:
|
|
151
|
+
datagrams = [
|
|
152
|
+
b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered\n"
|
|
153
|
+
b"BROKEN syslog payload with empty parsed body",
|
|
154
|
+
]
|
|
155
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
156
|
+
|
|
157
|
+
original_adapter = get_raw_input_adapter("syslog")
|
|
158
|
+
|
|
159
|
+
class BrokenSyslogAdapter:
|
|
160
|
+
source_type = "syslog"
|
|
161
|
+
|
|
162
|
+
def parse_raw_input(self, payload, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
|
|
163
|
+
if str(payload).startswith("BROKEN"):
|
|
164
|
+
return RawInputEnvelope(
|
|
165
|
+
tenant_id=tenant_id,
|
|
166
|
+
source_type="syslog",
|
|
167
|
+
timestamp="2026-03-22T00:00:00Z",
|
|
168
|
+
message_raw="",
|
|
169
|
+
source_path=source_path,
|
|
170
|
+
metadata={"raw_line": str(payload)},
|
|
171
|
+
)
|
|
172
|
+
return original_adapter.parse_raw_input(payload, tenant_id=tenant_id, source_path=source_path)
|
|
173
|
+
|
|
174
|
+
register_raw_input_adapter(BrokenSyslogAdapter())
|
|
175
|
+
try:
|
|
176
|
+
fake_socket = _FakeDatagramSocket(datagrams)
|
|
177
|
+
received_events = []
|
|
178
|
+
parse_errors = []
|
|
179
|
+
processed = threading.Event()
|
|
180
|
+
|
|
181
|
+
def on_event(event) -> None:
|
|
182
|
+
received_events.append(event)
|
|
183
|
+
if len(received_events) >= 1:
|
|
184
|
+
processed.set()
|
|
185
|
+
|
|
186
|
+
def on_parse_error(exc, line: str) -> None:
|
|
187
|
+
parse_errors.append((exc, line))
|
|
188
|
+
|
|
189
|
+
thread = threading.Thread(
|
|
190
|
+
target=run_udp_syslog_listener,
|
|
191
|
+
kwargs={
|
|
192
|
+
"tenant_id": "client-a",
|
|
193
|
+
"host": "127.0.0.1",
|
|
194
|
+
"port": 5514,
|
|
195
|
+
"source_path": "/var/log/syslog",
|
|
196
|
+
"socket_obj": fake_socket,
|
|
197
|
+
"max_datagrams": 1,
|
|
198
|
+
"on_event": on_event,
|
|
199
|
+
"on_parse_error": on_parse_error,
|
|
200
|
+
"db_path": str(db_path),
|
|
201
|
+
"threshold": 2,
|
|
202
|
+
},
|
|
203
|
+
daemon=True,
|
|
204
|
+
)
|
|
205
|
+
thread.start()
|
|
206
|
+
|
|
207
|
+
processed.wait(1.0)
|
|
208
|
+
thread.join(timeout=1.0)
|
|
209
|
+
|
|
210
|
+
assert len(received_events) == 1
|
|
211
|
+
assert parse_errors
|
|
212
|
+
assert parse_errors[0][1].startswith("BROKEN")
|
|
213
|
+
|
|
214
|
+
stats = get_ingest_stats(str(db_path))
|
|
215
|
+
assert stats["received"] == 2
|
|
216
|
+
assert stats["canonicalized"] == 1
|
|
217
|
+
assert stats["parse_failed"] == 1
|
|
218
|
+
assert stats["candidates_generated"] == 0
|
|
219
|
+
|
|
220
|
+
failed = list_recent_raw_envelopes(str(db_path), status="parse_failed", limit=10)
|
|
221
|
+
assert len(failed) == 1
|
|
222
|
+
finally:
|
|
223
|
+
register_raw_input_adapter(original_adapter)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def test_listener_ingest_helper_persists_canonical_events_and_candidates(tmp_path: Path) -> None:
|
|
227
|
+
payload = (
|
|
228
|
+
b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered\n"
|
|
229
|
+
b"Mar 22 00:00:02 fw-01 charon: VPN tunnel dropped and recovered\n"
|
|
230
|
+
)
|
|
231
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
232
|
+
|
|
233
|
+
result = run_ingest_syslog_datagram(
|
|
234
|
+
payload,
|
|
235
|
+
tenant_id="client-a",
|
|
236
|
+
source_path="/var/log/syslog",
|
|
237
|
+
db_path=str(db_path),
|
|
238
|
+
threshold=2,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
assert len(result.events) == 2
|
|
242
|
+
assert all(event.raw_envelope_id == expected for event, expected in zip(result.events, result.raw_envelope_ids))
|
|
243
|
+
assert len(result.signatures) >= 1
|
|
244
|
+
assert len(result.candidates) >= 1
|
|
245
|
+
assert result.parse_failed == 0
|
|
246
|
+
|
|
247
|
+
stats = get_ingest_stats(str(db_path))
|
|
248
|
+
assert stats["received"] == 2
|
|
249
|
+
assert stats["canonicalized"] == 2
|
|
250
|
+
assert stats["candidates_generated"] >= 1
|
|
251
|
+
|
|
252
|
+
candidates = list_candidates(str(db_path), limit=10)
|
|
253
|
+
assert candidates
|
package/tests/test_recurrence.py
CHANGED
|
@@ -13,4 +13,6 @@ def test_build_recurrence_candidates_emits_candidate_for_repeated_signature() ->
|
|
|
13
13
|
candidates = build_recurrence_candidates(events, signatures, threshold=2)
|
|
14
14
|
assert candidates
|
|
15
15
|
digest = digest_items(candidates)
|
|
16
|
+
assert digest[0]["attention_explanation"]["dominant_signals"]
|
|
17
|
+
assert "summary" in digest[0]["attention_explanation"]
|
|
16
18
|
assert digest[0]['decision_band'] in {'watch', 'review', 'urgent_human_review', 'promote_to_incident_memory'}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from brainstem.adapters import get_raw_input_adapter, register_raw_input_adapter
|
|
6
|
+
from brainstem.ingest import run_ingest_source_payload
|
|
7
|
+
from brainstem.models import RawInputEnvelope
|
|
8
|
+
from brainstem.storage import get_ingest_stats
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.mark.parametrize(
|
|
12
|
+
"source_type,payload,source_path",
|
|
13
|
+
[
|
|
14
|
+
("file", ["vpn tunnel dropped and recovered", "vpn tunnel dropped and recovered"], "/tmp/sample.log"),
|
|
15
|
+
("syslog", b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered\nMar 22 00:00:02 fw-01 charon: VPN tunnel dropped and recovered\n", "/var/log/syslog"),
|
|
16
|
+
],
|
|
17
|
+
)
|
|
18
|
+
def test_run_ingest_source_payload_routes_file_and_syslog_payloads_through_same_pipeline(
|
|
19
|
+
tmp_path: Path,
|
|
20
|
+
source_type: str,
|
|
21
|
+
payload,
|
|
22
|
+
source_path: str,
|
|
23
|
+
) -> None:
|
|
24
|
+
db_path = tmp_path / f"{source_type}.sqlite3"
|
|
25
|
+
|
|
26
|
+
result = run_ingest_source_payload(
|
|
27
|
+
source_type,
|
|
28
|
+
payload,
|
|
29
|
+
tenant_id="client-a",
|
|
30
|
+
source_path=source_path,
|
|
31
|
+
threshold=2,
|
|
32
|
+
db_path=str(db_path),
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
assert len(result.events) == 2
|
|
36
|
+
assert result.parse_failed == 0
|
|
37
|
+
assert len(result.signatures) >= 1
|
|
38
|
+
assert len(result.candidates) >= 1
|
|
39
|
+
|
|
40
|
+
stats = get_ingest_stats(str(db_path))
|
|
41
|
+
assert stats["received"] == 2
|
|
42
|
+
assert stats["canonicalized"] == 2
|
|
43
|
+
assert stats["parse_failed"] == 0
|
|
44
|
+
assert stats["candidates_generated"] >= 1
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.parametrize("source_type,payload,source_path", [
|
|
48
|
+
("file", ["vpn tunnel dropped and recovered", "BROKEN syslog payload with empty parsed body", "vpn tunnel dropped and recovered"], "/tmp/sample.log"),
|
|
49
|
+
("syslog", b"Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered\nBROKEN syslog payload with empty parsed body\nMar 22 00:00:02 fw-01 charon: VPN tunnel dropped and recovered\n", "/var/log/syslog"),
|
|
50
|
+
])
|
|
51
|
+
def test_run_ingest_source_payload_preserves_parse_failure_contract(
|
|
52
|
+
source_type: str,
|
|
53
|
+
payload,
|
|
54
|
+
source_path: str,
|
|
55
|
+
tmp_path: Path,
|
|
56
|
+
) -> None:
|
|
57
|
+
original_adapter = get_raw_input_adapter(source_type)
|
|
58
|
+
db_path = tmp_path / f"{source_type}-failures.sqlite3"
|
|
59
|
+
|
|
60
|
+
class BrokenAdapter:
|
|
61
|
+
def __init__(self, source_type: str) -> None:
|
|
62
|
+
self.source_type = source_type
|
|
63
|
+
|
|
64
|
+
def parse_raw_input(self, payload, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
|
|
65
|
+
if str(payload).startswith("BROKEN"):
|
|
66
|
+
return RawInputEnvelope(
|
|
67
|
+
tenant_id=tenant_id,
|
|
68
|
+
source_type=source_type,
|
|
69
|
+
timestamp="2026-03-22T00:00:00Z",
|
|
70
|
+
message_raw="",
|
|
71
|
+
source_path=source_path,
|
|
72
|
+
metadata={"raw_line": str(payload)},
|
|
73
|
+
)
|
|
74
|
+
return original_adapter.parse_raw_input(payload, tenant_id=tenant_id, source_path=source_path)
|
|
75
|
+
|
|
76
|
+
register_raw_input_adapter(BrokenAdapter(source_type))
|
|
77
|
+
try:
|
|
78
|
+
result = run_ingest_source_payload(
|
|
79
|
+
source_type,
|
|
80
|
+
payload,
|
|
81
|
+
tenant_id="client-a",
|
|
82
|
+
source_path=source_path,
|
|
83
|
+
threshold=2,
|
|
84
|
+
db_path=str(db_path),
|
|
85
|
+
)
|
|
86
|
+
finally:
|
|
87
|
+
register_raw_input_adapter(original_adapter)
|
|
88
|
+
|
|
89
|
+
assert result.parse_failed == 1
|
|
90
|
+
assert len(result.events) == 2
|
|
91
|
+
|
|
92
|
+
stats = get_ingest_stats(str(db_path))
|
|
93
|
+
assert stats["received"] == 3
|
|
94
|
+
assert stats["canonicalized"] == 2
|
|
95
|
+
assert stats["parse_failed"] == 1
|