@simbimbo/brainstem 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +99 -3
  3. package/brainstem/__init__.py +3 -0
  4. package/brainstem/api.py +257 -0
  5. package/brainstem/connectors/__init__.py +1 -0
  6. package/brainstem/connectors/logicmonitor.py +26 -0
  7. package/brainstem/connectors/types.py +16 -0
  8. package/brainstem/demo.py +64 -0
  9. package/brainstem/fingerprint.py +44 -0
  10. package/brainstem/ingest.py +108 -0
  11. package/brainstem/instrumentation.py +38 -0
  12. package/brainstem/interesting.py +62 -0
  13. package/brainstem/models.py +80 -0
  14. package/brainstem/recurrence.py +112 -0
  15. package/brainstem/scoring.py +38 -0
  16. package/brainstem/storage.py +428 -0
  17. package/docs/adapters.md +435 -0
  18. package/docs/api.md +380 -0
  19. package/docs/architecture.md +333 -0
  20. package/docs/connectors.md +66 -0
  21. package/docs/data-model.md +290 -0
  22. package/docs/design-governance.md +595 -0
  23. package/docs/mvp-flow.md +109 -0
  24. package/docs/roadmap.md +87 -0
  25. package/docs/scoring.md +424 -0
  26. package/docs/v0.0.1.md +277 -0
  27. package/docs/vision.md +85 -0
  28. package/package.json +6 -14
  29. package/pyproject.toml +18 -0
  30. package/tests/fixtures/sample_syslog.log +6 -0
  31. package/tests/test_api.py +319 -0
  32. package/tests/test_canonicalization.py +28 -0
  33. package/tests/test_demo.py +25 -0
  34. package/tests/test_fingerprint.py +22 -0
  35. package/tests/test_ingest.py +15 -0
  36. package/tests/test_instrumentation.py +16 -0
  37. package/tests/test_interesting.py +36 -0
  38. package/tests/test_logicmonitor.py +22 -0
  39. package/tests/test_recurrence.py +16 -0
  40. package/tests/test_scoring.py +21 -0
  41. package/tests/test_storage.py +294 -0
@@ -0,0 +1,294 @@
1
+ import sqlite3
2
+ from pathlib import Path
3
+
4
+ from brainstem.ingest import ingest_syslog_lines, signatures_for_events
5
+ from brainstem.models import RawInputEnvelope
6
+ from brainstem.recurrence import build_recurrence_candidates
7
+ from brainstem.storage import (
8
+ get_raw_envelope_by_id,
9
+ get_ingest_stats,
10
+ init_db,
11
+ list_candidates,
12
+ get_source_dimension_summaries,
13
+ store_candidates,
14
+ store_events,
15
+ list_recent_failed_raw_envelopes,
16
+ list_recent_raw_envelopes,
17
+ store_raw_envelopes,
18
+ set_raw_envelope_status,
19
+ store_signatures,
20
+ )
21
+
22
+
23
+ def test_storage_round_trip(tmp_path: Path) -> None:
24
+ db_path = tmp_path / 'brainstem.sqlite3'
25
+ init_db(str(db_path))
26
+ lines = [
27
+ 'Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered',
28
+ 'Mar 22 00:00:03 fw-01 charon: VPN tunnel dropped and recovered',
29
+ 'Mar 22 00:00:05 fw-01 charon: VPN tunnel dropped and recovered',
30
+ ]
31
+ events = ingest_syslog_lines(lines, tenant_id='client-a', source_path='/var/log/syslog')
32
+ signatures = signatures_for_events(events)
33
+ candidates = build_recurrence_candidates(events, signatures, threshold=2)
34
+
35
+ assert store_events(events, str(db_path)) == 3
36
+ assert store_signatures(signatures, str(db_path)) == 3
37
+ assert store_candidates(candidates, str(db_path)) >= 1
38
+
39
+ rows = list_candidates(str(db_path), limit=10)
40
+ assert rows
41
+ assert rows[0]['title']
42
+
43
+
44
+ def test_raw_envelope_records_are_persisted(tmp_path: Path) -> None:
45
+ db_path = tmp_path / 'brainstem.sqlite3'
46
+ init_db(str(db_path))
47
+ raw_events = [
48
+ RawInputEnvelope(
49
+ tenant_id="client-a",
50
+ source_type="syslog",
51
+ timestamp="2026-03-22T00:00:01Z",
52
+ message_raw="VPN tunnel dropped and recovered",
53
+ host="fw-01",
54
+ service="charon",
55
+ ),
56
+ RawInputEnvelope(
57
+ tenant_id="client-a",
58
+ source_type="syslog",
59
+ timestamp="2026-03-22T00:00:02Z",
60
+ message_raw="IPsec SA rekey succeeded",
61
+ host="fw-01",
62
+ service="charon",
63
+ ),
64
+ ]
65
+ assert store_raw_envelopes(raw_events, str(db_path)) == [1, 2]
66
+
67
+ conn = sqlite3.connect(db_path)
68
+ try:
69
+ rows = conn.execute(
70
+ "SELECT tenant_id, source_type, message_raw, canonicalization_status FROM raw_envelopes ORDER BY id ASC"
71
+ ).fetchall()
72
+ finally:
73
+ conn.close()
74
+
75
+ assert len(rows) == 2
76
+ assert rows[0][0] == "client-a"
77
+ assert rows[0][1] == "syslog"
78
+ assert rows[0][2] == "VPN tunnel dropped and recovered"
79
+ assert rows[0][3] == "received"
80
+ assert rows[1][3] == "received"
81
+
82
+
83
+ def test_ingest_stats_from_raw_envelopes(tmp_path: Path) -> None:
84
+ db_path = tmp_path / 'brainstem.sqlite3'
85
+ init_db(str(db_path))
86
+ conn = sqlite3.connect(db_path)
87
+ try:
88
+ conn.execute(
89
+ """
90
+ INSERT INTO raw_envelopes (
91
+ tenant_id, source_type, timestamp, host, service, severity,
92
+ asset_id, source_path, facility, message_raw,
93
+ structured_fields_json, correlation_keys_json, metadata_json,
94
+ canonicalization_status, failure_reason
95
+ ) VALUES (
96
+ 'client-a', 'syslog', '2026-03-22T00:00:00Z',
97
+ 'fw-01', 'charon', 'info', '', '', '', 'ok', '{}', '{}', '{}',
98
+ 'canonicalized', NULL
99
+ )
100
+ """
101
+ )
102
+ conn.execute(
103
+ """
104
+ INSERT INTO raw_envelopes (
105
+ tenant_id, source_type, timestamp, host, service, severity,
106
+ asset_id, source_path, facility, message_raw,
107
+ structured_fields_json, correlation_keys_json, metadata_json,
108
+ canonicalization_status, failure_reason
109
+ ) VALUES (
110
+ 'client-a', 'syslog', '2026-03-22T00:00:00Z',
111
+ 'fw-01', 'charon', 'info', '', '', '', 'bad', '{}', '{}', '{}',
112
+ 'parse_failed', 'message empty'
113
+ )
114
+ """
115
+ )
116
+ conn.execute(
117
+ "INSERT INTO candidates (candidate_type, title, summary, score_total, score_breakdown_json, decision_band, source_signature_ids_json, source_event_ids_json, confidence, metadata_json) VALUES ('recurrence', 'x', 'y', 1.0, '{}', 'medium', '[]', '[]', 0.1, '{}')"
118
+ )
119
+ conn.commit()
120
+ finally:
121
+ conn.close()
122
+
123
+ stats = get_ingest_stats(str(db_path))
124
+ assert stats["received"] == 2
125
+ assert stats["canonicalized"] == 1
126
+ assert stats["parse_failed"] == 1
127
+ assert stats["candidates_generated"] == 1
128
+
129
+
130
+ def test_source_dimension_summaries(tmp_path: Path) -> None:
131
+ db_path = tmp_path / 'brainstem.sqlite3'
132
+ init_db(str(db_path))
133
+ store_raw_envelopes(
134
+ [
135
+ RawInputEnvelope(
136
+ tenant_id='client-a',
137
+ source_type='syslog',
138
+ source_id='fw-01',
139
+ source_name='edge-fw-01',
140
+ timestamp='2026-03-22T00:00:01Z',
141
+ message_raw='VPN tunnel dropped and recovered',
142
+ source_path='/var/log/syslog',
143
+ host='fw-01',
144
+ service='charon',
145
+ ),
146
+ RawInputEnvelope(
147
+ tenant_id='client-a',
148
+ source_type='syslog',
149
+ source_id='fw-01',
150
+ source_name='edge-fw-01',
151
+ timestamp='2026-03-22T00:00:02Z',
152
+ message_raw='IPsec SA rekey succeeded',
153
+ source_path='/var/log/syslog',
154
+ host='fw-01',
155
+ service='charon',
156
+ ),
157
+ RawInputEnvelope(
158
+ tenant_id='client-a',
159
+ source_type='logicmonitor',
160
+ source_id='lm-1',
161
+ source_name='edge-lm-01',
162
+ timestamp='2026-03-22T00:00:03Z',
163
+ message_raw='CPU usage high',
164
+ source_path='/alerts',
165
+ host='lm-01',
166
+ service='logicmonitor',
167
+ ),
168
+ ],
169
+ db_path=str(db_path),
170
+ )
171
+
172
+ summary = get_source_dimension_summaries(str(db_path), limit=10)
173
+ assert summary['source_type'][0]['value'] == "syslog"
174
+ assert summary['source_type'][0]['count'] == 2
175
+ assert summary['source_type'][1]['value'] == "logicmonitor"
176
+ assert summary['source_type'][1]['count'] == 1
177
+ assert dict((entry['value'], entry['count']) for entry in summary['source_path']) == {
178
+ '/alerts': 1,
179
+ '/var/log/syslog': 2,
180
+ }
181
+ assert dict((entry['value'], entry['count']) for entry in summary['source_id']) == {
182
+ 'fw-01': 2,
183
+ 'lm-1': 1,
184
+ }
185
+
186
+
187
+ def test_list_recent_raw_envelopes_supports_status_filtering(tmp_path: Path) -> None:
188
+ db_path = tmp_path / 'brainstem.sqlite3'
189
+ init_db(str(db_path))
190
+ raw_ids = store_raw_envelopes(
191
+ [
192
+ RawInputEnvelope(
193
+ tenant_id='client-a',
194
+ source_type='syslog',
195
+ timestamp='2026-03-22T00:00:01Z',
196
+ message_raw='first',
197
+ host='fw-01',
198
+ service='sshd',
199
+ ),
200
+ RawInputEnvelope(
201
+ tenant_id='client-a',
202
+ source_type='syslog',
203
+ timestamp='2026-03-22T00:00:02Z',
204
+ message_raw='second',
205
+ host='fw-01',
206
+ service='sshd',
207
+ ),
208
+ RawInputEnvelope(
209
+ tenant_id='client-a',
210
+ source_type='syslog',
211
+ timestamp='2026-03-22T00:00:03Z',
212
+ message_raw='third',
213
+ host='fw-01',
214
+ service='sshd',
215
+ ),
216
+ ],
217
+ db_path=str(db_path),
218
+ )
219
+ set_raw_envelope_status(raw_ids[0], 'parse_failed', db_path=str(db_path), failure_reason='empty message')
220
+ set_raw_envelope_status(raw_ids[1], 'canonicalized', db_path=str(db_path))
221
+ set_raw_envelope_status(raw_ids[2], 'unsupported', db_path=str(db_path), failure_reason='unsupported source')
222
+
223
+ all_rows = list_recent_raw_envelopes(str(db_path), limit=10)
224
+ assert [row['id'] for row in all_rows] == [raw_ids[2], raw_ids[1], raw_ids[0]]
225
+
226
+ parsed_only = list_recent_raw_envelopes(str(db_path), status='parse_failed', limit=10)
227
+ assert [row['id'] for row in parsed_only] == [raw_ids[0]]
228
+
229
+
230
+ def test_query_recent_failed_raw_envelopes_with_status_filter(tmp_path: Path) -> None:
231
+ db_path = tmp_path / 'brainstem.sqlite3'
232
+ init_db(str(db_path))
233
+ raw_events = [
234
+ RawInputEnvelope(
235
+ tenant_id="client-a",
236
+ source_type="syslog",
237
+ timestamp="2026-03-22T00:00:01Z",
238
+ message_raw="first",
239
+ host="fw-01",
240
+ service="sshd",
241
+ ),
242
+ RawInputEnvelope(
243
+ tenant_id="client-a",
244
+ source_type="syslog",
245
+ timestamp="2026-03-22T00:00:02Z",
246
+ message_raw="second",
247
+ host="fw-01",
248
+ service="sshd",
249
+ ),
250
+ RawInputEnvelope(
251
+ tenant_id="client-a",
252
+ source_type="syslog",
253
+ timestamp="2026-03-22T00:00:03Z",
254
+ message_raw="third",
255
+ host="fw-01",
256
+ service="sshd",
257
+ ),
258
+ ]
259
+ raw_ids = store_raw_envelopes(raw_events, str(db_path))
260
+ set_raw_envelope_status(raw_ids[0], "parse_failed", db_path=str(db_path), failure_reason="empty message")
261
+ set_raw_envelope_status(raw_ids[1], "canonicalized", db_path=str(db_path))
262
+ set_raw_envelope_status(raw_ids[2], "unsupported", db_path=str(db_path), failure_reason="unsupported source")
263
+
264
+ failures = list_recent_failed_raw_envelopes(str(db_path), limit=10)
265
+ assert [row["id"] for row in failures] == [raw_ids[2], raw_ids[0]]
266
+ assert failures[0]["canonicalization_status"] == "unsupported"
267
+ assert failures[1]["canonicalization_status"] == "parse_failed"
268
+
269
+ parsed_only = list_recent_failed_raw_envelopes(str(db_path), status="parse_failed", limit=10)
270
+ assert len(parsed_only) == 1
271
+ assert parsed_only[0]["id"] == raw_ids[0]
272
+
273
+
274
+ def test_get_raw_envelope_by_id(tmp_path: Path) -> None:
275
+ db_path = tmp_path / 'brainstem.sqlite3'
276
+ init_db(str(db_path))
277
+ raw_events = [
278
+ RawInputEnvelope(
279
+ tenant_id="client-a",
280
+ source_type="syslog",
281
+ timestamp="2026-03-22T00:00:01Z",
282
+ message_raw="single",
283
+ host="fw-01",
284
+ service="charon",
285
+ )
286
+ ]
287
+ (raw_id,) = store_raw_envelopes(raw_events, str(db_path))
288
+ set_raw_envelope_status(raw_id, "parse_failed", db_path=str(db_path), failure_reason="empty message")
289
+
290
+ row = get_raw_envelope_by_id(raw_id, db_path=str(db_path))
291
+ assert row is not None
292
+ assert row["id"] == raw_id
293
+ assert row["canonicalization_status"] == "parse_failed"
294
+ assert row["failure_reason"] == "empty message"