@simbimbo/brainstem 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/README.md +99 -3
- package/brainstem/__init__.py +3 -0
- package/brainstem/api.py +257 -0
- package/brainstem/connectors/__init__.py +1 -0
- package/brainstem/connectors/logicmonitor.py +26 -0
- package/brainstem/connectors/types.py +16 -0
- package/brainstem/demo.py +64 -0
- package/brainstem/fingerprint.py +44 -0
- package/brainstem/ingest.py +108 -0
- package/brainstem/instrumentation.py +38 -0
- package/brainstem/interesting.py +62 -0
- package/brainstem/models.py +80 -0
- package/brainstem/recurrence.py +112 -0
- package/brainstem/scoring.py +38 -0
- package/brainstem/storage.py +428 -0
- package/docs/adapters.md +435 -0
- package/docs/api.md +380 -0
- package/docs/architecture.md +333 -0
- package/docs/connectors.md +66 -0
- package/docs/data-model.md +290 -0
- package/docs/design-governance.md +595 -0
- package/docs/mvp-flow.md +109 -0
- package/docs/roadmap.md +87 -0
- package/docs/scoring.md +424 -0
- package/docs/v0.0.1.md +277 -0
- package/docs/vision.md +85 -0
- package/package.json +6 -14
- package/pyproject.toml +18 -0
- package/tests/fixtures/sample_syslog.log +6 -0
- package/tests/test_api.py +319 -0
- package/tests/test_canonicalization.py +28 -0
- package/tests/test_demo.py +25 -0
- package/tests/test_fingerprint.py +22 -0
- package/tests/test_ingest.py +15 -0
- package/tests/test_instrumentation.py +16 -0
- package/tests/test_interesting.py +36 -0
- package/tests/test_logicmonitor.py +22 -0
- package/tests/test_recurrence.py +16 -0
- package/tests/test_scoring.py +21 -0
- package/tests/test_storage.py +294 -0
|
@@ -0,0 +1,428 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sqlite3
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Iterable, List
|
|
7
|
+
|
|
8
|
+
from .models import Candidate, Event, RawInputEnvelope, Signature
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def default_db_path() -> Path:
|
|
12
|
+
return Path('.brainstem-state') / 'brainstem.sqlite3'
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
RAW_ENVELOPE_STATUSES = ("received", "canonicalized", "parse_failed", "unsupported")
|
|
16
|
+
RAW_ENVELOPE_FAILURE_STATUSES = ("parse_failed", "unsupported")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _validate_canonicalization_status(status: str) -> None:
|
|
20
|
+
if status not in RAW_ENVELOPE_STATUSES:
|
|
21
|
+
raise ValueError(f"unsupported canonicalization_status: {status}")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def connect(db_path: str | None = None) -> sqlite3.Connection:
|
|
25
|
+
path = Path(db_path) if db_path else default_db_path()
|
|
26
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
27
|
+
conn = sqlite3.connect(path)
|
|
28
|
+
conn.row_factory = sqlite3.Row
|
|
29
|
+
return conn
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def init_db(db_path: str | None = None) -> None:
|
|
33
|
+
conn = connect(db_path)
|
|
34
|
+
try:
|
|
35
|
+
conn.executescript(
|
|
36
|
+
'''
|
|
37
|
+
CREATE TABLE IF NOT EXISTS events (
|
|
38
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
39
|
+
tenant_id TEXT NOT NULL,
|
|
40
|
+
source_type TEXT NOT NULL,
|
|
41
|
+
timestamp TEXT NOT NULL,
|
|
42
|
+
host TEXT,
|
|
43
|
+
service TEXT,
|
|
44
|
+
severity TEXT,
|
|
45
|
+
asset_id TEXT,
|
|
46
|
+
source_path TEXT,
|
|
47
|
+
facility TEXT,
|
|
48
|
+
message_raw TEXT NOT NULL,
|
|
49
|
+
structured_fields_json TEXT NOT NULL,
|
|
50
|
+
correlation_keys_json TEXT NOT NULL
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
CREATE TABLE IF NOT EXISTS raw_envelopes (
|
|
54
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
55
|
+
tenant_id TEXT NOT NULL,
|
|
56
|
+
source_type TEXT NOT NULL,
|
|
57
|
+
source_id TEXT,
|
|
58
|
+
source_name TEXT,
|
|
59
|
+
timestamp TEXT NOT NULL,
|
|
60
|
+
host TEXT,
|
|
61
|
+
service TEXT,
|
|
62
|
+
severity TEXT,
|
|
63
|
+
asset_id TEXT,
|
|
64
|
+
source_path TEXT,
|
|
65
|
+
facility TEXT,
|
|
66
|
+
message_raw TEXT NOT NULL,
|
|
67
|
+
structured_fields_json TEXT NOT NULL,
|
|
68
|
+
correlation_keys_json TEXT NOT NULL,
|
|
69
|
+
metadata_json TEXT NOT NULL,
|
|
70
|
+
canonicalization_status TEXT NOT NULL DEFAULT 'received',
|
|
71
|
+
failure_reason TEXT
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
CREATE TABLE IF NOT EXISTS signatures (
|
|
75
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
76
|
+
signature_key TEXT NOT NULL UNIQUE,
|
|
77
|
+
event_family TEXT NOT NULL,
|
|
78
|
+
normalized_pattern TEXT NOT NULL,
|
|
79
|
+
service TEXT,
|
|
80
|
+
metadata_json TEXT NOT NULL,
|
|
81
|
+
occurrence_count INTEGER NOT NULL DEFAULT 0
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
CREATE TABLE IF NOT EXISTS candidates (
|
|
85
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
86
|
+
candidate_type TEXT NOT NULL,
|
|
87
|
+
title TEXT NOT NULL,
|
|
88
|
+
summary TEXT NOT NULL,
|
|
89
|
+
score_total REAL NOT NULL,
|
|
90
|
+
score_breakdown_json TEXT NOT NULL,
|
|
91
|
+
decision_band TEXT NOT NULL,
|
|
92
|
+
source_signature_ids_json TEXT NOT NULL,
|
|
93
|
+
source_event_ids_json TEXT NOT NULL,
|
|
94
|
+
confidence REAL NOT NULL,
|
|
95
|
+
metadata_json TEXT NOT NULL
|
|
96
|
+
);
|
|
97
|
+
'''
|
|
98
|
+
)
|
|
99
|
+
conn.commit()
|
|
100
|
+
finally:
|
|
101
|
+
conn.close()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def store_raw_envelopes(raw_envelopes: Iterable[RawInputEnvelope], db_path: str | None = None) -> List[int]:
|
|
105
|
+
conn = connect(db_path)
|
|
106
|
+
raw_ids: List[int] = []
|
|
107
|
+
try:
|
|
108
|
+
for raw in raw_envelopes:
|
|
109
|
+
cursor = conn.execute(
|
|
110
|
+
'''
|
|
111
|
+
INSERT INTO raw_envelopes (
|
|
112
|
+
tenant_id, source_type, source_id, source_name, timestamp, host, service, severity,
|
|
113
|
+
asset_id, source_path, facility, message_raw,
|
|
114
|
+
structured_fields_json, correlation_keys_json, metadata_json,
|
|
115
|
+
canonicalization_status, failure_reason
|
|
116
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
117
|
+
''',
|
|
118
|
+
(
|
|
119
|
+
raw.tenant_id,
|
|
120
|
+
raw.source_type,
|
|
121
|
+
raw.source_id,
|
|
122
|
+
raw.source_name,
|
|
123
|
+
raw.timestamp,
|
|
124
|
+
raw.host,
|
|
125
|
+
raw.service,
|
|
126
|
+
raw.severity,
|
|
127
|
+
raw.asset_id,
|
|
128
|
+
raw.source_path,
|
|
129
|
+
raw.facility,
|
|
130
|
+
raw.message_raw,
|
|
131
|
+
json.dumps(raw.structured_fields, ensure_ascii=False),
|
|
132
|
+
json.dumps(raw.correlation_keys, ensure_ascii=False),
|
|
133
|
+
json.dumps(raw.metadata, ensure_ascii=False),
|
|
134
|
+
"received",
|
|
135
|
+
None,
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
raw_ids.append(int(cursor.lastrowid))
|
|
139
|
+
conn.commit()
|
|
140
|
+
return raw_ids
|
|
141
|
+
finally:
|
|
142
|
+
conn.close()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def set_raw_envelope_status(
|
|
146
|
+
raw_envelope_id: int,
|
|
147
|
+
status: str,
|
|
148
|
+
db_path: str | None = None,
|
|
149
|
+
*,
|
|
150
|
+
failure_reason: str | None = None,
|
|
151
|
+
) -> None:
|
|
152
|
+
_validate_canonicalization_status(status)
|
|
153
|
+
conn = connect(db_path)
|
|
154
|
+
try:
|
|
155
|
+
conn.execute(
|
|
156
|
+
'''
|
|
157
|
+
UPDATE raw_envelopes
|
|
158
|
+
SET canonicalization_status = ?, failure_reason = ?
|
|
159
|
+
WHERE id = ?
|
|
160
|
+
''',
|
|
161
|
+
(status, failure_reason, raw_envelope_id),
|
|
162
|
+
)
|
|
163
|
+
conn.commit()
|
|
164
|
+
finally:
|
|
165
|
+
conn.close()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_raw_envelope_by_id(raw_envelope_id: int, db_path: str | None = None) -> sqlite3.Row | None:
|
|
169
|
+
conn = connect(db_path)
|
|
170
|
+
try:
|
|
171
|
+
return conn.execute(
|
|
172
|
+
"SELECT * FROM raw_envelopes WHERE id = ?",
|
|
173
|
+
(raw_envelope_id,),
|
|
174
|
+
).fetchone()
|
|
175
|
+
finally:
|
|
176
|
+
conn.close()
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _recent_raw_envelopes_query(
|
|
180
|
+
canonicalization_status: str | None,
|
|
181
|
+
*,
|
|
182
|
+
failures_only: bool,
|
|
183
|
+
) -> tuple[str, tuple[str, ...], bool]:
|
|
184
|
+
if canonicalization_status is None and failures_only:
|
|
185
|
+
return "WHERE canonicalization_status IN (?, ?)", RAW_ENVELOPE_FAILURE_STATUSES, True
|
|
186
|
+
if canonicalization_status is None and not failures_only:
|
|
187
|
+
return "", (), False
|
|
188
|
+
_validate_canonicalization_status(canonicalization_status)
|
|
189
|
+
return "WHERE canonicalization_status = ?", (canonicalization_status,), False
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def list_recent_raw_envelopes(
|
|
193
|
+
db_path: str | None = None,
|
|
194
|
+
status: str | None = None,
|
|
195
|
+
limit: int = 20,
|
|
196
|
+
*,
|
|
197
|
+
failures_only: bool = False,
|
|
198
|
+
) -> List[sqlite3.Row]:
|
|
199
|
+
conn = connect(db_path)
|
|
200
|
+
try:
|
|
201
|
+
where_clause, status_args, _ = _recent_raw_envelopes_query(status, failures_only=failures_only)
|
|
202
|
+
prefix = f"{where_clause} " if where_clause else ""
|
|
203
|
+
rows = conn.execute(
|
|
204
|
+
f"""
|
|
205
|
+
SELECT * FROM raw_envelopes
|
|
206
|
+
{prefix}
|
|
207
|
+
ORDER BY id DESC
|
|
208
|
+
LIMIT ?
|
|
209
|
+
""",
|
|
210
|
+
(*status_args, max(1, limit)),
|
|
211
|
+
).fetchall()
|
|
212
|
+
return rows
|
|
213
|
+
finally:
|
|
214
|
+
conn.close()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def list_recent_failed_raw_envelopes(
|
|
218
|
+
db_path: str | None = None,
|
|
219
|
+
*,
|
|
220
|
+
status: str | None = None,
|
|
221
|
+
limit: int = 20,
|
|
222
|
+
) -> List[sqlite3.Row]:
|
|
223
|
+
return list_recent_raw_envelopes(
|
|
224
|
+
db_path=db_path,
|
|
225
|
+
status=status,
|
|
226
|
+
limit=limit,
|
|
227
|
+
failures_only=status is None,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
|
|
232
|
+
conn = connect(db_path)
|
|
233
|
+
count = 0
|
|
234
|
+
try:
|
|
235
|
+
for event in events:
|
|
236
|
+
conn.execute(
|
|
237
|
+
'''
|
|
238
|
+
INSERT INTO events (
|
|
239
|
+
tenant_id, source_type, timestamp, host, service, severity,
|
|
240
|
+
asset_id, source_path, facility, message_raw,
|
|
241
|
+
structured_fields_json, correlation_keys_json
|
|
242
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
243
|
+
''',
|
|
244
|
+
(
|
|
245
|
+
event.tenant_id,
|
|
246
|
+
event.source_type,
|
|
247
|
+
event.timestamp,
|
|
248
|
+
event.host,
|
|
249
|
+
event.service,
|
|
250
|
+
event.severity,
|
|
251
|
+
event.asset_id,
|
|
252
|
+
event.source_path,
|
|
253
|
+
event.facility,
|
|
254
|
+
event.message_raw,
|
|
255
|
+
json.dumps(event.structured_fields, ensure_ascii=False),
|
|
256
|
+
json.dumps(event.correlation_keys, ensure_ascii=False),
|
|
257
|
+
),
|
|
258
|
+
)
|
|
259
|
+
count += 1
|
|
260
|
+
conn.commit()
|
|
261
|
+
return count
|
|
262
|
+
finally:
|
|
263
|
+
conn.close()
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
SOURCE_SUMMARY_DIMENSIONS = (
|
|
267
|
+
"source_type",
|
|
268
|
+
"source_path",
|
|
269
|
+
"source_id",
|
|
270
|
+
"source_name",
|
|
271
|
+
"host",
|
|
272
|
+
"service",
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _summarize_raw_envelopes_by_dimension(
|
|
277
|
+
conn: sqlite3.Connection,
|
|
278
|
+
dimension: str,
|
|
279
|
+
limit: int = 20,
|
|
280
|
+
) -> List[dict[str, int | str]]:
|
|
281
|
+
if dimension not in SOURCE_SUMMARY_DIMENSIONS:
|
|
282
|
+
raise ValueError(f"unsupported dimension: {dimension}")
|
|
283
|
+
return [
|
|
284
|
+
{"value": row["value"], "count": int(row["count"])}
|
|
285
|
+
for row in conn.execute(
|
|
286
|
+
f"""
|
|
287
|
+
SELECT {dimension} AS value, COUNT(*) AS count
|
|
288
|
+
FROM raw_envelopes
|
|
289
|
+
WHERE COALESCE(TRIM({dimension}), '') <> ''
|
|
290
|
+
GROUP BY {dimension}
|
|
291
|
+
ORDER BY COUNT(*) DESC, value ASC
|
|
292
|
+
LIMIT ?
|
|
293
|
+
""",
|
|
294
|
+
(max(1, limit),),
|
|
295
|
+
).fetchall()
|
|
296
|
+
]
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def get_source_dimension_summaries(
|
|
300
|
+
db_path: str | None = None,
|
|
301
|
+
*,
|
|
302
|
+
limit: int = 20,
|
|
303
|
+
) -> dict[str, List[dict[str, int | str]]]:
|
|
304
|
+
init_db(db_path)
|
|
305
|
+
conn = connect(db_path)
|
|
306
|
+
try:
|
|
307
|
+
return _get_source_dimension_summaries_from_conn(conn, limit=limit)
|
|
308
|
+
finally:
|
|
309
|
+
conn.close()
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _get_source_dimension_summaries_from_conn(
|
|
313
|
+
conn: sqlite3.Connection,
|
|
314
|
+
*,
|
|
315
|
+
limit: int = 20,
|
|
316
|
+
) -> dict[str, List[dict[str, int | str]]]:
|
|
317
|
+
return {
|
|
318
|
+
"source_type": _summarize_raw_envelopes_by_dimension(conn, "source_type", limit=limit),
|
|
319
|
+
"source_path": _summarize_raw_envelopes_by_dimension(conn, "source_path", limit=limit),
|
|
320
|
+
"source_id": _summarize_raw_envelopes_by_dimension(conn, "source_id", limit=limit),
|
|
321
|
+
"source_name": _summarize_raw_envelopes_by_dimension(conn, "source_name", limit=limit),
|
|
322
|
+
"host": _summarize_raw_envelopes_by_dimension(conn, "host", limit=limit),
|
|
323
|
+
"service": _summarize_raw_envelopes_by_dimension(conn, "service", limit=limit),
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _query_count(conn: sqlite3.Connection, query: str) -> int:
|
|
328
|
+
return int(conn.execute(query).fetchone()[0])
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def get_ingest_stats(db_path: str | None = None) -> dict[str, Any]:
|
|
332
|
+
init_db(db_path)
|
|
333
|
+
conn = connect(db_path)
|
|
334
|
+
try:
|
|
335
|
+
return {
|
|
336
|
+
"received": _query_count(conn, "SELECT COUNT(*) FROM raw_envelopes"),
|
|
337
|
+
"canonicalized": _query_count(
|
|
338
|
+
conn,
|
|
339
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'canonicalized'",
|
|
340
|
+
),
|
|
341
|
+
"parse_failed": _query_count(
|
|
342
|
+
conn,
|
|
343
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'parse_failed'",
|
|
344
|
+
),
|
|
345
|
+
"unsupported": _query_count(
|
|
346
|
+
conn,
|
|
347
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'unsupported'",
|
|
348
|
+
),
|
|
349
|
+
"candidates_generated": _query_count(conn, "SELECT COUNT(*) FROM candidates"),
|
|
350
|
+
"source_summaries": _get_source_dimension_summaries_from_conn(conn),
|
|
351
|
+
}
|
|
352
|
+
finally:
|
|
353
|
+
conn.close()
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def store_signatures(signatures: Iterable[Signature], db_path: str | None = None) -> int:
|
|
357
|
+
conn = connect(db_path)
|
|
358
|
+
count = 0
|
|
359
|
+
try:
|
|
360
|
+
for signature in signatures:
|
|
361
|
+
conn.execute(
|
|
362
|
+
'''
|
|
363
|
+
INSERT INTO signatures (
|
|
364
|
+
signature_key, event_family, normalized_pattern, service, metadata_json, occurrence_count
|
|
365
|
+
) VALUES (?, ?, ?, ?, ?, 1)
|
|
366
|
+
ON CONFLICT(signature_key) DO UPDATE SET
|
|
367
|
+
occurrence_count = occurrence_count + 1,
|
|
368
|
+
metadata_json = excluded.metadata_json,
|
|
369
|
+
service = excluded.service
|
|
370
|
+
''',
|
|
371
|
+
(
|
|
372
|
+
signature.signature_key,
|
|
373
|
+
signature.event_family,
|
|
374
|
+
signature.normalized_pattern,
|
|
375
|
+
signature.service,
|
|
376
|
+
json.dumps(signature.metadata, ensure_ascii=False),
|
|
377
|
+
),
|
|
378
|
+
)
|
|
379
|
+
count += 1
|
|
380
|
+
conn.commit()
|
|
381
|
+
return count
|
|
382
|
+
finally:
|
|
383
|
+
conn.close()
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None) -> int:
|
|
387
|
+
conn = connect(db_path)
|
|
388
|
+
count = 0
|
|
389
|
+
try:
|
|
390
|
+
for candidate in candidates:
|
|
391
|
+
conn.execute(
|
|
392
|
+
'''
|
|
393
|
+
INSERT INTO candidates (
|
|
394
|
+
candidate_type, title, summary, score_total, score_breakdown_json,
|
|
395
|
+
decision_band, source_signature_ids_json, source_event_ids_json,
|
|
396
|
+
confidence, metadata_json
|
|
397
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
398
|
+
''',
|
|
399
|
+
(
|
|
400
|
+
candidate.candidate_type,
|
|
401
|
+
candidate.title,
|
|
402
|
+
candidate.summary,
|
|
403
|
+
candidate.score_total,
|
|
404
|
+
json.dumps(candidate.score_breakdown, ensure_ascii=False),
|
|
405
|
+
candidate.decision_band,
|
|
406
|
+
json.dumps(candidate.source_signature_ids, ensure_ascii=False),
|
|
407
|
+
json.dumps(candidate.source_event_ids, ensure_ascii=False),
|
|
408
|
+
candidate.confidence,
|
|
409
|
+
json.dumps(candidate.metadata, ensure_ascii=False),
|
|
410
|
+
),
|
|
411
|
+
)
|
|
412
|
+
count += 1
|
|
413
|
+
conn.commit()
|
|
414
|
+
return count
|
|
415
|
+
finally:
|
|
416
|
+
conn.close()
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def list_candidates(db_path: str | None = None, limit: int = 20) -> List[sqlite3.Row]:
|
|
420
|
+
conn = connect(db_path)
|
|
421
|
+
try:
|
|
422
|
+
rows = conn.execute(
|
|
423
|
+
'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
|
|
424
|
+
(max(1, limit),),
|
|
425
|
+
).fetchall()
|
|
426
|
+
return rows
|
|
427
|
+
finally:
|
|
428
|
+
conn.close()
|