@simbimbo/brainstem 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +99 -3
  3. package/brainstem/__init__.py +3 -0
  4. package/brainstem/api.py +257 -0
  5. package/brainstem/connectors/__init__.py +1 -0
  6. package/brainstem/connectors/logicmonitor.py +26 -0
  7. package/brainstem/connectors/types.py +16 -0
  8. package/brainstem/demo.py +64 -0
  9. package/brainstem/fingerprint.py +44 -0
  10. package/brainstem/ingest.py +108 -0
  11. package/brainstem/instrumentation.py +38 -0
  12. package/brainstem/interesting.py +62 -0
  13. package/brainstem/models.py +80 -0
  14. package/brainstem/recurrence.py +112 -0
  15. package/brainstem/scoring.py +38 -0
  16. package/brainstem/storage.py +428 -0
  17. package/docs/adapters.md +435 -0
  18. package/docs/api.md +380 -0
  19. package/docs/architecture.md +333 -0
  20. package/docs/connectors.md +66 -0
  21. package/docs/data-model.md +290 -0
  22. package/docs/design-governance.md +595 -0
  23. package/docs/mvp-flow.md +109 -0
  24. package/docs/roadmap.md +87 -0
  25. package/docs/scoring.md +424 -0
  26. package/docs/v0.0.1.md +277 -0
  27. package/docs/vision.md +85 -0
  28. package/package.json +6 -14
  29. package/pyproject.toml +18 -0
  30. package/tests/fixtures/sample_syslog.log +6 -0
  31. package/tests/test_api.py +319 -0
  32. package/tests/test_canonicalization.py +28 -0
  33. package/tests/test_demo.py +25 -0
  34. package/tests/test_fingerprint.py +22 -0
  35. package/tests/test_ingest.py +15 -0
  36. package/tests/test_instrumentation.py +16 -0
  37. package/tests/test_interesting.py +36 -0
  38. package/tests/test_logicmonitor.py +22 -0
  39. package/tests/test_recurrence.py +16 -0
  40. package/tests/test_scoring.py +21 -0
  41. package/tests/test_storage.py +294 -0
@@ -0,0 +1,428 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sqlite3
5
+ from pathlib import Path
6
+ from typing import Any, Iterable, List
7
+
8
+ from .models import Candidate, Event, RawInputEnvelope, Signature
9
+
10
+
11
+ def default_db_path() -> Path:
12
+ return Path('.brainstem-state') / 'brainstem.sqlite3'
13
+
14
+
15
+ RAW_ENVELOPE_STATUSES = ("received", "canonicalized", "parse_failed", "unsupported")
16
+ RAW_ENVELOPE_FAILURE_STATUSES = ("parse_failed", "unsupported")
17
+
18
+
19
+ def _validate_canonicalization_status(status: str) -> None:
20
+ if status not in RAW_ENVELOPE_STATUSES:
21
+ raise ValueError(f"unsupported canonicalization_status: {status}")
22
+
23
+
24
+ def connect(db_path: str | None = None) -> sqlite3.Connection:
25
+ path = Path(db_path) if db_path else default_db_path()
26
+ path.parent.mkdir(parents=True, exist_ok=True)
27
+ conn = sqlite3.connect(path)
28
+ conn.row_factory = sqlite3.Row
29
+ return conn
30
+
31
+
32
+ def init_db(db_path: str | None = None) -> None:
33
+ conn = connect(db_path)
34
+ try:
35
+ conn.executescript(
36
+ '''
37
+ CREATE TABLE IF NOT EXISTS events (
38
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
39
+ tenant_id TEXT NOT NULL,
40
+ source_type TEXT NOT NULL,
41
+ timestamp TEXT NOT NULL,
42
+ host TEXT,
43
+ service TEXT,
44
+ severity TEXT,
45
+ asset_id TEXT,
46
+ source_path TEXT,
47
+ facility TEXT,
48
+ message_raw TEXT NOT NULL,
49
+ structured_fields_json TEXT NOT NULL,
50
+ correlation_keys_json TEXT NOT NULL
51
+ );
52
+
53
+ CREATE TABLE IF NOT EXISTS raw_envelopes (
54
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
55
+ tenant_id TEXT NOT NULL,
56
+ source_type TEXT NOT NULL,
57
+ source_id TEXT,
58
+ source_name TEXT,
59
+ timestamp TEXT NOT NULL,
60
+ host TEXT,
61
+ service TEXT,
62
+ severity TEXT,
63
+ asset_id TEXT,
64
+ source_path TEXT,
65
+ facility TEXT,
66
+ message_raw TEXT NOT NULL,
67
+ structured_fields_json TEXT NOT NULL,
68
+ correlation_keys_json TEXT NOT NULL,
69
+ metadata_json TEXT NOT NULL,
70
+ canonicalization_status TEXT NOT NULL DEFAULT 'received',
71
+ failure_reason TEXT
72
+ );
73
+
74
+ CREATE TABLE IF NOT EXISTS signatures (
75
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
76
+ signature_key TEXT NOT NULL UNIQUE,
77
+ event_family TEXT NOT NULL,
78
+ normalized_pattern TEXT NOT NULL,
79
+ service TEXT,
80
+ metadata_json TEXT NOT NULL,
81
+ occurrence_count INTEGER NOT NULL DEFAULT 0
82
+ );
83
+
84
+ CREATE TABLE IF NOT EXISTS candidates (
85
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
86
+ candidate_type TEXT NOT NULL,
87
+ title TEXT NOT NULL,
88
+ summary TEXT NOT NULL,
89
+ score_total REAL NOT NULL,
90
+ score_breakdown_json TEXT NOT NULL,
91
+ decision_band TEXT NOT NULL,
92
+ source_signature_ids_json TEXT NOT NULL,
93
+ source_event_ids_json TEXT NOT NULL,
94
+ confidence REAL NOT NULL,
95
+ metadata_json TEXT NOT NULL
96
+ );
97
+ '''
98
+ )
99
+ conn.commit()
100
+ finally:
101
+ conn.close()
102
+
103
+
104
+ def store_raw_envelopes(raw_envelopes: Iterable[RawInputEnvelope], db_path: str | None = None) -> List[int]:
105
+ conn = connect(db_path)
106
+ raw_ids: List[int] = []
107
+ try:
108
+ for raw in raw_envelopes:
109
+ cursor = conn.execute(
110
+ '''
111
+ INSERT INTO raw_envelopes (
112
+ tenant_id, source_type, source_id, source_name, timestamp, host, service, severity,
113
+ asset_id, source_path, facility, message_raw,
114
+ structured_fields_json, correlation_keys_json, metadata_json,
115
+ canonicalization_status, failure_reason
116
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
117
+ ''',
118
+ (
119
+ raw.tenant_id,
120
+ raw.source_type,
121
+ raw.source_id,
122
+ raw.source_name,
123
+ raw.timestamp,
124
+ raw.host,
125
+ raw.service,
126
+ raw.severity,
127
+ raw.asset_id,
128
+ raw.source_path,
129
+ raw.facility,
130
+ raw.message_raw,
131
+ json.dumps(raw.structured_fields, ensure_ascii=False),
132
+ json.dumps(raw.correlation_keys, ensure_ascii=False),
133
+ json.dumps(raw.metadata, ensure_ascii=False),
134
+ "received",
135
+ None,
136
+ ),
137
+ )
138
+ raw_ids.append(int(cursor.lastrowid))
139
+ conn.commit()
140
+ return raw_ids
141
+ finally:
142
+ conn.close()
143
+
144
+
145
+ def set_raw_envelope_status(
146
+ raw_envelope_id: int,
147
+ status: str,
148
+ db_path: str | None = None,
149
+ *,
150
+ failure_reason: str | None = None,
151
+ ) -> None:
152
+ _validate_canonicalization_status(status)
153
+ conn = connect(db_path)
154
+ try:
155
+ conn.execute(
156
+ '''
157
+ UPDATE raw_envelopes
158
+ SET canonicalization_status = ?, failure_reason = ?
159
+ WHERE id = ?
160
+ ''',
161
+ (status, failure_reason, raw_envelope_id),
162
+ )
163
+ conn.commit()
164
+ finally:
165
+ conn.close()
166
+
167
+
168
+ def get_raw_envelope_by_id(raw_envelope_id: int, db_path: str | None = None) -> sqlite3.Row | None:
169
+ conn = connect(db_path)
170
+ try:
171
+ return conn.execute(
172
+ "SELECT * FROM raw_envelopes WHERE id = ?",
173
+ (raw_envelope_id,),
174
+ ).fetchone()
175
+ finally:
176
+ conn.close()
177
+
178
+
179
+ def _recent_raw_envelopes_query(
180
+ canonicalization_status: str | None,
181
+ *,
182
+ failures_only: bool,
183
+ ) -> tuple[str, tuple[str, ...], bool]:
184
+ if canonicalization_status is None and failures_only:
185
+ return "WHERE canonicalization_status IN (?, ?)", RAW_ENVELOPE_FAILURE_STATUSES, True
186
+ if canonicalization_status is None and not failures_only:
187
+ return "", (), False
188
+ _validate_canonicalization_status(canonicalization_status)
189
+ return "WHERE canonicalization_status = ?", (canonicalization_status,), False
190
+
191
+
192
+ def list_recent_raw_envelopes(
193
+ db_path: str | None = None,
194
+ status: str | None = None,
195
+ limit: int = 20,
196
+ *,
197
+ failures_only: bool = False,
198
+ ) -> List[sqlite3.Row]:
199
+ conn = connect(db_path)
200
+ try:
201
+ where_clause, status_args, _ = _recent_raw_envelopes_query(status, failures_only=failures_only)
202
+ prefix = f"{where_clause} " if where_clause else ""
203
+ rows = conn.execute(
204
+ f"""
205
+ SELECT * FROM raw_envelopes
206
+ {prefix}
207
+ ORDER BY id DESC
208
+ LIMIT ?
209
+ """,
210
+ (*status_args, max(1, limit)),
211
+ ).fetchall()
212
+ return rows
213
+ finally:
214
+ conn.close()
215
+
216
+
217
+ def list_recent_failed_raw_envelopes(
218
+ db_path: str | None = None,
219
+ *,
220
+ status: str | None = None,
221
+ limit: int = 20,
222
+ ) -> List[sqlite3.Row]:
223
+ return list_recent_raw_envelopes(
224
+ db_path=db_path,
225
+ status=status,
226
+ limit=limit,
227
+ failures_only=status is None,
228
+ )
229
+
230
+
231
+ def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
232
+ conn = connect(db_path)
233
+ count = 0
234
+ try:
235
+ for event in events:
236
+ conn.execute(
237
+ '''
238
+ INSERT INTO events (
239
+ tenant_id, source_type, timestamp, host, service, severity,
240
+ asset_id, source_path, facility, message_raw,
241
+ structured_fields_json, correlation_keys_json
242
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
243
+ ''',
244
+ (
245
+ event.tenant_id,
246
+ event.source_type,
247
+ event.timestamp,
248
+ event.host,
249
+ event.service,
250
+ event.severity,
251
+ event.asset_id,
252
+ event.source_path,
253
+ event.facility,
254
+ event.message_raw,
255
+ json.dumps(event.structured_fields, ensure_ascii=False),
256
+ json.dumps(event.correlation_keys, ensure_ascii=False),
257
+ ),
258
+ )
259
+ count += 1
260
+ conn.commit()
261
+ return count
262
+ finally:
263
+ conn.close()
264
+
265
+
266
+ SOURCE_SUMMARY_DIMENSIONS = (
267
+ "source_type",
268
+ "source_path",
269
+ "source_id",
270
+ "source_name",
271
+ "host",
272
+ "service",
273
+ )
274
+
275
+
276
+ def _summarize_raw_envelopes_by_dimension(
277
+ conn: sqlite3.Connection,
278
+ dimension: str,
279
+ limit: int = 20,
280
+ ) -> List[dict[str, int | str]]:
281
+ if dimension not in SOURCE_SUMMARY_DIMENSIONS:
282
+ raise ValueError(f"unsupported dimension: {dimension}")
283
+ return [
284
+ {"value": row["value"], "count": int(row["count"])}
285
+ for row in conn.execute(
286
+ f"""
287
+ SELECT {dimension} AS value, COUNT(*) AS count
288
+ FROM raw_envelopes
289
+ WHERE COALESCE(TRIM({dimension}), '') <> ''
290
+ GROUP BY {dimension}
291
+ ORDER BY COUNT(*) DESC, value ASC
292
+ LIMIT ?
293
+ """,
294
+ (max(1, limit),),
295
+ ).fetchall()
296
+ ]
297
+
298
+
299
+ def get_source_dimension_summaries(
300
+ db_path: str | None = None,
301
+ *,
302
+ limit: int = 20,
303
+ ) -> dict[str, List[dict[str, int | str]]]:
304
+ init_db(db_path)
305
+ conn = connect(db_path)
306
+ try:
307
+ return _get_source_dimension_summaries_from_conn(conn, limit=limit)
308
+ finally:
309
+ conn.close()
310
+
311
+
312
+ def _get_source_dimension_summaries_from_conn(
313
+ conn: sqlite3.Connection,
314
+ *,
315
+ limit: int = 20,
316
+ ) -> dict[str, List[dict[str, int | str]]]:
317
+ return {
318
+ "source_type": _summarize_raw_envelopes_by_dimension(conn, "source_type", limit=limit),
319
+ "source_path": _summarize_raw_envelopes_by_dimension(conn, "source_path", limit=limit),
320
+ "source_id": _summarize_raw_envelopes_by_dimension(conn, "source_id", limit=limit),
321
+ "source_name": _summarize_raw_envelopes_by_dimension(conn, "source_name", limit=limit),
322
+ "host": _summarize_raw_envelopes_by_dimension(conn, "host", limit=limit),
323
+ "service": _summarize_raw_envelopes_by_dimension(conn, "service", limit=limit),
324
+ }
325
+
326
+
327
+ def _query_count(conn: sqlite3.Connection, query: str) -> int:
328
+ return int(conn.execute(query).fetchone()[0])
329
+
330
+
331
+ def get_ingest_stats(db_path: str | None = None) -> dict[str, Any]:
332
+ init_db(db_path)
333
+ conn = connect(db_path)
334
+ try:
335
+ return {
336
+ "received": _query_count(conn, "SELECT COUNT(*) FROM raw_envelopes"),
337
+ "canonicalized": _query_count(
338
+ conn,
339
+ "SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'canonicalized'",
340
+ ),
341
+ "parse_failed": _query_count(
342
+ conn,
343
+ "SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'parse_failed'",
344
+ ),
345
+ "unsupported": _query_count(
346
+ conn,
347
+ "SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'unsupported'",
348
+ ),
349
+ "candidates_generated": _query_count(conn, "SELECT COUNT(*) FROM candidates"),
350
+ "source_summaries": _get_source_dimension_summaries_from_conn(conn),
351
+ }
352
+ finally:
353
+ conn.close()
354
+
355
+
356
+ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None) -> int:
357
+ conn = connect(db_path)
358
+ count = 0
359
+ try:
360
+ for signature in signatures:
361
+ conn.execute(
362
+ '''
363
+ INSERT INTO signatures (
364
+ signature_key, event_family, normalized_pattern, service, metadata_json, occurrence_count
365
+ ) VALUES (?, ?, ?, ?, ?, 1)
366
+ ON CONFLICT(signature_key) DO UPDATE SET
367
+ occurrence_count = occurrence_count + 1,
368
+ metadata_json = excluded.metadata_json,
369
+ service = excluded.service
370
+ ''',
371
+ (
372
+ signature.signature_key,
373
+ signature.event_family,
374
+ signature.normalized_pattern,
375
+ signature.service,
376
+ json.dumps(signature.metadata, ensure_ascii=False),
377
+ ),
378
+ )
379
+ count += 1
380
+ conn.commit()
381
+ return count
382
+ finally:
383
+ conn.close()
384
+
385
+
386
+ def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None) -> int:
387
+ conn = connect(db_path)
388
+ count = 0
389
+ try:
390
+ for candidate in candidates:
391
+ conn.execute(
392
+ '''
393
+ INSERT INTO candidates (
394
+ candidate_type, title, summary, score_total, score_breakdown_json,
395
+ decision_band, source_signature_ids_json, source_event_ids_json,
396
+ confidence, metadata_json
397
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
398
+ ''',
399
+ (
400
+ candidate.candidate_type,
401
+ candidate.title,
402
+ candidate.summary,
403
+ candidate.score_total,
404
+ json.dumps(candidate.score_breakdown, ensure_ascii=False),
405
+ candidate.decision_band,
406
+ json.dumps(candidate.source_signature_ids, ensure_ascii=False),
407
+ json.dumps(candidate.source_event_ids, ensure_ascii=False),
408
+ candidate.confidence,
409
+ json.dumps(candidate.metadata, ensure_ascii=False),
410
+ ),
411
+ )
412
+ count += 1
413
+ conn.commit()
414
+ return count
415
+ finally:
416
+ conn.close()
417
+
418
+
419
+ def list_candidates(db_path: str | None = None, limit: int = 20) -> List[sqlite3.Row]:
420
+ conn = connect(db_path)
421
+ try:
422
+ rows = conn.execute(
423
+ 'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
424
+ (max(1, limit),),
425
+ ).fetchall()
426
+ return rows
427
+ finally:
428
+ conn.close()