@simbimbo/brainstem 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,16 +6,54 @@ from pathlib import Path
6
6
  from typing import Any, Iterable, List
7
7
 
8
8
  from .models import Candidate, Event, RawInputEnvelope, Signature
9
+ from .config import resolve_default_db_path
9
10
 
10
11
 
11
12
  def default_db_path() -> Path:
12
- return Path('.brainstem-state') / 'brainstem.sqlite3'
13
+ return Path(resolve_default_db_path())
13
14
 
14
15
 
15
16
  RAW_ENVELOPE_STATUSES = ("received", "canonicalized", "parse_failed", "unsupported")
16
17
  RAW_ENVELOPE_FAILURE_STATUSES = ("parse_failed", "unsupported")
17
18
 
18
19
 
20
+ def _coerce_raw_envelope_id(value: Any) -> int | None:
21
+ if isinstance(value, bool):
22
+ return None
23
+ if isinstance(value, int):
24
+ return value
25
+ if isinstance(value, str):
26
+ value = value.strip()
27
+ if not value.isdigit():
28
+ return None
29
+ return int(value)
30
+ return None
31
+
32
+
33
+ def _coerce_raw_envelope_id_list(raw_value: Any) -> List[int]:
34
+ if raw_value is None:
35
+ return []
36
+ if isinstance(raw_value, list):
37
+ ids = [_coerce_raw_envelope_id(item) for item in raw_value]
38
+ return [item for item in ids if item is not None]
39
+ if isinstance(raw_value, tuple):
40
+ ids = [_coerce_raw_envelope_id(item) for item in raw_value]
41
+ return [item for item in ids if item is not None]
42
+ return []
43
+
44
+
45
+ def extract_source_raw_envelope_ids(metadata_json: str | None) -> List[int]:
46
+ if not metadata_json:
47
+ return []
48
+ try:
49
+ metadata = json.loads(metadata_json)
50
+ except json.JSONDecodeError:
51
+ return []
52
+ if not isinstance(metadata, dict):
53
+ return []
54
+ return _coerce_raw_envelope_id_list(metadata.get("source_raw_envelope_ids"))
55
+
56
+
19
57
  def _validate_canonicalization_status(status: str) -> None:
20
58
  if status not in RAW_ENVELOPE_STATUSES:
21
59
  raise ValueError(f"unsupported canonicalization_status: {status}")
@@ -176,17 +214,62 @@ def get_raw_envelope_by_id(raw_envelope_id: int, db_path: str | None = None) ->
176
214
  conn.close()
177
215
 
178
216
 
217
+ def get_raw_envelopes_by_ids(
218
+ raw_envelope_ids: Iterable[int | str | object],
219
+ db_path: str | None = None,
220
+ ) -> List[sqlite3.Row]:
221
+ ids = list(dict.fromkeys(_coerce_raw_envelope_id_list(raw_envelope_ids)))
222
+ if not ids:
223
+ return []
224
+
225
+ conn = connect(db_path)
226
+ try:
227
+ placeholders = ",".join(["?"] * len(ids))
228
+ return conn.execute(
229
+ f"SELECT * FROM raw_envelopes WHERE id IN ({placeholders})",
230
+ ids,
231
+ ).fetchall()
232
+ finally:
233
+ conn.close()
234
+
235
+
179
236
  def _recent_raw_envelopes_query(
180
237
  canonicalization_status: str | None,
181
238
  *,
182
239
  failures_only: bool,
183
- ) -> tuple[str, tuple[str, ...], bool]:
240
+ tenant_id: str | None = None,
241
+ source_type: str | None = None,
242
+ source_id: str | None = None,
243
+ source_path: str | None = None,
244
+ ) -> tuple[str, tuple[str, ...]]:
245
+ where_clauses: list[str] = []
246
+ args: list[str] = []
247
+
184
248
  if canonicalization_status is None and failures_only:
185
- return "WHERE canonicalization_status IN (?, ?)", RAW_ENVELOPE_FAILURE_STATUSES, True
186
- if canonicalization_status is None and not failures_only:
187
- return "", (), False
188
- _validate_canonicalization_status(canonicalization_status)
189
- return "WHERE canonicalization_status = ?", (canonicalization_status,), False
249
+ where_clauses.append("canonicalization_status IN (?, ?)")
250
+ args.extend(RAW_ENVELOPE_FAILURE_STATUSES)
251
+ elif canonicalization_status is None and not failures_only:
252
+ pass
253
+ elif canonicalization_status is not None:
254
+ _validate_canonicalization_status(canonicalization_status)
255
+ where_clauses.append("canonicalization_status = ?")
256
+ args.append(canonicalization_status)
257
+
258
+ if tenant_id is not None:
259
+ where_clauses.append("tenant_id = ?")
260
+ args.append(tenant_id)
261
+ if source_type is not None:
262
+ where_clauses.append("source_type = ?")
263
+ args.append(source_type)
264
+ if source_id is not None:
265
+ where_clauses.append("source_id = ?")
266
+ args.append(source_id)
267
+ if source_path is not None:
268
+ where_clauses.append("source_path = ?")
269
+ args.append(source_path)
270
+
271
+ where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
272
+ return where_clause, tuple(args)
190
273
 
191
274
 
192
275
  def list_recent_raw_envelopes(
@@ -195,10 +278,21 @@ def list_recent_raw_envelopes(
195
278
  limit: int = 20,
196
279
  *,
197
280
  failures_only: bool = False,
281
+ tenant_id: str | None = None,
282
+ source_type: str | None = None,
283
+ source_id: str | None = None,
284
+ source_path: str | None = None,
198
285
  ) -> List[sqlite3.Row]:
199
286
  conn = connect(db_path)
200
287
  try:
201
- where_clause, status_args, _ = _recent_raw_envelopes_query(status, failures_only=failures_only)
288
+ where_clause, status_args = _recent_raw_envelopes_query(
289
+ status,
290
+ failures_only=failures_only,
291
+ tenant_id=tenant_id,
292
+ source_type=source_type,
293
+ source_id=source_id,
294
+ source_path=source_path,
295
+ )
202
296
  prefix = f"{where_clause} " if where_clause else ""
203
297
  rows = conn.execute(
204
298
  f"""
@@ -214,6 +308,51 @@ def list_recent_raw_envelopes(
214
308
  conn.close()
215
309
 
216
310
 
311
+ def list_canonical_events(
312
+ db_path: str | None = None,
313
+ limit: int = 20,
314
+ *,
315
+ tenant_id: str | None = None,
316
+ source_type: str | None = None,
317
+ host: str | None = None,
318
+ service: str | None = None,
319
+ severity: str | None = None,
320
+ ) -> List[sqlite3.Row]:
321
+ conn = connect(db_path)
322
+ try:
323
+ where_clauses = ["canonicalization_status = ?"]
324
+ args: List[str] = ["canonicalized"]
325
+
326
+ if tenant_id is not None:
327
+ where_clauses.append("tenant_id = ?")
328
+ args.append(tenant_id)
329
+ if source_type is not None:
330
+ where_clauses.append("source_type = ?")
331
+ args.append(source_type)
332
+ if host is not None:
333
+ where_clauses.append("host = ?")
334
+ args.append(host)
335
+ if service is not None:
336
+ where_clauses.append("service = ?")
337
+ args.append(service)
338
+ if severity is not None:
339
+ where_clauses.append("severity = ?")
340
+ args.append(severity)
341
+
342
+ where_clause = " WHERE " + " AND ".join(where_clauses)
343
+ return conn.execute(
344
+ f"""
345
+ SELECT * FROM raw_envelopes
346
+ {where_clause}
347
+ ORDER BY id DESC
348
+ LIMIT ?
349
+ """,
350
+ (*args, max(1, limit)),
351
+ ).fetchall()
352
+ finally:
353
+ conn.close()
354
+
355
+
217
356
  def list_recent_failed_raw_envelopes(
218
357
  db_path: str | None = None,
219
358
  *,
@@ -309,6 +448,72 @@ def get_source_dimension_summaries(
309
448
  conn.close()
310
449
 
311
450
 
451
+ def get_source_status_summaries(
452
+ db_path: str | None = None,
453
+ *,
454
+ limit: int = 20,
455
+ tenant_id: str | None = None,
456
+ source_type: str | None = None,
457
+ source_id: str | None = None,
458
+ source_path: str | None = None,
459
+ ) -> List[dict[str, Any]]:
460
+ init_db(db_path)
461
+ conn = connect(db_path)
462
+ try:
463
+ query = """
464
+ SELECT
465
+ tenant_id,
466
+ source_type,
467
+ source_id,
468
+ source_path,
469
+ COUNT(*) AS raw_count,
470
+ SUM(CASE WHEN canonicalization_status = 'canonicalized' THEN 1 ELSE 0 END) AS canonicalized_count,
471
+ SUM(CASE WHEN canonicalization_status = 'parse_failed' THEN 1 ELSE 0 END) AS parse_failed_count,
472
+ SUM(CASE WHEN canonicalization_status = 'unsupported' THEN 1 ELSE 0 END) AS unsupported_count,
473
+ MIN(timestamp) AS first_seen_at,
474
+ MAX(timestamp) AS last_seen_at
475
+ FROM raw_envelopes
476
+ WHERE 1 = 1
477
+ """
478
+ args: list[Any] = []
479
+ if tenant_id is not None:
480
+ query += " AND tenant_id = ?"
481
+ args.append(tenant_id)
482
+ if source_type is not None:
483
+ query += " AND source_type = ?"
484
+ args.append(source_type)
485
+ if source_id is not None:
486
+ query += " AND source_id = ?"
487
+ args.append(source_id)
488
+ if source_path is not None:
489
+ query += " AND source_path = ?"
490
+ args.append(source_path)
491
+
492
+ query += """
493
+ GROUP BY tenant_id, source_type, source_id, source_path
494
+ ORDER BY last_seen_at DESC, raw_count DESC
495
+ LIMIT ?
496
+ """
497
+ args.append(max(1, limit))
498
+ return [
499
+ {
500
+ "tenant_id": row["tenant_id"],
501
+ "source_type": row["source_type"] or "",
502
+ "source_id": row["source_id"] or "",
503
+ "source_path": row["source_path"] or "",
504
+ "raw_count": int(row["raw_count"]),
505
+ "canonicalized_count": int(row["canonicalized_count"] or 0),
506
+ "parse_failed_count": int(row["parse_failed_count"] or 0),
507
+ "unsupported_count": int(row["unsupported_count"] or 0),
508
+ "first_seen_at": row["first_seen_at"],
509
+ "last_seen_at": row["last_seen_at"],
510
+ }
511
+ for row in conn.execute(query, args).fetchall()
512
+ ]
513
+ finally:
514
+ conn.close()
515
+
516
+
312
517
  def _get_source_dimension_summaries_from_conn(
313
518
  conn: sqlite3.Connection,
314
519
  *,
@@ -358,6 +563,27 @@ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None
358
563
  count = 0
359
564
  try:
360
565
  for signature in signatures:
566
+ row = conn.execute(
567
+ "SELECT metadata_json FROM signatures WHERE signature_key = ?",
568
+ (signature.signature_key,),
569
+ ).fetchone()
570
+
571
+ metadata = dict(signature.metadata)
572
+ raw_ids = _coerce_raw_envelope_id_list(metadata.get("source_raw_envelope_ids"))
573
+ if not raw_ids:
574
+ raw_id = _coerce_raw_envelope_id(metadata.get("source_raw_envelope_id"))
575
+ if raw_id is not None:
576
+ raw_ids = [raw_id]
577
+ metadata.pop("source_raw_envelope_id", None)
578
+
579
+ if row is not None:
580
+ existing_metadata = json.loads(row["metadata_json"] or "{}")
581
+ if not isinstance(existing_metadata, dict):
582
+ existing_metadata = {}
583
+ existing_raw_ids = _coerce_raw_envelope_id_list(existing_metadata.get("source_raw_envelope_ids"))
584
+ metadata = dict(existing_metadata) | dict(metadata)
585
+ metadata["source_raw_envelope_ids"] = sorted(set(existing_raw_ids + raw_ids))
586
+
361
587
  conn.execute(
362
588
  '''
363
589
  INSERT INTO signatures (
@@ -373,7 +599,7 @@ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None
373
599
  signature.event_family,
374
600
  signature.normalized_pattern,
375
601
  signature.service,
376
- json.dumps(signature.metadata, ensure_ascii=False),
602
+ json.dumps(metadata, ensure_ascii=False),
377
603
  ),
378
604
  )
379
605
  count += 1
@@ -416,12 +642,79 @@ def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None
416
642
  conn.close()
417
643
 
418
644
 
419
- def list_candidates(db_path: str | None = None, limit: int = 20) -> List[sqlite3.Row]:
645
+ def list_candidates(
646
+ db_path: str | None = None,
647
+ limit: int = 20,
648
+ *,
649
+ candidate_type: str | None = None,
650
+ decision_band: str | None = None,
651
+ min_score_total: float | None = None,
652
+ ) -> List[sqlite3.Row]:
420
653
  conn = connect(db_path)
421
654
  try:
655
+ where_clauses: List[str] = []
656
+ args: List[Any] = []
657
+
658
+ if candidate_type is not None:
659
+ where_clauses.append("candidate_type = ?")
660
+ args.append(candidate_type)
661
+ if decision_band is not None:
662
+ where_clauses.append("decision_band = ?")
663
+ args.append(decision_band)
664
+ if min_score_total is not None:
665
+ where_clauses.append("score_total >= ?")
666
+ args.append(min_score_total)
667
+
668
+ where_clause = ""
669
+ if where_clauses:
670
+ where_clause = " WHERE " + " AND ".join(where_clauses)
671
+
422
672
  rows = conn.execute(
423
- 'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
424
- (max(1, limit),),
673
+ f'SELECT * FROM candidates{where_clause} ORDER BY score_total DESC, id DESC LIMIT ?',
674
+ (*args, max(1, limit)),
675
+ ).fetchall()
676
+ return rows
677
+ finally:
678
+ conn.close()
679
+
680
+
681
+ def list_signatures(
682
+ db_path: str | None = None,
683
+ limit: int = 20,
684
+ *,
685
+ event_family: str | None = None,
686
+ service: str | None = None,
687
+ min_occurrence_count: int | None = None,
688
+ ) -> List[sqlite3.Row]:
689
+ conn = connect(db_path)
690
+ try:
691
+ where_clauses: List[str] = []
692
+ args: List[Any] = []
693
+
694
+ if event_family is not None:
695
+ where_clauses.append("event_family = ?")
696
+ args.append(event_family)
697
+ if service is not None:
698
+ where_clauses.append("service = ?")
699
+ args.append(service)
700
+ if min_occurrence_count is not None:
701
+ where_clauses.append("occurrence_count >= ?")
702
+ args.append(min_occurrence_count)
703
+
704
+ where_clause = ""
705
+ if where_clauses:
706
+ where_clause = " WHERE " + " AND ".join(where_clauses)
707
+
708
+ rows = conn.execute(
709
+ f"""
710
+ SELECT
711
+ id, signature_key, event_family, normalized_pattern, service,
712
+ metadata_json, occurrence_count
713
+ FROM signatures{where_clause}
714
+ ORDER BY occurrence_count DESC, id DESC
715
+ LIMIT ?
716
+ """,
717
+ (*args, max(1, limit)),
425
718
  ).fetchall()
426
719
  return rows
427
720
  finally:
package/docs/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # Runtime Examples
2
+
3
+ Use this compact surface for the implemented runtime API, listener, and file-ingest paths.
4
+
5
+ ## 0) Shared runtime settings
6
+
7
+ ```bash
8
+ export BRAINSTEM_API_TOKEN=my-local-token # optional: set only if you want auth required
9
+ export BRAINSTEM_DB_PATH=/tmp/brainstem.sqlite3
10
+ ```
11
+
12
+ `BRAINSTEM_API_TOKEN` is optional. If you do not set it, omit all `X-API-Token` headers in the API examples.
13
+
14
+ ## 1) API entry point
15
+
16
+ ```bash
17
+ # Starts the runtime API
18
+ python -m uvicorn brainstem.api:app --host 127.0.0.1 --port 8000
19
+ ```
20
+
21
+ ```bash
22
+ curl -s http://127.0.0.1:8000/healthz
23
+ ```
24
+
25
+ ## 2) UDP listener entry point
26
+
27
+ ```bash
28
+ # Prints canonicalized events for each received datagram
29
+ python -m brainstem.listener --tenant demo-tenant --host 127.0.0.1 --port 5514 --source-path /var/log/syslog
30
+ ```
31
+
32
+ ```bash
33
+ printf 'Mar 22 03:10:00 fw-01 charon: IPsec SA rekey succeeded\n' | nc -u 127.0.0.1 5514
34
+ ```
35
+
36
+ ## 3) API ingest (syslog payload style)
37
+
38
+ ```bash
39
+ curl -s -X POST http://127.0.0.1:8000/ingest/event \
40
+ -H "Content-Type: application/json" \
41
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN" \
42
+ -d '{"tenant_id":"demo-tenant","source_type":"syslog","source_path":"/var/log/syslog","message_raw":"Mar 22 03:11:00 fw-01 charon: child SA rekey started"}'
43
+ ```
44
+
45
+ ## 4) API ingest for file source events
46
+
47
+ ```bash
48
+ curl -s -X POST http://127.0.0.1:8000/ingest/batch \
49
+ -H "Content-Type: application/json" \
50
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN" \
51
+ -d '{"threshold":2,"db_path":"/tmp/brainstem.sqlite3","events":[{"tenant_id":"demo-tenant","source_type":"file","source_path":"/tmp/manual.log","message_raw":"vpn tunnel dropped and recovered"}]}'
52
+ ```
53
+
54
+ ## 5) Runtime inspection endpoints (same db path)
55
+
56
+ ```bash
57
+ curl -s "http://127.0.0.1:8000/ingest/recent?db_path=/tmp/brainstem.sqlite3&limit=5" \
58
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
59
+ curl -s "http://127.0.0.1:8000/candidates?db_path=/tmp/brainstem.sqlite3&limit=5" \
60
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
61
+ curl -s "http://127.0.0.1:8000/signatures?db_path=/tmp/brainstem.sqlite3&limit=5" \
62
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
63
+ curl -s "http://127.0.0.1:8000/raw_envelopes?db_path=/tmp/brainstem.sqlite3&limit=5" \
64
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
65
+ curl -s "http://127.0.0.1:8000/stats?db_path=/tmp/brainstem.sqlite3" \
66
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
67
+ curl -s "http://127.0.0.1:8000/failures?db_path=/tmp/brainstem.sqlite3&limit=5" \
68
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
69
+ curl -s "http://127.0.0.1:8000/sources?db_path=/tmp/brainstem.sqlite3&limit=5" \
70
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
71
+ curl -s "http://127.0.0.1:8000/sources/status?db_path=/tmp/brainstem.sqlite3&limit=5" \
72
+ -H "X-API-Token: $BRAINSTEM_API_TOKEN"
73
+ ```
74
+
75
+ ## 6) Direct file ingest helper path
76
+
77
+ ```bash
78
+ python - <<'PY'
79
+ from brainstem.ingest import run_ingest_file
80
+
81
+ result = run_ingest_file(
82
+ "tests/fixtures/sample_syslog.log",
83
+ tenant_id="demo-tenant",
84
+ threshold=2,
85
+ db_path="/tmp/brainstem.sqlite3",
86
+ )
87
+ print({
88
+ "events": len(result.events),
89
+ "signatures": len(result.signatures),
90
+ "candidates": len(result.candidates),
91
+ "parse_failed": result.parse_failed,
92
+ })
93
+ PY
94
+ ```