@simbimbo/brainstem 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,27 @@
1
1
  import sqlite3
2
2
  from pathlib import Path
3
3
 
4
- from brainstem.ingest import ingest_syslog_lines, signatures_for_events
4
+ from brainstem.ingest import ingest_syslog_lines, parse_syslog_envelopes, run_ingest_pipeline, signatures_for_events
5
5
  from brainstem.models import RawInputEnvelope
6
6
  from brainstem.recurrence import build_recurrence_candidates
7
7
  from brainstem.storage import (
8
+ extract_source_raw_envelope_ids,
9
+ create_sqlite_snapshot,
10
+ clear_storage_tables,
8
11
  get_raw_envelope_by_id,
12
+ get_storage_counts,
9
13
  get_ingest_stats,
10
14
  init_db,
11
15
  list_candidates,
12
16
  get_source_dimension_summaries,
17
+ get_source_status_summaries,
13
18
  store_candidates,
14
19
  store_events,
15
20
  list_recent_failed_raw_envelopes,
16
21
  list_recent_raw_envelopes,
17
22
  store_raw_envelopes,
18
23
  set_raw_envelope_status,
24
+ resolve_maintenance_tables,
19
25
  store_signatures,
20
26
  )
21
27
 
@@ -41,6 +47,39 @@ def test_storage_round_trip(tmp_path: Path) -> None:
41
47
  assert rows[0]['title']
42
48
 
43
49
 
50
+ def test_raw_envelope_lineage_stored_in_signature_and_candidate_metadata(tmp_path: Path) -> None:
51
+ db_path = tmp_path / 'brainstem.sqlite3'
52
+ raw_envelopes = parse_syslog_envelopes(
53
+ [
54
+ "Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered",
55
+ "Mar 22 00:00:03 fw-01 charon: VPN tunnel dropped and recovered",
56
+ "Mar 22 00:00:05 fw-01 charon: VPN tunnel dropped and recovered",
57
+ ],
58
+ tenant_id="client-a",
59
+ source_path="/var/log/syslog",
60
+ )
61
+ result = run_ingest_pipeline(raw_envelopes, threshold=2, db_path=str(db_path))
62
+
63
+ assert result.raw_envelope_ids == [1, 2, 3]
64
+ assert all(event.raw_envelope_id is not None for event in result.events)
65
+
66
+ signatures = sqlite3.connect(db_path)
67
+ try:
68
+ signature_rows = signatures.execute(
69
+ "SELECT metadata_json FROM signatures ORDER BY id ASC"
70
+ ).fetchall()
71
+ finally:
72
+ signatures.close()
73
+ assert signature_rows
74
+ signature_metadata = extract_source_raw_envelope_ids(signature_rows[0][0])
75
+ assert set(signature_metadata) == set(result.raw_envelope_ids)
76
+
77
+ candidate_rows = list_candidates(str(db_path), limit=10)
78
+ assert candidate_rows
79
+ candidate_metadata = extract_source_raw_envelope_ids(candidate_rows[0]["metadata_json"])
80
+ assert set(candidate_metadata) == set(result.raw_envelope_ids)
81
+
82
+
44
83
  def test_raw_envelope_records_are_persisted(tmp_path: Path) -> None:
45
84
  db_path = tmp_path / 'brainstem.sqlite3'
46
85
  init_db(str(db_path))
@@ -184,6 +223,71 @@ def test_source_dimension_summaries(tmp_path: Path) -> None:
184
223
  }
185
224
 
186
225
 
226
+ def test_source_status_summaries_from_raw_envelope_history(tmp_path: Path) -> None:
227
+ db_path = tmp_path / 'brainstem.sqlite3'
228
+ init_db(str(db_path))
229
+ raw_ids = store_raw_envelopes(
230
+ [
231
+ RawInputEnvelope(
232
+ tenant_id='client-a',
233
+ source_type='syslog',
234
+ source_id='fw-01',
235
+ source_name='edge-fw-01',
236
+ timestamp='2026-03-22T00:00:01Z',
237
+ message_raw='event 1',
238
+ source_path='/var/log/syslog',
239
+ host='fw-01',
240
+ service='charon',
241
+ ),
242
+ RawInputEnvelope(
243
+ tenant_id='client-a',
244
+ source_type='syslog',
245
+ source_id='fw-01',
246
+ source_name='edge-fw-01',
247
+ timestamp='2026-03-22T00:00:02Z',
248
+ message_raw='event 2',
249
+ source_path='/var/log/syslog',
250
+ host='fw-01',
251
+ service='charon',
252
+ ),
253
+ RawInputEnvelope(
254
+ tenant_id='client-a',
255
+ source_type='syslog',
256
+ source_id='fw-02',
257
+ source_name='edge-fw-02',
258
+ timestamp='2026-03-22T00:00:03Z',
259
+ message_raw='event 3',
260
+ source_path='/var/log/auth.log',
261
+ host='fw-02',
262
+ service='sshd',
263
+ ),
264
+ ],
265
+ db_path=str(db_path),
266
+ )
267
+ set_raw_envelope_status(raw_ids[0], 'parse_failed', db_path=str(db_path), failure_reason='empty event')
268
+ set_raw_envelope_status(raw_ids[1], 'canonicalized', db_path=str(db_path))
269
+ set_raw_envelope_status(raw_ids[2], 'unsupported', db_path=str(db_path), failure_reason='bad source payload')
270
+
271
+ summaries = get_source_status_summaries(str(db_path), limit=10)
272
+ assert len(summaries) == 2
273
+ fw01 = next(item for item in summaries if item['source_id'] == 'fw-01' and item['source_path'] == '/var/log/syslog')
274
+ fw02 = next(item for item in summaries if item['source_id'] == 'fw-02')
275
+ assert fw01['raw_count'] == 2
276
+ assert fw01['canonicalized_count'] == 1
277
+ assert fw01['parse_failed_count'] == 1
278
+ assert fw01['unsupported_count'] == 0
279
+ assert fw01['first_seen_at'] == '2026-03-22T00:00:01Z'
280
+ assert fw01['last_seen_at'] == '2026-03-22T00:00:02Z'
281
+ assert fw02['raw_count'] == 1
282
+ assert fw02['canonicalized_count'] == 0
283
+ assert fw02['parse_failed_count'] == 0
284
+ assert fw02['unsupported_count'] == 1
285
+
286
+ filtered = get_source_status_summaries(str(db_path), source_type='syslog', source_id='fw-01', source_path='/var/log/syslog')
287
+ assert len(filtered) == 1
288
+ assert filtered[0]['source_id'] == 'fw-01'
289
+
290
+
187
291
  def test_list_recent_raw_envelopes_supports_status_filtering(tmp_path: Path) -> None:
188
292
  db_path = tmp_path / 'brainstem.sqlite3'
189
293
  init_db(str(db_path))
@@ -292,3 +396,76 @@ def test_get_raw_envelope_by_id(tmp_path: Path) -> None:
292
396
  assert row["id"] == raw_id
293
397
  assert row["canonicalization_status"] == "parse_failed"
294
398
  assert row["failure_reason"] == "empty message"
399
+
400
+
401
+ def test_storage_counts_report_expected_rows_for_ingested_payload(tmp_path: Path) -> None:
402
+ db_path = tmp_path / 'brainstem.sqlite3'
403
+ raw_events = parse_syslog_envelopes(
404
+ [
405
+ "Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered",
406
+ "Mar 22 00:00:02 fw-01 charon: VPN tunnel dropped and recovered",
407
+ ],
408
+ tenant_id="client-a",
409
+ source_path="/var/log/syslog",
410
+ )
411
+ result = run_ingest_pipeline(raw_events, threshold=2, db_path=str(db_path))
412
+ assert result.raw_envelope_ids
413
+ assert result.events
414
+ assert result.signatures
415
+ assert result.candidates
416
+
417
+ counts = get_storage_counts(str(db_path))
418
+ expected_signature_rows = len({sig.signature_key for sig in result.signatures})
419
+ assert counts["raw_envelopes"] == len(result.raw_envelope_ids)
420
+ assert counts["events"] == len(result.events)
421
+ assert counts["signatures"] == expected_signature_rows
422
+ assert counts["candidates"] == len(result.candidates)
423
+
424
+
425
+ def test_resolve_and_clear_storage_tables_are_explicit() -> None:
426
+ assert resolve_maintenance_tables(["all"]) == ["events", "raw_envelopes", "signatures", "candidates"]
427
+ assert resolve_maintenance_tables(["raw_envelopes", "events"]) == ["raw_envelopes", "events"]
428
+ assert resolve_maintenance_tables(None) == ["events", "raw_envelopes", "signatures", "candidates"]
429
+
430
+
431
+ def test_clear_storage_tables_only_clears_requested_tables(tmp_path: Path) -> None:
432
+ db_path = tmp_path / 'brainstem.sqlite3'
433
+ raw_events = parse_syslog_envelopes(
434
+ [
435
+ "Mar 22 00:00:01 fw-01 charon: VPN tunnel dropped and recovered",
436
+ "Mar 22 00:00:02 fw-01 charon: VPN tunnel dropped and recovered",
437
+ ],
438
+ tenant_id="client-a",
439
+ source_path="/var/log/syslog",
440
+ )
441
+ result = run_ingest_pipeline(raw_events, threshold=2, db_path=str(db_path))
442
+ removed = clear_storage_tables(str(db_path), tables=["raw_envelopes", "events"])
443
+ expected_signature_rows = len({sig.signature_key for sig in result.signatures})
444
+ assert removed["raw_envelopes"] == len(result.raw_envelope_ids)
445
+ assert removed["events"] == len(result.events)
446
+
447
+ counts_after = get_storage_counts(str(db_path))
448
+ assert counts_after["raw_envelopes"] == 0
449
+ assert counts_after["events"] == 0
450
+ assert counts_after["signatures"] == expected_signature_rows
451
+ assert counts_after["candidates"] == len(result.candidates)
452
+
453
+
454
+ def test_create_sqlite_snapshot_returns_metadata_and_copies_file(tmp_path: Path) -> None:
455
+ db_path = tmp_path / "snapshot.sqlite3"
456
+ init_db(str(db_path))
457
+
458
+ first = create_sqlite_snapshot(str(db_path))
459
+ first_path = Path(first["snapshot_path"])
460
+ assert first["source_path"] == str(db_path)
461
+ assert first_path.exists()
462
+ assert first_path.is_file()
463
+ assert first_path != db_path
464
+ assert first["size"] == first_path.stat().st_size
465
+ assert first["size"] > 0
466
+ assert first["created_at"].endswith("Z")
467
+
468
+ second = create_sqlite_snapshot(str(db_path))
469
+ second_path = Path(second["snapshot_path"])
470
+ assert second_path.exists()
471
+ assert second["snapshot_path"] != first["snapshot_path"]