@simbimbo/brainstem 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tests/test_api.py CHANGED
@@ -1,11 +1,15 @@
1
1
  from pathlib import Path
2
2
 
3
+ import pytest
3
4
  from fastapi.testclient import TestClient
4
5
 
6
+ from brainstem import __version__
5
7
  from brainstem.api import app
8
+ from brainstem.fingerprint import normalize_message
6
9
  from brainstem.models import RawInputEnvelope
7
10
  from brainstem.storage import (
8
11
  init_db,
12
+ get_raw_envelope_by_id,
9
13
  set_raw_envelope_status,
10
14
  store_raw_envelopes,
11
15
  )
@@ -71,6 +75,308 @@ def test_ingest_batch_and_interesting(tmp_path: Path) -> None:
71
75
  assert interesting_payload["items"]
72
76
 
73
77
 
78
+ def test_ingest_batch_mixed_success_and_failure_returns_per_item_accounting(tmp_path: Path) -> None:
79
+ client = TestClient(app)
80
+ db_path = tmp_path / "brainstem_batch_accounting.sqlite3"
81
+ payload = {
82
+ "threshold": 2,
83
+ "db_path": str(db_path),
84
+ "events": [
85
+ {
86
+ "tenant_id": "client-a",
87
+ "source_type": "syslog",
88
+ "message_raw": "Failed password for admin from 10.1.2.3",
89
+ "host": "fw-01",
90
+ "service": "sshd",
91
+ },
92
+ {
93
+ "tenant_id": "client-b",
94
+ "source_type": "syslog",
95
+ "message_raw": "VPN tunnel dropped and recovered",
96
+ "host": "fw-02",
97
+ "service": "charon",
98
+ },
99
+ {
100
+ "tenant_id": "client-a",
101
+ "source_type": "syslog",
102
+ "message_raw": "",
103
+ "host": "fw-01",
104
+ "service": "sshd",
105
+ },
106
+ ],
107
+ }
108
+ response = client.post("/ingest/batch", json=payload)
109
+ assert response.status_code == 200
110
+ batch_payload = response.json()
111
+
112
+ assert batch_payload["ok"] is True
113
+ assert batch_payload["item_count"] == 3
114
+ assert batch_payload["event_count"] == 2
115
+ assert batch_payload["parse_failed"] == 1
116
+ assert "item_results" in batch_payload
117
+ assert len(batch_payload["item_results"]) == 3
118
+
119
+ item_by_index = {item["index"]: item for item in batch_payload["item_results"]}
120
+ assert set(item_by_index.keys()) == {0, 1, 2}
121
+ assert item_by_index[0]["status"] == "canonicalized"
122
+ assert item_by_index[1]["status"] == "canonicalized"
123
+ assert item_by_index[2]["status"] == "parse_failed"
124
+ assert batch_payload["item_results"][2]["raw_envelope_id"] is not None
125
+ assert item_by_index[2]["failure_reason"] == "message_raw is empty and cannot be canonicalized"
126
+
127
+ for index, item in item_by_index.items():
128
+ assert item["tenant_id"] in {"client-a", "client-b"}
129
+ assert item["source_type"] == "syslog"
130
+ assert isinstance(item["index"], int)
131
+ assert "raw_envelope_id" in item
132
+ assert "failure_reason" in item
133
+
134
+
135
+ def test_candidates_endpoint_returns_candidate_inspection_payload_and_supports_filtering(tmp_path: Path) -> None:
136
+ client = TestClient(app)
137
+ db_path = tmp_path / "brainstem_candidates.sqlite3"
138
+ ingest_response = client.post(
139
+ "/ingest/batch",
140
+ json={
141
+ "threshold": 2,
142
+ "db_path": str(db_path),
143
+ "events": [
144
+ {
145
+ "tenant_id": "client-a",
146
+ "source_type": "syslog",
147
+ "message_raw": "Failed password for admin from 10.1.2.3",
148
+ "host": "fw-01",
149
+ "service": "sshd",
150
+ },
151
+ {
152
+ "tenant_id": "client-a",
153
+ "source_type": "syslog",
154
+ "message_raw": "Failed password for admin from 10.1.2.3",
155
+ "host": "fw-01",
156
+ "service": "sshd",
157
+ },
158
+ ],
159
+ },
160
+ )
161
+ assert ingest_response.status_code == 200
162
+ candidates = client.get(f"/candidates?db_path={db_path}&limit=10")
163
+ assert candidates.status_code == 200
164
+ candidates_payload = candidates.json()
165
+ assert candidates_payload["ok"] is True
166
+ assert candidates_payload["count"] >= 1
167
+ assert len(candidates_payload["items"]) >= 1
168
+
169
+ item = candidates_payload["items"][0]
170
+ assert item["title"]
171
+ assert item["summary"]
172
+ assert item["decision_band"] in {"watch", "review", "urgent_human_review", "promote_to_incident_memory", "ignore"}
173
+ assert item["attention_band"] in {"ignore_fast", "background", "watch", "investigate", "promote"}
174
+ assert item["attention_score"] >= 0
175
+ assert item["score_total"] == item["attention_score"]
176
+ assert isinstance(item["score_breakdown"], dict)
177
+ assert item["raw_envelope_ids"]
178
+ assert isinstance(item["raw_envelopes"], list)
179
+ assert [envelope["id"] for envelope in item["raw_envelopes"]] == item["raw_envelope_ids"]
180
+
181
+ filtered_by_decision = client.get(
182
+ f"/candidates?db_path={db_path}&decision_band={item['decision_band']}&limit=10"
183
+ )
184
+ assert filtered_by_decision.status_code == 200
185
+ filtered_payload = filtered_by_decision.json()
186
+ assert filtered_payload["count"] >= 1
187
+ assert all(i["decision_band"] == item["decision_band"] for i in filtered_payload["items"])
188
+
189
+ filtered_by_type = client.get(f"/candidates?db_path={db_path}&candidate_type={item['candidate_type']}&limit=1")
190
+ assert filtered_by_type.status_code == 200
191
+ limited_payload = filtered_by_type.json()
192
+ assert limited_payload["count"] == 1
193
+ assert len(limited_payload["items"]) <= 1
194
+
195
+
196
+ def test_signatures_endpoint_returns_signature_payload_and_supports_filtering(tmp_path: Path) -> None:
197
+ client = TestClient(app)
198
+ db_path = tmp_path / "brainstem_signatures.sqlite3"
199
+ ingest_response = client.post(
200
+ "/ingest/batch",
201
+ json={
202
+ "threshold": 2,
203
+ "db_path": str(db_path),
204
+ "events": [
205
+ {
206
+ "tenant_id": "client-a",
207
+ "source_type": "syslog",
208
+ "message_raw": "Failed password for admin from 10.1.2.3",
209
+ "host": "fw-01",
210
+ "service": "sshd",
211
+ },
212
+ {
213
+ "tenant_id": "client-a",
214
+ "source_type": "syslog",
215
+ "message_raw": "Failed password for admin from 10.1.2.3",
216
+ "host": "fw-01",
217
+ "service": "sshd",
218
+ },
219
+ {
220
+ "tenant_id": "client-a",
221
+ "source_type": "syslog",
222
+ "message_raw": "Different event in another family",
223
+ "host": "fw-01",
224
+ "service": "systemd",
225
+ },
226
+ ],
227
+ },
228
+ )
229
+ assert ingest_response.status_code == 200
230
+
231
+ signatures = client.get(f"/signatures?db_path={db_path}&limit=10")
232
+ assert signatures.status_code == 200
233
+ signatures_payload = signatures.json()
234
+ assert signatures_payload["ok"] is True
235
+ assert signatures_payload["count"] >= 2
236
+ assert len(signatures_payload["items"]) >= 2
237
+
238
+ first_signature = signatures_payload["items"][0]
239
+ assert first_signature["signature_key"]
240
+ assert first_signature["event_family"]
241
+ assert first_signature["normalized_pattern"]
242
+ assert isinstance(first_signature["occurrence_count"], int)
243
+ assert first_signature["occurrence_count"] >= 2
244
+ assert isinstance(first_signature["raw_envelope_ids"], list)
245
+ assert first_signature["raw_envelope_count"] == len(first_signature["raw_envelope_ids"])
246
+ assert isinstance(first_signature["recurrence"], dict)
247
+ assert first_signature["recurrence"]["signature_id"] > 0
248
+ assert first_signature["raw_envelope_count"] >= 1
249
+
250
+ family_filtered = client.get(
251
+ f"/signatures?db_path={db_path}&event_family={first_signature['event_family']}&limit=10"
252
+ )
253
+ assert family_filtered.status_code == 200
254
+ family_filtered_payload = family_filtered.json()
255
+ assert family_filtered_payload["count"] >= 1
256
+ assert all(item["event_family"] == first_signature["event_family"] for item in family_filtered_payload["items"])
257
+
258
+ service_filtered = client.get(
259
+ f"/signatures?db_path={db_path}&service=sshd&limit=10"
260
+ )
261
+ assert service_filtered.status_code == 200
262
+ service_filtered_payload = service_filtered.json()
263
+ assert service_filtered_payload["count"] >= 1
264
+ assert all(item["service"] == "sshd" for item in service_filtered_payload["items"])
265
+
266
+ min_occurrence_filtered = client.get(
267
+ f"/signatures?db_path={db_path}&min_occurrence_count=2&limit=10"
268
+ )
269
+ assert min_occurrence_filtered.status_code == 200
270
+ min_occurrence_payload = min_occurrence_filtered.json()
271
+ assert min_occurrence_payload["count"] >= 1
272
+ assert all(item["occurrence_count"] >= 2 for item in min_occurrence_payload["items"])
273
+
274
+ limited = client.get(f"/signatures?db_path={db_path}&limit=1")
275
+ assert limited.status_code == 200
276
+ limited_payload = limited.json()
277
+ assert limited_payload["count"] == 1
278
+ assert len(limited_payload["items"]) <= 1
279
+
280
+
281
+ def test_canonical_events_endpoint_returns_normalized_fields_and_supports_filters(tmp_path: Path) -> None:
282
+ client = TestClient(app)
283
+ db_path = tmp_path / "brainstem_canonical_events.sqlite3"
284
+ ingest_response = client.post(
285
+ "/ingest/batch",
286
+ json={
287
+ "threshold": 1,
288
+ "db_path": str(db_path),
289
+ "events": [
290
+ {
291
+ "tenant_id": "client-a",
292
+ "source_type": "syslog",
293
+ "message_raw": "IPsec SA rekey succeeded on host 10.1.2.3",
294
+ "host": "fw-01",
295
+ "service": "charon",
296
+ "severity": "info",
297
+ },
298
+ {
299
+ "tenant_id": "client-a",
300
+ "source_type": "syslog",
301
+ "message_raw": "Service restart detected on node 2",
302
+ "host": "fw-01",
303
+ "service": "systemd",
304
+ "severity": "warning",
305
+ },
306
+ {
307
+ "tenant_id": "client-a",
308
+ "source_type": "file",
309
+ "message_raw": "Configuration drift detected for node 3",
310
+ "host": "fw-02",
311
+ "service": "charon",
312
+ "severity": "critical",
313
+ },
314
+ {
315
+ "tenant_id": "client-b",
316
+ "source_type": "file",
317
+ "message_raw": "",
318
+ "host": "fw-02",
319
+ "service": "sshd",
320
+ "severity": "info",
321
+ },
322
+ ],
323
+ },
324
+ )
325
+ assert ingest_response.status_code == 200
326
+
327
+ tenant_events = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&limit=10")
328
+ assert tenant_events.status_code == 200
329
+ tenant_payload = tenant_events.json()
330
+ assert tenant_payload["ok"] is True
331
+ assert tenant_payload["count"] == 3
332
+ assert tenant_payload["items"][0]["tenant_id"] == "client-a"
333
+ expected_normalized = {
334
+ normalize_message("IPsec SA rekey succeeded on host 10.1.2.3"),
335
+ normalize_message("Service restart detected on node 2"),
336
+ normalize_message("Configuration drift detected for node 3"),
337
+ }
338
+ first = tenant_payload["items"][0]
339
+ assert first["raw_envelope_id"] > 0
340
+ assert first["tenant_id"]
341
+ assert first["source"] in {"syslog", "file"}
342
+ assert first["host"]
343
+ assert first["service"]
344
+ assert first["severity"] in {"info", "warning", "critical"}
345
+ assert first["message_raw"]
346
+ assert first["message_normalized"] == normalize_message(first["message_raw"])
347
+ assert set(item["message_normalized"] for item in tenant_payload["items"]) == expected_normalized
348
+
349
+ limited = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&limit=1")
350
+ assert limited.status_code == 200
351
+ limited_payload = limited.json()
352
+ assert limited_payload["count"] == 1
353
+ assert len(limited_payload["items"]) <= 1
354
+
355
+ host_filtered = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&host=fw-01")
356
+ assert host_filtered.status_code == 200
357
+ host_payload = host_filtered.json()
358
+ assert host_payload["count"] == 2
359
+ assert all(item["host"] == "fw-01" for item in host_payload["items"])
360
+
361
+ source_filtered = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&source=file")
362
+ assert source_filtered.status_code == 200
363
+ source_payload = source_filtered.json()
364
+ assert source_payload["count"] == 1
365
+ assert source_payload["items"][0]["source"] == "file"
366
+
367
+ service_filtered = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&service=charon")
368
+ assert service_filtered.status_code == 200
369
+ service_payload = service_filtered.json()
370
+ assert service_payload["count"] == 2
371
+ assert all(item["service"] == "charon" for item in service_payload["items"])
372
+
373
+ severity_filtered = client.get(f"/canonical_events?db_path={db_path}&severity=warning&tenant_id=client-a")
374
+ assert severity_filtered.status_code == 200
375
+ severity_payload = severity_filtered.json()
376
+ assert severity_payload["count"] == 1
377
+ assert severity_payload["items"][0]["severity"] == "warning"
378
+
379
+
74
380
  def test_stats_after_successful_and_failed_ingest(tmp_path: Path) -> None:
75
381
  client = TestClient(app)
76
382
  db_path = tmp_path / "brainstem.sqlite3"
@@ -127,6 +433,157 @@ def test_healthz_is_ready() -> None:
127
433
  assert response.json()["ok"] is True
128
434
 
129
435
 
436
+ def test_healthz_reports_api_token_status(monkeypatch: pytest.MonkeyPatch) -> None:
437
+ client = TestClient(app)
438
+ monkeypatch.delenv("BRAINSTEM_API_TOKEN", raising=False)
439
+ response = client.get("/healthz")
440
+ assert response.status_code == 200
441
+ payload = response.json()
442
+ assert payload["api_token_enabled"] is False
443
+ assert payload["runtime"]["auth_state"]["api_token_configured"] is False
444
+
445
+ monkeypatch.setenv("BRAINSTEM_API_TOKEN", "local-token")
446
+ response = client.get("/healthz")
447
+ assert response.status_code == 200
448
+ payload = response.json()
449
+ assert payload["api_token_enabled"] is True
450
+ assert payload["runtime"]["auth_state"]["api_token_configured"] is True
451
+
452
+
453
+ def test_healthz_reports_runtime_summary() -> None:
454
+ client = TestClient(app)
455
+ response = client.get("/healthz")
456
+ assert response.status_code == 200
457
+ payload = response.json()
458
+ runtime = payload["runtime"]
459
+ assert runtime["version"] == __version__
460
+ assert runtime["capability_flags"]["ingest_endpoints"]["single_event"] is True
461
+ assert runtime["defaults"]["interesting_limit"] == 5
462
+ assert runtime["limits"]["replay_raw_max_ids"] == 32
463
+
464
+
465
+ def test_status_endpoint_reports_operator_summary() -> None:
466
+ client = TestClient(app)
467
+ response = client.get("/status")
468
+ assert response.status_code == 200
469
+ payload = response.json()
470
+ assert payload["ok"] is True
471
+ assert payload["status"] == "ok"
472
+ assert payload["api_token_enabled"] == payload["runtime"]["auth_state"]["api_token_configured"]
473
+ assert payload["runtime"]["capability_flags"]["inspection_endpoints"]["raw_envelopes"] is True
474
+ assert payload["runtime"]["runtime"]["api_token_env"] == "BRAINSTEM_API_TOKEN"
475
+
476
+
477
+ def test_status_and_healthz_are_coherent() -> None:
478
+ client = TestClient(app)
479
+ status_response = client.get("/status")
480
+ healthz_response = client.get("/healthz")
481
+ assert status_response.status_code == 200
482
+ assert healthz_response.status_code == 200
483
+ assert status_response.json() == healthz_response.json()
484
+
485
+
486
+ def test_runtime_endpoint_reports_config_object(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
487
+ custom_db = tmp_path / "runtime.sqlite3"
488
+ monkeypatch.setenv("BRAINSTEM_DB_PATH", str(custom_db))
489
+ client = TestClient(app)
490
+
491
+ response = client.get("/runtime")
492
+ assert response.status_code == 200
493
+ runtime = response.json()["runtime"]
494
+
495
+ config = runtime["runtime"]["config"]
496
+ assert config["api_token_env_var"] == "BRAINSTEM_API_TOKEN"
497
+ assert config["listener"]["syslog_host"] == "127.0.0.1"
498
+ assert config["listener"]["syslog_port"] == 5514
499
+ assert config["listener"]["syslog_source_path"] == "/dev/udp"
500
+ assert config["defaults"]["ingest_threshold"] == 2
501
+ assert config["db"]["default_path"] == str(custom_db)
502
+ assert runtime["defaults"] == config["defaults"]
503
+
504
+
505
+ def test_runtime_endpoint_provides_same_summary(monkeypatch: pytest.MonkeyPatch) -> None:
506
+ client = TestClient(app)
507
+ monkeypatch.setenv("BRAINSTEM_API_TOKEN", "runtime-token")
508
+ response = client.get("/runtime")
509
+ assert response.status_code == 200
510
+ payload = response.json()
511
+ assert payload["ok"] is True
512
+ runtime = payload["runtime"]
513
+ assert runtime["auth_state"]["api_token_configured"] is True
514
+ assert runtime["runtime"]["api_token_env"] == "BRAINSTEM_API_TOKEN"
515
+ assert runtime["limits"]["replay_raw_max_ids"] == 32
516
+
517
+
518
+ def test_unprotected_routes_remain_open_when_api_token_not_configured(
519
+ monkeypatch: pytest.MonkeyPatch,
520
+ tmp_path: Path,
521
+ ) -> None:
522
+ monkeypatch.delenv("BRAINSTEM_API_TOKEN", raising=False)
523
+ client = TestClient(app)
524
+ db_path = tmp_path / "open.sqlite3"
525
+
526
+ ingest_response = client.post(
527
+ f"/ingest/event?threshold=1&db_path={db_path}",
528
+ json={
529
+ "tenant_id": "client-a",
530
+ "source_type": "syslog",
531
+ "message_raw": "Service restarted",
532
+ "host": "fw-01",
533
+ "service": "systemd",
534
+ },
535
+ )
536
+ assert ingest_response.status_code == 200
537
+
538
+ healthz_response = client.get(f"/interesting?db_path={db_path}&limit=10")
539
+ assert healthz_response.status_code == 200
540
+ assert healthz_response.json()["ok"] is True
541
+
542
+
543
+ def test_api_token_is_required_for_write_and_inspection_routes_when_enabled(
544
+ monkeypatch: pytest.MonkeyPatch,
545
+ tmp_path: Path,
546
+ ) -> None:
547
+ monkeypatch.setenv("BRAINSTEM_API_TOKEN", "valid-token")
548
+ client = TestClient(app)
549
+ db_path = tmp_path / "auth.sqlite3"
550
+
551
+ unauthenticated = client.post(
552
+ f"/ingest/event?threshold=1&db_path={db_path}",
553
+ json={
554
+ "tenant_id": "client-a",
555
+ "source_type": "syslog",
556
+ "message_raw": "Service restarted",
557
+ "host": "fw-01",
558
+ "service": "systemd",
559
+ },
560
+ )
561
+ assert unauthenticated.status_code == 401
562
+
563
+ wrong_token = client.get(f"/interesting?db_path={db_path}&limit=10", headers={"X-API-Token": "wrong"})
564
+ assert wrong_token.status_code == 401
565
+
566
+ authed = client.post(
567
+ f"/ingest/event?threshold=1&db_path={db_path}",
568
+ headers={"Authorization": "Bearer valid-token"},
569
+ json={
570
+ "tenant_id": "client-a",
571
+ "source_type": "syslog",
572
+ "message_raw": "Service restarted",
573
+ "host": "fw-01",
574
+ "service": "systemd",
575
+ },
576
+ )
577
+ assert authed.status_code == 200
578
+
579
+ read_authed = client.get(
580
+ f"/interesting?db_path={db_path}&limit=10",
581
+ headers={"X-API-Token": "valid-token"},
582
+ )
583
+ assert read_authed.status_code == 200
584
+ assert read_authed.json()["ok"] is True
585
+
586
+
130
587
  def test_failures_endpoint_lists_recent_parse_failures(tmp_path: Path) -> None:
131
588
  client = TestClient(app)
132
589
  db_path = tmp_path / "brainstem.sqlite3"
@@ -213,6 +670,94 @@ def test_failures_endpoint_filters_by_status_and_fetches_single_record(tmp_path:
213
670
  assert invalid.status_code == 422
214
671
 
215
672
 
673
+ def test_raw_envelopes_endpoint_supports_status_and_source_filters(tmp_path: Path) -> None:
674
+ client = TestClient(app)
675
+ db_path = tmp_path / "brainstem.sqlite3"
676
+ init_db(str(db_path))
677
+ raw_ids = store_raw_envelopes(
678
+ [
679
+ RawInputEnvelope(
680
+ tenant_id="tenant-a",
681
+ source_type="syslog",
682
+ source_id="fw-01",
683
+ source_path="/var/log/syslog",
684
+ timestamp="2026-03-22T00:00:01Z",
685
+ message_raw="VPN tunnel recovered",
686
+ ),
687
+ RawInputEnvelope(
688
+ tenant_id="tenant-a",
689
+ source_type="syslog",
690
+ source_id="fw-01",
691
+ source_path="/var/log/auth.log",
692
+ timestamp="2026-03-22T00:00:02Z",
693
+ message_raw="",
694
+ ),
695
+ RawInputEnvelope(
696
+ tenant_id="tenant-b",
697
+ source_type="file",
698
+ source_id="agent-01",
699
+ source_path="/tmp/agent.log",
700
+ timestamp="2026-03-22T00:00:03Z",
701
+ message_raw="backup finished",
702
+ ),
703
+ RawInputEnvelope(
704
+ tenant_id="tenant-a",
705
+ source_type="file",
706
+ source_id="fw-01",
707
+ source_path="/var/log/syslog",
708
+ timestamp="2026-03-22T00:00:04Z",
709
+ message_raw="disk pressure warning",
710
+ ),
711
+ ],
712
+ db_path=str(db_path),
713
+ )
714
+ set_raw_envelope_status(raw_ids[1], "parse_failed", db_path=str(db_path), failure_reason="seeded parse failure")
715
+ set_raw_envelope_status(raw_ids[2], "unsupported", db_path=str(db_path), failure_reason="seeded unsupported")
716
+
717
+ response = client.get(f"/raw_envelopes?db_path={db_path}&limit=10")
718
+ assert response.status_code == 200
719
+ payload = response.json()
720
+ assert payload["ok"] is True
721
+ assert payload["count"] == 4
722
+ assert [item["id"] for item in payload["items"]] == [raw_ids[3], raw_ids[2], raw_ids[1], raw_ids[0]]
723
+
724
+ parse_failed = client.get(f"/raw_envelopes?db_path={db_path}&status=parse_failed&limit=10")
725
+ assert parse_failed.status_code == 200
726
+ parse_payload = parse_failed.json()
727
+ assert parse_payload["count"] == 1
728
+ assert parse_payload["items"][0]["id"] == raw_ids[1]
729
+ assert parse_payload["items"][0]["canonicalization_status"] == "parse_failed"
730
+
731
+ syslog_only = client.get(f"/raw_envelopes?db_path={db_path}&source_type=syslog&limit=10")
732
+ assert syslog_only.status_code == 200
733
+ syslog_payload = syslog_only.json()
734
+ assert [item["id"] for item in syslog_payload["items"]] == [raw_ids[1], raw_ids[0]]
735
+
736
+ fw_source = client.get(f"/raw_envelopes?db_path={db_path}&source_id=fw-01&limit=10")
737
+ assert fw_source.status_code == 200
738
+ fw_payload = fw_source.json()
739
+ assert [item["id"] for item in fw_payload["items"]] == [raw_ids[3], raw_ids[1], raw_ids[0]]
740
+
741
+ source_path = client.get(f"/raw_envelopes?db_path={db_path}&source_path=/var/log/syslog&limit=10")
742
+ assert source_path.status_code == 200
743
+ path_payload = source_path.json()
744
+ assert [item["id"] for item in path_payload["items"]] == [raw_ids[3], raw_ids[0]]
745
+
746
+ tenant_and_source = client.get(
747
+ f"/raw_envelopes?db_path={db_path}&tenant_id=tenant-a&source_type=file&source_path=/var/log/syslog&limit=10"
748
+ )
749
+ assert tenant_and_source.status_code == 200
750
+ tenant_source_payload = tenant_and_source.json()
751
+ assert [item["id"] for item in tenant_source_payload["items"]] == [raw_ids[3]]
752
+
753
+
754
+ def test_raw_envelopes_endpoint_rejects_invalid_status_filter(tmp_path: Path) -> None:
755
+ client = TestClient(app)
756
+ db_path = tmp_path / "brainstem.sqlite3"
757
+ response = client.get(f"/raw_envelopes?db_path={db_path}&status=bogus")
758
+ assert response.status_code == 422
759
+
760
+
216
761
  def test_sources_endpoint_summarizes_ingest_dimensions(tmp_path: Path) -> None:
217
762
  client = TestClient(app)
218
763
  db_path = tmp_path / "brainstem.sqlite3"
@@ -274,6 +819,73 @@ def test_sources_endpoint_summarizes_ingest_dimensions(tmp_path: Path) -> None:
274
819
  }
275
820
 
276
821
 
822
+ def test_sources_status_endpoint_returns_source_health_like_summary(tmp_path: Path) -> None:
823
+ client = TestClient(app)
824
+ db_path = tmp_path / "brainstem.sqlite3"
825
+ ingest_response = client.post(
826
+ "/ingest/batch",
827
+ json={
828
+ "threshold": 1,
829
+ "db_path": str(db_path),
830
+ "events": [
831
+ {
832
+ "tenant_id": "client-a",
833
+ "source_type": "syslog",
834
+ "source_id": "fw-01",
835
+ "source_name": "edge-fw-01",
836
+ "source_path": "/var/log/syslog",
837
+ "message_raw": "Service restarted",
838
+ "host": "fw-01",
839
+ "service": "systemd",
840
+ },
841
+ {
842
+ "tenant_id": "client-a",
843
+ "source_type": "syslog",
844
+ "source_id": "fw-01",
845
+ "source_name": "edge-fw-01",
846
+ "source_path": "/var/log/syslog",
847
+ "message_raw": "",
848
+ "host": "fw-01",
849
+ "service": "systemd",
850
+ },
851
+ {
852
+ "tenant_id": "client-a",
853
+ "source_type": "logicmonitor",
854
+ "source_id": "lm-01",
855
+ "source_name": "edge-lm-01",
856
+ "source_path": "/alerts",
857
+ "message_raw": "Disk space low",
858
+ "host": "lm-01",
859
+ "service": "logicmonitor",
860
+ },
861
+ ],
862
+ },
863
+ )
864
+ assert ingest_response.status_code == 200
865
+
866
+ response = client.get(f"/sources/status?db_path={db_path}&limit=10")
867
+ assert response.status_code == 200
868
+ payload = response.json()
869
+ assert payload["ok"] is True
870
+ assert payload["count"] == 2
871
+ fw01 = next(item for item in payload["items"] if item["source_type"] == "syslog" and item["source_id"] == "fw-01")
872
+ assert fw01["raw_count"] == 2
873
+ assert fw01["canonicalized_count"] == 1
874
+ assert fw01["parse_failed_count"] == 1
875
+ assert fw01["unsupported_count"] == 0
876
+ assert fw01["source_path"] == "/var/log/syslog"
877
+ assert fw01["first_seen_at"] <= fw01["last_seen_at"]
878
+
879
+ filtered = client.get(
880
+ f"/sources/status?db_path={db_path}&source_type=syslog&source_id=fw-01&source_path=/var/log/syslog&limit=10"
881
+ )
882
+ assert filtered.status_code == 200
883
+ filtered_payload = filtered.json()
884
+ assert filtered_payload["count"] == 1
885
+ assert filtered_payload["items"][0]["source_id"] == "fw-01"
886
+ assert filtered_payload["items"][0]["source_path"] == "/var/log/syslog"
887
+
888
+
277
889
  def test_ingest_recent_endpoint_returns_recent_intake_and_allows_status_filter(tmp_path: Path) -> None:
278
890
  client = TestClient(app)
279
891
  db_path = tmp_path / "brainstem.sqlite3"
@@ -317,3 +929,117 @@ def test_ingest_recent_endpoint_returns_recent_intake_and_allows_status_filter(t
317
929
  failed_payload = failed.json()
318
930
  assert failed_payload["count"] == 1
319
931
  assert failed_payload["items"][0]["canonicalization_status"] == "parse_failed"
932
+
933
+
934
+ def test_replay_raw_endpoint_replays_parse_failed_and_received_records(tmp_path: Path) -> None:
935
+ client = TestClient(app)
936
+ db_path = tmp_path / "brainstem.sqlite3"
937
+ init_db(str(db_path))
938
+ raw_envelope_ids = store_raw_envelopes(
939
+ [
940
+ RawInputEnvelope(
941
+ tenant_id="client-a",
942
+ source_type="syslog",
943
+ timestamp="2026-03-22T00:00:01Z",
944
+ message_raw="can canonicalize first",
945
+ host="fw-01",
946
+ service="sshd",
947
+ ),
948
+ RawInputEnvelope(
949
+ tenant_id="client-a",
950
+ source_type="syslog",
951
+ timestamp="2026-03-22T00:00:02Z",
952
+ message_raw="can canonicalize second",
953
+ host="fw-01",
954
+ service="sshd",
955
+ ),
956
+ RawInputEnvelope(
957
+ tenant_id="client-a",
958
+ source_type="syslog",
959
+ timestamp="2026-03-22T00:00:03Z",
960
+ message_raw="",
961
+ host="fw-01",
962
+ service="sshd",
963
+ ),
964
+ ],
965
+ db_path=str(db_path),
966
+ )
967
+ set_raw_envelope_status(raw_envelope_ids[0], "parse_failed", db_path=str(db_path), failure_reason="seeded parse failure")
968
+ set_raw_envelope_status(raw_envelope_ids[2], "parse_failed", db_path=str(db_path), failure_reason="seeded parse failure")
969
+
970
+ response = client.post(
971
+ "/replay/raw",
972
+ json={
973
+ "db_path": str(db_path),
974
+ "raw_envelope_ids": raw_envelope_ids,
975
+ "threshold": 1,
976
+ },
977
+ )
978
+ assert response.status_code == 200
979
+ payload = response.json()
980
+ assert payload["ok"] is True
981
+ assert payload["attempted_raw_envelope_ids"] == raw_envelope_ids
982
+ assert payload["event_count"] == 2
983
+ assert payload["parse_failed"] == 1
984
+
985
+ parse_failed_row = get_raw_envelope_by_id(raw_envelope_ids[0], db_path=str(db_path))
986
+ assert parse_failed_row is not None
987
+ assert parse_failed_row["canonicalization_status"] == "canonicalized"
988
+
989
+ received_row = get_raw_envelope_by_id(raw_envelope_ids[1], db_path=str(db_path))
990
+ assert received_row is not None
991
+ assert received_row["canonicalization_status"] == "canonicalized"
992
+
993
+ still_failed_row = get_raw_envelope_by_id(raw_envelope_ids[2], db_path=str(db_path))
994
+ assert still_failed_row is not None
995
+ assert still_failed_row["canonicalization_status"] == "parse_failed"
996
+ assert still_failed_row["failure_reason"] == "message_raw is empty and cannot be canonicalized"
997
+
998
+
999
+ def test_replay_raw_endpoint_skips_non_replayable_statuses_without_force(tmp_path: Path) -> None:
1000
+ client = TestClient(app)
1001
+ db_path = tmp_path / "brainstem.sqlite3"
1002
+ init_db(str(db_path))
1003
+ (canonicalized_id,) = store_raw_envelopes(
1004
+ [
1005
+ RawInputEnvelope(
1006
+ tenant_id="client-a",
1007
+ source_type="syslog",
1008
+ timestamp="2026-03-22T00:00:01Z",
1009
+ message_raw="already canonicalized",
1010
+ host="fw-01",
1011
+ service="sshd",
1012
+ )
1013
+ ],
1014
+ db_path=str(db_path),
1015
+ )
1016
+ set_raw_envelope_status(canonicalized_id, "canonicalized", db_path=str(db_path))
1017
+
1018
+ skip = client.post(
1019
+ "/replay/raw",
1020
+ json={
1021
+ "db_path": str(db_path),
1022
+ "raw_envelope_ids": [canonicalized_id],
1023
+ "threshold": 1,
1024
+ },
1025
+ )
1026
+ assert skip.status_code == 200
1027
+ skipped_payload = skip.json()
1028
+ assert skipped_payload["attempted_raw_envelope_ids"] == []
1029
+ assert skipped_payload["event_count"] == 0
1030
+ assert skipped_payload["skipped"][0]["reason"] == "not_replayable"
1031
+
1032
+ force = client.post(
1033
+ "/replay/raw",
1034
+ json={
1035
+ "db_path": str(db_path),
1036
+ "raw_envelope_ids": [canonicalized_id],
1037
+ "threshold": 1,
1038
+ "force": True,
1039
+ "allowed_statuses": ["canonicalized"],
1040
+ },
1041
+ )
1042
+ assert force.status_code == 200
1043
+ force_payload = force.json()
1044
+ assert force_payload["attempted_raw_envelope_ids"] == [canonicalized_id]
1045
+ assert force_payload["event_count"] == 1