npm - @simbimbo/brainstem - Versions diffs - 0.0.2 → 0.0.4 - Mend

@simbimbo/brainstem 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/CHANGELOG.md +37 -0
package/README.md +25 -0
package/brainstem/__init__.py +1 -1
package/brainstem/adapters.py +120 -0
package/brainstem/api.py +483 -23
package/brainstem/config.py +70 -0
package/brainstem/ingest.py +418 -33
package/brainstem/interesting.py +56 -1
package/brainstem/listener.py +175 -0
package/brainstem/models.py +3 -0
package/brainstem/recurrence.py +38 -1
package/brainstem/source_drivers.py +150 -0
package/brainstem/storage.py +547 -8
package/docs/README.md +94 -0
package/docs/adapters.md +97 -401
package/docs/api.md +223 -278
package/package.json +1 -1
package/pyproject.toml +1 -1
package/tests/test_adapters.py +94 -0
package/tests/test_api.py +973 -0
package/tests/test_canonicalization.py +8 -0
package/tests/test_config.py +24 -0
package/tests/test_file_ingest.py +77 -0
package/tests/test_interesting.py +10 -0
package/tests/test_listener.py +253 -0
package/tests/test_recurrence.py +2 -0
package/tests/test_source_drivers.py +95 -0
package/tests/test_storage.py +370 -2

package/tests/test_api.py CHANGED Viewed

@@ -1,8 +1,18 @@
 from pathlib import Path
+import pytest
 from fastapi.testclient import TestClient
+from brainstem import __version__
 from brainstem.api import app
+from brainstem.fingerprint import normalize_message
+from brainstem.models import RawInputEnvelope
+from brainstem.storage import (
+    init_db,
+    get_raw_envelope_by_id,
+    set_raw_envelope_status,
+    store_raw_envelopes,
+)
 def test_ingest_event_endpoint_round_trip(tmp_path: Path) -> None:
@@ -65,8 +75,971 @@ def test_ingest_batch_and_interesting(tmp_path: Path) -> None:
     assert interesting_payload["items"]
+def test_ingest_batch_mixed_success_and_failure_returns_per_item_accounting(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem_batch_accounting.sqlite3"
+    payload = {
+        "threshold": 2,
+        "db_path": str(db_path),
+        "events": [
+            {
+                "tenant_id": "client-a",
+                "source_type": "syslog",
+                "message_raw": "Failed password for admin from 10.1.2.3",
+                "host": "fw-01",
+                "service": "sshd",
+            },
+            {
+                "tenant_id": "client-b",
+                "source_type": "syslog",
+                "message_raw": "VPN tunnel dropped and recovered",
+                "host": "fw-02",
+                "service": "charon",
+            },
+            {
+                "tenant_id": "client-a",
+                "source_type": "syslog",
+                "message_raw": "",
+                "host": "fw-01",
+                "service": "sshd",
+            },
+        ],
+    }
+    response = client.post("/ingest/batch", json=payload)
+    assert response.status_code == 200
+    batch_payload = response.json()
+    assert batch_payload["ok"] is True
+    assert batch_payload["item_count"] == 3
+    assert batch_payload["event_count"] == 2
+    assert batch_payload["parse_failed"] == 1
+    assert "item_results" in batch_payload
+    assert len(batch_payload["item_results"]) == 3
+    item_by_index = {item["index"]: item for item in batch_payload["item_results"]}
+    assert set(item_by_index.keys()) == {0, 1, 2}
+    assert item_by_index[0]["status"] == "canonicalized"
+    assert item_by_index[1]["status"] == "canonicalized"
+    assert item_by_index[2]["status"] == "parse_failed"
+    assert batch_payload["item_results"][2]["raw_envelope_id"] is not None
+    assert item_by_index[2]["failure_reason"] == "message_raw is empty and cannot be canonicalized"
+    for index, item in item_by_index.items():
+        assert item["tenant_id"] in {"client-a", "client-b"}
+        assert item["source_type"] == "syslog"
+        assert isinstance(item["index"], int)
+        assert "raw_envelope_id" in item
+        assert "failure_reason" in item
+def test_candidates_endpoint_returns_candidate_inspection_payload_and_supports_filtering(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem_candidates.sqlite3"
+    ingest_response = client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 2,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+            ],
+        },
+    )
+    assert ingest_response.status_code == 200
+    candidates = client.get(f"/candidates?db_path={db_path}&limit=10")
+    assert candidates.status_code == 200
+    candidates_payload = candidates.json()
+    assert candidates_payload["ok"] is True
+    assert candidates_payload["count"] >= 1
+    assert len(candidates_payload["items"]) >= 1
+    item = candidates_payload["items"][0]
+    assert item["title"]
+    assert item["summary"]
+    assert item["decision_band"] in {"watch", "review", "urgent_human_review", "promote_to_incident_memory", "ignore"}
+    assert item["attention_band"] in {"ignore_fast", "background", "watch", "investigate", "promote"}
+    assert item["attention_score"] >= 0
+    assert item["score_total"] == item["attention_score"]
+    assert isinstance(item["score_breakdown"], dict)
+    assert item["raw_envelope_ids"]
+    assert isinstance(item["raw_envelopes"], list)
+    assert [envelope["id"] for envelope in item["raw_envelopes"]] == item["raw_envelope_ids"]
+    filtered_by_decision = client.get(
+        f"/candidates?db_path={db_path}&decision_band={item['decision_band']}&limit=10"
+    )
+    assert filtered_by_decision.status_code == 200
+    filtered_payload = filtered_by_decision.json()
+    assert filtered_payload["count"] >= 1
+    assert all(i["decision_band"] == item["decision_band"] for i in filtered_payload["items"])
+    filtered_by_type = client.get(f"/candidates?db_path={db_path}&candidate_type={item['candidate_type']}&limit=1")
+    assert filtered_by_type.status_code == 200
+    limited_payload = filtered_by_type.json()
+    assert limited_payload["count"] == 1
+    assert len(limited_payload["items"]) <= 1
+def test_signatures_endpoint_returns_signature_payload_and_supports_filtering(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem_signatures.sqlite3"
+    ingest_response = client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 2,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Different event in another family",
+                    "host": "fw-01",
+                    "service": "systemd",
+                },
+            ],
+        },
+    )
+    assert ingest_response.status_code == 200
+    signatures = client.get(f"/signatures?db_path={db_path}&limit=10")
+    assert signatures.status_code == 200
+    signatures_payload = signatures.json()
+    assert signatures_payload["ok"] is True
+    assert signatures_payload["count"] >= 2
+    assert len(signatures_payload["items"]) >= 2
+    first_signature = signatures_payload["items"][0]
+    assert first_signature["signature_key"]
+    assert first_signature["event_family"]
+    assert first_signature["normalized_pattern"]
+    assert isinstance(first_signature["occurrence_count"], int)
+    assert first_signature["occurrence_count"] >= 2
+    assert isinstance(first_signature["raw_envelope_ids"], list)
+    assert first_signature["raw_envelope_count"] == len(first_signature["raw_envelope_ids"])
+    assert isinstance(first_signature["recurrence"], dict)
+    assert first_signature["recurrence"]["signature_id"] > 0
+    assert first_signature["raw_envelope_count"] >= 1
+    family_filtered = client.get(
+        f"/signatures?db_path={db_path}&event_family={first_signature['event_family']}&limit=10"
+    )
+    assert family_filtered.status_code == 200
+    family_filtered_payload = family_filtered.json()
+    assert family_filtered_payload["count"] >= 1
+    assert all(item["event_family"] == first_signature["event_family"] for item in family_filtered_payload["items"])
+    service_filtered = client.get(
+        f"/signatures?db_path={db_path}&service=sshd&limit=10"
+    )
+    assert service_filtered.status_code == 200
+    service_filtered_payload = service_filtered.json()
+    assert service_filtered_payload["count"] >= 1
+    assert all(item["service"] == "sshd" for item in service_filtered_payload["items"])
+    min_occurrence_filtered = client.get(
+        f"/signatures?db_path={db_path}&min_occurrence_count=2&limit=10"
+    )
+    assert min_occurrence_filtered.status_code == 200
+    min_occurrence_payload = min_occurrence_filtered.json()
+    assert min_occurrence_payload["count"] >= 1
+    assert all(item["occurrence_count"] >= 2 for item in min_occurrence_payload["items"])
+    limited = client.get(f"/signatures?db_path={db_path}&limit=1")
+    assert limited.status_code == 200
+    limited_payload = limited.json()
+    assert limited_payload["count"] == 1
+    assert len(limited_payload["items"]) <= 1
+def test_canonical_events_endpoint_returns_normalized_fields_and_supports_filters(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem_canonical_events.sqlite3"
+    ingest_response = client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 1,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "IPsec SA rekey succeeded on host 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "charon",
+                    "severity": "info",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Service restart detected on node 2",
+                    "host": "fw-01",
+                    "service": "systemd",
+                    "severity": "warning",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "file",
+                    "message_raw": "Configuration drift detected for node 3",
+                    "host": "fw-02",
+                    "service": "charon",
+                    "severity": "critical",
+                },
+                {
+                    "tenant_id": "client-b",
+                    "source_type": "file",
+                    "message_raw": "",
+                    "host": "fw-02",
+                    "service": "sshd",
+                    "severity": "info",
+                },
+            ],
+        },
+    )
+    assert ingest_response.status_code == 200
+    tenant_events = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&limit=10")
+    assert tenant_events.status_code == 200
+    tenant_payload = tenant_events.json()
+    assert tenant_payload["ok"] is True
+    assert tenant_payload["count"] == 3
+    assert tenant_payload["items"][0]["tenant_id"] == "client-a"
+    expected_normalized = {
+        normalize_message("IPsec SA rekey succeeded on host 10.1.2.3"),
+        normalize_message("Service restart detected on node 2"),
+        normalize_message("Configuration drift detected for node 3"),
+    }
+    first = tenant_payload["items"][0]
+    assert first["raw_envelope_id"] > 0
+    assert first["tenant_id"]
+    assert first["source"] in {"syslog", "file"}
+    assert first["host"]
+    assert first["service"]
+    assert first["severity"] in {"info", "warning", "critical"}
+    assert first["message_raw"]
+    assert first["message_normalized"] == normalize_message(first["message_raw"])
+    assert set(item["message_normalized"] for item in tenant_payload["items"]) == expected_normalized
+    limited = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&limit=1")
+    assert limited.status_code == 200
+    limited_payload = limited.json()
+    assert limited_payload["count"] == 1
+    assert len(limited_payload["items"]) <= 1
+    host_filtered = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&host=fw-01")
+    assert host_filtered.status_code == 200
+    host_payload = host_filtered.json()
+    assert host_payload["count"] == 2
+    assert all(item["host"] == "fw-01" for item in host_payload["items"])
+    source_filtered = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&source=file")
+    assert source_filtered.status_code == 200
+    source_payload = source_filtered.json()
+    assert source_payload["count"] == 1
+    assert source_payload["items"][0]["source"] == "file"
+    service_filtered = client.get(f"/canonical_events?db_path={db_path}&tenant_id=client-a&service=charon")
+    assert service_filtered.status_code == 200
+    service_payload = service_filtered.json()
+    assert service_payload["count"] == 2
+    assert all(item["service"] == "charon" for item in service_payload["items"])
+    severity_filtered = client.get(f"/canonical_events?db_path={db_path}&severity=warning&tenant_id=client-a")
+    assert severity_filtered.status_code == 200
+    severity_payload = severity_filtered.json()
+    assert severity_payload["count"] == 1
+    assert severity_payload["items"][0]["severity"] == "warning"
+def test_stats_after_successful_and_failed_ingest(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    batch_response = client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 2,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+            ],
+        },
+    )
+    assert batch_response.status_code == 200
+    batch_payload = batch_response.json()
+    assert batch_payload["ok"] is True
+    assert batch_payload["event_count"] == 2
+    assert batch_payload["parse_failed"] == 1
+    stats = client.get(f"/stats?db_path={db_path}")
+    assert stats.status_code == 200
+    stats_payload = stats.json()
+    assert stats_payload["ok"] is True
+    assert stats_payload["received"] == 3
+    assert stats_payload["canonicalized"] == 2
+    assert stats_payload["parse_failed"] == 1
+    assert stats_payload["candidates_generated"] >= 1
 def test_healthz_is_ready() -> None:
     client = TestClient(app)
     response = client.get("/healthz")
     assert response.status_code == 200
     assert response.json()["ok"] is True
+def test_healthz_reports_api_token_status(monkeypatch: pytest.MonkeyPatch) -> None:
+    client = TestClient(app)
+    monkeypatch.delenv("BRAINSTEM_API_TOKEN", raising=False)
+    response = client.get("/healthz")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["api_token_enabled"] is False
+    assert payload["runtime"]["auth_state"]["api_token_configured"] is False
+    monkeypatch.setenv("BRAINSTEM_API_TOKEN", "local-token")
+    response = client.get("/healthz")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["api_token_enabled"] is True
+    assert payload["runtime"]["auth_state"]["api_token_configured"] is True
+def test_healthz_reports_runtime_summary() -> None:
+    client = TestClient(app)
+    response = client.get("/healthz")
+    assert response.status_code == 200
+    payload = response.json()
+    runtime = payload["runtime"]
+    assert runtime["version"] == __version__
+    assert runtime["capability_flags"]["ingest_endpoints"]["single_event"] is True
+    assert runtime["defaults"]["interesting_limit"] == 5
+    assert runtime["limits"]["replay_raw_max_ids"] == 32
+def test_status_endpoint_reports_operator_summary() -> None:
+    client = TestClient(app)
+    response = client.get("/status")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["status"] == "ok"
+    assert payload["api_token_enabled"] == payload["runtime"]["auth_state"]["api_token_configured"]
+    assert payload["runtime"]["capability_flags"]["inspection_endpoints"]["raw_envelopes"] is True
+    assert payload["runtime"]["runtime"]["api_token_env"] == "BRAINSTEM_API_TOKEN"
+def test_status_and_healthz_are_coherent() -> None:
+    client = TestClient(app)
+    status_response = client.get("/status")
+    healthz_response = client.get("/healthz")
+    assert status_response.status_code == 200
+    assert healthz_response.status_code == 200
+    assert status_response.json() == healthz_response.json()
+def test_runtime_endpoint_reports_config_object(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+    custom_db = tmp_path / "runtime.sqlite3"
+    monkeypatch.setenv("BRAINSTEM_DB_PATH", str(custom_db))
+    client = TestClient(app)
+    response = client.get("/runtime")
+    assert response.status_code == 200
+    runtime = response.json()["runtime"]
+    config = runtime["runtime"]["config"]
+    assert config["api_token_env_var"] == "BRAINSTEM_API_TOKEN"
+    assert config["listener"]["syslog_host"] == "127.0.0.1"
+    assert config["listener"]["syslog_port"] == 5514
+    assert config["listener"]["syslog_source_path"] == "/dev/udp"
+    assert config["defaults"]["ingest_threshold"] == 2
+    assert config["db"]["default_path"] == str(custom_db)
+    assert runtime["defaults"] == config["defaults"]
+def test_runtime_endpoint_provides_same_summary(monkeypatch: pytest.MonkeyPatch) -> None:
+    client = TestClient(app)
+    monkeypatch.setenv("BRAINSTEM_API_TOKEN", "runtime-token")
+    response = client.get("/runtime")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    runtime = payload["runtime"]
+    assert runtime["auth_state"]["api_token_configured"] is True
+    assert runtime["runtime"]["api_token_env"] == "BRAINSTEM_API_TOKEN"
+    assert runtime["limits"]["replay_raw_max_ids"] == 32
+def test_unprotected_routes_remain_open_when_api_token_not_configured(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.delenv("BRAINSTEM_API_TOKEN", raising=False)
+    client = TestClient(app)
+    db_path = tmp_path / "open.sqlite3"
+    ingest_response = client.post(
+        f"/ingest/event?threshold=1&db_path={db_path}",
+        json={
+            "tenant_id": "client-a",
+            "source_type": "syslog",
+            "message_raw": "Service restarted",
+            "host": "fw-01",
+            "service": "systemd",
+        },
+    )
+    assert ingest_response.status_code == 200
+    healthz_response = client.get(f"/interesting?db_path={db_path}&limit=10")
+    assert healthz_response.status_code == 200
+    assert healthz_response.json()["ok"] is True
+def test_api_token_is_required_for_write_and_inspection_routes_when_enabled(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: Path,
+) -> None:
+    monkeypatch.setenv("BRAINSTEM_API_TOKEN", "valid-token")
+    client = TestClient(app)
+    db_path = tmp_path / "auth.sqlite3"
+    unauthenticated = client.post(
+        f"/ingest/event?threshold=1&db_path={db_path}",
+        json={
+            "tenant_id": "client-a",
+            "source_type": "syslog",
+            "message_raw": "Service restarted",
+            "host": "fw-01",
+            "service": "systemd",
+        },
+    )
+    assert unauthenticated.status_code == 401
+    wrong_token = client.get(f"/interesting?db_path={db_path}&limit=10", headers={"X-API-Token": "wrong"})
+    assert wrong_token.status_code == 401
+    authed = client.post(
+        f"/ingest/event?threshold=1&db_path={db_path}",
+        headers={"Authorization": "Bearer valid-token"},
+        json={
+            "tenant_id": "client-a",
+            "source_type": "syslog",
+            "message_raw": "Service restarted",
+            "host": "fw-01",
+            "service": "systemd",
+        },
+    )
+    assert authed.status_code == 200
+    read_authed = client.get(
+        f"/interesting?db_path={db_path}&limit=10",
+        headers={"X-API-Token": "valid-token"},
+    )
+    assert read_authed.status_code == 200
+    assert read_authed.json()["ok"] is True
+def test_failures_endpoint_lists_recent_parse_failures(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 2,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "",
+                    "host": "fw-01",
+                    "service": "sshd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "message_raw": "VPN tunnel dropped and recovered",
+                    "host": "fw-01",
+                    "service": "charon",
+                },
+            ],
+        },
+    )
+    response = client.get(f"/failures?db_path={db_path}&limit=10")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["count"] == 1
+    assert payload["items"][0]["canonicalization_status"] == "parse_failed"
+def test_failures_endpoint_filters_by_status_and_fetches_single_record(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    init_db(str(db_path))
+    raw_ids = store_raw_envelopes(
+        [
+            RawInputEnvelope(
+                tenant_id="client-a",
+                source_type="syslog",
+                timestamp="2026-03-22T00:00:01Z",
+                message_raw="first",
+                host="fw-01",
+                service="sshd",
+            ),
+            RawInputEnvelope(
+                tenant_id="client-a",
+                source_type="syslog",
+                timestamp="2026-03-22T00:00:02Z",
+                message_raw="second",
+                host="fw-01",
+                service="sshd",
+            ),
+        ],
+        db_path=str(db_path),
+    )
+    set_raw_envelope_status(raw_ids[0], "parse_failed", db_path=str(db_path), failure_reason="bad parse")
+    set_raw_envelope_status(raw_ids[1], "unsupported", db_path=str(db_path), failure_reason="unsupported source")
+    failed_only = client.get(f"/failures?db_path={db_path}&status=parse_failed&limit=10")
+    assert failed_only.status_code == 200
+    failed_payload = failed_only.json()
+    assert failed_payload["count"] == 1
+    assert failed_payload["items"][0]["id"] == raw_ids[0]
+    unsupported = client.get(f"/failures?db_path={db_path}&status=unsupported&limit=10")
+    assert unsupported.status_code == 200
+    unsupported_payload = unsupported.json()
+    assert unsupported_payload["count"] == 1
+    assert unsupported_payload["items"][0]["id"] == raw_ids[1]
+    single = client.get(f"/failures/{raw_ids[1]}?db_path={db_path}")
+    assert single.status_code == 200
+    single_payload = single.json()
+    assert single_payload["ok"] is True
+    assert single_payload["item"]["id"] == raw_ids[1]
+    assert single_payload["item"]["failure_reason"] == "unsupported source"
+    invalid = client.get(f"/failures?db_path={db_path}&status=bogus")
+    assert invalid.status_code == 422
+def test_raw_envelopes_endpoint_supports_status_and_source_filters(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    init_db(str(db_path))
+    raw_ids = store_raw_envelopes(
+        [
+            RawInputEnvelope(
+                tenant_id="tenant-a",
+                source_type="syslog",
+                source_id="fw-01",
+                source_path="/var/log/syslog",
+                timestamp="2026-03-22T00:00:01Z",
+                message_raw="VPN tunnel recovered",
+            ),
+            RawInputEnvelope(
+                tenant_id="tenant-a",
+                source_type="syslog",
+                source_id="fw-01",
+                source_path="/var/log/auth.log",
+                timestamp="2026-03-22T00:00:02Z",
+                message_raw="",
+            ),
+            RawInputEnvelope(
+                tenant_id="tenant-b",
+                source_type="file",
+                source_id="agent-01",
+                source_path="/tmp/agent.log",
+                timestamp="2026-03-22T00:00:03Z",
+                message_raw="backup finished",
+            ),
+            RawInputEnvelope(
+                tenant_id="tenant-a",
+                source_type="file",
+                source_id="fw-01",
+                source_path="/var/log/syslog",
+                timestamp="2026-03-22T00:00:04Z",
+                message_raw="disk pressure warning",
+            ),
+        ],
+        db_path=str(db_path),
+    )
+    set_raw_envelope_status(raw_ids[1], "parse_failed", db_path=str(db_path), failure_reason="seeded parse failure")
+    set_raw_envelope_status(raw_ids[2], "unsupported", db_path=str(db_path), failure_reason="seeded unsupported")
+    response = client.get(f"/raw_envelopes?db_path={db_path}&limit=10")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["count"] == 4
+    assert [item["id"] for item in payload["items"]] == [raw_ids[3], raw_ids[2], raw_ids[1], raw_ids[0]]
+    parse_failed = client.get(f"/raw_envelopes?db_path={db_path}&status=parse_failed&limit=10")
+    assert parse_failed.status_code == 200
+    parse_payload = parse_failed.json()
+    assert parse_payload["count"] == 1
+    assert parse_payload["items"][0]["id"] == raw_ids[1]
+    assert parse_payload["items"][0]["canonicalization_status"] == "parse_failed"
+    syslog_only = client.get(f"/raw_envelopes?db_path={db_path}&source_type=syslog&limit=10")
+    assert syslog_only.status_code == 200
+    syslog_payload = syslog_only.json()
+    assert [item["id"] for item in syslog_payload["items"]] == [raw_ids[1], raw_ids[0]]
+    fw_source = client.get(f"/raw_envelopes?db_path={db_path}&source_id=fw-01&limit=10")
+    assert fw_source.status_code == 200
+    fw_payload = fw_source.json()
+    assert [item["id"] for item in fw_payload["items"]] == [raw_ids[3], raw_ids[1], raw_ids[0]]
+    source_path = client.get(f"/raw_envelopes?db_path={db_path}&source_path=/var/log/syslog&limit=10")
+    assert source_path.status_code == 200
+    path_payload = source_path.json()
+    assert [item["id"] for item in path_payload["items"]] == [raw_ids[3], raw_ids[0]]
+    tenant_and_source = client.get(
+        f"/raw_envelopes?db_path={db_path}&tenant_id=tenant-a&source_type=file&source_path=/var/log/syslog&limit=10"
+    )
+    assert tenant_and_source.status_code == 200
+    tenant_source_payload = tenant_and_source.json()
+    assert [item["id"] for item in tenant_source_payload["items"]] == [raw_ids[3]]
+def test_raw_envelopes_endpoint_rejects_invalid_status_filter(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    response = client.get(f"/raw_envelopes?db_path={db_path}&status=bogus")
+    assert response.status_code == 422
+def test_sources_endpoint_summarizes_ingest_dimensions(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    batch_response = client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 1,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "source_id": "fw-01",
+                    "source_name": "edge-fw-01",
+                    "source_path": "/var/log/syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                    "severity": "info",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "source_id": "fw-01",
+                    "source_name": "edge-fw-01",
+                    "source_path": "/var/log/syslog",
+                    "message_raw": "Failed password for admin from 10.1.2.3",
+                    "host": "fw-01",
+                    "service": "sshd",
+                    "severity": "info",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "logicmonitor",
+                    "source_id": "lm-01",
+                    "source_name": "edge-lm-01",
+                    "source_path": "/alerts",
+                    "message_raw": "Disk space low",
+                    "host": "lm-01",
+                    "service": "logicmonitor",
+                    "severity": "warning",
+                },
+            ],
+        },
+    )
+    assert batch_response.status_code == 200
+    response = client.get(f"/sources?db_path={db_path}&limit=10")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["items"]["source_type"] == [
+        {"value": "syslog", "count": 2},
+        {"value": "logicmonitor", "count": 1},
+    ]
+    assert dict((entry["value"], entry["count"]) for entry in payload["items"]["source_name"]) == {
+        "edge-fw-01": 2,
+        "edge-lm-01": 1,
+    }
+def test_sources_status_endpoint_returns_source_health_like_summary(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    ingest_response = client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 1,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "source_id": "fw-01",
+                    "source_name": "edge-fw-01",
+                    "source_path": "/var/log/syslog",
+                    "message_raw": "Service restarted",
+                    "host": "fw-01",
+                    "service": "systemd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "source_id": "fw-01",
+                    "source_name": "edge-fw-01",
+                    "source_path": "/var/log/syslog",
+                    "message_raw": "",
+                    "host": "fw-01",
+                    "service": "systemd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "logicmonitor",
+                    "source_id": "lm-01",
+                    "source_name": "edge-lm-01",
+                    "source_path": "/alerts",
+                    "message_raw": "Disk space low",
+                    "host": "lm-01",
+                    "service": "logicmonitor",
+                },
+            ],
+        },
+    )
+    assert ingest_response.status_code == 200
+    response = client.get(f"/sources/status?db_path={db_path}&limit=10")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["count"] == 2
+    fw01 = next(item for item in payload["items"] if item["source_type"] == "syslog" and item["source_id"] == "fw-01")
+    assert fw01["raw_count"] == 2
+    assert fw01["canonicalized_count"] == 1
+    assert fw01["parse_failed_count"] == 1
+    assert fw01["unsupported_count"] == 0
+    assert fw01["source_path"] == "/var/log/syslog"
+    assert fw01["first_seen_at"] <= fw01["last_seen_at"]
+    filtered = client.get(
+        f"/sources/status?db_path={db_path}&source_type=syslog&source_id=fw-01&source_path=/var/log/syslog&limit=10"
+    )
+    assert filtered.status_code == 200
+    filtered_payload = filtered.json()
+    assert filtered_payload["count"] == 1
+    assert filtered_payload["items"][0]["source_id"] == "fw-01"
+    assert filtered_payload["items"][0]["source_path"] == "/var/log/syslog"
+def test_ingest_recent_endpoint_returns_recent_intake_and_allows_status_filter(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    client.post(
+        "/ingest/batch",
+        json={
+            "threshold": 1,
+            "db_path": str(db_path),
+            "events": [
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "source_id": "fw-01",
+                    "source_name": "edge-fw-01",
+                    "message_raw": "service restarted",
+                    "host": "fw-01",
+                    "service": "systemd",
+                },
+                {
+                    "tenant_id": "client-a",
+                    "source_type": "syslog",
+                    "source_id": "fw-01",
+                    "source_name": "edge-fw-01",
+                    "message_raw": "",
+                    "host": "fw-01",
+                    "service": "systemd",
+                },
+            ],
+        },
+    )
+    response = client.get(f"/ingest/recent?db_path={db_path}&limit=10")
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["count"] == 2
+    assert len({item["canonicalization_status"] for item in payload["items"]}) == 2
+    failed = client.get(f"/ingest/recent?db_path={db_path}&status=parse_failed&limit=10")
+    assert failed.status_code == 200
+    failed_payload = failed.json()
+    assert failed_payload["count"] == 1
+    assert failed_payload["items"][0]["canonicalization_status"] == "parse_failed"
+def test_replay_raw_endpoint_replays_parse_failed_and_received_records(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    init_db(str(db_path))
+    raw_envelope_ids = store_raw_envelopes(
+        [
+            RawInputEnvelope(
+                tenant_id="client-a",
+                source_type="syslog",
+                timestamp="2026-03-22T00:00:01Z",
+                message_raw="can canonicalize first",
+                host="fw-01",
+                service="sshd",
+            ),
+            RawInputEnvelope(
+                tenant_id="client-a",
+                source_type="syslog",
+                timestamp="2026-03-22T00:00:02Z",
+                message_raw="can canonicalize second",
+                host="fw-01",
+                service="sshd",
+            ),
+            RawInputEnvelope(
+                tenant_id="client-a",
+                source_type="syslog",
+                timestamp="2026-03-22T00:00:03Z",
+                message_raw="",
+                host="fw-01",
+                service="sshd",
+            ),
+        ],
+        db_path=str(db_path),
+    )
+    set_raw_envelope_status(raw_envelope_ids[0], "parse_failed", db_path=str(db_path), failure_reason="seeded parse failure")
+    set_raw_envelope_status(raw_envelope_ids[2], "parse_failed", db_path=str(db_path), failure_reason="seeded parse failure")
+    response = client.post(
+        "/replay/raw",
+        json={
+            "db_path": str(db_path),
+            "raw_envelope_ids": raw_envelope_ids,
+            "threshold": 1,
+        },
+    )
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["ok"] is True
+    assert payload["attempted_raw_envelope_ids"] == raw_envelope_ids
+    assert payload["event_count"] == 2
+    assert payload["parse_failed"] == 1
+    parse_failed_row = get_raw_envelope_by_id(raw_envelope_ids[0], db_path=str(db_path))
+    assert parse_failed_row is not None
+    assert parse_failed_row["canonicalization_status"] == "canonicalized"
+    received_row = get_raw_envelope_by_id(raw_envelope_ids[1], db_path=str(db_path))
+    assert received_row is not None
+    assert received_row["canonicalization_status"] == "canonicalized"
+    still_failed_row = get_raw_envelope_by_id(raw_envelope_ids[2], db_path=str(db_path))
+    assert still_failed_row is not None
+    assert still_failed_row["canonicalization_status"] == "parse_failed"
+    assert still_failed_row["failure_reason"] == "message_raw is empty and cannot be canonicalized"
+def test_replay_raw_endpoint_skips_non_replayable_statuses_without_force(tmp_path: Path) -> None:
+    client = TestClient(app)
+    db_path = tmp_path / "brainstem.sqlite3"
+    init_db(str(db_path))
+    (canonicalized_id,) = store_raw_envelopes(
+        [
+            RawInputEnvelope(
+                tenant_id="client-a",
+                source_type="syslog",
+                timestamp="2026-03-22T00:00:01Z",
+                message_raw="already canonicalized",
+                host="fw-01",
+                service="sshd",
+            )
+        ],
+        db_path=str(db_path),
+    )
+    set_raw_envelope_status(canonicalized_id, "canonicalized", db_path=str(db_path))
+    skip = client.post(
+        "/replay/raw",
+        json={
+            "db_path": str(db_path),
+            "raw_envelope_ids": [canonicalized_id],
+            "threshold": 1,
+        },
+    )
+    assert skip.status_code == 200
+    skipped_payload = skip.json()
+    assert skipped_payload["attempted_raw_envelope_ids"] == []
+    assert skipped_payload["event_count"] == 0
+    assert skipped_payload["skipped"][0]["reason"] == "not_replayable"
+    force = client.post(
+        "/replay/raw",
+        json={
+            "db_path": str(db_path),
+            "raw_envelope_ids": [canonicalized_id],
+            "threshold": 1,
+            "force": True,
+            "allowed_statuses": ["canonicalized"],
+        },
+    )
+    assert force.status_code == 200
+    force_payload = force.json()
+    assert force_payload["attempted_raw_envelope_ids"] == [canonicalized_id]
+    assert force_payload["event_count"] == 1