npm - @simbimbo/brainstem - Versions diffs - 0.0.3 → 0.0.4 - Mend

@simbimbo/brainstem 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/CHANGELOG.md +13 -0
package/README.md +25 -0
package/brainstem/__init__.py +1 -1
package/brainstem/adapters.py +120 -0
package/brainstem/api.py +391 -57
package/brainstem/config.py +70 -0
package/brainstem/ingest.py +411 -33
package/brainstem/interesting.py +56 -1
package/brainstem/listener.py +175 -0
package/brainstem/models.py +1 -0
package/brainstem/recurrence.py +38 -1
package/brainstem/source_drivers.py +150 -0
package/brainstem/storage.py +305 -12
package/docs/README.md +94 -0
package/docs/adapters.md +97 -401
package/docs/api.md +223 -278
package/package.json +1 -1
package/pyproject.toml +1 -1
package/tests/test_adapters.py +94 -0
package/tests/test_api.py +726 -0
package/tests/test_canonicalization.py +8 -0
package/tests/test_config.py +24 -0
package/tests/test_file_ingest.py +77 -0
package/tests/test_interesting.py +10 -0
package/tests/test_listener.py +253 -0
package/tests/test_recurrence.py +2 -0
package/tests/test_source_drivers.py +95 -0
package/tests/test_storage.py +101 -1

package/brainstem/storage.py CHANGED Viewed

@@ -6,16 +6,54 @@ from pathlib import Path
 from typing import Any, Iterable, List
 from .models import Candidate, Event, RawInputEnvelope, Signature
+from .config import resolve_default_db_path
 def default_db_path() -> Path:
-    return Path('.brainstem-state') / 'brainstem.sqlite3'
+    return Path(resolve_default_db_path())
 RAW_ENVELOPE_STATUSES = ("received", "canonicalized", "parse_failed", "unsupported")
 RAW_ENVELOPE_FAILURE_STATUSES = ("parse_failed", "unsupported")
+def _coerce_raw_envelope_id(value: Any) -> int | None:
+    if isinstance(value, bool):
+        return None
+    if isinstance(value, int):
+        return value
+    if isinstance(value, str):
+        value = value.strip()
+        if not value.isdigit():
+            return None
+        return int(value)
+    return None
+def _coerce_raw_envelope_id_list(raw_value: Any) -> List[int]:
+    if raw_value is None:
+        return []
+    if isinstance(raw_value, list):
+        ids = [_coerce_raw_envelope_id(item) for item in raw_value]
+        return [item for item in ids if item is not None]
+    if isinstance(raw_value, tuple):
+        ids = [_coerce_raw_envelope_id(item) for item in raw_value]
+        return [item for item in ids if item is not None]
+    return []
+def extract_source_raw_envelope_ids(metadata_json: str | None) -> List[int]:
+    if not metadata_json:
+        return []
+    try:
+        metadata = json.loads(metadata_json)
+    except json.JSONDecodeError:
+        return []
+    if not isinstance(metadata, dict):
+        return []
+    return _coerce_raw_envelope_id_list(metadata.get("source_raw_envelope_ids"))
 def _validate_canonicalization_status(status: str) -> None:
     if status not in RAW_ENVELOPE_STATUSES:
         raise ValueError(f"unsupported canonicalization_status: {status}")
@@ -176,17 +214,62 @@ def get_raw_envelope_by_id(raw_envelope_id: int, db_path: str | None = None) ->
         conn.close()
+def get_raw_envelopes_by_ids(
+    raw_envelope_ids: Iterable[int | str | object],
+    db_path: str | None = None,
+) -> List[sqlite3.Row]:
+    ids = list(dict.fromkeys(_coerce_raw_envelope_id_list(raw_envelope_ids)))
+    if not ids:
+        return []
+    conn = connect(db_path)
+    try:
+        placeholders = ",".join(["?"] * len(ids))
+        return conn.execute(
+            f"SELECT * FROM raw_envelopes WHERE id IN ({placeholders})",
+            ids,
+        ).fetchall()
+    finally:
+        conn.close()
 def _recent_raw_envelopes_query(
     canonicalization_status: str | None,
     *,
     failures_only: bool,
-) -> tuple[str, tuple[str, ...], bool]:
+    tenant_id: str | None = None,
+    source_type: str | None = None,
+    source_id: str | None = None,
+    source_path: str | None = None,
+) -> tuple[str, tuple[str, ...]]:
+    where_clauses: list[str] = []
+    args: list[str] = []
     if canonicalization_status is None and failures_only:
-        return "WHERE canonicalization_status IN (?, ?)", RAW_ENVELOPE_FAILURE_STATUSES, True
-    if canonicalization_status is None and not failures_only:
-        return "", (), False
-    _validate_canonicalization_status(canonicalization_status)
-    return "WHERE canonicalization_status = ?", (canonicalization_status,), False
+        where_clauses.append("canonicalization_status IN (?, ?)")
+        args.extend(RAW_ENVELOPE_FAILURE_STATUSES)
+    elif canonicalization_status is None and not failures_only:
+        pass
+    elif canonicalization_status is not None:
+        _validate_canonicalization_status(canonicalization_status)
+        where_clauses.append("canonicalization_status = ?")
+        args.append(canonicalization_status)
+    if tenant_id is not None:
+        where_clauses.append("tenant_id = ?")
+        args.append(tenant_id)
+    if source_type is not None:
+        where_clauses.append("source_type = ?")
+        args.append(source_type)
+    if source_id is not None:
+        where_clauses.append("source_id = ?")
+        args.append(source_id)
+    if source_path is not None:
+        where_clauses.append("source_path = ?")
+        args.append(source_path)
+    where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
+    return where_clause, tuple(args)
 def list_recent_raw_envelopes(
@@ -195,10 +278,21 @@ def list_recent_raw_envelopes(
     limit: int = 20,
     *,
     failures_only: bool = False,
+    tenant_id: str | None = None,
+    source_type: str | None = None,
+    source_id: str | None = None,
+    source_path: str | None = None,
 ) -> List[sqlite3.Row]:
     conn = connect(db_path)
     try:
-        where_clause, status_args, _ = _recent_raw_envelopes_query(status, failures_only=failures_only)
+        where_clause, status_args = _recent_raw_envelopes_query(
+            status,
+            failures_only=failures_only,
+            tenant_id=tenant_id,
+            source_type=source_type,
+            source_id=source_id,
+            source_path=source_path,
+        )
         prefix = f"{where_clause} " if where_clause else ""
         rows = conn.execute(
             f"""
@@ -214,6 +308,51 @@ def list_recent_raw_envelopes(
         conn.close()
+def list_canonical_events(
+    db_path: str | None = None,
+    limit: int = 20,
+    *,
+    tenant_id: str | None = None,
+    source_type: str | None = None,
+    host: str | None = None,
+    service: str | None = None,
+    severity: str | None = None,
+) -> List[sqlite3.Row]:
+    conn = connect(db_path)
+    try:
+        where_clauses = ["canonicalization_status = ?"]
+        args: List[str] = ["canonicalized"]
+        if tenant_id is not None:
+            where_clauses.append("tenant_id = ?")
+            args.append(tenant_id)
+        if source_type is not None:
+            where_clauses.append("source_type = ?")
+            args.append(source_type)
+        if host is not None:
+            where_clauses.append("host = ?")
+            args.append(host)
+        if service is not None:
+            where_clauses.append("service = ?")
+            args.append(service)
+        if severity is not None:
+            where_clauses.append("severity = ?")
+            args.append(severity)
+        where_clause = " WHERE " + " AND ".join(where_clauses)
+        return conn.execute(
+            f"""
+            SELECT * FROM raw_envelopes
+            {where_clause}
+            ORDER BY id DESC
+            LIMIT ?
+            """,
+            (*args, max(1, limit)),
+        ).fetchall()
+    finally:
+        conn.close()
 def list_recent_failed_raw_envelopes(
     db_path: str | None = None,
     *,
@@ -309,6 +448,72 @@ def get_source_dimension_summaries(
         conn.close()
+def get_source_status_summaries(
+    db_path: str | None = None,
+    *,
+    limit: int = 20,
+    tenant_id: str | None = None,
+    source_type: str | None = None,
+    source_id: str | None = None,
+    source_path: str | None = None,
+) -> List[dict[str, Any]]:
+    init_db(db_path)
+    conn = connect(db_path)
+    try:
+        query = """
+            SELECT
+                tenant_id,
+                source_type,
+                source_id,
+                source_path,
+                COUNT(*) AS raw_count,
+                SUM(CASE WHEN canonicalization_status = 'canonicalized' THEN 1 ELSE 0 END) AS canonicalized_count,
+                SUM(CASE WHEN canonicalization_status = 'parse_failed' THEN 1 ELSE 0 END) AS parse_failed_count,
+                SUM(CASE WHEN canonicalization_status = 'unsupported' THEN 1 ELSE 0 END) AS unsupported_count,
+                MIN(timestamp) AS first_seen_at,
+                MAX(timestamp) AS last_seen_at
+            FROM raw_envelopes
+            WHERE 1 = 1
+        """
+        args: list[Any] = []
+        if tenant_id is not None:
+            query += " AND tenant_id = ?"
+            args.append(tenant_id)
+        if source_type is not None:
+            query += " AND source_type = ?"
+            args.append(source_type)
+        if source_id is not None:
+            query += " AND source_id = ?"
+            args.append(source_id)
+        if source_path is not None:
+            query += " AND source_path = ?"
+            args.append(source_path)
+        query += """
+            GROUP BY tenant_id, source_type, source_id, source_path
+            ORDER BY last_seen_at DESC, raw_count DESC
+            LIMIT ?
+        """
+        args.append(max(1, limit))
+        return [
+            {
+                "tenant_id": row["tenant_id"],
+                "source_type": row["source_type"] or "",
+                "source_id": row["source_id"] or "",
+                "source_path": row["source_path"] or "",
+                "raw_count": int(row["raw_count"]),
+                "canonicalized_count": int(row["canonicalized_count"] or 0),
+                "parse_failed_count": int(row["parse_failed_count"] or 0),
+                "unsupported_count": int(row["unsupported_count"] or 0),
+                "first_seen_at": row["first_seen_at"],
+                "last_seen_at": row["last_seen_at"],
+            }
+            for row in conn.execute(query, args).fetchall()
+        ]
+    finally:
+        conn.close()
 def _get_source_dimension_summaries_from_conn(
     conn: sqlite3.Connection,
     *,
@@ -358,6 +563,27 @@ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None
     count = 0
     try:
         for signature in signatures:
+            row = conn.execute(
+                "SELECT metadata_json FROM signatures WHERE signature_key = ?",
+                (signature.signature_key,),
+            ).fetchone()
+            metadata = dict(signature.metadata)
+            raw_ids = _coerce_raw_envelope_id_list(metadata.get("source_raw_envelope_ids"))
+            if not raw_ids:
+                raw_id = _coerce_raw_envelope_id(metadata.get("source_raw_envelope_id"))
+                if raw_id is not None:
+                    raw_ids = [raw_id]
+            metadata.pop("source_raw_envelope_id", None)
+            if row is not None:
+                existing_metadata = json.loads(row["metadata_json"] or "{}")
+                if not isinstance(existing_metadata, dict):
+                    existing_metadata = {}
+                existing_raw_ids = _coerce_raw_envelope_id_list(existing_metadata.get("source_raw_envelope_ids"))
+                metadata = dict(existing_metadata) | dict(metadata)
+                metadata["source_raw_envelope_ids"] = sorted(set(existing_raw_ids + raw_ids))
             conn.execute(
                 '''
                 INSERT INTO signatures (
@@ -373,7 +599,7 @@ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None
                     signature.event_family,
                     signature.normalized_pattern,
                     signature.service,
-                    json.dumps(signature.metadata, ensure_ascii=False),
+                    json.dumps(metadata, ensure_ascii=False),
                 ),
             )
             count += 1
@@ -416,12 +642,79 @@ def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None
         conn.close()
-def list_candidates(db_path: str | None = None, limit: int = 20) -> List[sqlite3.Row]:
+def list_candidates(
+    db_path: str | None = None,
+    limit: int = 20,
+    *,
+    candidate_type: str | None = None,
+    decision_band: str | None = None,
+    min_score_total: float | None = None,
+) -> List[sqlite3.Row]:
     conn = connect(db_path)
     try:
+        where_clauses: List[str] = []
+        args: List[Any] = []
+        if candidate_type is not None:
+            where_clauses.append("candidate_type = ?")
+            args.append(candidate_type)
+        if decision_band is not None:
+            where_clauses.append("decision_band = ?")
+            args.append(decision_band)
+        if min_score_total is not None:
+            where_clauses.append("score_total >= ?")
+            args.append(min_score_total)
+        where_clause = ""
+        if where_clauses:
+            where_clause = " WHERE " + " AND ".join(where_clauses)
         rows = conn.execute(
-            'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
-            (max(1, limit),),
+            f'SELECT * FROM candidates{where_clause} ORDER BY score_total DESC, id DESC LIMIT ?',
+            (*args, max(1, limit)),
+        ).fetchall()
+        return rows
+    finally:
+        conn.close()
+def list_signatures(
+    db_path: str | None = None,
+    limit: int = 20,
+    *,
+    event_family: str | None = None,
+    service: str | None = None,
+    min_occurrence_count: int | None = None,
+) -> List[sqlite3.Row]:
+    conn = connect(db_path)
+    try:
+        where_clauses: List[str] = []
+        args: List[Any] = []
+        if event_family is not None:
+            where_clauses.append("event_family = ?")
+            args.append(event_family)
+        if service is not None:
+            where_clauses.append("service = ?")
+            args.append(service)
+        if min_occurrence_count is not None:
+            where_clauses.append("occurrence_count >= ?")
+            args.append(min_occurrence_count)
+        where_clause = ""
+        if where_clauses:
+            where_clause = " WHERE " + " AND ".join(where_clauses)
+        rows = conn.execute(
+            f"""
+            SELECT
+                id, signature_key, event_family, normalized_pattern, service,
+                metadata_json, occurrence_count
+            FROM signatures{where_clause}
+            ORDER BY occurrence_count DESC, id DESC
+            LIMIT ?
+            """,
+            (*args, max(1, limit)),
         ).fetchall()
         return rows
     finally:

package/docs/README.md ADDED Viewed

@@ -0,0 +1,94 @@
+# Runtime Examples
+Use this compact surface for the implemented runtime API, listener, and file-ingest paths.
+## 0) Shared runtime settings
+```bash
+export BRAINSTEM_API_TOKEN=my-local-token  # optional: set only if you want auth required
+export BRAINSTEM_DB_PATH=/tmp/brainstem.sqlite3
+```
+`BRAINSTEM_API_TOKEN` is optional. If you do not set it, omit all `X-API-Token` headers in the API examples.
+## 1) API entry point
+```bash
+# Starts the runtime API
+python -m uvicorn brainstem.api:app --host 127.0.0.1 --port 8000
+```
+```bash
+curl -s http://127.0.0.1:8000/healthz
+```
+## 2) UDP listener entry point
+```bash
+# Prints canonicalized events for each received datagram
+python -m brainstem.listener --tenant demo-tenant --host 127.0.0.1 --port 5514 --source-path /var/log/syslog
+```
+```bash
+printf 'Mar 22 03:10:00 fw-01 charon: IPsec SA rekey succeeded\n' | nc -u 127.0.0.1 5514
+```
+## 3) API ingest (syslog payload style)
+```bash
+curl -s -X POST http://127.0.0.1:8000/ingest/event \
+  -H "Content-Type: application/json" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN" \
+  -d '{"tenant_id":"demo-tenant","source_type":"syslog","source_path":"/var/log/syslog","message_raw":"Mar 22 03:11:00 fw-01 charon: child SA rekey started"}'
+```
+## 4) API ingest for file source events
+```bash
+curl -s -X POST http://127.0.0.1:8000/ingest/batch \
+  -H "Content-Type: application/json" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN" \
+  -d '{"threshold":2,"db_path":"/tmp/brainstem.sqlite3","events":[{"tenant_id":"demo-tenant","source_type":"file","source_path":"/tmp/manual.log","message_raw":"vpn tunnel dropped and recovered"}]}'
+```
+## 5) Runtime inspection endpoints (same db path)
+```bash
+curl -s "http://127.0.0.1:8000/ingest/recent?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/candidates?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/signatures?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/raw_envelopes?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/stats?db_path=/tmp/brainstem.sqlite3" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/failures?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/sources?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+curl -s "http://127.0.0.1:8000/sources/status?db_path=/tmp/brainstem.sqlite3&limit=5" \
+  -H "X-API-Token: $BRAINSTEM_API_TOKEN"
+```
+## 6) Direct file ingest helper path
+```bash
+python - <<'PY'
+from brainstem.ingest import run_ingest_file
+result = run_ingest_file(
+    "tests/fixtures/sample_syslog.log",
+    tenant_id="demo-tenant",
+    threshold=2,
+    db_path="/tmp/brainstem.sqlite3",
+)
+print({
+    "events": len(result.events),
+    "signatures": len(result.signatures),
+    "candidates": len(result.candidates),
+    "parse_failed": result.parse_failed,
+})
+PY
+```