@simbimbo/brainstem 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/brainstem/__init__.py +1 -1
- package/brainstem/api.py +131 -5
- package/brainstem/ingest.py +7 -0
- package/brainstem/models.py +2 -0
- package/brainstem/storage.py +249 -3
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/tests/test_api.py +247 -0
- package/tests/test_storage.py +269 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,29 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.0.3 — 2026-03-22
|
|
4
|
+
|
|
5
|
+
Intake Foundation follow-up release for **brAInstem**.
|
|
6
|
+
|
|
7
|
+
### Highlights
|
|
8
|
+
- persists `RawInputEnvelope` intake records to SQLite before canonicalization
|
|
9
|
+
- records canonicalization outcomes explicitly (`received`, `canonicalized`, `parse_failed`, `unsupported`)
|
|
10
|
+
- adds ingest accounting for:
|
|
11
|
+
- received
|
|
12
|
+
- canonicalized
|
|
13
|
+
- parse_failed
|
|
14
|
+
- candidates_generated
|
|
15
|
+
- adds runtime inspection endpoints for intake trust and observability:
|
|
16
|
+
- `GET /stats`
|
|
17
|
+
- `GET /failures`
|
|
18
|
+
- `GET /failures/{id}`
|
|
19
|
+
- `GET /ingest/recent`
|
|
20
|
+
- `GET /sources`
|
|
21
|
+
- adds storage/query helpers for recent raw envelopes, recent failures, and per-source summaries
|
|
22
|
+
- expands tests around raw-envelope persistence, failure inspection, source summaries, and stats
|
|
23
|
+
|
|
24
|
+
### Validation
|
|
25
|
+
- local test suite passed (`26 passed`)
|
|
26
|
+
|
|
3
27
|
## 0.0.2 — 2026-03-22
|
|
4
28
|
|
|
5
29
|
First fully aligned public foundation release of **brAInstem**.
|
package/brainstem/__init__.py
CHANGED
package/brainstem/api.py
CHANGED
|
@@ -8,11 +8,25 @@ from fastapi import FastAPI, HTTPException, Query
|
|
|
8
8
|
from fastapi.responses import JSONResponse
|
|
9
9
|
from pydantic import BaseModel, Field
|
|
10
10
|
|
|
11
|
-
from .ingest import
|
|
11
|
+
from .ingest import canonicalize_raw_input_envelope
|
|
12
12
|
from .interesting import interesting_items
|
|
13
13
|
from .models import Candidate, RawInputEnvelope
|
|
14
14
|
from .recurrence import build_recurrence_candidates
|
|
15
|
-
from .storage import
|
|
15
|
+
from .storage import (
|
|
16
|
+
RAW_ENVELOPE_STATUSES,
|
|
17
|
+
get_ingest_stats,
|
|
18
|
+
init_db,
|
|
19
|
+
list_candidates,
|
|
20
|
+
get_raw_envelope_by_id,
|
|
21
|
+
get_source_dimension_summaries,
|
|
22
|
+
list_recent_failed_raw_envelopes,
|
|
23
|
+
list_recent_raw_envelopes,
|
|
24
|
+
set_raw_envelope_status,
|
|
25
|
+
store_candidates,
|
|
26
|
+
store_events,
|
|
27
|
+
store_raw_envelopes,
|
|
28
|
+
store_signatures,
|
|
29
|
+
)
|
|
16
30
|
from .ingest import signatures_for_events
|
|
17
31
|
|
|
18
32
|
|
|
@@ -22,6 +36,8 @@ app = FastAPI(title="brAInstem Runtime")
|
|
|
22
36
|
class RawEnvelopeRequest(BaseModel):
|
|
23
37
|
tenant_id: str
|
|
24
38
|
source_type: str
|
|
39
|
+
source_id: str = ""
|
|
40
|
+
source_name: str = ""
|
|
25
41
|
message_raw: str
|
|
26
42
|
timestamp: Optional[str] = None
|
|
27
43
|
host: str = ""
|
|
@@ -49,6 +65,8 @@ def _raw_envelope_from_request(payload: RawEnvelopeRequest) -> RawInputEnvelope:
|
|
|
49
65
|
return RawInputEnvelope(
|
|
50
66
|
tenant_id=payload.tenant_id,
|
|
51
67
|
source_type=payload.source_type,
|
|
68
|
+
source_id=payload.source_id,
|
|
69
|
+
source_name=payload.source_name,
|
|
52
70
|
timestamp=payload.timestamp or datetime.utcnow().isoformat() + "Z",
|
|
53
71
|
message_raw=payload.message_raw,
|
|
54
72
|
host=payload.host,
|
|
@@ -78,15 +96,68 @@ def _candidate_from_row(row) -> Candidate:
|
|
|
78
96
|
)
|
|
79
97
|
|
|
80
98
|
|
|
99
|
+
def _raw_envelope_from_row(row) -> Dict[str, Any]:
|
|
100
|
+
return {
|
|
101
|
+
"id": row["id"],
|
|
102
|
+
"tenant_id": row["tenant_id"],
|
|
103
|
+
"source_type": row["source_type"],
|
|
104
|
+
"source_id": row["source_id"],
|
|
105
|
+
"source_name": row["source_name"],
|
|
106
|
+
"timestamp": row["timestamp"],
|
|
107
|
+
"host": row["host"],
|
|
108
|
+
"service": row["service"],
|
|
109
|
+
"severity": row["severity"],
|
|
110
|
+
"asset_id": row["asset_id"],
|
|
111
|
+
"source_path": row["source_path"],
|
|
112
|
+
"facility": row["facility"],
|
|
113
|
+
"message_raw": row["message_raw"],
|
|
114
|
+
"structured_fields": json.loads(row["structured_fields_json"] or "{}"),
|
|
115
|
+
"correlation_keys": json.loads(row["correlation_keys_json"] or "{}"),
|
|
116
|
+
"metadata": json.loads(row["metadata_json"] or "{}"),
|
|
117
|
+
"canonicalization_status": row["canonicalization_status"],
|
|
118
|
+
"failure_reason": row["failure_reason"],
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
81
122
|
def _run_ingest_batch(raw_events: List[RawInputEnvelope], *, threshold: int, db_path: Optional[str]) -> Dict[str, Any]:
|
|
82
|
-
|
|
123
|
+
raw_envelope_ids: List[int] = []
|
|
124
|
+
if db_path:
|
|
125
|
+
init_db(db_path)
|
|
126
|
+
raw_envelope_ids = store_raw_envelopes(raw_events, db_path)
|
|
127
|
+
|
|
128
|
+
events = []
|
|
129
|
+
parse_failed = 0
|
|
130
|
+
for idx, raw_event in enumerate(raw_events):
|
|
131
|
+
raw_envelope_id = raw_envelope_ids[idx] if idx < len(raw_envelope_ids) else None
|
|
132
|
+
try:
|
|
133
|
+
canonical_event = canonicalize_raw_input_envelope(raw_event)
|
|
134
|
+
except Exception as exc:
|
|
135
|
+
parse_failed += 1
|
|
136
|
+
if raw_envelope_id is not None:
|
|
137
|
+
set_raw_envelope_status(
|
|
138
|
+
raw_envelope_id,
|
|
139
|
+
"parse_failed",
|
|
140
|
+
db_path=db_path,
|
|
141
|
+
failure_reason=str(exc),
|
|
142
|
+
)
|
|
143
|
+
continue
|
|
144
|
+
events.append(canonical_event)
|
|
145
|
+
if raw_envelope_id is not None:
|
|
146
|
+
set_raw_envelope_status(raw_envelope_id, "canonicalized", db_path=db_path)
|
|
147
|
+
|
|
83
148
|
if not events:
|
|
84
|
-
return {
|
|
149
|
+
return {
|
|
150
|
+
"ok": True,
|
|
151
|
+
"event_count": 0,
|
|
152
|
+
"signature_count": 0,
|
|
153
|
+
"candidate_count": 0,
|
|
154
|
+
"parse_failed": parse_failed,
|
|
155
|
+
"interesting_items": [],
|
|
156
|
+
}
|
|
85
157
|
|
|
86
158
|
signatures = signatures_for_events(events)
|
|
87
159
|
candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
|
|
88
160
|
if db_path:
|
|
89
|
-
init_db(db_path)
|
|
90
161
|
store_events(events, db_path)
|
|
91
162
|
store_signatures(signatures, db_path)
|
|
92
163
|
store_candidates(candidates, db_path)
|
|
@@ -97,6 +168,7 @@ def _run_ingest_batch(raw_events: List[RawInputEnvelope], *, threshold: int, db_
|
|
|
97
168
|
"event_count": len(events),
|
|
98
169
|
"signature_count": len({sig.signature_key for sig in signatures}),
|
|
99
170
|
"candidate_count": len(candidates),
|
|
171
|
+
"parse_failed": parse_failed,
|
|
100
172
|
"interesting_items": interesting_items(candidates, limit=max(1, 5)),
|
|
101
173
|
}
|
|
102
174
|
|
|
@@ -126,6 +198,60 @@ def get_interesting(
|
|
|
126
198
|
return {"ok": True, "items": interesting_items(candidates, limit=limit)}
|
|
127
199
|
|
|
128
200
|
|
|
201
|
+
@app.get("/stats")
|
|
202
|
+
def get_stats(db_path: Optional[str] = None) -> Dict[str, Any]:
|
|
203
|
+
return {"ok": True, **get_ingest_stats(db_path)}
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@app.get("/failures")
|
|
207
|
+
def get_failures(
|
|
208
|
+
limit: int = Query(default=20, ge=1),
|
|
209
|
+
status: Optional[str] = None,
|
|
210
|
+
db_path: Optional[str] = None,
|
|
211
|
+
) -> Dict[str, Any]:
|
|
212
|
+
if status is not None and status not in RAW_ENVELOPE_STATUSES:
|
|
213
|
+
raise HTTPException(
|
|
214
|
+
status_code=422,
|
|
215
|
+
detail=f"invalid status '{status}'; expected one of: {', '.join(RAW_ENVELOPE_STATUSES)}",
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
rows = list_recent_failed_raw_envelopes(db_path=db_path, status=status, limit=limit)
|
|
219
|
+
items = [_raw_envelope_from_row(row) for row in rows]
|
|
220
|
+
return {"ok": True, "items": items, "count": len(items), "status": status}
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@app.get("/ingest/recent")
|
|
224
|
+
def get_ingest_recent(
|
|
225
|
+
limit: int = Query(default=20, ge=1),
|
|
226
|
+
status: Optional[str] = None,
|
|
227
|
+
db_path: Optional[str] = None,
|
|
228
|
+
) -> Dict[str, Any]:
|
|
229
|
+
if status is not None and status not in RAW_ENVELOPE_STATUSES:
|
|
230
|
+
raise HTTPException(
|
|
231
|
+
status_code=422,
|
|
232
|
+
detail=f"invalid status '{status}'; expected one of: {', '.join(RAW_ENVELOPE_STATUSES)}",
|
|
233
|
+
)
|
|
234
|
+
rows = list_recent_raw_envelopes(db_path=db_path, status=status, limit=limit, failures_only=False)
|
|
235
|
+
items = [_raw_envelope_from_row(row) for row in rows]
|
|
236
|
+
return {"ok": True, "items": items, "count": len(items), "status": status}
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
@app.get("/sources")
|
|
240
|
+
def get_sources(
|
|
241
|
+
limit: int = Query(default=10, ge=1),
|
|
242
|
+
db_path: Optional[str] = None,
|
|
243
|
+
) -> Dict[str, Any]:
|
|
244
|
+
return {"ok": True, "items": get_source_dimension_summaries(db_path=db_path, limit=limit)}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
@app.get("/failures/{raw_envelope_id}")
|
|
248
|
+
def get_failure(raw_envelope_id: int, db_path: Optional[str] = None) -> Dict[str, Any]:
|
|
249
|
+
row = get_raw_envelope_by_id(raw_envelope_id, db_path=db_path)
|
|
250
|
+
if row is None:
|
|
251
|
+
raise HTTPException(status_code=404, detail="raw envelope not found")
|
|
252
|
+
return {"ok": True, "item": _raw_envelope_from_row(row)}
|
|
253
|
+
|
|
254
|
+
|
|
129
255
|
@app.get("/healthz")
|
|
130
256
|
def healthz() -> Dict[str, str]:
|
|
131
257
|
return JSONResponse(content={"ok": True, "status": "ok"})
|
package/brainstem/ingest.py
CHANGED
|
@@ -50,6 +50,13 @@ def parse_syslog_envelopes(lines: Iterable[str], *, tenant_id: str, source_path:
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
def canonicalize_raw_input_envelope(raw: RawInputEnvelope) -> CanonicalEvent:
|
|
53
|
+
parse_error = (raw.metadata or {}).get("parse_error")
|
|
54
|
+
if parse_error:
|
|
55
|
+
raise ValueError(f"parse_error: {parse_error}")
|
|
56
|
+
|
|
57
|
+
if not (raw.message_raw or "").strip():
|
|
58
|
+
raise ValueError("message_raw is empty and cannot be canonicalized")
|
|
59
|
+
|
|
53
60
|
message_normalized = normalize_message(raw.message_raw)
|
|
54
61
|
metadata = dict(raw.metadata or {})
|
|
55
62
|
metadata.setdefault("canonicalization_source", raw.source_type)
|
package/brainstem/models.py
CHANGED
package/brainstem/storage.py
CHANGED
|
@@ -2,17 +2,25 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import sqlite3
|
|
5
|
-
from dataclasses import asdict
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
from typing import Iterable, List
|
|
6
|
+
from typing import Any, Iterable, List
|
|
8
7
|
|
|
9
|
-
from .models import Candidate, Event, Signature
|
|
8
|
+
from .models import Candidate, Event, RawInputEnvelope, Signature
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
def default_db_path() -> Path:
|
|
13
12
|
return Path('.brainstem-state') / 'brainstem.sqlite3'
|
|
14
13
|
|
|
15
14
|
|
|
15
|
+
RAW_ENVELOPE_STATUSES = ("received", "canonicalized", "parse_failed", "unsupported")
|
|
16
|
+
RAW_ENVELOPE_FAILURE_STATUSES = ("parse_failed", "unsupported")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _validate_canonicalization_status(status: str) -> None:
|
|
20
|
+
if status not in RAW_ENVELOPE_STATUSES:
|
|
21
|
+
raise ValueError(f"unsupported canonicalization_status: {status}")
|
|
22
|
+
|
|
23
|
+
|
|
16
24
|
def connect(db_path: str | None = None) -> sqlite3.Connection:
|
|
17
25
|
path = Path(db_path) if db_path else default_db_path()
|
|
18
26
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -42,6 +50,27 @@ def init_db(db_path: str | None = None) -> None:
|
|
|
42
50
|
correlation_keys_json TEXT NOT NULL
|
|
43
51
|
);
|
|
44
52
|
|
|
53
|
+
CREATE TABLE IF NOT EXISTS raw_envelopes (
|
|
54
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
55
|
+
tenant_id TEXT NOT NULL,
|
|
56
|
+
source_type TEXT NOT NULL,
|
|
57
|
+
source_id TEXT,
|
|
58
|
+
source_name TEXT,
|
|
59
|
+
timestamp TEXT NOT NULL,
|
|
60
|
+
host TEXT,
|
|
61
|
+
service TEXT,
|
|
62
|
+
severity TEXT,
|
|
63
|
+
asset_id TEXT,
|
|
64
|
+
source_path TEXT,
|
|
65
|
+
facility TEXT,
|
|
66
|
+
message_raw TEXT NOT NULL,
|
|
67
|
+
structured_fields_json TEXT NOT NULL,
|
|
68
|
+
correlation_keys_json TEXT NOT NULL,
|
|
69
|
+
metadata_json TEXT NOT NULL,
|
|
70
|
+
canonicalization_status TEXT NOT NULL DEFAULT 'received',
|
|
71
|
+
failure_reason TEXT
|
|
72
|
+
);
|
|
73
|
+
|
|
45
74
|
CREATE TABLE IF NOT EXISTS signatures (
|
|
46
75
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
47
76
|
signature_key TEXT NOT NULL UNIQUE,
|
|
@@ -72,6 +101,133 @@ def init_db(db_path: str | None = None) -> None:
|
|
|
72
101
|
conn.close()
|
|
73
102
|
|
|
74
103
|
|
|
104
|
+
def store_raw_envelopes(raw_envelopes: Iterable[RawInputEnvelope], db_path: str | None = None) -> List[int]:
|
|
105
|
+
conn = connect(db_path)
|
|
106
|
+
raw_ids: List[int] = []
|
|
107
|
+
try:
|
|
108
|
+
for raw in raw_envelopes:
|
|
109
|
+
cursor = conn.execute(
|
|
110
|
+
'''
|
|
111
|
+
INSERT INTO raw_envelopes (
|
|
112
|
+
tenant_id, source_type, source_id, source_name, timestamp, host, service, severity,
|
|
113
|
+
asset_id, source_path, facility, message_raw,
|
|
114
|
+
structured_fields_json, correlation_keys_json, metadata_json,
|
|
115
|
+
canonicalization_status, failure_reason
|
|
116
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
117
|
+
''',
|
|
118
|
+
(
|
|
119
|
+
raw.tenant_id,
|
|
120
|
+
raw.source_type,
|
|
121
|
+
raw.source_id,
|
|
122
|
+
raw.source_name,
|
|
123
|
+
raw.timestamp,
|
|
124
|
+
raw.host,
|
|
125
|
+
raw.service,
|
|
126
|
+
raw.severity,
|
|
127
|
+
raw.asset_id,
|
|
128
|
+
raw.source_path,
|
|
129
|
+
raw.facility,
|
|
130
|
+
raw.message_raw,
|
|
131
|
+
json.dumps(raw.structured_fields, ensure_ascii=False),
|
|
132
|
+
json.dumps(raw.correlation_keys, ensure_ascii=False),
|
|
133
|
+
json.dumps(raw.metadata, ensure_ascii=False),
|
|
134
|
+
"received",
|
|
135
|
+
None,
|
|
136
|
+
),
|
|
137
|
+
)
|
|
138
|
+
raw_ids.append(int(cursor.lastrowid))
|
|
139
|
+
conn.commit()
|
|
140
|
+
return raw_ids
|
|
141
|
+
finally:
|
|
142
|
+
conn.close()
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def set_raw_envelope_status(
|
|
146
|
+
raw_envelope_id: int,
|
|
147
|
+
status: str,
|
|
148
|
+
db_path: str | None = None,
|
|
149
|
+
*,
|
|
150
|
+
failure_reason: str | None = None,
|
|
151
|
+
) -> None:
|
|
152
|
+
_validate_canonicalization_status(status)
|
|
153
|
+
conn = connect(db_path)
|
|
154
|
+
try:
|
|
155
|
+
conn.execute(
|
|
156
|
+
'''
|
|
157
|
+
UPDATE raw_envelopes
|
|
158
|
+
SET canonicalization_status = ?, failure_reason = ?
|
|
159
|
+
WHERE id = ?
|
|
160
|
+
''',
|
|
161
|
+
(status, failure_reason, raw_envelope_id),
|
|
162
|
+
)
|
|
163
|
+
conn.commit()
|
|
164
|
+
finally:
|
|
165
|
+
conn.close()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def get_raw_envelope_by_id(raw_envelope_id: int, db_path: str | None = None) -> sqlite3.Row | None:
|
|
169
|
+
conn = connect(db_path)
|
|
170
|
+
try:
|
|
171
|
+
return conn.execute(
|
|
172
|
+
"SELECT * FROM raw_envelopes WHERE id = ?",
|
|
173
|
+
(raw_envelope_id,),
|
|
174
|
+
).fetchone()
|
|
175
|
+
finally:
|
|
176
|
+
conn.close()
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _recent_raw_envelopes_query(
|
|
180
|
+
canonicalization_status: str | None,
|
|
181
|
+
*,
|
|
182
|
+
failures_only: bool,
|
|
183
|
+
) -> tuple[str, tuple[str, ...], bool]:
|
|
184
|
+
if canonicalization_status is None and failures_only:
|
|
185
|
+
return "WHERE canonicalization_status IN (?, ?)", RAW_ENVELOPE_FAILURE_STATUSES, True
|
|
186
|
+
if canonicalization_status is None and not failures_only:
|
|
187
|
+
return "", (), False
|
|
188
|
+
_validate_canonicalization_status(canonicalization_status)
|
|
189
|
+
return "WHERE canonicalization_status = ?", (canonicalization_status,), False
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def list_recent_raw_envelopes(
|
|
193
|
+
db_path: str | None = None,
|
|
194
|
+
status: str | None = None,
|
|
195
|
+
limit: int = 20,
|
|
196
|
+
*,
|
|
197
|
+
failures_only: bool = False,
|
|
198
|
+
) -> List[sqlite3.Row]:
|
|
199
|
+
conn = connect(db_path)
|
|
200
|
+
try:
|
|
201
|
+
where_clause, status_args, _ = _recent_raw_envelopes_query(status, failures_only=failures_only)
|
|
202
|
+
prefix = f"{where_clause} " if where_clause else ""
|
|
203
|
+
rows = conn.execute(
|
|
204
|
+
f"""
|
|
205
|
+
SELECT * FROM raw_envelopes
|
|
206
|
+
{prefix}
|
|
207
|
+
ORDER BY id DESC
|
|
208
|
+
LIMIT ?
|
|
209
|
+
""",
|
|
210
|
+
(*status_args, max(1, limit)),
|
|
211
|
+
).fetchall()
|
|
212
|
+
return rows
|
|
213
|
+
finally:
|
|
214
|
+
conn.close()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def list_recent_failed_raw_envelopes(
|
|
218
|
+
db_path: str | None = None,
|
|
219
|
+
*,
|
|
220
|
+
status: str | None = None,
|
|
221
|
+
limit: int = 20,
|
|
222
|
+
) -> List[sqlite3.Row]:
|
|
223
|
+
return list_recent_raw_envelopes(
|
|
224
|
+
db_path=db_path,
|
|
225
|
+
status=status,
|
|
226
|
+
limit=limit,
|
|
227
|
+
failures_only=status is None,
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
75
231
|
def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
|
|
76
232
|
conn = connect(db_path)
|
|
77
233
|
count = 0
|
|
@@ -107,6 +263,96 @@ def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
|
|
|
107
263
|
conn.close()
|
|
108
264
|
|
|
109
265
|
|
|
266
|
+
SOURCE_SUMMARY_DIMENSIONS = (
|
|
267
|
+
"source_type",
|
|
268
|
+
"source_path",
|
|
269
|
+
"source_id",
|
|
270
|
+
"source_name",
|
|
271
|
+
"host",
|
|
272
|
+
"service",
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _summarize_raw_envelopes_by_dimension(
|
|
277
|
+
conn: sqlite3.Connection,
|
|
278
|
+
dimension: str,
|
|
279
|
+
limit: int = 20,
|
|
280
|
+
) -> List[dict[str, int | str]]:
|
|
281
|
+
if dimension not in SOURCE_SUMMARY_DIMENSIONS:
|
|
282
|
+
raise ValueError(f"unsupported dimension: {dimension}")
|
|
283
|
+
return [
|
|
284
|
+
{"value": row["value"], "count": int(row["count"])}
|
|
285
|
+
for row in conn.execute(
|
|
286
|
+
f"""
|
|
287
|
+
SELECT {dimension} AS value, COUNT(*) AS count
|
|
288
|
+
FROM raw_envelopes
|
|
289
|
+
WHERE COALESCE(TRIM({dimension}), '') <> ''
|
|
290
|
+
GROUP BY {dimension}
|
|
291
|
+
ORDER BY COUNT(*) DESC, value ASC
|
|
292
|
+
LIMIT ?
|
|
293
|
+
""",
|
|
294
|
+
(max(1, limit),),
|
|
295
|
+
).fetchall()
|
|
296
|
+
]
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def get_source_dimension_summaries(
|
|
300
|
+
db_path: str | None = None,
|
|
301
|
+
*,
|
|
302
|
+
limit: int = 20,
|
|
303
|
+
) -> dict[str, List[dict[str, int | str]]]:
|
|
304
|
+
init_db(db_path)
|
|
305
|
+
conn = connect(db_path)
|
|
306
|
+
try:
|
|
307
|
+
return _get_source_dimension_summaries_from_conn(conn, limit=limit)
|
|
308
|
+
finally:
|
|
309
|
+
conn.close()
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _get_source_dimension_summaries_from_conn(
|
|
313
|
+
conn: sqlite3.Connection,
|
|
314
|
+
*,
|
|
315
|
+
limit: int = 20,
|
|
316
|
+
) -> dict[str, List[dict[str, int | str]]]:
|
|
317
|
+
return {
|
|
318
|
+
"source_type": _summarize_raw_envelopes_by_dimension(conn, "source_type", limit=limit),
|
|
319
|
+
"source_path": _summarize_raw_envelopes_by_dimension(conn, "source_path", limit=limit),
|
|
320
|
+
"source_id": _summarize_raw_envelopes_by_dimension(conn, "source_id", limit=limit),
|
|
321
|
+
"source_name": _summarize_raw_envelopes_by_dimension(conn, "source_name", limit=limit),
|
|
322
|
+
"host": _summarize_raw_envelopes_by_dimension(conn, "host", limit=limit),
|
|
323
|
+
"service": _summarize_raw_envelopes_by_dimension(conn, "service", limit=limit),
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _query_count(conn: sqlite3.Connection, query: str) -> int:
|
|
328
|
+
return int(conn.execute(query).fetchone()[0])
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def get_ingest_stats(db_path: str | None = None) -> dict[str, Any]:
|
|
332
|
+
init_db(db_path)
|
|
333
|
+
conn = connect(db_path)
|
|
334
|
+
try:
|
|
335
|
+
return {
|
|
336
|
+
"received": _query_count(conn, "SELECT COUNT(*) FROM raw_envelopes"),
|
|
337
|
+
"canonicalized": _query_count(
|
|
338
|
+
conn,
|
|
339
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'canonicalized'",
|
|
340
|
+
),
|
|
341
|
+
"parse_failed": _query_count(
|
|
342
|
+
conn,
|
|
343
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'parse_failed'",
|
|
344
|
+
),
|
|
345
|
+
"unsupported": _query_count(
|
|
346
|
+
conn,
|
|
347
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'unsupported'",
|
|
348
|
+
),
|
|
349
|
+
"candidates_generated": _query_count(conn, "SELECT COUNT(*) FROM candidates"),
|
|
350
|
+
"source_summaries": _get_source_dimension_summaries_from_conn(conn),
|
|
351
|
+
}
|
|
352
|
+
finally:
|
|
353
|
+
conn.close()
|
|
354
|
+
|
|
355
|
+
|
|
110
356
|
def store_signatures(signatures: Iterable[Signature], db_path: str | None = None) -> int:
|
|
111
357
|
conn = connect(db_path)
|
|
112
358
|
count = 0
|
package/package.json
CHANGED
package/pyproject.toml
CHANGED
package/tests/test_api.py
CHANGED
|
@@ -3,6 +3,12 @@ from pathlib import Path
|
|
|
3
3
|
from fastapi.testclient import TestClient
|
|
4
4
|
|
|
5
5
|
from brainstem.api import app
|
|
6
|
+
from brainstem.models import RawInputEnvelope
|
|
7
|
+
from brainstem.storage import (
|
|
8
|
+
init_db,
|
|
9
|
+
set_raw_envelope_status,
|
|
10
|
+
store_raw_envelopes,
|
|
11
|
+
)
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
def test_ingest_event_endpoint_round_trip(tmp_path: Path) -> None:
|
|
@@ -65,8 +71,249 @@ def test_ingest_batch_and_interesting(tmp_path: Path) -> None:
|
|
|
65
71
|
assert interesting_payload["items"]
|
|
66
72
|
|
|
67
73
|
|
|
74
|
+
def test_stats_after_successful_and_failed_ingest(tmp_path: Path) -> None:
|
|
75
|
+
client = TestClient(app)
|
|
76
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
77
|
+
batch_response = client.post(
|
|
78
|
+
"/ingest/batch",
|
|
79
|
+
json={
|
|
80
|
+
"threshold": 2,
|
|
81
|
+
"db_path": str(db_path),
|
|
82
|
+
"events": [
|
|
83
|
+
{
|
|
84
|
+
"tenant_id": "client-a",
|
|
85
|
+
"source_type": "syslog",
|
|
86
|
+
"message_raw": "Failed password for admin from 10.1.2.3",
|
|
87
|
+
"host": "fw-01",
|
|
88
|
+
"service": "sshd",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"tenant_id": "client-a",
|
|
92
|
+
"source_type": "syslog",
|
|
93
|
+
"message_raw": "Failed password for admin from 10.1.2.3",
|
|
94
|
+
"host": "fw-01",
|
|
95
|
+
"service": "sshd",
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
"tenant_id": "client-a",
|
|
99
|
+
"source_type": "syslog",
|
|
100
|
+
"message_raw": "",
|
|
101
|
+
"host": "fw-01",
|
|
102
|
+
"service": "sshd",
|
|
103
|
+
},
|
|
104
|
+
],
|
|
105
|
+
},
|
|
106
|
+
)
|
|
107
|
+
assert batch_response.status_code == 200
|
|
108
|
+
batch_payload = batch_response.json()
|
|
109
|
+
assert batch_payload["ok"] is True
|
|
110
|
+
assert batch_payload["event_count"] == 2
|
|
111
|
+
assert batch_payload["parse_failed"] == 1
|
|
112
|
+
|
|
113
|
+
stats = client.get(f"/stats?db_path={db_path}")
|
|
114
|
+
assert stats.status_code == 200
|
|
115
|
+
stats_payload = stats.json()
|
|
116
|
+
assert stats_payload["ok"] is True
|
|
117
|
+
assert stats_payload["received"] == 3
|
|
118
|
+
assert stats_payload["canonicalized"] == 2
|
|
119
|
+
assert stats_payload["parse_failed"] == 1
|
|
120
|
+
assert stats_payload["candidates_generated"] >= 1
|
|
121
|
+
|
|
122
|
+
|
|
68
123
|
def test_healthz_is_ready() -> None:
|
|
69
124
|
client = TestClient(app)
|
|
70
125
|
response = client.get("/healthz")
|
|
71
126
|
assert response.status_code == 200
|
|
72
127
|
assert response.json()["ok"] is True
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_failures_endpoint_lists_recent_parse_failures(tmp_path: Path) -> None:
|
|
131
|
+
client = TestClient(app)
|
|
132
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
133
|
+
client.post(
|
|
134
|
+
"/ingest/batch",
|
|
135
|
+
json={
|
|
136
|
+
"threshold": 2,
|
|
137
|
+
"db_path": str(db_path),
|
|
138
|
+
"events": [
|
|
139
|
+
{
|
|
140
|
+
"tenant_id": "client-a",
|
|
141
|
+
"source_type": "syslog",
|
|
142
|
+
"message_raw": "",
|
|
143
|
+
"host": "fw-01",
|
|
144
|
+
"service": "sshd",
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"tenant_id": "client-a",
|
|
148
|
+
"source_type": "syslog",
|
|
149
|
+
"message_raw": "VPN tunnel dropped and recovered",
|
|
150
|
+
"host": "fw-01",
|
|
151
|
+
"service": "charon",
|
|
152
|
+
},
|
|
153
|
+
],
|
|
154
|
+
},
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
response = client.get(f"/failures?db_path={db_path}&limit=10")
|
|
158
|
+
assert response.status_code == 200
|
|
159
|
+
payload = response.json()
|
|
160
|
+
assert payload["ok"] is True
|
|
161
|
+
assert payload["count"] == 1
|
|
162
|
+
assert payload["items"][0]["canonicalization_status"] == "parse_failed"
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def test_failures_endpoint_filters_by_status_and_fetches_single_record(tmp_path: Path) -> None:
|
|
166
|
+
client = TestClient(app)
|
|
167
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
168
|
+
init_db(str(db_path))
|
|
169
|
+
raw_ids = store_raw_envelopes(
|
|
170
|
+
[
|
|
171
|
+
RawInputEnvelope(
|
|
172
|
+
tenant_id="client-a",
|
|
173
|
+
source_type="syslog",
|
|
174
|
+
timestamp="2026-03-22T00:00:01Z",
|
|
175
|
+
message_raw="first",
|
|
176
|
+
host="fw-01",
|
|
177
|
+
service="sshd",
|
|
178
|
+
),
|
|
179
|
+
RawInputEnvelope(
|
|
180
|
+
tenant_id="client-a",
|
|
181
|
+
source_type="syslog",
|
|
182
|
+
timestamp="2026-03-22T00:00:02Z",
|
|
183
|
+
message_raw="second",
|
|
184
|
+
host="fw-01",
|
|
185
|
+
service="sshd",
|
|
186
|
+
),
|
|
187
|
+
],
|
|
188
|
+
db_path=str(db_path),
|
|
189
|
+
)
|
|
190
|
+
set_raw_envelope_status(raw_ids[0], "parse_failed", db_path=str(db_path), failure_reason="bad parse")
|
|
191
|
+
set_raw_envelope_status(raw_ids[1], "unsupported", db_path=str(db_path), failure_reason="unsupported source")
|
|
192
|
+
|
|
193
|
+
failed_only = client.get(f"/failures?db_path={db_path}&status=parse_failed&limit=10")
|
|
194
|
+
assert failed_only.status_code == 200
|
|
195
|
+
failed_payload = failed_only.json()
|
|
196
|
+
assert failed_payload["count"] == 1
|
|
197
|
+
assert failed_payload["items"][0]["id"] == raw_ids[0]
|
|
198
|
+
|
|
199
|
+
unsupported = client.get(f"/failures?db_path={db_path}&status=unsupported&limit=10")
|
|
200
|
+
assert unsupported.status_code == 200
|
|
201
|
+
unsupported_payload = unsupported.json()
|
|
202
|
+
assert unsupported_payload["count"] == 1
|
|
203
|
+
assert unsupported_payload["items"][0]["id"] == raw_ids[1]
|
|
204
|
+
|
|
205
|
+
single = client.get(f"/failures/{raw_ids[1]}?db_path={db_path}")
|
|
206
|
+
assert single.status_code == 200
|
|
207
|
+
single_payload = single.json()
|
|
208
|
+
assert single_payload["ok"] is True
|
|
209
|
+
assert single_payload["item"]["id"] == raw_ids[1]
|
|
210
|
+
assert single_payload["item"]["failure_reason"] == "unsupported source"
|
|
211
|
+
|
|
212
|
+
invalid = client.get(f"/failures?db_path={db_path}&status=bogus")
|
|
213
|
+
assert invalid.status_code == 422
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def test_sources_endpoint_summarizes_ingest_dimensions(tmp_path: Path) -> None:
|
|
217
|
+
client = TestClient(app)
|
|
218
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
219
|
+
batch_response = client.post(
|
|
220
|
+
"/ingest/batch",
|
|
221
|
+
json={
|
|
222
|
+
"threshold": 1,
|
|
223
|
+
"db_path": str(db_path),
|
|
224
|
+
"events": [
|
|
225
|
+
{
|
|
226
|
+
"tenant_id": "client-a",
|
|
227
|
+
"source_type": "syslog",
|
|
228
|
+
"source_id": "fw-01",
|
|
229
|
+
"source_name": "edge-fw-01",
|
|
230
|
+
"source_path": "/var/log/syslog",
|
|
231
|
+
"message_raw": "Failed password for admin from 10.1.2.3",
|
|
232
|
+
"host": "fw-01",
|
|
233
|
+
"service": "sshd",
|
|
234
|
+
"severity": "info",
|
|
235
|
+
},
|
|
236
|
+
{
|
|
237
|
+
"tenant_id": "client-a",
|
|
238
|
+
"source_type": "syslog",
|
|
239
|
+
"source_id": "fw-01",
|
|
240
|
+
"source_name": "edge-fw-01",
|
|
241
|
+
"source_path": "/var/log/syslog",
|
|
242
|
+
"message_raw": "Failed password for admin from 10.1.2.3",
|
|
243
|
+
"host": "fw-01",
|
|
244
|
+
"service": "sshd",
|
|
245
|
+
"severity": "info",
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
"tenant_id": "client-a",
|
|
249
|
+
"source_type": "logicmonitor",
|
|
250
|
+
"source_id": "lm-01",
|
|
251
|
+
"source_name": "edge-lm-01",
|
|
252
|
+
"source_path": "/alerts",
|
|
253
|
+
"message_raw": "Disk space low",
|
|
254
|
+
"host": "lm-01",
|
|
255
|
+
"service": "logicmonitor",
|
|
256
|
+
"severity": "warning",
|
|
257
|
+
},
|
|
258
|
+
],
|
|
259
|
+
},
|
|
260
|
+
)
|
|
261
|
+
assert batch_response.status_code == 200
|
|
262
|
+
|
|
263
|
+
response = client.get(f"/sources?db_path={db_path}&limit=10")
|
|
264
|
+
assert response.status_code == 200
|
|
265
|
+
payload = response.json()
|
|
266
|
+
assert payload["ok"] is True
|
|
267
|
+
assert payload["items"]["source_type"] == [
|
|
268
|
+
{"value": "syslog", "count": 2},
|
|
269
|
+
{"value": "logicmonitor", "count": 1},
|
|
270
|
+
]
|
|
271
|
+
assert dict((entry["value"], entry["count"]) for entry in payload["items"]["source_name"]) == {
|
|
272
|
+
"edge-fw-01": 2,
|
|
273
|
+
"edge-lm-01": 1,
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def test_ingest_recent_endpoint_returns_recent_intake_and_allows_status_filter(tmp_path: Path) -> None:
|
|
278
|
+
client = TestClient(app)
|
|
279
|
+
db_path = tmp_path / "brainstem.sqlite3"
|
|
280
|
+
client.post(
|
|
281
|
+
"/ingest/batch",
|
|
282
|
+
json={
|
|
283
|
+
"threshold": 1,
|
|
284
|
+
"db_path": str(db_path),
|
|
285
|
+
"events": [
|
|
286
|
+
{
|
|
287
|
+
"tenant_id": "client-a",
|
|
288
|
+
"source_type": "syslog",
|
|
289
|
+
"source_id": "fw-01",
|
|
290
|
+
"source_name": "edge-fw-01",
|
|
291
|
+
"message_raw": "service restarted",
|
|
292
|
+
"host": "fw-01",
|
|
293
|
+
"service": "systemd",
|
|
294
|
+
},
|
|
295
|
+
{
|
|
296
|
+
"tenant_id": "client-a",
|
|
297
|
+
"source_type": "syslog",
|
|
298
|
+
"source_id": "fw-01",
|
|
299
|
+
"source_name": "edge-fw-01",
|
|
300
|
+
"message_raw": "",
|
|
301
|
+
"host": "fw-01",
|
|
302
|
+
"service": "systemd",
|
|
303
|
+
},
|
|
304
|
+
],
|
|
305
|
+
},
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
response = client.get(f"/ingest/recent?db_path={db_path}&limit=10")
|
|
309
|
+
assert response.status_code == 200
|
|
310
|
+
payload = response.json()
|
|
311
|
+
assert payload["ok"] is True
|
|
312
|
+
assert payload["count"] == 2
|
|
313
|
+
assert len({item["canonicalization_status"] for item in payload["items"]}) == 2
|
|
314
|
+
|
|
315
|
+
failed = client.get(f"/ingest/recent?db_path={db_path}&status=parse_failed&limit=10")
|
|
316
|
+
assert failed.status_code == 200
|
|
317
|
+
failed_payload = failed.json()
|
|
318
|
+
assert failed_payload["count"] == 1
|
|
319
|
+
assert failed_payload["items"][0]["canonicalization_status"] == "parse_failed"
|
package/tests/test_storage.py
CHANGED
|
@@ -1,8 +1,23 @@
|
|
|
1
|
+
import sqlite3
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
from brainstem.ingest import ingest_syslog_lines, signatures_for_events
|
|
5
|
+
from brainstem.models import RawInputEnvelope
|
|
4
6
|
from brainstem.recurrence import build_recurrence_candidates
|
|
5
|
-
from brainstem.storage import
|
|
7
|
+
from brainstem.storage import (
|
|
8
|
+
get_raw_envelope_by_id,
|
|
9
|
+
get_ingest_stats,
|
|
10
|
+
init_db,
|
|
11
|
+
list_candidates,
|
|
12
|
+
get_source_dimension_summaries,
|
|
13
|
+
store_candidates,
|
|
14
|
+
store_events,
|
|
15
|
+
list_recent_failed_raw_envelopes,
|
|
16
|
+
list_recent_raw_envelopes,
|
|
17
|
+
store_raw_envelopes,
|
|
18
|
+
set_raw_envelope_status,
|
|
19
|
+
store_signatures,
|
|
20
|
+
)
|
|
6
21
|
|
|
7
22
|
|
|
8
23
|
def test_storage_round_trip(tmp_path: Path) -> None:
|
|
@@ -24,3 +39,256 @@ def test_storage_round_trip(tmp_path: Path) -> None:
|
|
|
24
39
|
rows = list_candidates(str(db_path), limit=10)
|
|
25
40
|
assert rows
|
|
26
41
|
assert rows[0]['title']
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_raw_envelope_records_are_persisted(tmp_path: Path) -> None:
|
|
45
|
+
db_path = tmp_path / 'brainstem.sqlite3'
|
|
46
|
+
init_db(str(db_path))
|
|
47
|
+
raw_events = [
|
|
48
|
+
RawInputEnvelope(
|
|
49
|
+
tenant_id="client-a",
|
|
50
|
+
source_type="syslog",
|
|
51
|
+
timestamp="2026-03-22T00:00:01Z",
|
|
52
|
+
message_raw="VPN tunnel dropped and recovered",
|
|
53
|
+
host="fw-01",
|
|
54
|
+
service="charon",
|
|
55
|
+
),
|
|
56
|
+
RawInputEnvelope(
|
|
57
|
+
tenant_id="client-a",
|
|
58
|
+
source_type="syslog",
|
|
59
|
+
timestamp="2026-03-22T00:00:02Z",
|
|
60
|
+
message_raw="IPsec SA rekey succeeded",
|
|
61
|
+
host="fw-01",
|
|
62
|
+
service="charon",
|
|
63
|
+
),
|
|
64
|
+
]
|
|
65
|
+
assert store_raw_envelopes(raw_events, str(db_path)) == [1, 2]
|
|
66
|
+
|
|
67
|
+
conn = sqlite3.connect(db_path)
|
|
68
|
+
try:
|
|
69
|
+
rows = conn.execute(
|
|
70
|
+
"SELECT tenant_id, source_type, message_raw, canonicalization_status FROM raw_envelopes ORDER BY id ASC"
|
|
71
|
+
).fetchall()
|
|
72
|
+
finally:
|
|
73
|
+
conn.close()
|
|
74
|
+
|
|
75
|
+
assert len(rows) == 2
|
|
76
|
+
assert rows[0][0] == "client-a"
|
|
77
|
+
assert rows[0][1] == "syslog"
|
|
78
|
+
assert rows[0][2] == "VPN tunnel dropped and recovered"
|
|
79
|
+
assert rows[0][3] == "received"
|
|
80
|
+
assert rows[1][3] == "received"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_ingest_stats_from_raw_envelopes(tmp_path: Path) -> None:
|
|
84
|
+
db_path = tmp_path / 'brainstem.sqlite3'
|
|
85
|
+
init_db(str(db_path))
|
|
86
|
+
conn = sqlite3.connect(db_path)
|
|
87
|
+
try:
|
|
88
|
+
conn.execute(
|
|
89
|
+
"""
|
|
90
|
+
INSERT INTO raw_envelopes (
|
|
91
|
+
tenant_id, source_type, timestamp, host, service, severity,
|
|
92
|
+
asset_id, source_path, facility, message_raw,
|
|
93
|
+
structured_fields_json, correlation_keys_json, metadata_json,
|
|
94
|
+
canonicalization_status, failure_reason
|
|
95
|
+
) VALUES (
|
|
96
|
+
'client-a', 'syslog', '2026-03-22T00:00:00Z',
|
|
97
|
+
'fw-01', 'charon', 'info', '', '', '', 'ok', '{}', '{}', '{}',
|
|
98
|
+
'canonicalized', NULL
|
|
99
|
+
)
|
|
100
|
+
"""
|
|
101
|
+
)
|
|
102
|
+
conn.execute(
|
|
103
|
+
"""
|
|
104
|
+
INSERT INTO raw_envelopes (
|
|
105
|
+
tenant_id, source_type, timestamp, host, service, severity,
|
|
106
|
+
asset_id, source_path, facility, message_raw,
|
|
107
|
+
structured_fields_json, correlation_keys_json, metadata_json,
|
|
108
|
+
canonicalization_status, failure_reason
|
|
109
|
+
) VALUES (
|
|
110
|
+
'client-a', 'syslog', '2026-03-22T00:00:00Z',
|
|
111
|
+
'fw-01', 'charon', 'info', '', '', '', 'bad', '{}', '{}', '{}',
|
|
112
|
+
'parse_failed', 'message empty'
|
|
113
|
+
)
|
|
114
|
+
"""
|
|
115
|
+
)
|
|
116
|
+
conn.execute(
|
|
117
|
+
"INSERT INTO candidates (candidate_type, title, summary, score_total, score_breakdown_json, decision_band, source_signature_ids_json, source_event_ids_json, confidence, metadata_json) VALUES ('recurrence', 'x', 'y', 1.0, '{}', 'medium', '[]', '[]', 0.1, '{}')"
|
|
118
|
+
)
|
|
119
|
+
conn.commit()
|
|
120
|
+
finally:
|
|
121
|
+
conn.close()
|
|
122
|
+
|
|
123
|
+
stats = get_ingest_stats(str(db_path))
|
|
124
|
+
assert stats["received"] == 2
|
|
125
|
+
assert stats["canonicalized"] == 1
|
|
126
|
+
assert stats["parse_failed"] == 1
|
|
127
|
+
assert stats["candidates_generated"] == 1
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def test_source_dimension_summaries(tmp_path: Path) -> None:
|
|
131
|
+
db_path = tmp_path / 'brainstem.sqlite3'
|
|
132
|
+
init_db(str(db_path))
|
|
133
|
+
store_raw_envelopes(
|
|
134
|
+
[
|
|
135
|
+
RawInputEnvelope(
|
|
136
|
+
tenant_id='client-a',
|
|
137
|
+
source_type='syslog',
|
|
138
|
+
source_id='fw-01',
|
|
139
|
+
source_name='edge-fw-01',
|
|
140
|
+
timestamp='2026-03-22T00:00:01Z',
|
|
141
|
+
message_raw='VPN tunnel dropped and recovered',
|
|
142
|
+
source_path='/var/log/syslog',
|
|
143
|
+
host='fw-01',
|
|
144
|
+
service='charon',
|
|
145
|
+
),
|
|
146
|
+
RawInputEnvelope(
|
|
147
|
+
tenant_id='client-a',
|
|
148
|
+
source_type='syslog',
|
|
149
|
+
source_id='fw-01',
|
|
150
|
+
source_name='edge-fw-01',
|
|
151
|
+
timestamp='2026-03-22T00:00:02Z',
|
|
152
|
+
message_raw='IPsec SA rekey succeeded',
|
|
153
|
+
source_path='/var/log/syslog',
|
|
154
|
+
host='fw-01',
|
|
155
|
+
service='charon',
|
|
156
|
+
),
|
|
157
|
+
RawInputEnvelope(
|
|
158
|
+
tenant_id='client-a',
|
|
159
|
+
source_type='logicmonitor',
|
|
160
|
+
source_id='lm-1',
|
|
161
|
+
source_name='edge-lm-01',
|
|
162
|
+
timestamp='2026-03-22T00:00:03Z',
|
|
163
|
+
message_raw='CPU usage high',
|
|
164
|
+
source_path='/alerts',
|
|
165
|
+
host='lm-01',
|
|
166
|
+
service='logicmonitor',
|
|
167
|
+
),
|
|
168
|
+
],
|
|
169
|
+
db_path=str(db_path),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
summary = get_source_dimension_summaries(str(db_path), limit=10)
|
|
173
|
+
assert summary['source_type'][0]['value'] == "syslog"
|
|
174
|
+
assert summary['source_type'][0]['count'] == 2
|
|
175
|
+
assert summary['source_type'][1]['value'] == "logicmonitor"
|
|
176
|
+
assert summary['source_type'][1]['count'] == 1
|
|
177
|
+
assert dict((entry['value'], entry['count']) for entry in summary['source_path']) == {
|
|
178
|
+
'/alerts': 1,
|
|
179
|
+
'/var/log/syslog': 2,
|
|
180
|
+
}
|
|
181
|
+
assert dict((entry['value'], entry['count']) for entry in summary['source_id']) == {
|
|
182
|
+
'fw-01': 2,
|
|
183
|
+
'lm-1': 1,
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def test_list_recent_raw_envelopes_supports_status_filtering(tmp_path: Path) -> None:
|
|
188
|
+
db_path = tmp_path / 'brainstem.sqlite3'
|
|
189
|
+
init_db(str(db_path))
|
|
190
|
+
raw_ids = store_raw_envelopes(
|
|
191
|
+
[
|
|
192
|
+
RawInputEnvelope(
|
|
193
|
+
tenant_id='client-a',
|
|
194
|
+
source_type='syslog',
|
|
195
|
+
timestamp='2026-03-22T00:00:01Z',
|
|
196
|
+
message_raw='first',
|
|
197
|
+
host='fw-01',
|
|
198
|
+
service='sshd',
|
|
199
|
+
),
|
|
200
|
+
RawInputEnvelope(
|
|
201
|
+
tenant_id='client-a',
|
|
202
|
+
source_type='syslog',
|
|
203
|
+
timestamp='2026-03-22T00:00:02Z',
|
|
204
|
+
message_raw='second',
|
|
205
|
+
host='fw-01',
|
|
206
|
+
service='sshd',
|
|
207
|
+
),
|
|
208
|
+
RawInputEnvelope(
|
|
209
|
+
tenant_id='client-a',
|
|
210
|
+
source_type='syslog',
|
|
211
|
+
timestamp='2026-03-22T00:00:03Z',
|
|
212
|
+
message_raw='third',
|
|
213
|
+
host='fw-01',
|
|
214
|
+
service='sshd',
|
|
215
|
+
),
|
|
216
|
+
],
|
|
217
|
+
db_path=str(db_path),
|
|
218
|
+
)
|
|
219
|
+
set_raw_envelope_status(raw_ids[0], 'parse_failed', db_path=str(db_path), failure_reason='empty message')
|
|
220
|
+
set_raw_envelope_status(raw_ids[1], 'canonicalized', db_path=str(db_path))
|
|
221
|
+
set_raw_envelope_status(raw_ids[2], 'unsupported', db_path=str(db_path), failure_reason='unsupported source')
|
|
222
|
+
|
|
223
|
+
all_rows = list_recent_raw_envelopes(str(db_path), limit=10)
|
|
224
|
+
assert [row['id'] for row in all_rows] == [raw_ids[2], raw_ids[1], raw_ids[0]]
|
|
225
|
+
|
|
226
|
+
parsed_only = list_recent_raw_envelopes(str(db_path), status='parse_failed', limit=10)
|
|
227
|
+
assert [row['id'] for row in parsed_only] == [raw_ids[0]]
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def test_query_recent_failed_raw_envelopes_with_status_filter(tmp_path: Path) -> None:
|
|
231
|
+
db_path = tmp_path / 'brainstem.sqlite3'
|
|
232
|
+
init_db(str(db_path))
|
|
233
|
+
raw_events = [
|
|
234
|
+
RawInputEnvelope(
|
|
235
|
+
tenant_id="client-a",
|
|
236
|
+
source_type="syslog",
|
|
237
|
+
timestamp="2026-03-22T00:00:01Z",
|
|
238
|
+
message_raw="first",
|
|
239
|
+
host="fw-01",
|
|
240
|
+
service="sshd",
|
|
241
|
+
),
|
|
242
|
+
RawInputEnvelope(
|
|
243
|
+
tenant_id="client-a",
|
|
244
|
+
source_type="syslog",
|
|
245
|
+
timestamp="2026-03-22T00:00:02Z",
|
|
246
|
+
message_raw="second",
|
|
247
|
+
host="fw-01",
|
|
248
|
+
service="sshd",
|
|
249
|
+
),
|
|
250
|
+
RawInputEnvelope(
|
|
251
|
+
tenant_id="client-a",
|
|
252
|
+
source_type="syslog",
|
|
253
|
+
timestamp="2026-03-22T00:00:03Z",
|
|
254
|
+
message_raw="third",
|
|
255
|
+
host="fw-01",
|
|
256
|
+
service="sshd",
|
|
257
|
+
),
|
|
258
|
+
]
|
|
259
|
+
raw_ids = store_raw_envelopes(raw_events, str(db_path))
|
|
260
|
+
set_raw_envelope_status(raw_ids[0], "parse_failed", db_path=str(db_path), failure_reason="empty message")
|
|
261
|
+
set_raw_envelope_status(raw_ids[1], "canonicalized", db_path=str(db_path))
|
|
262
|
+
set_raw_envelope_status(raw_ids[2], "unsupported", db_path=str(db_path), failure_reason="unsupported source")
|
|
263
|
+
|
|
264
|
+
failures = list_recent_failed_raw_envelopes(str(db_path), limit=10)
|
|
265
|
+
assert [row["id"] for row in failures] == [raw_ids[2], raw_ids[0]]
|
|
266
|
+
assert failures[0]["canonicalization_status"] == "unsupported"
|
|
267
|
+
assert failures[1]["canonicalization_status"] == "parse_failed"
|
|
268
|
+
|
|
269
|
+
parsed_only = list_recent_failed_raw_envelopes(str(db_path), status="parse_failed", limit=10)
|
|
270
|
+
assert len(parsed_only) == 1
|
|
271
|
+
assert parsed_only[0]["id"] == raw_ids[0]
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def test_get_raw_envelope_by_id(tmp_path: Path) -> None:
|
|
275
|
+
db_path = tmp_path / 'brainstem.sqlite3'
|
|
276
|
+
init_db(str(db_path))
|
|
277
|
+
raw_events = [
|
|
278
|
+
RawInputEnvelope(
|
|
279
|
+
tenant_id="client-a",
|
|
280
|
+
source_type="syslog",
|
|
281
|
+
timestamp="2026-03-22T00:00:01Z",
|
|
282
|
+
message_raw="single",
|
|
283
|
+
host="fw-01",
|
|
284
|
+
service="charon",
|
|
285
|
+
)
|
|
286
|
+
]
|
|
287
|
+
(raw_id,) = store_raw_envelopes(raw_events, str(db_path))
|
|
288
|
+
set_raw_envelope_status(raw_id, "parse_failed", db_path=str(db_path), failure_reason="empty message")
|
|
289
|
+
|
|
290
|
+
row = get_raw_envelope_by_id(raw_id, db_path=str(db_path))
|
|
291
|
+
assert row is not None
|
|
292
|
+
assert row["id"] == raw_id
|
|
293
|
+
assert row["canonicalization_status"] == "parse_failed"
|
|
294
|
+
assert row["failure_reason"] == "empty message"
|