@simbimbo/brainstem 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/README.md +25 -0
- package/brainstem/__init__.py +1 -1
- package/brainstem/adapters.py +120 -0
- package/brainstem/api.py +483 -23
- package/brainstem/config.py +70 -0
- package/brainstem/ingest.py +418 -33
- package/brainstem/interesting.py +56 -1
- package/brainstem/listener.py +175 -0
- package/brainstem/models.py +3 -0
- package/brainstem/recurrence.py +38 -1
- package/brainstem/source_drivers.py +150 -0
- package/brainstem/storage.py +547 -8
- package/docs/README.md +94 -0
- package/docs/adapters.md +97 -401
- package/docs/api.md +223 -278
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/tests/test_adapters.py +94 -0
- package/tests/test_api.py +973 -0
- package/tests/test_canonicalization.py +8 -0
- package/tests/test_config.py +24 -0
- package/tests/test_file_ingest.py +77 -0
- package/tests/test_interesting.py +10 -0
- package/tests/test_listener.py +253 -0
- package/tests/test_recurrence.py +2 -0
- package/tests/test_source_drivers.py +95 -0
- package/tests/test_storage.py +370 -2
package/brainstem/storage.py
CHANGED
|
@@ -2,15 +2,61 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import sqlite3
|
|
5
|
-
from dataclasses import asdict
|
|
6
5
|
from pathlib import Path
|
|
7
|
-
from typing import Iterable, List
|
|
6
|
+
from typing import Any, Iterable, List
|
|
8
7
|
|
|
9
|
-
from .models import Candidate, Event, Signature
|
|
8
|
+
from .models import Candidate, Event, RawInputEnvelope, Signature
|
|
9
|
+
from .config import resolve_default_db_path
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def default_db_path() -> Path:
|
|
13
|
-
return Path(
|
|
13
|
+
return Path(resolve_default_db_path())
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
RAW_ENVELOPE_STATUSES = ("received", "canonicalized", "parse_failed", "unsupported")
|
|
17
|
+
RAW_ENVELOPE_FAILURE_STATUSES = ("parse_failed", "unsupported")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _coerce_raw_envelope_id(value: Any) -> int | None:
|
|
21
|
+
if isinstance(value, bool):
|
|
22
|
+
return None
|
|
23
|
+
if isinstance(value, int):
|
|
24
|
+
return value
|
|
25
|
+
if isinstance(value, str):
|
|
26
|
+
value = value.strip()
|
|
27
|
+
if not value.isdigit():
|
|
28
|
+
return None
|
|
29
|
+
return int(value)
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _coerce_raw_envelope_id_list(raw_value: Any) -> List[int]:
|
|
34
|
+
if raw_value is None:
|
|
35
|
+
return []
|
|
36
|
+
if isinstance(raw_value, list):
|
|
37
|
+
ids = [_coerce_raw_envelope_id(item) for item in raw_value]
|
|
38
|
+
return [item for item in ids if item is not None]
|
|
39
|
+
if isinstance(raw_value, tuple):
|
|
40
|
+
ids = [_coerce_raw_envelope_id(item) for item in raw_value]
|
|
41
|
+
return [item for item in ids if item is not None]
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def extract_source_raw_envelope_ids(metadata_json: str | None) -> List[int]:
|
|
46
|
+
if not metadata_json:
|
|
47
|
+
return []
|
|
48
|
+
try:
|
|
49
|
+
metadata = json.loads(metadata_json)
|
|
50
|
+
except json.JSONDecodeError:
|
|
51
|
+
return []
|
|
52
|
+
if not isinstance(metadata, dict):
|
|
53
|
+
return []
|
|
54
|
+
return _coerce_raw_envelope_id_list(metadata.get("source_raw_envelope_ids"))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _validate_canonicalization_status(status: str) -> None:
|
|
58
|
+
if status not in RAW_ENVELOPE_STATUSES:
|
|
59
|
+
raise ValueError(f"unsupported canonicalization_status: {status}")
|
|
14
60
|
|
|
15
61
|
|
|
16
62
|
def connect(db_path: str | None = None) -> sqlite3.Connection:
|
|
@@ -42,6 +88,27 @@ def init_db(db_path: str | None = None) -> None:
|
|
|
42
88
|
correlation_keys_json TEXT NOT NULL
|
|
43
89
|
);
|
|
44
90
|
|
|
91
|
+
CREATE TABLE IF NOT EXISTS raw_envelopes (
|
|
92
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
93
|
+
tenant_id TEXT NOT NULL,
|
|
94
|
+
source_type TEXT NOT NULL,
|
|
95
|
+
source_id TEXT,
|
|
96
|
+
source_name TEXT,
|
|
97
|
+
timestamp TEXT NOT NULL,
|
|
98
|
+
host TEXT,
|
|
99
|
+
service TEXT,
|
|
100
|
+
severity TEXT,
|
|
101
|
+
asset_id TEXT,
|
|
102
|
+
source_path TEXT,
|
|
103
|
+
facility TEXT,
|
|
104
|
+
message_raw TEXT NOT NULL,
|
|
105
|
+
structured_fields_json TEXT NOT NULL,
|
|
106
|
+
correlation_keys_json TEXT NOT NULL,
|
|
107
|
+
metadata_json TEXT NOT NULL,
|
|
108
|
+
canonicalization_status TEXT NOT NULL DEFAULT 'received',
|
|
109
|
+
failure_reason TEXT
|
|
110
|
+
);
|
|
111
|
+
|
|
45
112
|
CREATE TABLE IF NOT EXISTS signatures (
|
|
46
113
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
47
114
|
signature_key TEXT NOT NULL UNIQUE,
|
|
@@ -72,6 +139,234 @@ def init_db(db_path: str | None = None) -> None:
|
|
|
72
139
|
conn.close()
|
|
73
140
|
|
|
74
141
|
|
|
142
|
+
def store_raw_envelopes(raw_envelopes: Iterable[RawInputEnvelope], db_path: str | None = None) -> List[int]:
|
|
143
|
+
conn = connect(db_path)
|
|
144
|
+
raw_ids: List[int] = []
|
|
145
|
+
try:
|
|
146
|
+
for raw in raw_envelopes:
|
|
147
|
+
cursor = conn.execute(
|
|
148
|
+
'''
|
|
149
|
+
INSERT INTO raw_envelopes (
|
|
150
|
+
tenant_id, source_type, source_id, source_name, timestamp, host, service, severity,
|
|
151
|
+
asset_id, source_path, facility, message_raw,
|
|
152
|
+
structured_fields_json, correlation_keys_json, metadata_json,
|
|
153
|
+
canonicalization_status, failure_reason
|
|
154
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
155
|
+
''',
|
|
156
|
+
(
|
|
157
|
+
raw.tenant_id,
|
|
158
|
+
raw.source_type,
|
|
159
|
+
raw.source_id,
|
|
160
|
+
raw.source_name,
|
|
161
|
+
raw.timestamp,
|
|
162
|
+
raw.host,
|
|
163
|
+
raw.service,
|
|
164
|
+
raw.severity,
|
|
165
|
+
raw.asset_id,
|
|
166
|
+
raw.source_path,
|
|
167
|
+
raw.facility,
|
|
168
|
+
raw.message_raw,
|
|
169
|
+
json.dumps(raw.structured_fields, ensure_ascii=False),
|
|
170
|
+
json.dumps(raw.correlation_keys, ensure_ascii=False),
|
|
171
|
+
json.dumps(raw.metadata, ensure_ascii=False),
|
|
172
|
+
"received",
|
|
173
|
+
None,
|
|
174
|
+
),
|
|
175
|
+
)
|
|
176
|
+
raw_ids.append(int(cursor.lastrowid))
|
|
177
|
+
conn.commit()
|
|
178
|
+
return raw_ids
|
|
179
|
+
finally:
|
|
180
|
+
conn.close()
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def set_raw_envelope_status(
|
|
184
|
+
raw_envelope_id: int,
|
|
185
|
+
status: str,
|
|
186
|
+
db_path: str | None = None,
|
|
187
|
+
*,
|
|
188
|
+
failure_reason: str | None = None,
|
|
189
|
+
) -> None:
|
|
190
|
+
_validate_canonicalization_status(status)
|
|
191
|
+
conn = connect(db_path)
|
|
192
|
+
try:
|
|
193
|
+
conn.execute(
|
|
194
|
+
'''
|
|
195
|
+
UPDATE raw_envelopes
|
|
196
|
+
SET canonicalization_status = ?, failure_reason = ?
|
|
197
|
+
WHERE id = ?
|
|
198
|
+
''',
|
|
199
|
+
(status, failure_reason, raw_envelope_id),
|
|
200
|
+
)
|
|
201
|
+
conn.commit()
|
|
202
|
+
finally:
|
|
203
|
+
conn.close()
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def get_raw_envelope_by_id(raw_envelope_id: int, db_path: str | None = None) -> sqlite3.Row | None:
|
|
207
|
+
conn = connect(db_path)
|
|
208
|
+
try:
|
|
209
|
+
return conn.execute(
|
|
210
|
+
"SELECT * FROM raw_envelopes WHERE id = ?",
|
|
211
|
+
(raw_envelope_id,),
|
|
212
|
+
).fetchone()
|
|
213
|
+
finally:
|
|
214
|
+
conn.close()
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_raw_envelopes_by_ids(
|
|
218
|
+
raw_envelope_ids: Iterable[int | str | object],
|
|
219
|
+
db_path: str | None = None,
|
|
220
|
+
) -> List[sqlite3.Row]:
|
|
221
|
+
ids = list(dict.fromkeys(_coerce_raw_envelope_id_list(raw_envelope_ids)))
|
|
222
|
+
if not ids:
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
conn = connect(db_path)
|
|
226
|
+
try:
|
|
227
|
+
placeholders = ",".join(["?"] * len(ids))
|
|
228
|
+
return conn.execute(
|
|
229
|
+
f"SELECT * FROM raw_envelopes WHERE id IN ({placeholders})",
|
|
230
|
+
ids,
|
|
231
|
+
).fetchall()
|
|
232
|
+
finally:
|
|
233
|
+
conn.close()
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _recent_raw_envelopes_query(
|
|
237
|
+
canonicalization_status: str | None,
|
|
238
|
+
*,
|
|
239
|
+
failures_only: bool,
|
|
240
|
+
tenant_id: str | None = None,
|
|
241
|
+
source_type: str | None = None,
|
|
242
|
+
source_id: str | None = None,
|
|
243
|
+
source_path: str | None = None,
|
|
244
|
+
) -> tuple[str, tuple[str, ...]]:
|
|
245
|
+
where_clauses: list[str] = []
|
|
246
|
+
args: list[str] = []
|
|
247
|
+
|
|
248
|
+
if canonicalization_status is None and failures_only:
|
|
249
|
+
where_clauses.append("canonicalization_status IN (?, ?)")
|
|
250
|
+
args.extend(RAW_ENVELOPE_FAILURE_STATUSES)
|
|
251
|
+
elif canonicalization_status is None and not failures_only:
|
|
252
|
+
pass
|
|
253
|
+
elif canonicalization_status is not None:
|
|
254
|
+
_validate_canonicalization_status(canonicalization_status)
|
|
255
|
+
where_clauses.append("canonicalization_status = ?")
|
|
256
|
+
args.append(canonicalization_status)
|
|
257
|
+
|
|
258
|
+
if tenant_id is not None:
|
|
259
|
+
where_clauses.append("tenant_id = ?")
|
|
260
|
+
args.append(tenant_id)
|
|
261
|
+
if source_type is not None:
|
|
262
|
+
where_clauses.append("source_type = ?")
|
|
263
|
+
args.append(source_type)
|
|
264
|
+
if source_id is not None:
|
|
265
|
+
where_clauses.append("source_id = ?")
|
|
266
|
+
args.append(source_id)
|
|
267
|
+
if source_path is not None:
|
|
268
|
+
where_clauses.append("source_path = ?")
|
|
269
|
+
args.append(source_path)
|
|
270
|
+
|
|
271
|
+
where_clause = f"WHERE {' AND '.join(where_clauses)}" if where_clauses else ""
|
|
272
|
+
return where_clause, tuple(args)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def list_recent_raw_envelopes(
|
|
276
|
+
db_path: str | None = None,
|
|
277
|
+
status: str | None = None,
|
|
278
|
+
limit: int = 20,
|
|
279
|
+
*,
|
|
280
|
+
failures_only: bool = False,
|
|
281
|
+
tenant_id: str | None = None,
|
|
282
|
+
source_type: str | None = None,
|
|
283
|
+
source_id: str | None = None,
|
|
284
|
+
source_path: str | None = None,
|
|
285
|
+
) -> List[sqlite3.Row]:
|
|
286
|
+
conn = connect(db_path)
|
|
287
|
+
try:
|
|
288
|
+
where_clause, status_args = _recent_raw_envelopes_query(
|
|
289
|
+
status,
|
|
290
|
+
failures_only=failures_only,
|
|
291
|
+
tenant_id=tenant_id,
|
|
292
|
+
source_type=source_type,
|
|
293
|
+
source_id=source_id,
|
|
294
|
+
source_path=source_path,
|
|
295
|
+
)
|
|
296
|
+
prefix = f"{where_clause} " if where_clause else ""
|
|
297
|
+
rows = conn.execute(
|
|
298
|
+
f"""
|
|
299
|
+
SELECT * FROM raw_envelopes
|
|
300
|
+
{prefix}
|
|
301
|
+
ORDER BY id DESC
|
|
302
|
+
LIMIT ?
|
|
303
|
+
""",
|
|
304
|
+
(*status_args, max(1, limit)),
|
|
305
|
+
).fetchall()
|
|
306
|
+
return rows
|
|
307
|
+
finally:
|
|
308
|
+
conn.close()
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def list_canonical_events(
|
|
312
|
+
db_path: str | None = None,
|
|
313
|
+
limit: int = 20,
|
|
314
|
+
*,
|
|
315
|
+
tenant_id: str | None = None,
|
|
316
|
+
source_type: str | None = None,
|
|
317
|
+
host: str | None = None,
|
|
318
|
+
service: str | None = None,
|
|
319
|
+
severity: str | None = None,
|
|
320
|
+
) -> List[sqlite3.Row]:
|
|
321
|
+
conn = connect(db_path)
|
|
322
|
+
try:
|
|
323
|
+
where_clauses = ["canonicalization_status = ?"]
|
|
324
|
+
args: List[str] = ["canonicalized"]
|
|
325
|
+
|
|
326
|
+
if tenant_id is not None:
|
|
327
|
+
where_clauses.append("tenant_id = ?")
|
|
328
|
+
args.append(tenant_id)
|
|
329
|
+
if source_type is not None:
|
|
330
|
+
where_clauses.append("source_type = ?")
|
|
331
|
+
args.append(source_type)
|
|
332
|
+
if host is not None:
|
|
333
|
+
where_clauses.append("host = ?")
|
|
334
|
+
args.append(host)
|
|
335
|
+
if service is not None:
|
|
336
|
+
where_clauses.append("service = ?")
|
|
337
|
+
args.append(service)
|
|
338
|
+
if severity is not None:
|
|
339
|
+
where_clauses.append("severity = ?")
|
|
340
|
+
args.append(severity)
|
|
341
|
+
|
|
342
|
+
where_clause = " WHERE " + " AND ".join(where_clauses)
|
|
343
|
+
return conn.execute(
|
|
344
|
+
f"""
|
|
345
|
+
SELECT * FROM raw_envelopes
|
|
346
|
+
{where_clause}
|
|
347
|
+
ORDER BY id DESC
|
|
348
|
+
LIMIT ?
|
|
349
|
+
""",
|
|
350
|
+
(*args, max(1, limit)),
|
|
351
|
+
).fetchall()
|
|
352
|
+
finally:
|
|
353
|
+
conn.close()
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def list_recent_failed_raw_envelopes(
|
|
357
|
+
db_path: str | None = None,
|
|
358
|
+
*,
|
|
359
|
+
status: str | None = None,
|
|
360
|
+
limit: int = 20,
|
|
361
|
+
) -> List[sqlite3.Row]:
|
|
362
|
+
return list_recent_raw_envelopes(
|
|
363
|
+
db_path=db_path,
|
|
364
|
+
status=status,
|
|
365
|
+
limit=limit,
|
|
366
|
+
failures_only=status is None,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
75
370
|
def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
|
|
76
371
|
conn = connect(db_path)
|
|
77
372
|
count = 0
|
|
@@ -107,11 +402,188 @@ def store_events(events: Iterable[Event], db_path: str | None = None) -> int:
|
|
|
107
402
|
conn.close()
|
|
108
403
|
|
|
109
404
|
|
|
405
|
+
SOURCE_SUMMARY_DIMENSIONS = (
|
|
406
|
+
"source_type",
|
|
407
|
+
"source_path",
|
|
408
|
+
"source_id",
|
|
409
|
+
"source_name",
|
|
410
|
+
"host",
|
|
411
|
+
"service",
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def _summarize_raw_envelopes_by_dimension(
|
|
416
|
+
conn: sqlite3.Connection,
|
|
417
|
+
dimension: str,
|
|
418
|
+
limit: int = 20,
|
|
419
|
+
) -> List[dict[str, int | str]]:
|
|
420
|
+
if dimension not in SOURCE_SUMMARY_DIMENSIONS:
|
|
421
|
+
raise ValueError(f"unsupported dimension: {dimension}")
|
|
422
|
+
return [
|
|
423
|
+
{"value": row["value"], "count": int(row["count"])}
|
|
424
|
+
for row in conn.execute(
|
|
425
|
+
f"""
|
|
426
|
+
SELECT {dimension} AS value, COUNT(*) AS count
|
|
427
|
+
FROM raw_envelopes
|
|
428
|
+
WHERE COALESCE(TRIM({dimension}), '') <> ''
|
|
429
|
+
GROUP BY {dimension}
|
|
430
|
+
ORDER BY COUNT(*) DESC, value ASC
|
|
431
|
+
LIMIT ?
|
|
432
|
+
""",
|
|
433
|
+
(max(1, limit),),
|
|
434
|
+
).fetchall()
|
|
435
|
+
]
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def get_source_dimension_summaries(
|
|
439
|
+
db_path: str | None = None,
|
|
440
|
+
*,
|
|
441
|
+
limit: int = 20,
|
|
442
|
+
) -> dict[str, List[dict[str, int | str]]]:
|
|
443
|
+
init_db(db_path)
|
|
444
|
+
conn = connect(db_path)
|
|
445
|
+
try:
|
|
446
|
+
return _get_source_dimension_summaries_from_conn(conn, limit=limit)
|
|
447
|
+
finally:
|
|
448
|
+
conn.close()
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def get_source_status_summaries(
|
|
452
|
+
db_path: str | None = None,
|
|
453
|
+
*,
|
|
454
|
+
limit: int = 20,
|
|
455
|
+
tenant_id: str | None = None,
|
|
456
|
+
source_type: str | None = None,
|
|
457
|
+
source_id: str | None = None,
|
|
458
|
+
source_path: str | None = None,
|
|
459
|
+
) -> List[dict[str, Any]]:
|
|
460
|
+
init_db(db_path)
|
|
461
|
+
conn = connect(db_path)
|
|
462
|
+
try:
|
|
463
|
+
query = """
|
|
464
|
+
SELECT
|
|
465
|
+
tenant_id,
|
|
466
|
+
source_type,
|
|
467
|
+
source_id,
|
|
468
|
+
source_path,
|
|
469
|
+
COUNT(*) AS raw_count,
|
|
470
|
+
SUM(CASE WHEN canonicalization_status = 'canonicalized' THEN 1 ELSE 0 END) AS canonicalized_count,
|
|
471
|
+
SUM(CASE WHEN canonicalization_status = 'parse_failed' THEN 1 ELSE 0 END) AS parse_failed_count,
|
|
472
|
+
SUM(CASE WHEN canonicalization_status = 'unsupported' THEN 1 ELSE 0 END) AS unsupported_count,
|
|
473
|
+
MIN(timestamp) AS first_seen_at,
|
|
474
|
+
MAX(timestamp) AS last_seen_at
|
|
475
|
+
FROM raw_envelopes
|
|
476
|
+
WHERE 1 = 1
|
|
477
|
+
"""
|
|
478
|
+
args: list[Any] = []
|
|
479
|
+
if tenant_id is not None:
|
|
480
|
+
query += " AND tenant_id = ?"
|
|
481
|
+
args.append(tenant_id)
|
|
482
|
+
if source_type is not None:
|
|
483
|
+
query += " AND source_type = ?"
|
|
484
|
+
args.append(source_type)
|
|
485
|
+
if source_id is not None:
|
|
486
|
+
query += " AND source_id = ?"
|
|
487
|
+
args.append(source_id)
|
|
488
|
+
if source_path is not None:
|
|
489
|
+
query += " AND source_path = ?"
|
|
490
|
+
args.append(source_path)
|
|
491
|
+
|
|
492
|
+
query += """
|
|
493
|
+
GROUP BY tenant_id, source_type, source_id, source_path
|
|
494
|
+
ORDER BY last_seen_at DESC, raw_count DESC
|
|
495
|
+
LIMIT ?
|
|
496
|
+
"""
|
|
497
|
+
args.append(max(1, limit))
|
|
498
|
+
return [
|
|
499
|
+
{
|
|
500
|
+
"tenant_id": row["tenant_id"],
|
|
501
|
+
"source_type": row["source_type"] or "",
|
|
502
|
+
"source_id": row["source_id"] or "",
|
|
503
|
+
"source_path": row["source_path"] or "",
|
|
504
|
+
"raw_count": int(row["raw_count"]),
|
|
505
|
+
"canonicalized_count": int(row["canonicalized_count"] or 0),
|
|
506
|
+
"parse_failed_count": int(row["parse_failed_count"] or 0),
|
|
507
|
+
"unsupported_count": int(row["unsupported_count"] or 0),
|
|
508
|
+
"first_seen_at": row["first_seen_at"],
|
|
509
|
+
"last_seen_at": row["last_seen_at"],
|
|
510
|
+
}
|
|
511
|
+
for row in conn.execute(query, args).fetchall()
|
|
512
|
+
]
|
|
513
|
+
finally:
|
|
514
|
+
conn.close()
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _get_source_dimension_summaries_from_conn(
|
|
518
|
+
conn: sqlite3.Connection,
|
|
519
|
+
*,
|
|
520
|
+
limit: int = 20,
|
|
521
|
+
) -> dict[str, List[dict[str, int | str]]]:
|
|
522
|
+
return {
|
|
523
|
+
"source_type": _summarize_raw_envelopes_by_dimension(conn, "source_type", limit=limit),
|
|
524
|
+
"source_path": _summarize_raw_envelopes_by_dimension(conn, "source_path", limit=limit),
|
|
525
|
+
"source_id": _summarize_raw_envelopes_by_dimension(conn, "source_id", limit=limit),
|
|
526
|
+
"source_name": _summarize_raw_envelopes_by_dimension(conn, "source_name", limit=limit),
|
|
527
|
+
"host": _summarize_raw_envelopes_by_dimension(conn, "host", limit=limit),
|
|
528
|
+
"service": _summarize_raw_envelopes_by_dimension(conn, "service", limit=limit),
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def _query_count(conn: sqlite3.Connection, query: str) -> int:
|
|
533
|
+
return int(conn.execute(query).fetchone()[0])
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def get_ingest_stats(db_path: str | None = None) -> dict[str, Any]:
|
|
537
|
+
init_db(db_path)
|
|
538
|
+
conn = connect(db_path)
|
|
539
|
+
try:
|
|
540
|
+
return {
|
|
541
|
+
"received": _query_count(conn, "SELECT COUNT(*) FROM raw_envelopes"),
|
|
542
|
+
"canonicalized": _query_count(
|
|
543
|
+
conn,
|
|
544
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'canonicalized'",
|
|
545
|
+
),
|
|
546
|
+
"parse_failed": _query_count(
|
|
547
|
+
conn,
|
|
548
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'parse_failed'",
|
|
549
|
+
),
|
|
550
|
+
"unsupported": _query_count(
|
|
551
|
+
conn,
|
|
552
|
+
"SELECT COUNT(*) FROM raw_envelopes WHERE canonicalization_status = 'unsupported'",
|
|
553
|
+
),
|
|
554
|
+
"candidates_generated": _query_count(conn, "SELECT COUNT(*) FROM candidates"),
|
|
555
|
+
"source_summaries": _get_source_dimension_summaries_from_conn(conn),
|
|
556
|
+
}
|
|
557
|
+
finally:
|
|
558
|
+
conn.close()
|
|
559
|
+
|
|
560
|
+
|
|
110
561
|
def store_signatures(signatures: Iterable[Signature], db_path: str | None = None) -> int:
|
|
111
562
|
conn = connect(db_path)
|
|
112
563
|
count = 0
|
|
113
564
|
try:
|
|
114
565
|
for signature in signatures:
|
|
566
|
+
row = conn.execute(
|
|
567
|
+
"SELECT metadata_json FROM signatures WHERE signature_key = ?",
|
|
568
|
+
(signature.signature_key,),
|
|
569
|
+
).fetchone()
|
|
570
|
+
|
|
571
|
+
metadata = dict(signature.metadata)
|
|
572
|
+
raw_ids = _coerce_raw_envelope_id_list(metadata.get("source_raw_envelope_ids"))
|
|
573
|
+
if not raw_ids:
|
|
574
|
+
raw_id = _coerce_raw_envelope_id(metadata.get("source_raw_envelope_id"))
|
|
575
|
+
if raw_id is not None:
|
|
576
|
+
raw_ids = [raw_id]
|
|
577
|
+
metadata.pop("source_raw_envelope_id", None)
|
|
578
|
+
|
|
579
|
+
if row is not None:
|
|
580
|
+
existing_metadata = json.loads(row["metadata_json"] or "{}")
|
|
581
|
+
if not isinstance(existing_metadata, dict):
|
|
582
|
+
existing_metadata = {}
|
|
583
|
+
existing_raw_ids = _coerce_raw_envelope_id_list(existing_metadata.get("source_raw_envelope_ids"))
|
|
584
|
+
metadata = dict(existing_metadata) | dict(metadata)
|
|
585
|
+
metadata["source_raw_envelope_ids"] = sorted(set(existing_raw_ids + raw_ids))
|
|
586
|
+
|
|
115
587
|
conn.execute(
|
|
116
588
|
'''
|
|
117
589
|
INSERT INTO signatures (
|
|
@@ -127,7 +599,7 @@ def store_signatures(signatures: Iterable[Signature], db_path: str | None = None
|
|
|
127
599
|
signature.event_family,
|
|
128
600
|
signature.normalized_pattern,
|
|
129
601
|
signature.service,
|
|
130
|
-
json.dumps(
|
|
602
|
+
json.dumps(metadata, ensure_ascii=False),
|
|
131
603
|
),
|
|
132
604
|
)
|
|
133
605
|
count += 1
|
|
@@ -170,12 +642,79 @@ def store_candidates(candidates: Iterable[Candidate], db_path: str | None = None
|
|
|
170
642
|
conn.close()
|
|
171
643
|
|
|
172
644
|
|
|
173
|
-
def list_candidates(
|
|
645
|
+
def list_candidates(
|
|
646
|
+
db_path: str | None = None,
|
|
647
|
+
limit: int = 20,
|
|
648
|
+
*,
|
|
649
|
+
candidate_type: str | None = None,
|
|
650
|
+
decision_band: str | None = None,
|
|
651
|
+
min_score_total: float | None = None,
|
|
652
|
+
) -> List[sqlite3.Row]:
|
|
174
653
|
conn = connect(db_path)
|
|
175
654
|
try:
|
|
655
|
+
where_clauses: List[str] = []
|
|
656
|
+
args: List[Any] = []
|
|
657
|
+
|
|
658
|
+
if candidate_type is not None:
|
|
659
|
+
where_clauses.append("candidate_type = ?")
|
|
660
|
+
args.append(candidate_type)
|
|
661
|
+
if decision_band is not None:
|
|
662
|
+
where_clauses.append("decision_band = ?")
|
|
663
|
+
args.append(decision_band)
|
|
664
|
+
if min_score_total is not None:
|
|
665
|
+
where_clauses.append("score_total >= ?")
|
|
666
|
+
args.append(min_score_total)
|
|
667
|
+
|
|
668
|
+
where_clause = ""
|
|
669
|
+
if where_clauses:
|
|
670
|
+
where_clause = " WHERE " + " AND ".join(where_clauses)
|
|
671
|
+
|
|
176
672
|
rows = conn.execute(
|
|
177
|
-
'SELECT * FROM candidates ORDER BY score_total DESC, id DESC LIMIT ?',
|
|
178
|
-
(max(1, limit)
|
|
673
|
+
f'SELECT * FROM candidates{where_clause} ORDER BY score_total DESC, id DESC LIMIT ?',
|
|
674
|
+
(*args, max(1, limit)),
|
|
675
|
+
).fetchall()
|
|
676
|
+
return rows
|
|
677
|
+
finally:
|
|
678
|
+
conn.close()
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def list_signatures(
|
|
682
|
+
db_path: str | None = None,
|
|
683
|
+
limit: int = 20,
|
|
684
|
+
*,
|
|
685
|
+
event_family: str | None = None,
|
|
686
|
+
service: str | None = None,
|
|
687
|
+
min_occurrence_count: int | None = None,
|
|
688
|
+
) -> List[sqlite3.Row]:
|
|
689
|
+
conn = connect(db_path)
|
|
690
|
+
try:
|
|
691
|
+
where_clauses: List[str] = []
|
|
692
|
+
args: List[Any] = []
|
|
693
|
+
|
|
694
|
+
if event_family is not None:
|
|
695
|
+
where_clauses.append("event_family = ?")
|
|
696
|
+
args.append(event_family)
|
|
697
|
+
if service is not None:
|
|
698
|
+
where_clauses.append("service = ?")
|
|
699
|
+
args.append(service)
|
|
700
|
+
if min_occurrence_count is not None:
|
|
701
|
+
where_clauses.append("occurrence_count >= ?")
|
|
702
|
+
args.append(min_occurrence_count)
|
|
703
|
+
|
|
704
|
+
where_clause = ""
|
|
705
|
+
if where_clauses:
|
|
706
|
+
where_clause = " WHERE " + " AND ".join(where_clauses)
|
|
707
|
+
|
|
708
|
+
rows = conn.execute(
|
|
709
|
+
f"""
|
|
710
|
+
SELECT
|
|
711
|
+
id, signature_key, event_family, normalized_pattern, service,
|
|
712
|
+
metadata_json, occurrence_count
|
|
713
|
+
FROM signatures{where_clause}
|
|
714
|
+
ORDER BY occurrence_count DESC, id DESC
|
|
715
|
+
LIMIT ?
|
|
716
|
+
""",
|
|
717
|
+
(*args, max(1, limit)),
|
|
179
718
|
).fetchall()
|
|
180
719
|
return rows
|
|
181
720
|
finally:
|
package/docs/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Runtime Examples
|
|
2
|
+
|
|
3
|
+
Use this compact surface for the implemented runtime API, listener, and file-ingest paths.
|
|
4
|
+
|
|
5
|
+
## 0) Shared runtime settings
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
export BRAINSTEM_API_TOKEN=my-local-token # optional: set only if you want auth required
|
|
9
|
+
export BRAINSTEM_DB_PATH=/tmp/brainstem.sqlite3
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
`BRAINSTEM_API_TOKEN` is optional. If you do not set it, omit all `X-API-Token` headers in the API examples.
|
|
13
|
+
|
|
14
|
+
## 1) API entry point
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Starts the runtime API
|
|
18
|
+
python -m uvicorn brainstem.api:app --host 127.0.0.1 --port 8000
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
curl -s http://127.0.0.1:8000/healthz
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## 2) UDP listener entry point
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Prints canonicalized events for each received datagram
|
|
29
|
+
python -m brainstem.listener --tenant demo-tenant --host 127.0.0.1 --port 5514 --source-path /var/log/syslog
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
printf 'Mar 22 03:10:00 fw-01 charon: IPsec SA rekey succeeded\n' | nc -u 127.0.0.1 5514
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## 3) API ingest (syslog payload style)
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
curl -s -X POST http://127.0.0.1:8000/ingest/event \
|
|
40
|
+
-H "Content-Type: application/json" \
|
|
41
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN" \
|
|
42
|
+
-d '{"tenant_id":"demo-tenant","source_type":"syslog","source_path":"/var/log/syslog","message_raw":"Mar 22 03:11:00 fw-01 charon: child SA rekey started"}'
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## 4) API ingest for file source events
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
curl -s -X POST http://127.0.0.1:8000/ingest/batch \
|
|
49
|
+
-H "Content-Type: application/json" \
|
|
50
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN" \
|
|
51
|
+
-d '{"threshold":2,"db_path":"/tmp/brainstem.sqlite3","events":[{"tenant_id":"demo-tenant","source_type":"file","source_path":"/tmp/manual.log","message_raw":"vpn tunnel dropped and recovered"}]}'
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## 5) Runtime inspection endpoints (same db path)
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
curl -s "http://127.0.0.1:8000/ingest/recent?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
58
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
59
|
+
curl -s "http://127.0.0.1:8000/candidates?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
60
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
61
|
+
curl -s "http://127.0.0.1:8000/signatures?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
62
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
63
|
+
curl -s "http://127.0.0.1:8000/raw_envelopes?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
64
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
65
|
+
curl -s "http://127.0.0.1:8000/stats?db_path=/tmp/brainstem.sqlite3" \
|
|
66
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
67
|
+
curl -s "http://127.0.0.1:8000/failures?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
68
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
69
|
+
curl -s "http://127.0.0.1:8000/sources?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
70
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
71
|
+
curl -s "http://127.0.0.1:8000/sources/status?db_path=/tmp/brainstem.sqlite3&limit=5" \
|
|
72
|
+
-H "X-API-Token: $BRAINSTEM_API_TOKEN"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 6) Direct file ingest helper path
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
python - <<'PY'
|
|
79
|
+
from brainstem.ingest import run_ingest_file
|
|
80
|
+
|
|
81
|
+
result = run_ingest_file(
|
|
82
|
+
"tests/fixtures/sample_syslog.log",
|
|
83
|
+
tenant_id="demo-tenant",
|
|
84
|
+
threshold=2,
|
|
85
|
+
db_path="/tmp/brainstem.sqlite3",
|
|
86
|
+
)
|
|
87
|
+
print({
|
|
88
|
+
"events": len(result.events),
|
|
89
|
+
"signatures": len(result.signatures),
|
|
90
|
+
"candidates": len(result.candidates),
|
|
91
|
+
"parse_failed": result.parse_failed,
|
|
92
|
+
})
|
|
93
|
+
PY
|
|
94
|
+
```
|