@simbimbo/brainstem 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/README.md +99 -3
- package/brainstem/__init__.py +3 -0
- package/brainstem/api.py +131 -0
- package/brainstem/connectors/__init__.py +1 -0
- package/brainstem/connectors/logicmonitor.py +26 -0
- package/brainstem/connectors/types.py +16 -0
- package/brainstem/demo.py +64 -0
- package/brainstem/fingerprint.py +44 -0
- package/brainstem/ingest.py +101 -0
- package/brainstem/instrumentation.py +38 -0
- package/brainstem/interesting.py +62 -0
- package/brainstem/models.py +78 -0
- package/brainstem/recurrence.py +112 -0
- package/brainstem/scoring.py +38 -0
- package/brainstem/storage.py +182 -0
- package/docs/adapters.md +435 -0
- package/docs/api.md +380 -0
- package/docs/architecture.md +333 -0
- package/docs/connectors.md +66 -0
- package/docs/data-model.md +290 -0
- package/docs/design-governance.md +595 -0
- package/docs/mvp-flow.md +109 -0
- package/docs/roadmap.md +87 -0
- package/docs/scoring.md +424 -0
- package/docs/v0.0.1.md +277 -0
- package/docs/vision.md +85 -0
- package/package.json +6 -14
- package/pyproject.toml +18 -0
- package/tests/fixtures/sample_syslog.log +6 -0
- package/tests/test_api.py +72 -0
- package/tests/test_canonicalization.py +28 -0
- package/tests/test_demo.py +25 -0
- package/tests/test_fingerprint.py +22 -0
- package/tests/test_ingest.py +15 -0
- package/tests/test_instrumentation.py +16 -0
- package/tests/test_interesting.py +36 -0
- package/tests/test_logicmonitor.py +22 -0
- package/tests/test_recurrence.py +16 -0
- package/tests/test_scoring.py +21 -0
- package/tests/test_storage.py +26 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.0.2 — 2026-03-22
|
|
4
|
+
|
|
5
|
+
First fully aligned public foundation release of **brAInstem**.
|
|
6
|
+
|
|
7
|
+
### Why 0.0.2
|
|
8
|
+
- `0.0.1` was already published to npm earlier as the first placeholder/bootstrap package version
|
|
9
|
+
- `0.0.2` is the first release where the local repo, documentation, canonical location, runtime foundation, and validation are being shipped together intentionally
|
|
10
|
+
|
|
11
|
+
### What this release adds/locks in
|
|
12
|
+
- canonical repo location at `~/brAInstem`
|
|
13
|
+
- design governance, `v0.0.1` scope, adapter contract, and attention scoring docs
|
|
14
|
+
- explicit `RawInputEnvelope` and `CanonicalEvent` foundation models
|
|
15
|
+
- canonicalization path for current syslog-like ingestion
|
|
16
|
+
- minimal FastAPI runtime with:
|
|
17
|
+
- `POST /ingest/event`
|
|
18
|
+
- `POST /ingest/batch`
|
|
19
|
+
- `GET /interesting`
|
|
20
|
+
- `GET /healthz`
|
|
21
|
+
- focused API and canonicalization tests
|
|
22
|
+
|
|
23
|
+
### Validation
|
|
24
|
+
- local test suite passed (`19 passed`)
|
|
25
|
+
- local end-to-end demo path executed successfully against sample syslog input
|
|
26
|
+
- minimal FastAPI runtime and canonicalization tests passed locally
|
|
27
|
+
|
|
28
|
+
## 0.0.1 — 2026-03-22
|
|
29
|
+
|
|
30
|
+
First public prototype release of **brAInstem**.
|
|
31
|
+
|
|
32
|
+
### What this release is
|
|
33
|
+
- an experimental, self-contained operational memory prototype for weak signals
|
|
34
|
+
- a proof that early event sources can feed one normalized internal stream
|
|
35
|
+
- a first cut at attention-oriented weak-signal discovery and operator-facing interesting items
|
|
36
|
+
|
|
37
|
+
### Included in 0.0.1
|
|
38
|
+
- syslog-like ingestion path
|
|
39
|
+
- event fingerprinting and recurrence candidate generation
|
|
40
|
+
- interpretable scoring with operator-facing decision/attention output
|
|
41
|
+
- SQLite persistence for events, signatures, and candidates
|
|
42
|
+
- local demo path for end-to-end validation
|
|
43
|
+
- initial LogicMonitor connector model/mapping work
|
|
44
|
+
- design governance docs covering:
|
|
45
|
+
- product thesis
|
|
46
|
+
- `v0.0.1` scope
|
|
47
|
+
- adapter/raw-envelope/canonical-event contract
|
|
48
|
+
- attention scoring model
|
|
49
|
+
|
|
50
|
+
### Not claimed in 0.0.1
|
|
51
|
+
- full universal intake apparatus
|
|
52
|
+
- production-grade always-on ingestion runtime
|
|
53
|
+
- mature multi-tenant MSP platform behavior
|
|
54
|
+
- complete discovery apparatus breadth (burst/spread/self-heal/precursor at full maturity)
|
|
55
|
+
- polished operator UI
|
|
56
|
+
|
|
57
|
+
### Validation
|
|
58
|
+
- local test suite passed (`19 passed`)
|
|
59
|
+
- local end-to-end demo path executed successfully against sample syslog input
|
|
60
|
+
- minimal FastAPI runtime and canonicalization tests passed locally
|
|
61
|
+
|
|
62
|
+
### Release framing
|
|
63
|
+
This release puts a truthful first stake in the ground for brAInstem as an operational memory runtime focused on weak signals and operator attention.
|
package/README.md
CHANGED
|
@@ -1,5 +1,101 @@
|
|
|
1
|
-
#
|
|
1
|
+
# brAInstem
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
**Operational memory for weak signals**
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
brAInstem is an always-on operational memory runtime for weak signals. Instead of treating memory as conversational context, brAInstem treats logs and operational events as raw operational experience that can be normalized into one canonical stream, assigned attention, clustered into patterns, and promoted into durable operational knowledge.
|
|
6
|
+
|
|
7
|
+
## One-line pitch
|
|
8
|
+
|
|
9
|
+
brAInstem helps MSPs and lean ops teams detect recurring, self-resolving, and quietly escalating issues before they become major incidents.
|
|
10
|
+
|
|
11
|
+
## The problem
|
|
12
|
+
|
|
13
|
+
Most operational pain never becomes a classic threshold alert.
|
|
14
|
+
|
|
15
|
+
It shows up as:
|
|
16
|
+
- recurring low-grade warnings
|
|
17
|
+
- brief self-healing failures
|
|
18
|
+
- cross-system weak signals
|
|
19
|
+
- near-misses that humans forget because there is too much noise
|
|
20
|
+
|
|
21
|
+
Traditional monitoring catches hard failures. brAInstem is designed to catch patterns that matter to humans before they become obvious outages.
|
|
22
|
+
|
|
23
|
+
## Core idea
|
|
24
|
+
|
|
25
|
+
Logs and events should not only be stored and searched. They should be:
|
|
26
|
+
1. ingested into a provenance-preserving raw envelope
|
|
27
|
+
2. normalized into one canonical event stream
|
|
28
|
+
3. assigned and updated with **attention** over time
|
|
29
|
+
4. compressed so most inconsequential noise is handled cheaply
|
|
30
|
+
5. promoted into operator-facing weak-signal outputs only when enough attention is earned
|
|
31
|
+
6. promoted later into incident memory, lessons, and runbook hints when justified
|
|
32
|
+
7. retrieved again when similar patterns recur
|
|
33
|
+
|
|
34
|
+
## Primary users
|
|
35
|
+
|
|
36
|
+
- MSP owners
|
|
37
|
+
- MSP technicians
|
|
38
|
+
- NOC teams
|
|
39
|
+
- SRE / infrastructure operators
|
|
40
|
+
- small security / ops teams dealing with alert fatigue and log blindness
|
|
41
|
+
|
|
42
|
+
## MVP promise
|
|
43
|
+
|
|
44
|
+
For a given tenant or environment, brAInstem should answer:
|
|
45
|
+
- What happened today that mattered but never alerted?
|
|
46
|
+
- What self-resolving issues are recurring?
|
|
47
|
+
- What patterns are likely to become tickets later?
|
|
48
|
+
- Have we seen this before?
|
|
49
|
+
- What happened right before the last similar incident?
|
|
50
|
+
|
|
51
|
+
## Relationship to ocmemog
|
|
52
|
+
|
|
53
|
+
Shared DNA:
|
|
54
|
+
- ingest -> candidate -> promotion pipeline
|
|
55
|
+
- compact retrieval
|
|
56
|
+
- provenance and explainability
|
|
57
|
+
- memory scoring and recurrence awareness
|
|
58
|
+
|
|
59
|
+
Different center of gravity:
|
|
60
|
+
- ocmemog = assistant memory and continuity
|
|
61
|
+
- brAInstem = operational event intelligence and weak-signal detection
|
|
62
|
+
|
|
63
|
+
## Initial scope
|
|
64
|
+
|
|
65
|
+
The long-term product direction is an always-on self-contained runtime with a robust input apparatus, a discovery apparatus, and operator-facing outputs.
|
|
66
|
+
|
|
67
|
+
For the first public prototype line, start with:
|
|
68
|
+
- a narrow but real ingestion story
|
|
69
|
+
- a canonical event stream
|
|
70
|
+
- attention-oriented weak-signal discovery
|
|
71
|
+
- operator-facing interesting items / digest output
|
|
72
|
+
- syslog-like events and LogicMonitor-shaped events as early proof sources
|
|
73
|
+
|
|
74
|
+
Delay until later:
|
|
75
|
+
- broad universal connector coverage
|
|
76
|
+
- mature syslog appliance behavior across every input mode
|
|
77
|
+
- full SIEM behavior
|
|
78
|
+
- broad compliance workflows
|
|
79
|
+
- generic observability replacement
|
|
80
|
+
- "chat with all your logs" as the primary story
|
|
81
|
+
|
|
82
|
+
## Proposed docs
|
|
83
|
+
|
|
84
|
+
- `docs/design-governance.md` — canonical product/design guardrails
|
|
85
|
+
- `docs/v0.0.1.md` — first release scope and acceptance criteria
|
|
86
|
+
- `docs/adapters.md` — intake, raw envelope, and canonical event contract
|
|
87
|
+
- `docs/vision.md`
|
|
88
|
+
- `docs/architecture.md`
|
|
89
|
+
- `docs/scoring.md` — attention scoring and routing model
|
|
90
|
+
- `docs/roadmap.md`
|
|
91
|
+
|
|
92
|
+
## Design governance
|
|
93
|
+
|
|
94
|
+
Before expanding scope, adding connectors, or making architecture claims, read:
|
|
95
|
+
- `docs/design-governance.md`
|
|
96
|
+
|
|
97
|
+
That document is the canonical governor for:
|
|
98
|
+
- what brAInstem is and is not
|
|
99
|
+
- how attention should work
|
|
100
|
+
- what belongs in `0.0.1`
|
|
101
|
+
- how the input apparatus, discovery apparatus, and operator outputs should evolve
|
package/brainstem/api.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Dict, List, Optional
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
from fastapi import FastAPI, HTTPException, Query
|
|
8
|
+
from fastapi.responses import JSONResponse
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
from .ingest import canonicalize_raw_input_envelopes
|
|
12
|
+
from .interesting import interesting_items
|
|
13
|
+
from .models import Candidate, RawInputEnvelope
|
|
14
|
+
from .recurrence import build_recurrence_candidates
|
|
15
|
+
from .storage import init_db, list_candidates, store_candidates, store_events, store_signatures
|
|
16
|
+
from .ingest import signatures_for_events
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
app = FastAPI(title="brAInstem Runtime")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RawEnvelopeRequest(BaseModel):
|
|
23
|
+
tenant_id: str
|
|
24
|
+
source_type: str
|
|
25
|
+
message_raw: str
|
|
26
|
+
timestamp: Optional[str] = None
|
|
27
|
+
host: str = ""
|
|
28
|
+
service: str = ""
|
|
29
|
+
severity: str = "info"
|
|
30
|
+
asset_id: str = ""
|
|
31
|
+
source_path: str = ""
|
|
32
|
+
facility: str = ""
|
|
33
|
+
structured_fields: Dict[str, Any] = Field(default_factory=dict)
|
|
34
|
+
correlation_keys: Dict[str, Any] = Field(default_factory=dict)
|
|
35
|
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class IngestEventRequest(RawEnvelopeRequest):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class IngestBatchRequest(BaseModel):
|
|
43
|
+
events: List[RawEnvelopeRequest]
|
|
44
|
+
threshold: int = Field(default=2, ge=1)
|
|
45
|
+
db_path: Optional[str] = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _raw_envelope_from_request(payload: RawEnvelopeRequest) -> RawInputEnvelope:
|
|
49
|
+
return RawInputEnvelope(
|
|
50
|
+
tenant_id=payload.tenant_id,
|
|
51
|
+
source_type=payload.source_type,
|
|
52
|
+
timestamp=payload.timestamp or datetime.utcnow().isoformat() + "Z",
|
|
53
|
+
message_raw=payload.message_raw,
|
|
54
|
+
host=payload.host,
|
|
55
|
+
service=payload.service,
|
|
56
|
+
severity=payload.severity,
|
|
57
|
+
asset_id=payload.asset_id,
|
|
58
|
+
source_path=payload.source_path,
|
|
59
|
+
facility=payload.facility,
|
|
60
|
+
structured_fields=dict(payload.structured_fields),
|
|
61
|
+
correlation_keys=dict(payload.correlation_keys),
|
|
62
|
+
metadata=dict(payload.metadata),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _candidate_from_row(row) -> Candidate:
|
|
67
|
+
return Candidate(
|
|
68
|
+
candidate_type=row["candidate_type"],
|
|
69
|
+
title=row["title"],
|
|
70
|
+
summary=row["summary"],
|
|
71
|
+
score_total=float(row["score_total"]),
|
|
72
|
+
score_breakdown=json.loads(row["score_breakdown_json"] or "{}"),
|
|
73
|
+
decision_band=row["decision_band"],
|
|
74
|
+
source_signature_ids=json.loads(row["source_signature_ids_json"] or "[]"),
|
|
75
|
+
source_event_ids=json.loads(row["source_event_ids_json"] or "[]"),
|
|
76
|
+
confidence=float(row["confidence"]),
|
|
77
|
+
metadata=json.loads(row["metadata_json"] or "{}"),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _run_ingest_batch(raw_events: List[RawInputEnvelope], *, threshold: int, db_path: Optional[str]) -> Dict[str, Any]:
|
|
82
|
+
events = canonicalize_raw_input_envelopes(raw_events)
|
|
83
|
+
if not events:
|
|
84
|
+
return {"ok": True, "event_count": 0, "signature_count": 0, "candidate_count": 0, "interesting_items": []}
|
|
85
|
+
|
|
86
|
+
signatures = signatures_for_events(events)
|
|
87
|
+
candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
|
|
88
|
+
if db_path:
|
|
89
|
+
init_db(db_path)
|
|
90
|
+
store_events(events, db_path)
|
|
91
|
+
store_signatures(signatures, db_path)
|
|
92
|
+
store_candidates(candidates, db_path)
|
|
93
|
+
|
|
94
|
+
return {
|
|
95
|
+
"ok": True,
|
|
96
|
+
"tenant_id": events[0].tenant_id if events else "",
|
|
97
|
+
"event_count": len(events),
|
|
98
|
+
"signature_count": len({sig.signature_key for sig in signatures}),
|
|
99
|
+
"candidate_count": len(candidates),
|
|
100
|
+
"interesting_items": interesting_items(candidates, limit=max(1, 5)),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@app.post("/ingest/event")
|
|
105
|
+
def ingest_event(payload: IngestEventRequest, threshold: int = 2, db_path: Optional[str] = None) -> Dict[str, Any]:
|
|
106
|
+
if threshold < 1:
|
|
107
|
+
raise HTTPException(status_code=422, detail="threshold must be >= 1")
|
|
108
|
+
return _run_ingest_batch([_raw_envelope_from_request(payload)], threshold=threshold, db_path=db_path)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.post("/ingest/batch")
|
|
112
|
+
def ingest_batch(payload: IngestBatchRequest) -> Dict[str, Any]:
|
|
113
|
+
raw_events = [_raw_envelope_from_request(event) for event in payload.events]
|
|
114
|
+
return _run_ingest_batch(raw_events, threshold=payload.threshold, db_path=payload.db_path)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@app.get("/interesting")
|
|
118
|
+
def get_interesting(
|
|
119
|
+
limit: int = Query(default=5, ge=1),
|
|
120
|
+
db_path: Optional[str] = None,
|
|
121
|
+
) -> Dict[str, Any]:
|
|
122
|
+
if not db_path:
|
|
123
|
+
return {"ok": True, "items": []}
|
|
124
|
+
rows = list_candidates(db_path=db_path, limit=limit)
|
|
125
|
+
candidates = [_candidate_from_row(row) for row in rows]
|
|
126
|
+
return {"ok": True, "items": interesting_items(candidates, limit=limit)}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@app.get("/healthz")
|
|
130
|
+
def healthz() -> Dict[str, str]:
|
|
131
|
+
return JSONResponse(content={"ok": True, "status": "ok"})
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Connector entry points for brAInstem."""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from .types import ConnectorEvent
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def map_logicmonitor_event(payload: dict, *, tenant_id: str) -> ConnectorEvent:
|
|
7
|
+
metadata = payload.get("metadata") or {}
|
|
8
|
+
host = payload.get("host") or payload.get("resource_name") or ""
|
|
9
|
+
service = payload.get("service") or metadata.get("datasource") or "logicmonitor"
|
|
10
|
+
return ConnectorEvent(
|
|
11
|
+
tenant_id=tenant_id,
|
|
12
|
+
source_type="logicmonitor",
|
|
13
|
+
host=str(host),
|
|
14
|
+
service=str(service),
|
|
15
|
+
severity=str(payload.get("severity") or "info"),
|
|
16
|
+
timestamp=str(payload.get("timestamp") or ""),
|
|
17
|
+
message_raw=str(payload.get("message_raw") or payload.get("message") or ""),
|
|
18
|
+
metadata={
|
|
19
|
+
"alert_id": payload.get("alert_id"),
|
|
20
|
+
"resource_id": payload.get("resource_id"),
|
|
21
|
+
"datasource": metadata.get("datasource"),
|
|
22
|
+
"instance_name": metadata.get("instance_name"),
|
|
23
|
+
"acknowledged": metadata.get("acknowledged"),
|
|
24
|
+
"cleared_at": metadata.get("cleared_at"),
|
|
25
|
+
},
|
|
26
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ConnectorEvent:
|
|
9
|
+
tenant_id: str
|
|
10
|
+
source_type: str
|
|
11
|
+
host: str
|
|
12
|
+
service: str
|
|
13
|
+
severity: str
|
|
14
|
+
timestamp: str
|
|
15
|
+
message_raw: str
|
|
16
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import asdict
|
|
6
|
+
from typing import Any, Dict
|
|
7
|
+
|
|
8
|
+
from .ingest import ingest_syslog_file, signatures_for_events
|
|
9
|
+
from .instrumentation import emit, span
|
|
10
|
+
from .interesting import interesting_items
|
|
11
|
+
from .recurrence import build_recurrence_candidates, digest_items
|
|
12
|
+
from .storage import init_db, store_candidates, store_events, store_signatures
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_syslog_demo(path: str, tenant_id: str, threshold: int = 2, db_path: str | None = None) -> Dict[str, Any]:
|
|
16
|
+
with span("syslog_demo", path=path, tenant_id=tenant_id, threshold=threshold):
|
|
17
|
+
init_db(db_path)
|
|
18
|
+
events = ingest_syslog_file(path, tenant_id=tenant_id)
|
|
19
|
+
emit("syslog_demo_events_loaded", count=len(events), path=path)
|
|
20
|
+
signatures = signatures_for_events(events)
|
|
21
|
+
emit("syslog_demo_signatures_built", count=len(signatures))
|
|
22
|
+
candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
|
|
23
|
+
store_events(events, db_path)
|
|
24
|
+
store_signatures(signatures, db_path)
|
|
25
|
+
store_candidates(candidates, db_path)
|
|
26
|
+
digest = digest_items(candidates)
|
|
27
|
+
items = interesting_items(candidates, limit=5)
|
|
28
|
+
payload = {
|
|
29
|
+
"ok": True,
|
|
30
|
+
"tenant_id": tenant_id,
|
|
31
|
+
"event_count": len(events),
|
|
32
|
+
"signature_count": len({sig.signature_key for sig in signatures}),
|
|
33
|
+
"candidate_count": len(candidates),
|
|
34
|
+
"canonical_stream": {
|
|
35
|
+
"event_count": len(events),
|
|
36
|
+
"signature_count": len({sig.signature_key for sig in signatures}),
|
|
37
|
+
},
|
|
38
|
+
"digest": digest,
|
|
39
|
+
"interesting_items": items,
|
|
40
|
+
"top_candidate": asdict(candidates[0]) if candidates else None,
|
|
41
|
+
}
|
|
42
|
+
emit(
|
|
43
|
+
"syslog_demo_summary",
|
|
44
|
+
event_count=payload["event_count"],
|
|
45
|
+
signature_count=payload["signature_count"],
|
|
46
|
+
candidate_count=payload["candidate_count"],
|
|
47
|
+
)
|
|
48
|
+
return payload
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def main() -> int:
|
|
52
|
+
parser = argparse.ArgumentParser(description="Run the brAInstem syslog weak-signal demo.")
|
|
53
|
+
parser.add_argument("path", help="Path to a syslog-like input file")
|
|
54
|
+
parser.add_argument("--tenant", default="demo-tenant", help="Tenant/environment identifier")
|
|
55
|
+
parser.add_argument("--threshold", type=int, default=2, help="Minimum recurrence count for candidate emission")
|
|
56
|
+
parser.add_argument("--db-path", default=None, help="Optional SQLite path for persistent state")
|
|
57
|
+
args = parser.parse_args()
|
|
58
|
+
payload = run_syslog_demo(args.path, tenant_id=args.tenant, threshold=args.threshold, db_path=args.db_path)
|
|
59
|
+
print(json.dumps(payload, indent=2))
|
|
60
|
+
return 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from .models import Event, Signature
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
_WHITESPACE_RE = re.compile(r"\s+")
|
|
9
|
+
_IPV4_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
|
|
10
|
+
_NUMBER_RE = re.compile(r"\b\d+\b")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def normalize_message(message: str) -> str:
|
|
14
|
+
text = (message or "").strip().lower()
|
|
15
|
+
text = _IPV4_RE.sub("<ip>", text)
|
|
16
|
+
text = _NUMBER_RE.sub("<n>", text)
|
|
17
|
+
text = _WHITESPACE_RE.sub(" ", text)
|
|
18
|
+
return text
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def event_family_for(event: Event) -> str:
|
|
22
|
+
message_normalized = getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
|
|
23
|
+
base = message_normalized
|
|
24
|
+
if "fail" in base or "error" in base:
|
|
25
|
+
return "failure"
|
|
26
|
+
if "restart" in base or "stopped" in base or "started" in base:
|
|
27
|
+
return "service_lifecycle"
|
|
28
|
+
if "auth" in base or "login" in base:
|
|
29
|
+
return "auth"
|
|
30
|
+
return "generic"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def fingerprint_event(event: Event) -> Signature:
|
|
34
|
+
normalized = getattr(event, "signature_input", None) or getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
|
|
35
|
+
family = event_family_for(event)
|
|
36
|
+
service = (event.service or "").strip().lower()
|
|
37
|
+
host = (event.host or "").strip().lower()
|
|
38
|
+
signature_key = f"{family}|{service}|{normalized}"
|
|
39
|
+
return Signature(
|
|
40
|
+
signature_key=signature_key,
|
|
41
|
+
event_family=family,
|
|
42
|
+
normalized_pattern=normalized,
|
|
43
|
+
service=service or host,
|
|
44
|
+
)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Iterable, List
|
|
7
|
+
|
|
8
|
+
from .fingerprint import fingerprint_event, normalize_message
|
|
9
|
+
from .models import CanonicalEvent, Event, RawInputEnvelope, Signature
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_syslog_line(line: str, *, tenant_id: str, source_path: str = "") -> CanonicalEvent:
|
|
13
|
+
return canonicalize_raw_input_envelope(
|
|
14
|
+
parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path)
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_syslog_envelope(line: str, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
|
|
19
|
+
text = (line or "").rstrip("\n")
|
|
20
|
+
timestamp = datetime.utcnow().isoformat() + "Z"
|
|
21
|
+
host = ""
|
|
22
|
+
service = ""
|
|
23
|
+
message = text
|
|
24
|
+
|
|
25
|
+
parts = text.split()
|
|
26
|
+
if len(parts) >= 5:
|
|
27
|
+
host = parts[3]
|
|
28
|
+
rest = " ".join(parts[4:])
|
|
29
|
+
if ":" in rest:
|
|
30
|
+
svc, _, msg = rest.partition(":")
|
|
31
|
+
service = svc.strip()
|
|
32
|
+
message = msg.strip() or rest.strip()
|
|
33
|
+
else:
|
|
34
|
+
message = rest.strip()
|
|
35
|
+
|
|
36
|
+
return RawInputEnvelope(
|
|
37
|
+
tenant_id=tenant_id,
|
|
38
|
+
source_type="syslog",
|
|
39
|
+
timestamp=timestamp,
|
|
40
|
+
message_raw=message,
|
|
41
|
+
host=host,
|
|
42
|
+
service=service,
|
|
43
|
+
source_path=source_path,
|
|
44
|
+
metadata={"raw_line": text},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def parse_syslog_envelopes(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[RawInputEnvelope]:
|
|
49
|
+
return [parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path) for line in lines if str(line).strip()]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def canonicalize_raw_input_envelope(raw: RawInputEnvelope) -> CanonicalEvent:
|
|
53
|
+
message_normalized = normalize_message(raw.message_raw)
|
|
54
|
+
metadata = dict(raw.metadata or {})
|
|
55
|
+
metadata.setdefault("canonicalization_source", raw.source_type)
|
|
56
|
+
metadata["raw_input_seen"] = True
|
|
57
|
+
return CanonicalEvent(
|
|
58
|
+
tenant_id=raw.tenant_id,
|
|
59
|
+
source_type=raw.source_type,
|
|
60
|
+
timestamp=raw.timestamp,
|
|
61
|
+
host=raw.host,
|
|
62
|
+
service=raw.service,
|
|
63
|
+
severity=raw.severity,
|
|
64
|
+
asset_id=raw.asset_id,
|
|
65
|
+
source_path=raw.source_path,
|
|
66
|
+
facility=raw.facility,
|
|
67
|
+
message_raw=raw.message_raw,
|
|
68
|
+
structured_fields=dict(raw.structured_fields),
|
|
69
|
+
correlation_keys=dict(raw.correlation_keys),
|
|
70
|
+
message_normalized=message_normalized,
|
|
71
|
+
signature_input=message_normalized,
|
|
72
|
+
ingest_metadata={
|
|
73
|
+
"canonicalized_at": datetime.utcnow().isoformat() + "Z",
|
|
74
|
+
"source_timestamp": raw.timestamp,
|
|
75
|
+
**metadata,
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def canonicalize_raw_input_envelopes(events: Iterable[RawInputEnvelope]) -> List[CanonicalEvent]:
|
|
81
|
+
return [canonicalize_raw_input_envelope(raw_event) for raw_event in events]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def ingest_syslog_lines(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[CanonicalEvent]:
|
|
85
|
+
return canonicalize_raw_input_envelopes(
|
|
86
|
+
parse_syslog_envelopes(lines, tenant_id=tenant_id, source_path=source_path),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def ingest_syslog_file(path: str, *, tenant_id: str) -> List[Event]:
|
|
91
|
+
file_path = Path(path)
|
|
92
|
+
lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
|
|
93
|
+
return ingest_syslog_lines(lines, tenant_id=tenant_id, source_path=str(file_path))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def signatures_for_events(events: Iterable[Event]) -> List[Signature]:
|
|
97
|
+
return [fingerprint_event(event) for event in events]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def events_as_dicts(events: Iterable[Event]) -> List[dict]:
|
|
101
|
+
return [asdict(event) for event in events]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from typing import Any, Dict, Iterator
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def emit(event: str, **fields: Any) -> None:
|
|
11
|
+
payload: Dict[str, Any] = {
|
|
12
|
+
"event": event,
|
|
13
|
+
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
14
|
+
**fields,
|
|
15
|
+
}
|
|
16
|
+
print(json.dumps(payload, ensure_ascii=False), file=sys.stderr)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@contextmanager
|
|
20
|
+
def span(event: str, **fields: Any) -> Iterator[None]:
|
|
21
|
+
started = time.perf_counter()
|
|
22
|
+
emit(f"{event}_start", **fields)
|
|
23
|
+
try:
|
|
24
|
+
yield
|
|
25
|
+
except Exception as exc:
|
|
26
|
+
emit(
|
|
27
|
+
f"{event}_failed",
|
|
28
|
+
error_type=type(exc).__name__,
|
|
29
|
+
error=str(exc),
|
|
30
|
+
elapsed_ms=round((time.perf_counter() - started) * 1000, 3),
|
|
31
|
+
**fields,
|
|
32
|
+
)
|
|
33
|
+
raise
|
|
34
|
+
emit(
|
|
35
|
+
f"{event}_complete",
|
|
36
|
+
elapsed_ms=round((time.perf_counter() - started) * 1000, 3),
|
|
37
|
+
**fields,
|
|
38
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Iterable, List, Dict, Any
|
|
4
|
+
|
|
5
|
+
from .models import Candidate
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _attention_band(decision_band: str) -> str:
|
|
9
|
+
mapping = {
|
|
10
|
+
"ignore": "ignore_fast",
|
|
11
|
+
"watch": "background",
|
|
12
|
+
"review": "watch",
|
|
13
|
+
"urgent_human_review": "investigate",
|
|
14
|
+
"promote_to_incident_memory": "promote",
|
|
15
|
+
}
|
|
16
|
+
return mapping.get(decision_band, "watch")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _why_it_matters(candidate: Candidate) -> str:
|
|
20
|
+
count = int((candidate.metadata or {}).get("count") or 0)
|
|
21
|
+
service = str((candidate.metadata or {}).get("service") or "").strip()
|
|
22
|
+
family = candidate.candidate_type.replace("_", " ")
|
|
23
|
+
pieces = []
|
|
24
|
+
if count:
|
|
25
|
+
pieces.append(f"observed {count} times")
|
|
26
|
+
if service:
|
|
27
|
+
pieces.append(f"around {service}")
|
|
28
|
+
band = _attention_band(candidate.decision_band)
|
|
29
|
+
if band == "promote":
|
|
30
|
+
level = "has earned high operator attention"
|
|
31
|
+
elif band == "investigate":
|
|
32
|
+
level = "has earned active operator attention"
|
|
33
|
+
elif band == "watch":
|
|
34
|
+
level = "has earned watch-level attention"
|
|
35
|
+
elif band == "background":
|
|
36
|
+
level = "is worth keeping in the background"
|
|
37
|
+
else:
|
|
38
|
+
level = "is low-attention noise"
|
|
39
|
+
detail = ", ".join(pieces) if pieces else family
|
|
40
|
+
return f"{detail}; {level}."
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def interesting_items(candidates: Iterable[Candidate], *, limit: int = 5) -> List[Dict[str, Any]]:
|
|
44
|
+
ordered = sorted(candidates, key=lambda c: (c.score_total, c.confidence), reverse=True)
|
|
45
|
+
items: List[Dict[str, Any]] = []
|
|
46
|
+
for candidate in ordered[: max(limit, 1)]:
|
|
47
|
+
attention_band = _attention_band(candidate.decision_band)
|
|
48
|
+
items.append(
|
|
49
|
+
{
|
|
50
|
+
"title": candidate.title,
|
|
51
|
+
"summary": candidate.summary,
|
|
52
|
+
"decision_band": candidate.decision_band,
|
|
53
|
+
"attention_band": attention_band,
|
|
54
|
+
"attention_score": candidate.score_total,
|
|
55
|
+
"score_total": candidate.score_total,
|
|
56
|
+
"confidence": candidate.confidence,
|
|
57
|
+
"why_it_matters": _why_it_matters(candidate),
|
|
58
|
+
"signals": dict(candidate.score_breakdown),
|
|
59
|
+
"metadata": dict(candidate.metadata),
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
return items
|