@simbimbo/brainstem 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +63 -0
  2. package/README.md +99 -3
  3. package/brainstem/__init__.py +3 -0
  4. package/brainstem/api.py +131 -0
  5. package/brainstem/connectors/__init__.py +1 -0
  6. package/brainstem/connectors/logicmonitor.py +26 -0
  7. package/brainstem/connectors/types.py +16 -0
  8. package/brainstem/demo.py +64 -0
  9. package/brainstem/fingerprint.py +44 -0
  10. package/brainstem/ingest.py +101 -0
  11. package/brainstem/instrumentation.py +38 -0
  12. package/brainstem/interesting.py +62 -0
  13. package/brainstem/models.py +78 -0
  14. package/brainstem/recurrence.py +112 -0
  15. package/brainstem/scoring.py +38 -0
  16. package/brainstem/storage.py +182 -0
  17. package/docs/adapters.md +435 -0
  18. package/docs/api.md +380 -0
  19. package/docs/architecture.md +333 -0
  20. package/docs/connectors.md +66 -0
  21. package/docs/data-model.md +290 -0
  22. package/docs/design-governance.md +595 -0
  23. package/docs/mvp-flow.md +109 -0
  24. package/docs/roadmap.md +87 -0
  25. package/docs/scoring.md +424 -0
  26. package/docs/v0.0.1.md +277 -0
  27. package/docs/vision.md +85 -0
  28. package/package.json +6 -14
  29. package/pyproject.toml +18 -0
  30. package/tests/fixtures/sample_syslog.log +6 -0
  31. package/tests/test_api.py +72 -0
  32. package/tests/test_canonicalization.py +28 -0
  33. package/tests/test_demo.py +25 -0
  34. package/tests/test_fingerprint.py +22 -0
  35. package/tests/test_ingest.py +15 -0
  36. package/tests/test_instrumentation.py +16 -0
  37. package/tests/test_interesting.py +36 -0
  38. package/tests/test_logicmonitor.py +22 -0
  39. package/tests/test_recurrence.py +16 -0
  40. package/tests/test_scoring.py +21 -0
  41. package/tests/test_storage.py +26 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,63 @@
1
+ # Changelog
2
+
3
+ ## 0.0.2 — 2026-03-22
4
+
5
+ First fully aligned public foundation release of **brAInstem**.
6
+
7
+ ### Why 0.0.2
8
+ - `0.0.1` was already published to npm earlier as the first placeholder/bootstrap package version
9
+ - `0.0.2` is the first release where the local repo, documentation, canonical location, runtime foundation, and validation are being shipped together intentionally
10
+
11
+ ### What this release adds/locks in
12
+ - canonical repo location at `~/brAInstem`
13
+ - design governance, `v0.0.1` scope, adapter contract, and attention scoring docs
14
+ - explicit `RawInputEnvelope` and `CanonicalEvent` foundation models
15
+ - canonicalization path for current syslog-like ingestion
16
+ - minimal FastAPI runtime with:
17
+ - `POST /ingest/event`
18
+ - `POST /ingest/batch`
19
+ - `GET /interesting`
20
+ - `GET /healthz`
21
+ - focused API and canonicalization tests
22
+
23
+ ### Validation
24
+ - local test suite passed (`19 passed`)
25
+ - local end-to-end demo path executed successfully against sample syslog input
26
+ - minimal FastAPI runtime and canonicalization tests passed locally
27
+
28
+ ## 0.0.1 — 2026-03-22
29
+
30
+ First public prototype release of **brAInstem**.
31
+
32
+ ### What this release is
33
+ - an experimental, self-contained operational memory prototype for weak signals
34
+ - a proof that early event sources can feed one normalized internal stream
35
+ - a first cut at attention-oriented weak-signal discovery and operator-facing interesting items
36
+
37
+ ### Included in 0.0.1
38
+ - syslog-like ingestion path
39
+ - event fingerprinting and recurrence candidate generation
40
+ - interpretable scoring with operator-facing decision/attention output
41
+ - SQLite persistence for events, signatures, and candidates
42
+ - local demo path for end-to-end validation
43
+ - initial LogicMonitor connector model/mapping work
44
+ - design governance docs covering:
45
+ - product thesis
46
+ - `v0.0.1` scope
47
+ - adapter/raw-envelope/canonical-event contract
48
+ - attention scoring model
49
+
50
+ ### Not claimed in 0.0.1
51
+ - full universal intake apparatus
52
+ - production-grade always-on ingestion runtime
53
+ - mature multi-tenant MSP platform behavior
54
+ - complete discovery apparatus breadth (burst/spread/self-heal/precursor at full maturity)
55
+ - polished operator UI
56
+
57
+ ### Validation
58
+ - local test suite passed (`19 passed`)
59
+ - local end-to-end demo path executed successfully against sample syslog input
60
+ - minimal FastAPI runtime and canonicalization tests passed locally
61
+
62
+ ### Release framing
63
+ This release puts a truthful first stake in the ground for brAInstem as an operational memory runtime focused on weak signals and operator attention.
package/README.md CHANGED
@@ -1,5 +1,101 @@
1
- # @simbimbo/brainstem
1
+ # brAInstem
2
2
 
3
- brAInstem is an operational memory engine for weak signals.
3
+ **Operational memory for weak signals**
4
4
 
5
- This package name is being reserved for the upcoming brAInstem project.
5
+ brAInstem is an always-on operational memory runtime for weak signals. Instead of treating memory as conversational context, brAInstem treats logs and operational events as raw operational experience that can be normalized into one canonical stream, assigned attention, clustered into patterns, and promoted into durable operational knowledge.
6
+
7
+ ## One-line pitch
8
+
9
+ brAInstem helps MSPs and lean ops teams detect recurring, self-resolving, and quietly escalating issues before they become major incidents.
10
+
11
+ ## The problem
12
+
13
+ Most operational pain never becomes a classic threshold alert.
14
+
15
+ It shows up as:
16
+ - recurring low-grade warnings
17
+ - brief self-healing failures
18
+ - cross-system weak signals
19
+ - near-misses that humans forget because there is too much noise
20
+
21
+ Traditional monitoring catches hard failures. brAInstem is designed to catch patterns that matter to humans before they become obvious outages.
22
+
23
+ ## Core idea
24
+
25
+ Logs and events should not only be stored and searched. They should be:
26
+ 1. ingested into a provenance-preserving raw envelope
27
+ 2. normalized into one canonical event stream
28
+ 3. assigned and updated with **attention** over time
29
+ 4. compressed so most inconsequential noise is handled cheaply
30
+ 5. promoted into operator-facing weak-signal outputs only when enough attention is earned
31
+ 6. promoted later into incident memory, lessons, and runbook hints when justified
32
+ 7. retrieved again when similar patterns recur
33
+
34
+ ## Primary users
35
+
36
+ - MSP owners
37
+ - MSP technicians
38
+ - NOC teams
39
+ - SRE / infrastructure operators
40
+ - small security / ops teams dealing with alert fatigue and log blindness
41
+
42
+ ## MVP promise
43
+
44
+ For a given tenant or environment, brAInstem should answer:
45
+ - What happened today that mattered but never alerted?
46
+ - What self-resolving issues are recurring?
47
+ - What patterns are likely to become tickets later?
48
+ - Have we seen this before?
49
+ - What happened right before the last similar incident?
50
+
51
+ ## Relationship to ocmemog
52
+
53
+ Shared DNA:
54
+ - ingest -> candidate -> promotion pipeline
55
+ - compact retrieval
56
+ - provenance and explainability
57
+ - memory scoring and recurrence awareness
58
+
59
+ Different center of gravity:
60
+ - ocmemog = assistant memory and continuity
61
+ - brAInstem = operational event intelligence and weak-signal detection
62
+
63
+ ## Initial scope
64
+
65
+ The long-term product direction is an always-on self-contained runtime with a robust input apparatus, a discovery apparatus, and operator-facing outputs.
66
+
67
+ For the first public prototype line, start with:
68
+ - a narrow but real ingestion story
69
+ - a canonical event stream
70
+ - attention-oriented weak-signal discovery
71
+ - operator-facing interesting items / digest output
72
+ - syslog-like events and LogicMonitor-shaped events as early proof sources
73
+
74
+ Delay until later:
75
+ - broad universal connector coverage
76
+ - mature syslog appliance behavior across every input mode
77
+ - full SIEM behavior
78
+ - broad compliance workflows
79
+ - generic observability replacement
80
+ - "chat with all your logs" as the primary story
81
+
82
+ ## Proposed docs
83
+
84
+ - `docs/design-governance.md` — canonical product/design guardrails
85
+ - `docs/v0.0.1.md` — first release scope and acceptance criteria
86
+ - `docs/adapters.md` — intake, raw envelope, and canonical event contract
87
+ - `docs/vision.md`
88
+ - `docs/architecture.md`
89
+ - `docs/scoring.md` — attention scoring and routing model
90
+ - `docs/roadmap.md`
91
+
92
+ ## Design governance
93
+
94
+ Before expanding scope, adding connectors, or making architecture claims, read:
95
+ - `docs/design-governance.md`
96
+
97
+ That document is the canonical governor for:
98
+ - what brAInstem is and is not
99
+ - how attention should work
100
+ - what belongs in `0.0.1`
101
+ - how the input apparatus, discovery apparatus, and operator outputs should evolve
@@ -0,0 +1,3 @@
1
+ """brAInstem — operational memory for weak signals."""
2
+
3
+ __version__ = "0.0.2"
@@ -0,0 +1,131 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import json
7
+ from fastapi import FastAPI, HTTPException, Query
8
+ from fastapi.responses import JSONResponse
9
+ from pydantic import BaseModel, Field
10
+
11
+ from .ingest import canonicalize_raw_input_envelopes
12
+ from .interesting import interesting_items
13
+ from .models import Candidate, RawInputEnvelope
14
+ from .recurrence import build_recurrence_candidates
15
+ from .storage import init_db, list_candidates, store_candidates, store_events, store_signatures
16
+ from .ingest import signatures_for_events
17
+
18
+
19
+ app = FastAPI(title="brAInstem Runtime")
20
+
21
+
22
+ class RawEnvelopeRequest(BaseModel):
23
+ tenant_id: str
24
+ source_type: str
25
+ message_raw: str
26
+ timestamp: Optional[str] = None
27
+ host: str = ""
28
+ service: str = ""
29
+ severity: str = "info"
30
+ asset_id: str = ""
31
+ source_path: str = ""
32
+ facility: str = ""
33
+ structured_fields: Dict[str, Any] = Field(default_factory=dict)
34
+ correlation_keys: Dict[str, Any] = Field(default_factory=dict)
35
+ metadata: Dict[str, Any] = Field(default_factory=dict)
36
+
37
+
38
+ class IngestEventRequest(RawEnvelopeRequest):
39
+ pass
40
+
41
+
42
+ class IngestBatchRequest(BaseModel):
43
+ events: List[RawEnvelopeRequest]
44
+ threshold: int = Field(default=2, ge=1)
45
+ db_path: Optional[str] = None
46
+
47
+
48
+ def _raw_envelope_from_request(payload: RawEnvelopeRequest) -> RawInputEnvelope:
49
+ return RawInputEnvelope(
50
+ tenant_id=payload.tenant_id,
51
+ source_type=payload.source_type,
52
+ timestamp=payload.timestamp or datetime.utcnow().isoformat() + "Z",
53
+ message_raw=payload.message_raw,
54
+ host=payload.host,
55
+ service=payload.service,
56
+ severity=payload.severity,
57
+ asset_id=payload.asset_id,
58
+ source_path=payload.source_path,
59
+ facility=payload.facility,
60
+ structured_fields=dict(payload.structured_fields),
61
+ correlation_keys=dict(payload.correlation_keys),
62
+ metadata=dict(payload.metadata),
63
+ )
64
+
65
+
66
+ def _candidate_from_row(row) -> Candidate:
67
+ return Candidate(
68
+ candidate_type=row["candidate_type"],
69
+ title=row["title"],
70
+ summary=row["summary"],
71
+ score_total=float(row["score_total"]),
72
+ score_breakdown=json.loads(row["score_breakdown_json"] or "{}"),
73
+ decision_band=row["decision_band"],
74
+ source_signature_ids=json.loads(row["source_signature_ids_json"] or "[]"),
75
+ source_event_ids=json.loads(row["source_event_ids_json"] or "[]"),
76
+ confidence=float(row["confidence"]),
77
+ metadata=json.loads(row["metadata_json"] or "{}"),
78
+ )
79
+
80
+
81
+ def _run_ingest_batch(raw_events: List[RawInputEnvelope], *, threshold: int, db_path: Optional[str]) -> Dict[str, Any]:
82
+ events = canonicalize_raw_input_envelopes(raw_events)
83
+ if not events:
84
+ return {"ok": True, "event_count": 0, "signature_count": 0, "candidate_count": 0, "interesting_items": []}
85
+
86
+ signatures = signatures_for_events(events)
87
+ candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
88
+ if db_path:
89
+ init_db(db_path)
90
+ store_events(events, db_path)
91
+ store_signatures(signatures, db_path)
92
+ store_candidates(candidates, db_path)
93
+
94
+ return {
95
+ "ok": True,
96
+ "tenant_id": events[0].tenant_id if events else "",
97
+ "event_count": len(events),
98
+ "signature_count": len({sig.signature_key for sig in signatures}),
99
+ "candidate_count": len(candidates),
100
+ "interesting_items": interesting_items(candidates, limit=max(1, 5)),
101
+ }
102
+
103
+
104
+ @app.post("/ingest/event")
105
+ def ingest_event(payload: IngestEventRequest, threshold: int = 2, db_path: Optional[str] = None) -> Dict[str, Any]:
106
+ if threshold < 1:
107
+ raise HTTPException(status_code=422, detail="threshold must be >= 1")
108
+ return _run_ingest_batch([_raw_envelope_from_request(payload)], threshold=threshold, db_path=db_path)
109
+
110
+
111
+ @app.post("/ingest/batch")
112
+ def ingest_batch(payload: IngestBatchRequest) -> Dict[str, Any]:
113
+ raw_events = [_raw_envelope_from_request(event) for event in payload.events]
114
+ return _run_ingest_batch(raw_events, threshold=payload.threshold, db_path=payload.db_path)
115
+
116
+
117
+ @app.get("/interesting")
118
+ def get_interesting(
119
+ limit: int = Query(default=5, ge=1),
120
+ db_path: Optional[str] = None,
121
+ ) -> Dict[str, Any]:
122
+ if not db_path:
123
+ return {"ok": True, "items": []}
124
+ rows = list_candidates(db_path=db_path, limit=limit)
125
+ candidates = [_candidate_from_row(row) for row in rows]
126
+ return {"ok": True, "items": interesting_items(candidates, limit=limit)}
127
+
128
+
129
+ @app.get("/healthz")
130
+ def healthz() -> Dict[str, str]:
131
+ return JSONResponse(content={"ok": True, "status": "ok"})
@@ -0,0 +1 @@
1
+ """Connector entry points for brAInstem."""
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from .types import ConnectorEvent
4
+
5
+
6
+ def map_logicmonitor_event(payload: dict, *, tenant_id: str) -> ConnectorEvent:
7
+ metadata = payload.get("metadata") or {}
8
+ host = payload.get("host") or payload.get("resource_name") or ""
9
+ service = payload.get("service") or metadata.get("datasource") or "logicmonitor"
10
+ return ConnectorEvent(
11
+ tenant_id=tenant_id,
12
+ source_type="logicmonitor",
13
+ host=str(host),
14
+ service=str(service),
15
+ severity=str(payload.get("severity") or "info"),
16
+ timestamp=str(payload.get("timestamp") or ""),
17
+ message_raw=str(payload.get("message_raw") or payload.get("message") or ""),
18
+ metadata={
19
+ "alert_id": payload.get("alert_id"),
20
+ "resource_id": payload.get("resource_id"),
21
+ "datasource": metadata.get("datasource"),
22
+ "instance_name": metadata.get("instance_name"),
23
+ "acknowledged": metadata.get("acknowledged"),
24
+ "cleared_at": metadata.get("cleared_at"),
25
+ },
26
+ )
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict
5
+
6
+
7
+ @dataclass
8
+ class ConnectorEvent:
9
+ tenant_id: str
10
+ source_type: str
11
+ host: str
12
+ service: str
13
+ severity: str
14
+ timestamp: str
15
+ message_raw: str
16
+ metadata: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from dataclasses import asdict
6
+ from typing import Any, Dict
7
+
8
+ from .ingest import ingest_syslog_file, signatures_for_events
9
+ from .instrumentation import emit, span
10
+ from .interesting import interesting_items
11
+ from .recurrence import build_recurrence_candidates, digest_items
12
+ from .storage import init_db, store_candidates, store_events, store_signatures
13
+
14
+
15
+ def run_syslog_demo(path: str, tenant_id: str, threshold: int = 2, db_path: str | None = None) -> Dict[str, Any]:
16
+ with span("syslog_demo", path=path, tenant_id=tenant_id, threshold=threshold):
17
+ init_db(db_path)
18
+ events = ingest_syslog_file(path, tenant_id=tenant_id)
19
+ emit("syslog_demo_events_loaded", count=len(events), path=path)
20
+ signatures = signatures_for_events(events)
21
+ emit("syslog_demo_signatures_built", count=len(signatures))
22
+ candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
23
+ store_events(events, db_path)
24
+ store_signatures(signatures, db_path)
25
+ store_candidates(candidates, db_path)
26
+ digest = digest_items(candidates)
27
+ items = interesting_items(candidates, limit=5)
28
+ payload = {
29
+ "ok": True,
30
+ "tenant_id": tenant_id,
31
+ "event_count": len(events),
32
+ "signature_count": len({sig.signature_key for sig in signatures}),
33
+ "candidate_count": len(candidates),
34
+ "canonical_stream": {
35
+ "event_count": len(events),
36
+ "signature_count": len({sig.signature_key for sig in signatures}),
37
+ },
38
+ "digest": digest,
39
+ "interesting_items": items,
40
+ "top_candidate": asdict(candidates[0]) if candidates else None,
41
+ }
42
+ emit(
43
+ "syslog_demo_summary",
44
+ event_count=payload["event_count"],
45
+ signature_count=payload["signature_count"],
46
+ candidate_count=payload["candidate_count"],
47
+ )
48
+ return payload
49
+
50
+
51
+ def main() -> int:
52
+ parser = argparse.ArgumentParser(description="Run the brAInstem syslog weak-signal demo.")
53
+ parser.add_argument("path", help="Path to a syslog-like input file")
54
+ parser.add_argument("--tenant", default="demo-tenant", help="Tenant/environment identifier")
55
+ parser.add_argument("--threshold", type=int, default=2, help="Minimum recurrence count for candidate emission")
56
+ parser.add_argument("--db-path", default=None, help="Optional SQLite path for persistent state")
57
+ args = parser.parse_args()
58
+ payload = run_syslog_demo(args.path, tenant_id=args.tenant, threshold=args.threshold, db_path=args.db_path)
59
+ print(json.dumps(payload, indent=2))
60
+ return 0
61
+
62
+
63
+ if __name__ == "__main__":
64
+ raise SystemExit(main())
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from .models import Event, Signature
6
+
7
+
8
+ _WHITESPACE_RE = re.compile(r"\s+")
9
+ _IPV4_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
10
+ _NUMBER_RE = re.compile(r"\b\d+\b")
11
+
12
+
13
+ def normalize_message(message: str) -> str:
14
+ text = (message or "").strip().lower()
15
+ text = _IPV4_RE.sub("<ip>", text)
16
+ text = _NUMBER_RE.sub("<n>", text)
17
+ text = _WHITESPACE_RE.sub(" ", text)
18
+ return text
19
+
20
+
21
+ def event_family_for(event: Event) -> str:
22
+ message_normalized = getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
23
+ base = message_normalized
24
+ if "fail" in base or "error" in base:
25
+ return "failure"
26
+ if "restart" in base or "stopped" in base or "started" in base:
27
+ return "service_lifecycle"
28
+ if "auth" in base or "login" in base:
29
+ return "auth"
30
+ return "generic"
31
+
32
+
33
+ def fingerprint_event(event: Event) -> Signature:
34
+ normalized = getattr(event, "signature_input", None) or getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
35
+ family = event_family_for(event)
36
+ service = (event.service or "").strip().lower()
37
+ host = (event.host or "").strip().lower()
38
+ signature_key = f"{family}|{service}|{normalized}"
39
+ return Signature(
40
+ signature_key=signature_key,
41
+ event_family=family,
42
+ normalized_pattern=normalized,
43
+ service=service or host,
44
+ )
@@ -0,0 +1,101 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import asdict
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Iterable, List
7
+
8
+ from .fingerprint import fingerprint_event, normalize_message
9
+ from .models import CanonicalEvent, Event, RawInputEnvelope, Signature
10
+
11
+
12
+ def parse_syslog_line(line: str, *, tenant_id: str, source_path: str = "") -> CanonicalEvent:
13
+ return canonicalize_raw_input_envelope(
14
+ parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path)
15
+ )
16
+
17
+
18
+ def parse_syslog_envelope(line: str, *, tenant_id: str, source_path: str = "") -> RawInputEnvelope:
19
+ text = (line or "").rstrip("\n")
20
+ timestamp = datetime.utcnow().isoformat() + "Z"
21
+ host = ""
22
+ service = ""
23
+ message = text
24
+
25
+ parts = text.split()
26
+ if len(parts) >= 5:
27
+ host = parts[3]
28
+ rest = " ".join(parts[4:])
29
+ if ":" in rest:
30
+ svc, _, msg = rest.partition(":")
31
+ service = svc.strip()
32
+ message = msg.strip() or rest.strip()
33
+ else:
34
+ message = rest.strip()
35
+
36
+ return RawInputEnvelope(
37
+ tenant_id=tenant_id,
38
+ source_type="syslog",
39
+ timestamp=timestamp,
40
+ message_raw=message,
41
+ host=host,
42
+ service=service,
43
+ source_path=source_path,
44
+ metadata={"raw_line": text},
45
+ )
46
+
47
+
48
+ def parse_syslog_envelopes(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[RawInputEnvelope]:
49
+ return [parse_syslog_envelope(line, tenant_id=tenant_id, source_path=source_path) for line in lines if str(line).strip()]
50
+
51
+
52
+ def canonicalize_raw_input_envelope(raw: RawInputEnvelope) -> CanonicalEvent:
53
+ message_normalized = normalize_message(raw.message_raw)
54
+ metadata = dict(raw.metadata or {})
55
+ metadata.setdefault("canonicalization_source", raw.source_type)
56
+ metadata["raw_input_seen"] = True
57
+ return CanonicalEvent(
58
+ tenant_id=raw.tenant_id,
59
+ source_type=raw.source_type,
60
+ timestamp=raw.timestamp,
61
+ host=raw.host,
62
+ service=raw.service,
63
+ severity=raw.severity,
64
+ asset_id=raw.asset_id,
65
+ source_path=raw.source_path,
66
+ facility=raw.facility,
67
+ message_raw=raw.message_raw,
68
+ structured_fields=dict(raw.structured_fields),
69
+ correlation_keys=dict(raw.correlation_keys),
70
+ message_normalized=message_normalized,
71
+ signature_input=message_normalized,
72
+ ingest_metadata={
73
+ "canonicalized_at": datetime.utcnow().isoformat() + "Z",
74
+ "source_timestamp": raw.timestamp,
75
+ **metadata,
76
+ },
77
+ )
78
+
79
+
80
+ def canonicalize_raw_input_envelopes(events: Iterable[RawInputEnvelope]) -> List[CanonicalEvent]:
81
+ return [canonicalize_raw_input_envelope(raw_event) for raw_event in events]
82
+
83
+
84
+ def ingest_syslog_lines(lines: Iterable[str], *, tenant_id: str, source_path: str = "") -> List[CanonicalEvent]:
85
+ return canonicalize_raw_input_envelopes(
86
+ parse_syslog_envelopes(lines, tenant_id=tenant_id, source_path=source_path),
87
+ )
88
+
89
+
90
+ def ingest_syslog_file(path: str, *, tenant_id: str) -> List[Event]:
91
+ file_path = Path(path)
92
+ lines = file_path.read_text(encoding="utf-8", errors="ignore").splitlines()
93
+ return ingest_syslog_lines(lines, tenant_id=tenant_id, source_path=str(file_path))
94
+
95
+
96
+ def signatures_for_events(events: Iterable[Event]) -> List[Signature]:
97
+ return [fingerprint_event(event) for event in events]
98
+
99
+
100
+ def events_as_dicts(events: Iterable[Event]) -> List[dict]:
101
+ return [asdict(event) for event in events]
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ import time
6
+ from contextlib import contextmanager
7
+ from typing import Any, Dict, Iterator
8
+
9
+
10
+ def emit(event: str, **fields: Any) -> None:
11
+ payload: Dict[str, Any] = {
12
+ "event": event,
13
+ "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
14
+ **fields,
15
+ }
16
+ print(json.dumps(payload, ensure_ascii=False), file=sys.stderr)
17
+
18
+
19
+ @contextmanager
20
+ def span(event: str, **fields: Any) -> Iterator[None]:
21
+ started = time.perf_counter()
22
+ emit(f"{event}_start", **fields)
23
+ try:
24
+ yield
25
+ except Exception as exc:
26
+ emit(
27
+ f"{event}_failed",
28
+ error_type=type(exc).__name__,
29
+ error=str(exc),
30
+ elapsed_ms=round((time.perf_counter() - started) * 1000, 3),
31
+ **fields,
32
+ )
33
+ raise
34
+ emit(
35
+ f"{event}_complete",
36
+ elapsed_ms=round((time.perf_counter() - started) * 1000, 3),
37
+ **fields,
38
+ )
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable, List, Dict, Any
4
+
5
+ from .models import Candidate
6
+
7
+
8
+ def _attention_band(decision_band: str) -> str:
9
+ mapping = {
10
+ "ignore": "ignore_fast",
11
+ "watch": "background",
12
+ "review": "watch",
13
+ "urgent_human_review": "investigate",
14
+ "promote_to_incident_memory": "promote",
15
+ }
16
+ return mapping.get(decision_band, "watch")
17
+
18
+
19
+ def _why_it_matters(candidate: Candidate) -> str:
20
+ count = int((candidate.metadata or {}).get("count") or 0)
21
+ service = str((candidate.metadata or {}).get("service") or "").strip()
22
+ family = candidate.candidate_type.replace("_", " ")
23
+ pieces = []
24
+ if count:
25
+ pieces.append(f"observed {count} times")
26
+ if service:
27
+ pieces.append(f"around {service}")
28
+ band = _attention_band(candidate.decision_band)
29
+ if band == "promote":
30
+ level = "has earned high operator attention"
31
+ elif band == "investigate":
32
+ level = "has earned active operator attention"
33
+ elif band == "watch":
34
+ level = "has earned watch-level attention"
35
+ elif band == "background":
36
+ level = "is worth keeping in the background"
37
+ else:
38
+ level = "is low-attention noise"
39
+ detail = ", ".join(pieces) if pieces else family
40
+ return f"{detail}; {level}."
41
+
42
+
43
+ def interesting_items(candidates: Iterable[Candidate], *, limit: int = 5) -> List[Dict[str, Any]]:
44
+ ordered = sorted(candidates, key=lambda c: (c.score_total, c.confidence), reverse=True)
45
+ items: List[Dict[str, Any]] = []
46
+ for candidate in ordered[: max(limit, 1)]:
47
+ attention_band = _attention_band(candidate.decision_band)
48
+ items.append(
49
+ {
50
+ "title": candidate.title,
51
+ "summary": candidate.summary,
52
+ "decision_band": candidate.decision_band,
53
+ "attention_band": attention_band,
54
+ "attention_score": candidate.score_total,
55
+ "score_total": candidate.score_total,
56
+ "confidence": candidate.confidence,
57
+ "why_it_matters": _why_it_matters(candidate),
58
+ "signals": dict(candidate.score_breakdown),
59
+ "metadata": dict(candidate.metadata),
60
+ }
61
+ )
62
+ return items