@simbimbo/brainstem 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/CHANGELOG.md +87 -0
  2. package/README.md +99 -3
  3. package/brainstem/__init__.py +3 -0
  4. package/brainstem/api.py +257 -0
  5. package/brainstem/connectors/__init__.py +1 -0
  6. package/brainstem/connectors/logicmonitor.py +26 -0
  7. package/brainstem/connectors/types.py +16 -0
  8. package/brainstem/demo.py +64 -0
  9. package/brainstem/fingerprint.py +44 -0
  10. package/brainstem/ingest.py +108 -0
  11. package/brainstem/instrumentation.py +38 -0
  12. package/brainstem/interesting.py +62 -0
  13. package/brainstem/models.py +80 -0
  14. package/brainstem/recurrence.py +112 -0
  15. package/brainstem/scoring.py +38 -0
  16. package/brainstem/storage.py +428 -0
  17. package/docs/adapters.md +435 -0
  18. package/docs/api.md +380 -0
  19. package/docs/architecture.md +333 -0
  20. package/docs/connectors.md +66 -0
  21. package/docs/data-model.md +290 -0
  22. package/docs/design-governance.md +595 -0
  23. package/docs/mvp-flow.md +109 -0
  24. package/docs/roadmap.md +87 -0
  25. package/docs/scoring.md +424 -0
  26. package/docs/v0.0.1.md +277 -0
  27. package/docs/vision.md +85 -0
  28. package/package.json +6 -14
  29. package/pyproject.toml +18 -0
  30. package/tests/fixtures/sample_syslog.log +6 -0
  31. package/tests/test_api.py +319 -0
  32. package/tests/test_canonicalization.py +28 -0
  33. package/tests/test_demo.py +25 -0
  34. package/tests/test_fingerprint.py +22 -0
  35. package/tests/test_ingest.py +15 -0
  36. package/tests/test_instrumentation.py +16 -0
  37. package/tests/test_interesting.py +36 -0
  38. package/tests/test_logicmonitor.py +22 -0
  39. package/tests/test_recurrence.py +16 -0
  40. package/tests/test_scoring.py +21 -0
  41. package/tests/test_storage.py +294 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,87 @@
1
+ # Changelog
2
+
3
+ ## 0.0.3 — 2026-03-22
4
+
5
+ Intake Foundation follow-up release for **brAInstem**.
6
+
7
+ ### Highlights
8
+ - persists `RawInputEnvelope` intake records to SQLite before canonicalization
9
+ - records canonicalization outcomes explicitly (`received`, `canonicalized`, `parse_failed`, `unsupported`)
10
+ - adds ingest accounting for:
11
+ - received
12
+ - canonicalized
13
+ - parse_failed
14
+ - candidates_generated
15
+ - adds runtime inspection endpoints for intake trust and observability:
16
+ - `GET /stats`
17
+ - `GET /failures`
18
+ - `GET /failures/{id}`
19
+ - `GET /ingest/recent`
20
+ - `GET /sources`
21
+ - adds storage/query helpers for recent raw envelopes, recent failures, and per-source summaries
22
+ - expands tests around raw-envelope persistence, failure inspection, source summaries, and stats
23
+
24
+ ### Validation
25
+ - local test suite passed (`26 passed`)
26
+
27
+ ## 0.0.2 — 2026-03-22
28
+
29
+ First fully aligned public foundation release of **brAInstem**.
30
+
31
+ ### Why 0.0.2
32
+ - `0.0.1` was already published to npm earlier as the first placeholder/bootstrap package version
33
+ - `0.0.2` is the first release where the local repo, documentation, canonical location, runtime foundation, and validation are being shipped together intentionally
34
+
35
+ ### What this release adds/locks in
36
+ - canonical repo location at `~/brAInstem`
37
+ - design governance, `v0.0.1` scope, adapter contract, and attention scoring docs
38
+ - explicit `RawInputEnvelope` and `CanonicalEvent` foundation models
39
+ - canonicalization path for current syslog-like ingestion
40
+ - minimal FastAPI runtime with:
41
+ - `POST /ingest/event`
42
+ - `POST /ingest/batch`
43
+ - `GET /interesting`
44
+ - `GET /healthz`
45
+ - focused API and canonicalization tests
46
+
47
+ ### Validation
48
+ - local test suite passed (`19 passed`)
49
+ - local end-to-end demo path executed successfully against sample syslog input
50
+ - minimal FastAPI runtime and canonicalization tests passed locally
51
+
52
+ ## 0.0.1 — 2026-03-22
53
+
54
+ First public prototype release of **brAInstem**.
55
+
56
+ ### What this release is
57
+ - an experimental, self-contained operational memory prototype for weak signals
58
+ - a proof that early event sources can feed one normalized internal stream
59
+ - a first cut at attention-oriented weak-signal discovery and operator-facing interesting items
60
+
61
+ ### Included in 0.0.1
62
+ - syslog-like ingestion path
63
+ - event fingerprinting and recurrence candidate generation
64
+ - interpretable scoring with operator-facing decision/attention output
65
+ - SQLite persistence for events, signatures, and candidates
66
+ - local demo path for end-to-end validation
67
+ - initial LogicMonitor connector model/mapping work
68
+ - design governance docs covering:
69
+ - product thesis
70
+ - `v0.0.1` scope
71
+ - adapter/raw-envelope/canonical-event contract
72
+ - attention scoring model
73
+
74
+ ### Not claimed in 0.0.1
75
+ - full universal intake apparatus
76
+ - production-grade always-on ingestion runtime
77
+ - mature multi-tenant MSP platform behavior
78
+ - complete discovery apparatus breadth (burst/spread/self-heal/precursor at full maturity)
79
+ - polished operator UI
80
+
81
+ ### Validation
82
+ - local test suite passed (`19 passed`)
83
+ - local end-to-end demo path executed successfully against sample syslog input
84
+ - minimal FastAPI runtime and canonicalization tests passed locally
85
+
86
+ ### Release framing
87
+ This release puts a truthful first stake in the ground for brAInstem as an operational memory runtime focused on weak signals and operator attention.
package/README.md CHANGED
@@ -1,5 +1,101 @@
1
- # @simbimbo/brainstem
1
+ # brAInstem
2
2
 
3
- brAInstem is an operational memory engine for weak signals.
3
+ **Operational memory for weak signals**
4
4
 
5
- This package name is being reserved for the upcoming brAInstem project.
5
+ brAInstem is an always-on operational memory runtime for weak signals. Instead of treating memory as conversational context, brAInstem treats logs and operational events as raw operational experience that can be normalized into one canonical stream, assigned attention, clustered into patterns, and promoted into durable operational knowledge.
6
+
7
+ ## One-line pitch
8
+
9
+ brAInstem helps MSPs and lean ops teams detect recurring, self-resolving, and quietly escalating issues before they become major incidents.
10
+
11
+ ## The problem
12
+
13
+ Most operational pain never becomes a classic threshold alert.
14
+
15
+ It shows up as:
16
+ - recurring low-grade warnings
17
+ - brief self-healing failures
18
+ - cross-system weak signals
19
+ - near-misses that humans forget because there is too much noise
20
+
21
+ Traditional monitoring catches hard failures. brAInstem is designed to catch patterns that matter to humans before they become obvious outages.
22
+
23
+ ## Core idea
24
+
25
+ Logs and events should not only be stored and searched. They should be:
26
+ 1. ingested into a provenance-preserving raw envelope
27
+ 2. normalized into one canonical event stream
28
+ 3. assigned and updated with **attention** over time
29
+ 4. compressed so most inconsequential noise is handled cheaply
30
+ 5. promoted into operator-facing weak-signal outputs only when enough attention is earned
31
+ 6. promoted later into incident memory, lessons, and runbook hints when justified
32
+ 7. retrieved again when similar patterns recur
33
+
34
+ ## Primary users
35
+
36
+ - MSP owners
37
+ - MSP technicians
38
+ - NOC teams
39
+ - SRE / infrastructure operators
40
+ - small security / ops teams dealing with alert fatigue and log blindness
41
+
42
+ ## MVP promise
43
+
44
+ For a given tenant or environment, brAInstem should answer:
45
+ - What happened today that mattered but never alerted?
46
+ - What self-resolving issues are recurring?
47
+ - What patterns are likely to become tickets later?
48
+ - Have we seen this before?
49
+ - What happened right before the last similar incident?
50
+
51
+ ## Relationship to ocmemog
52
+
53
+ Shared DNA:
54
+ - ingest -> candidate -> promotion pipeline
55
+ - compact retrieval
56
+ - provenance and explainability
57
+ - memory scoring and recurrence awareness
58
+
59
+ Different center of gravity:
60
+ - ocmemog = assistant memory and continuity
61
+ - brAInstem = operational event intelligence and weak-signal detection
62
+
63
+ ## Initial scope
64
+
65
+ The long-term product direction is an always-on self-contained runtime with a robust input apparatus, a discovery apparatus, and operator-facing outputs.
66
+
67
+ For the first public prototype line, start with:
68
+ - a narrow but real ingestion story
69
+ - a canonical event stream
70
+ - attention-oriented weak-signal discovery
71
+ - operator-facing interesting items / digest output
72
+ - syslog-like events and LogicMonitor-shaped events as early proof sources
73
+
74
+ Delay until later:
75
+ - broad universal connector coverage
76
+ - mature syslog appliance behavior across every input mode
77
+ - full SIEM behavior
78
+ - broad compliance workflows
79
+ - generic observability replacement
80
+ - "chat with all your logs" as the primary story
81
+
82
+ ## Proposed docs
83
+
84
+ - `docs/design-governance.md` — canonical product/design guardrails
85
+ - `docs/v0.0.1.md` — first release scope and acceptance criteria
86
+ - `docs/adapters.md` — intake, raw envelope, and canonical event contract
87
+ - `docs/vision.md`
88
+ - `docs/architecture.md`
89
+ - `docs/scoring.md` — attention scoring and routing model
90
+ - `docs/roadmap.md`
91
+
92
+ ## Design governance
93
+
94
+ Before expanding scope, adding connectors, or making architecture claims, read:
95
+ - `docs/design-governance.md`
96
+
97
+ That document is the canonical governor for:
98
+ - what brAInstem is and is not
99
+ - how attention should work
100
+ - what belongs in `0.0.1`
101
+ - how the input apparatus, discovery apparatus, and operator outputs should evolve
@@ -0,0 +1,3 @@
1
+ """brAInstem — operational memory for weak signals."""
2
+
3
+ __version__ = "0.0.3"
@@ -0,0 +1,257 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Any, Dict, List, Optional
5
+
6
+ import json
7
+ from fastapi import FastAPI, HTTPException, Query
8
+ from fastapi.responses import JSONResponse
9
+ from pydantic import BaseModel, Field
10
+
11
+ from .ingest import canonicalize_raw_input_envelope
12
+ from .interesting import interesting_items
13
+ from .models import Candidate, RawInputEnvelope
14
+ from .recurrence import build_recurrence_candidates
15
+ from .storage import (
16
+ RAW_ENVELOPE_STATUSES,
17
+ get_ingest_stats,
18
+ init_db,
19
+ list_candidates,
20
+ get_raw_envelope_by_id,
21
+ get_source_dimension_summaries,
22
+ list_recent_failed_raw_envelopes,
23
+ list_recent_raw_envelopes,
24
+ set_raw_envelope_status,
25
+ store_candidates,
26
+ store_events,
27
+ store_raw_envelopes,
28
+ store_signatures,
29
+ )
30
+ from .ingest import signatures_for_events
31
+
32
+
33
+ app = FastAPI(title="brAInstem Runtime")
34
+
35
+
36
+ class RawEnvelopeRequest(BaseModel):
37
+ tenant_id: str
38
+ source_type: str
39
+ source_id: str = ""
40
+ source_name: str = ""
41
+ message_raw: str
42
+ timestamp: Optional[str] = None
43
+ host: str = ""
44
+ service: str = ""
45
+ severity: str = "info"
46
+ asset_id: str = ""
47
+ source_path: str = ""
48
+ facility: str = ""
49
+ structured_fields: Dict[str, Any] = Field(default_factory=dict)
50
+ correlation_keys: Dict[str, Any] = Field(default_factory=dict)
51
+ metadata: Dict[str, Any] = Field(default_factory=dict)
52
+
53
+
54
+ class IngestEventRequest(RawEnvelopeRequest):
55
+ pass
56
+
57
+
58
+ class IngestBatchRequest(BaseModel):
59
+ events: List[RawEnvelopeRequest]
60
+ threshold: int = Field(default=2, ge=1)
61
+ db_path: Optional[str] = None
62
+
63
+
64
+ def _raw_envelope_from_request(payload: RawEnvelopeRequest) -> RawInputEnvelope:
65
+ return RawInputEnvelope(
66
+ tenant_id=payload.tenant_id,
67
+ source_type=payload.source_type,
68
+ source_id=payload.source_id,
69
+ source_name=payload.source_name,
70
+ timestamp=payload.timestamp or datetime.utcnow().isoformat() + "Z",
71
+ message_raw=payload.message_raw,
72
+ host=payload.host,
73
+ service=payload.service,
74
+ severity=payload.severity,
75
+ asset_id=payload.asset_id,
76
+ source_path=payload.source_path,
77
+ facility=payload.facility,
78
+ structured_fields=dict(payload.structured_fields),
79
+ correlation_keys=dict(payload.correlation_keys),
80
+ metadata=dict(payload.metadata),
81
+ )
82
+
83
+
84
+ def _candidate_from_row(row) -> Candidate:
85
+ return Candidate(
86
+ candidate_type=row["candidate_type"],
87
+ title=row["title"],
88
+ summary=row["summary"],
89
+ score_total=float(row["score_total"]),
90
+ score_breakdown=json.loads(row["score_breakdown_json"] or "{}"),
91
+ decision_band=row["decision_band"],
92
+ source_signature_ids=json.loads(row["source_signature_ids_json"] or "[]"),
93
+ source_event_ids=json.loads(row["source_event_ids_json"] or "[]"),
94
+ confidence=float(row["confidence"]),
95
+ metadata=json.loads(row["metadata_json"] or "{}"),
96
+ )
97
+
98
+
99
+ def _raw_envelope_from_row(row) -> Dict[str, Any]:
100
+ return {
101
+ "id": row["id"],
102
+ "tenant_id": row["tenant_id"],
103
+ "source_type": row["source_type"],
104
+ "source_id": row["source_id"],
105
+ "source_name": row["source_name"],
106
+ "timestamp": row["timestamp"],
107
+ "host": row["host"],
108
+ "service": row["service"],
109
+ "severity": row["severity"],
110
+ "asset_id": row["asset_id"],
111
+ "source_path": row["source_path"],
112
+ "facility": row["facility"],
113
+ "message_raw": row["message_raw"],
114
+ "structured_fields": json.loads(row["structured_fields_json"] or "{}"),
115
+ "correlation_keys": json.loads(row["correlation_keys_json"] or "{}"),
116
+ "metadata": json.loads(row["metadata_json"] or "{}"),
117
+ "canonicalization_status": row["canonicalization_status"],
118
+ "failure_reason": row["failure_reason"],
119
+ }
120
+
121
+
122
+ def _run_ingest_batch(raw_events: List[RawInputEnvelope], *, threshold: int, db_path: Optional[str]) -> Dict[str, Any]:
123
+ raw_envelope_ids: List[int] = []
124
+ if db_path:
125
+ init_db(db_path)
126
+ raw_envelope_ids = store_raw_envelopes(raw_events, db_path)
127
+
128
+ events = []
129
+ parse_failed = 0
130
+ for idx, raw_event in enumerate(raw_events):
131
+ raw_envelope_id = raw_envelope_ids[idx] if idx < len(raw_envelope_ids) else None
132
+ try:
133
+ canonical_event = canonicalize_raw_input_envelope(raw_event)
134
+ except Exception as exc:
135
+ parse_failed += 1
136
+ if raw_envelope_id is not None:
137
+ set_raw_envelope_status(
138
+ raw_envelope_id,
139
+ "parse_failed",
140
+ db_path=db_path,
141
+ failure_reason=str(exc),
142
+ )
143
+ continue
144
+ events.append(canonical_event)
145
+ if raw_envelope_id is not None:
146
+ set_raw_envelope_status(raw_envelope_id, "canonicalized", db_path=db_path)
147
+
148
+ if not events:
149
+ return {
150
+ "ok": True,
151
+ "event_count": 0,
152
+ "signature_count": 0,
153
+ "candidate_count": 0,
154
+ "parse_failed": parse_failed,
155
+ "interesting_items": [],
156
+ }
157
+
158
+ signatures = signatures_for_events(events)
159
+ candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
160
+ if db_path:
161
+ store_events(events, db_path)
162
+ store_signatures(signatures, db_path)
163
+ store_candidates(candidates, db_path)
164
+
165
+ return {
166
+ "ok": True,
167
+ "tenant_id": events[0].tenant_id if events else "",
168
+ "event_count": len(events),
169
+ "signature_count": len({sig.signature_key for sig in signatures}),
170
+ "candidate_count": len(candidates),
171
+ "parse_failed": parse_failed,
172
+ "interesting_items": interesting_items(candidates, limit=max(1, 5)),
173
+ }
174
+
175
+
176
+ @app.post("/ingest/event")
177
+ def ingest_event(payload: IngestEventRequest, threshold: int = 2, db_path: Optional[str] = None) -> Dict[str, Any]:
178
+ if threshold < 1:
179
+ raise HTTPException(status_code=422, detail="threshold must be >= 1")
180
+ return _run_ingest_batch([_raw_envelope_from_request(payload)], threshold=threshold, db_path=db_path)
181
+
182
+
183
+ @app.post("/ingest/batch")
184
+ def ingest_batch(payload: IngestBatchRequest) -> Dict[str, Any]:
185
+ raw_events = [_raw_envelope_from_request(event) for event in payload.events]
186
+ return _run_ingest_batch(raw_events, threshold=payload.threshold, db_path=payload.db_path)
187
+
188
+
189
+ @app.get("/interesting")
190
+ def get_interesting(
191
+ limit: int = Query(default=5, ge=1),
192
+ db_path: Optional[str] = None,
193
+ ) -> Dict[str, Any]:
194
+ if not db_path:
195
+ return {"ok": True, "items": []}
196
+ rows = list_candidates(db_path=db_path, limit=limit)
197
+ candidates = [_candidate_from_row(row) for row in rows]
198
+ return {"ok": True, "items": interesting_items(candidates, limit=limit)}
199
+
200
+
201
+ @app.get("/stats")
202
+ def get_stats(db_path: Optional[str] = None) -> Dict[str, Any]:
203
+ return {"ok": True, **get_ingest_stats(db_path)}
204
+
205
+
206
+ @app.get("/failures")
207
+ def get_failures(
208
+ limit: int = Query(default=20, ge=1),
209
+ status: Optional[str] = None,
210
+ db_path: Optional[str] = None,
211
+ ) -> Dict[str, Any]:
212
+ if status is not None and status not in RAW_ENVELOPE_STATUSES:
213
+ raise HTTPException(
214
+ status_code=422,
215
+ detail=f"invalid status '{status}'; expected one of: {', '.join(RAW_ENVELOPE_STATUSES)}",
216
+ )
217
+
218
+ rows = list_recent_failed_raw_envelopes(db_path=db_path, status=status, limit=limit)
219
+ items = [_raw_envelope_from_row(row) for row in rows]
220
+ return {"ok": True, "items": items, "count": len(items), "status": status}
221
+
222
+
223
+ @app.get("/ingest/recent")
224
+ def get_ingest_recent(
225
+ limit: int = Query(default=20, ge=1),
226
+ status: Optional[str] = None,
227
+ db_path: Optional[str] = None,
228
+ ) -> Dict[str, Any]:
229
+ if status is not None and status not in RAW_ENVELOPE_STATUSES:
230
+ raise HTTPException(
231
+ status_code=422,
232
+ detail=f"invalid status '{status}'; expected one of: {', '.join(RAW_ENVELOPE_STATUSES)}",
233
+ )
234
+ rows = list_recent_raw_envelopes(db_path=db_path, status=status, limit=limit, failures_only=False)
235
+ items = [_raw_envelope_from_row(row) for row in rows]
236
+ return {"ok": True, "items": items, "count": len(items), "status": status}
237
+
238
+
239
+ @app.get("/sources")
240
+ def get_sources(
241
+ limit: int = Query(default=10, ge=1),
242
+ db_path: Optional[str] = None,
243
+ ) -> Dict[str, Any]:
244
+ return {"ok": True, "items": get_source_dimension_summaries(db_path=db_path, limit=limit)}
245
+
246
+
247
+ @app.get("/failures/{raw_envelope_id}")
248
+ def get_failure(raw_envelope_id: int, db_path: Optional[str] = None) -> Dict[str, Any]:
249
+ row = get_raw_envelope_by_id(raw_envelope_id, db_path=db_path)
250
+ if row is None:
251
+ raise HTTPException(status_code=404, detail="raw envelope not found")
252
+ return {"ok": True, "item": _raw_envelope_from_row(row)}
253
+
254
+
255
+ @app.get("/healthz")
256
+ def healthz() -> Dict[str, str]:
257
+ return JSONResponse(content={"ok": True, "status": "ok"})
@@ -0,0 +1 @@
1
+ """Connector entry points for brAInstem."""
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+
3
+ from .types import ConnectorEvent
4
+
5
+
6
+ def map_logicmonitor_event(payload: dict, *, tenant_id: str) -> ConnectorEvent:
7
+ metadata = payload.get("metadata") or {}
8
+ host = payload.get("host") or payload.get("resource_name") or ""
9
+ service = payload.get("service") or metadata.get("datasource") or "logicmonitor"
10
+ return ConnectorEvent(
11
+ tenant_id=tenant_id,
12
+ source_type="logicmonitor",
13
+ host=str(host),
14
+ service=str(service),
15
+ severity=str(payload.get("severity") or "info"),
16
+ timestamp=str(payload.get("timestamp") or ""),
17
+ message_raw=str(payload.get("message_raw") or payload.get("message") or ""),
18
+ metadata={
19
+ "alert_id": payload.get("alert_id"),
20
+ "resource_id": payload.get("resource_id"),
21
+ "datasource": metadata.get("datasource"),
22
+ "instance_name": metadata.get("instance_name"),
23
+ "acknowledged": metadata.get("acknowledged"),
24
+ "cleared_at": metadata.get("cleared_at"),
25
+ },
26
+ )
@@ -0,0 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any, Dict
5
+
6
+
7
+ @dataclass
8
+ class ConnectorEvent:
9
+ tenant_id: str
10
+ source_type: str
11
+ host: str
12
+ service: str
13
+ severity: str
14
+ timestamp: str
15
+ message_raw: str
16
+ metadata: Dict[str, Any] = field(default_factory=dict)
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from dataclasses import asdict
6
+ from typing import Any, Dict
7
+
8
+ from .ingest import ingest_syslog_file, signatures_for_events
9
+ from .instrumentation import emit, span
10
+ from .interesting import interesting_items
11
+ from .recurrence import build_recurrence_candidates, digest_items
12
+ from .storage import init_db, store_candidates, store_events, store_signatures
13
+
14
+
15
+ def run_syslog_demo(path: str, tenant_id: str, threshold: int = 2, db_path: str | None = None) -> Dict[str, Any]:
16
+ with span("syslog_demo", path=path, tenant_id=tenant_id, threshold=threshold):
17
+ init_db(db_path)
18
+ events = ingest_syslog_file(path, tenant_id=tenant_id)
19
+ emit("syslog_demo_events_loaded", count=len(events), path=path)
20
+ signatures = signatures_for_events(events)
21
+ emit("syslog_demo_signatures_built", count=len(signatures))
22
+ candidates = build_recurrence_candidates(events, signatures, threshold=threshold)
23
+ store_events(events, db_path)
24
+ store_signatures(signatures, db_path)
25
+ store_candidates(candidates, db_path)
26
+ digest = digest_items(candidates)
27
+ items = interesting_items(candidates, limit=5)
28
+ payload = {
29
+ "ok": True,
30
+ "tenant_id": tenant_id,
31
+ "event_count": len(events),
32
+ "signature_count": len({sig.signature_key for sig in signatures}),
33
+ "candidate_count": len(candidates),
34
+ "canonical_stream": {
35
+ "event_count": len(events),
36
+ "signature_count": len({sig.signature_key for sig in signatures}),
37
+ },
38
+ "digest": digest,
39
+ "interesting_items": items,
40
+ "top_candidate": asdict(candidates[0]) if candidates else None,
41
+ }
42
+ emit(
43
+ "syslog_demo_summary",
44
+ event_count=payload["event_count"],
45
+ signature_count=payload["signature_count"],
46
+ candidate_count=payload["candidate_count"],
47
+ )
48
+ return payload
49
+
50
+
51
+ def main() -> int:
52
+ parser = argparse.ArgumentParser(description="Run the brAInstem syslog weak-signal demo.")
53
+ parser.add_argument("path", help="Path to a syslog-like input file")
54
+ parser.add_argument("--tenant", default="demo-tenant", help="Tenant/environment identifier")
55
+ parser.add_argument("--threshold", type=int, default=2, help="Minimum recurrence count for candidate emission")
56
+ parser.add_argument("--db-path", default=None, help="Optional SQLite path for persistent state")
57
+ args = parser.parse_args()
58
+ payload = run_syslog_demo(args.path, tenant_id=args.tenant, threshold=args.threshold, db_path=args.db_path)
59
+ print(json.dumps(payload, indent=2))
60
+ return 0
61
+
62
+
63
+ if __name__ == "__main__":
64
+ raise SystemExit(main())
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from .models import Event, Signature
6
+
7
+
8
+ _WHITESPACE_RE = re.compile(r"\s+")
9
+ _IPV4_RE = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b")
10
+ _NUMBER_RE = re.compile(r"\b\d+\b")
11
+
12
+
13
+ def normalize_message(message: str) -> str:
14
+ text = (message or "").strip().lower()
15
+ text = _IPV4_RE.sub("<ip>", text)
16
+ text = _NUMBER_RE.sub("<n>", text)
17
+ text = _WHITESPACE_RE.sub(" ", text)
18
+ return text
19
+
20
+
21
+ def event_family_for(event: Event) -> str:
22
+ message_normalized = getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
23
+ base = message_normalized
24
+ if "fail" in base or "error" in base:
25
+ return "failure"
26
+ if "restart" in base or "stopped" in base or "started" in base:
27
+ return "service_lifecycle"
28
+ if "auth" in base or "login" in base:
29
+ return "auth"
30
+ return "generic"
31
+
32
+
33
+ def fingerprint_event(event: Event) -> Signature:
34
+ normalized = getattr(event, "signature_input", None) or getattr(event, "message_normalized", None) or normalize_message(event.message_raw)
35
+ family = event_family_for(event)
36
+ service = (event.service or "").strip().lower()
37
+ host = (event.host or "").strip().lower()
38
+ signature_key = f"{family}|{service}|{normalized}"
39
+ return Signature(
40
+ signature_key=signature_key,
41
+ event_family=family,
42
+ normalized_pattern=normalized,
43
+ service=service or host,
44
+ )