cortexdb-mcp 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,640 @@
1
+ """Proactive insights engine that analyzes CortexDB episodes to generate actionable intelligence.
2
+
3
+ Runs heuristic-based analysis over episodes and entities stored in CortexDB to
4
+ surface patterns such as incident spikes, new dependencies, knowledge gaps, and
5
+ deployment risks. All detection is done with simple temporal and co-occurrence
6
+ analysis -- no LLM calls on the hot path -- so insights can be generated in
7
+ sub-second time.
8
+
9
+ Typical output examples:
10
+
11
+ - "payments-service had 3 incidents this week (up from 0 last week)"
12
+ - "New dependency detected: checkout -> stripe-gateway-v2 (since March 10)"
13
+ - "Knowledge gap: nobody documented the billing reconciliation migration"
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import logging
20
+ import time
21
+ from collections import Counter, defaultdict
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime, timedelta, timezone
24
+ from enum import Enum
25
+ from typing import Any
26
+
27
+ import httpx
28
+
29
+ logger = logging.getLogger("cortexdb_mcp.insights")
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Data model
34
+ # ---------------------------------------------------------------------------
35
+
36
+
37
+ class InsightType(str, Enum):
38
+ """Categories of proactive insights the engine can produce."""
39
+
40
+ incident_spike = "incident_spike"
41
+ new_dependency = "new_dependency"
42
+ knowledge_gap = "knowledge_gap"
43
+ ownership_change = "ownership_change"
44
+ deployment_risk = "deployment_risk"
45
+ stale_documentation = "stale_documentation"
46
+ recurring_issue = "recurring_issue"
47
+ team_bottleneck = "team_bottleneck"
48
+
49
+
50
+ class Severity(str, Enum):
51
+ """Severity level for an insight."""
52
+
53
+ info = "info"
54
+ warning = "warning"
55
+ critical = "critical"
56
+
57
+
58
+ @dataclass
59
+ class Insight:
60
+ """A single actionable insight generated by the engine."""
61
+
62
+ id: str
63
+ insight_type: InsightType
64
+ title: str
65
+ description: str
66
+ severity: Severity
67
+ entities: list[str]
68
+ evidence: list[str]
69
+ generated_at: datetime
70
+ confidence: float
71
+
72
+ def to_dict(self) -> dict[str, Any]:
73
+ """Serialize the insight to a JSON-safe dictionary."""
74
+ return {
75
+ "id": self.id,
76
+ "insight_type": self.insight_type.value,
77
+ "title": self.title,
78
+ "description": self.description,
79
+ "severity": self.severity.value,
80
+ "entities": self.entities,
81
+ "evidence": self.evidence,
82
+ "generated_at": self.generated_at.isoformat(),
83
+ "confidence": self.confidence,
84
+ }
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Helpers
89
+ # ---------------------------------------------------------------------------
90
+
91
+
92
+ def _make_id(*parts: str) -> str:
93
+ """Produce a deterministic short insight ID from constituent parts."""
94
+ raw = ":".join(parts)
95
+ return "ins_" + hashlib.sha256(raw.encode()).hexdigest()[:12]
96
+
97
+
98
+ def _now() -> datetime:
99
+ """Return the current UTC time."""
100
+ return datetime.now(timezone.utc)
101
+
102
+
103
+ def _week_ago(weeks: int = 1) -> datetime:
104
+ """Return a datetime ``weeks`` weeks before now."""
105
+ return _now() - timedelta(weeks=weeks)
106
+
107
+
108
+ # ---------------------------------------------------------------------------
109
+ # Engine
110
+ # ---------------------------------------------------------------------------
111
+
112
+
113
+ class InsightsEngine:
114
+ """Analyze CortexDB episodes and generate proactive insights.
115
+
116
+ Parameters
117
+ ----------
118
+ cortex_url:
119
+ Base URL of the CortexDB HTTP API.
120
+ api_key:
121
+ Optional bearer token for authenticated access.
122
+ tenant_id:
123
+ Tenant scope used when querying CortexDB.
124
+ """
125
+
126
+ # Episode types used in heuristics.
127
+ _INCIDENT_TYPES = {"incident", "alert", "outage", "error", "failure"}
128
+ _DOCUMENT_TYPES = {"document", "doc", "runbook", "wiki", "documentation", "readme"}
129
+ _DEPLOY_TYPES = {"deploy", "deployment", "release", "rollout"}
130
+ _DEPENDENCY_TYPES = {"dependency", "integration", "api_call", "import"}
131
+
132
+ def __init__(
133
+ self,
134
+ cortex_url: str = "http://localhost:3141",
135
+ api_key: str | None = None,
136
+ tenant_id: str | None = None,
137
+ ) -> None:
138
+ self.cortex_url = cortex_url.rstrip("/")
139
+ self.api_key = api_key
140
+ self.tenant_id = tenant_id
141
+
142
+ # -- HTTP helpers -------------------------------------------------------
143
+
144
+ def _headers(self) -> dict[str, str]:
145
+ """Build HTTP headers for CortexDB requests."""
146
+ headers: dict[str, str] = {"Content-Type": "application/json"}
147
+ if self.api_key:
148
+ headers["Authorization"] = f"Bearer {self.api_key}"
149
+ return headers
150
+
151
+ async def _get(self, path: str, params: dict[str, Any] | None = None) -> Any:
152
+ """Perform a GET request against CortexDB and return parsed JSON."""
153
+ async with httpx.AsyncClient(
154
+ base_url=self.cortex_url,
155
+ headers=self._headers(),
156
+ timeout=30.0,
157
+ ) as client:
158
+ resp = await client.get(path, params=params or {})
159
+ resp.raise_for_status()
160
+ return resp.json()
161
+
162
+ async def _fetch_episodes(
163
+ self,
164
+ *,
165
+ episode_type: str | None = None,
166
+ since: datetime | None = None,
167
+ until: datetime | None = None,
168
+ ) -> list[dict[str, Any]]:
169
+ """Fetch episodes from CortexDB with optional type and time filters.
170
+
171
+ Returns a list of episode dicts as returned by the ``GET /v1/episodes``
172
+ endpoint.
173
+ """
174
+ params: dict[str, Any] = {}
175
+ if episode_type is not None:
176
+ params["episode_type"] = episode_type
177
+ if since is not None:
178
+ params["since"] = since.isoformat()
179
+ if until is not None:
180
+ params["until"] = until.isoformat()
181
+ if self.tenant_id is not None:
182
+ params["tenant_id"] = self.tenant_id
183
+
184
+ try:
185
+ data = await self._get("/v1/episodes", params=params)
186
+ except (httpx.HTTPStatusError, httpx.RequestError) as exc:
187
+ logger.warning("Failed to fetch episodes: %s", exc)
188
+ return []
189
+
190
+ if isinstance(data, list):
191
+ return data
192
+ return data.get("episodes", data.get("items", []))
193
+
194
+ async def _fetch_entities(self) -> list[dict[str, Any]]:
195
+ """Fetch the entity list from CortexDB."""
196
+ params: dict[str, Any] = {}
197
+ if self.tenant_id is not None:
198
+ params["tenant_id"] = self.tenant_id
199
+
200
+ try:
201
+ data = await self._get("/v1/entities", params=params)
202
+ except (httpx.HTTPStatusError, httpx.RequestError) as exc:
203
+ logger.warning("Failed to fetch entities: %s", exc)
204
+ return []
205
+
206
+ if isinstance(data, list):
207
+ return data
208
+ return data.get("entities", data.get("items", []))
209
+
210
+ # -- Utility extractors -------------------------------------------------
211
+
212
+ @staticmethod
213
+ def _extract_entities(episode: dict[str, Any]) -> list[str]:
214
+ """Return the list of entity names mentioned in an episode."""
215
+ entities = episode.get("entities", [])
216
+ if isinstance(entities, list):
217
+ return [
218
+ e.get("name", e) if isinstance(e, dict) else str(e)
219
+ for e in entities
220
+ ]
221
+ return []
222
+
223
+ @staticmethod
224
+ def _episode_type(episode: dict[str, Any]) -> str:
225
+ """Return the normalised episode type string."""
226
+ return (episode.get("episode_type") or episode.get("type") or "").lower()
227
+
228
+ @staticmethod
229
+ def _episode_time(episode: dict[str, Any]) -> datetime | None:
230
+ """Parse the episode timestamp into a datetime, or None on failure."""
231
+ raw = episode.get("timestamp") or episode.get("created_at") or episode.get("occurred_at")
232
+ if raw is None:
233
+ return None
234
+ if isinstance(raw, datetime):
235
+ return raw
236
+ try:
237
+ return datetime.fromisoformat(str(raw).replace("Z", "+00:00"))
238
+ except (ValueError, TypeError):
239
+ return None
240
+
241
+ @staticmethod
242
+ def _episode_id(episode: dict[str, Any]) -> str:
243
+ """Return a stable identifier for an episode."""
244
+ return str(
245
+ episode.get("episode_id")
246
+ or episode.get("event_id")
247
+ or episode.get("id")
248
+ or "unknown"
249
+ )
250
+
251
+ # -- Insight generators -------------------------------------------------
252
+
253
+ async def generate_all(self) -> list[Insight]:
254
+ """Run every insight generator and return the combined results."""
255
+ results: list[Insight] = []
256
+ generators = [
257
+ self.incident_spike_detection,
258
+ self.new_dependency_detection,
259
+ self.knowledge_gap_detection,
260
+ self.deployment_risk_assessment,
261
+ self.stale_documentation_detection,
262
+ self.recurring_issue_detection,
263
+ ]
264
+ for gen in generators:
265
+ try:
266
+ insights = await gen()
267
+ results.extend(insights)
268
+ except Exception:
269
+ logger.exception("Insight generator %s failed", gen.__name__)
270
+ return results
271
+
272
+ # 1. Incident spike -------------------------------------------------------
273
+
274
+ async def incident_spike_detection(self) -> list[Insight]:
275
+ """Compare incident counts this week vs last week per service.
276
+
277
+ Generates a warning if a service has more incidents this week than last,
278
+ and a critical insight if the spike is 3x or more.
279
+ """
280
+ now = _now()
281
+ this_week_start = now - timedelta(weeks=1)
282
+ last_week_start = now - timedelta(weeks=2)
283
+
284
+ all_episodes = await self._fetch_episodes(since=last_week_start)
285
+
286
+ this_week: Counter[str] = Counter()
287
+ last_week: Counter[str] = Counter()
288
+
289
+ for ep in all_episodes:
290
+ if self._episode_type(ep) not in self._INCIDENT_TYPES:
291
+ continue
292
+ ts = self._episode_time(ep)
293
+ if ts is None:
294
+ continue
295
+ for entity in self._extract_entities(ep):
296
+ if ts >= this_week_start:
297
+ this_week[entity] += 1
298
+ elif ts >= last_week_start:
299
+ last_week[entity] += 1
300
+
301
+ insights: list[Insight] = []
302
+ all_services = set(this_week) | set(last_week)
303
+ for svc in sorted(all_services):
304
+ cur = this_week.get(svc, 0)
305
+ prev = last_week.get(svc, 0)
306
+ if cur <= prev:
307
+ continue
308
+
309
+ if prev == 0:
310
+ severity = Severity.critical if cur >= 3 else Severity.warning
311
+ desc = (
312
+ f"{svc} had {cur} incident(s) this week (up from 0 last week)."
313
+ )
314
+ else:
315
+ ratio = cur / prev
316
+ severity = Severity.critical if ratio >= 3 else Severity.warning
317
+ desc = (
318
+ f"{svc} had {cur} incident(s) this week "
319
+ f"(up from {prev} last week, {ratio:.1f}x increase)."
320
+ )
321
+
322
+ # Collect evidence episode IDs.
323
+ evidence = [
324
+ self._episode_id(ep)
325
+ for ep in all_episodes
326
+ if self._episode_type(ep) in self._INCIDENT_TYPES
327
+ and svc in self._extract_entities(ep)
328
+ and (self._episode_time(ep) or now) >= this_week_start
329
+ ]
330
+
331
+ insights.append(
332
+ Insight(
333
+ id=_make_id("incident_spike", svc, str(cur)),
334
+ insight_type=InsightType.incident_spike,
335
+ title=f"Incident spike: {svc}",
336
+ description=desc,
337
+ severity=severity,
338
+ entities=[svc],
339
+ evidence=evidence,
340
+ generated_at=now,
341
+ confidence=0.85,
342
+ )
343
+ )
344
+
345
+ return insights
346
+
347
+ # 2. New dependency detection ---------------------------------------------
348
+
349
+ async def new_dependency_detection(self) -> list[Insight]:
350
+ """Find entity pairs that appear together for the first time recently.
351
+
352
+ Compares co-occurrences in the last 7 days against co-occurrences in
353
+ episodes older than 7 days to surface newly-discovered dependencies.
354
+ """
355
+ now = _now()
356
+ recent_cutoff = now - timedelta(days=7)
357
+ older_cutoff = now - timedelta(days=90)
358
+
359
+ all_episodes = await self._fetch_episodes(since=older_cutoff)
360
+
361
+ recent_pairs: dict[tuple[str, str], list[str]] = defaultdict(list)
362
+ older_pairs: set[tuple[str, str]] = set()
363
+
364
+ for ep in all_episodes:
365
+ entities = sorted(set(self._extract_entities(ep)))
366
+ ts = self._episode_time(ep)
367
+ if ts is None:
368
+ continue
369
+ for i in range(len(entities)):
370
+ for j in range(i + 1, len(entities)):
371
+ pair = (entities[i], entities[j])
372
+ if ts >= recent_cutoff:
373
+ recent_pairs[pair].append(self._episode_id(ep))
374
+ else:
375
+ older_pairs.add(pair)
376
+
377
+ insights: list[Insight] = []
378
+ for pair, evidence in sorted(recent_pairs.items()):
379
+ if pair in older_pairs:
380
+ continue
381
+ a, b = pair
382
+ first_ts = None
383
+ for ep in all_episodes:
384
+ ents = self._extract_entities(ep)
385
+ if a in ents and b in ents:
386
+ t = self._episode_time(ep)
387
+ if t and (first_ts is None or t < first_ts):
388
+ first_ts = t
389
+
390
+ since_str = first_ts.strftime("%B %d") if first_ts else "recently"
391
+ insights.append(
392
+ Insight(
393
+ id=_make_id("new_dep", a, b),
394
+ insight_type=InsightType.new_dependency,
395
+ title=f"New dependency detected: {a} -> {b}",
396
+ description=(
397
+ f"New dependency detected: {a} -> {b} (since {since_str}). "
398
+ f"These entities were never seen together before the last 7 days."
399
+ ),
400
+ severity=Severity.info,
401
+ entities=[a, b],
402
+ evidence=evidence,
403
+ generated_at=now,
404
+ confidence=0.7,
405
+ )
406
+ )
407
+
408
+ return insights
409
+
410
+ # 3. Knowledge gap detection ----------------------------------------------
411
+
412
+ async def knowledge_gap_detection(self) -> list[Insight]:
413
+ """Find services with many incidents but few document-type episodes.
414
+
415
+ A service that is frequently involved in incidents but has little or no
416
+ documentation is a knowledge gap that should be addressed.
417
+ """
418
+ now = _now()
419
+ since = now - timedelta(days=30)
420
+ all_episodes = await self._fetch_episodes(since=since)
421
+
422
+ incident_counts: Counter[str] = Counter()
423
+ doc_counts: Counter[str] = Counter()
424
+
425
+ for ep in all_episodes:
426
+ ep_type = self._episode_type(ep)
427
+ for entity in self._extract_entities(ep):
428
+ if ep_type in self._INCIDENT_TYPES:
429
+ incident_counts[entity] += 1
430
+ if ep_type in self._DOCUMENT_TYPES:
431
+ doc_counts[entity] += 1
432
+
433
+ insights: list[Insight] = []
434
+ for svc, incidents in sorted(incident_counts.items(), key=lambda x: -x[1]):
435
+ docs = doc_counts.get(svc, 0)
436
+ if incidents >= 2 and docs == 0:
437
+ severity = Severity.critical if incidents >= 5 else Severity.warning
438
+ evidence = [
439
+ self._episode_id(ep)
440
+ for ep in all_episodes
441
+ if self._episode_type(ep) in self._INCIDENT_TYPES
442
+ and svc in self._extract_entities(ep)
443
+ ]
444
+ insights.append(
445
+ Insight(
446
+ id=_make_id("knowledge_gap", svc),
447
+ insight_type=InsightType.knowledge_gap,
448
+ title=f"Knowledge gap: {svc}",
449
+ description=(
450
+ f"Knowledge gap: nobody documented {svc}. "
451
+ f"It had {incidents} incident(s) in the last 30 days "
452
+ f"but 0 documentation episodes."
453
+ ),
454
+ severity=severity,
455
+ entities=[svc],
456
+ evidence=evidence,
457
+ generated_at=now,
458
+ confidence=0.75,
459
+ )
460
+ )
461
+
462
+ return insights
463
+
464
+ # 4. Deployment risk assessment -------------------------------------------
465
+
466
+ async def deployment_risk_assessment(self) -> list[Insight]:
467
+ """Find services with recent deployments and recent incidents.
468
+
469
+ A service that was deployed recently and also had incidents is flagged
470
+ as a deployment risk.
471
+ """
472
+ now = _now()
473
+ since = now - timedelta(days=7)
474
+ all_episodes = await self._fetch_episodes(since=since)
475
+
476
+ deployed: dict[str, list[str]] = defaultdict(list)
477
+ incident_svcs: dict[str, list[str]] = defaultdict(list)
478
+
479
+ for ep in all_episodes:
480
+ ep_type = self._episode_type(ep)
481
+ eid = self._episode_id(ep)
482
+ for entity in self._extract_entities(ep):
483
+ if ep_type in self._DEPLOY_TYPES:
484
+ deployed[entity].append(eid)
485
+ if ep_type in self._INCIDENT_TYPES:
486
+ incident_svcs[entity].append(eid)
487
+
488
+ insights: list[Insight] = []
489
+ for svc in sorted(set(deployed) & set(incident_svcs)):
490
+ deploy_count = len(deployed[svc])
491
+ incident_count = len(incident_svcs[svc])
492
+ severity = Severity.critical if incident_count >= 3 else Severity.warning
493
+ evidence = deployed[svc] + incident_svcs[svc]
494
+ insights.append(
495
+ Insight(
496
+ id=_make_id("deploy_risk", svc),
497
+ insight_type=InsightType.deployment_risk,
498
+ title=f"Deployment risk: {svc}",
499
+ description=(
500
+ f"{svc} had {deploy_count} deployment(s) and "
501
+ f"{incident_count} incident(s) in the last 7 days. "
502
+ f"Recent deploys may be correlated with failures."
503
+ ),
504
+ severity=severity,
505
+ entities=[svc],
506
+ evidence=evidence,
507
+ generated_at=now,
508
+ confidence=0.8,
509
+ )
510
+ )
511
+
512
+ return insights
513
+
514
+ # 5. Stale documentation detection ----------------------------------------
515
+
516
+ async def stale_documentation_detection(self) -> list[Insight]:
517
+ """Find document episodes older than 90 days for services that are still active.
518
+
519
+ An active service (has any episode in the last 30 days) whose most
520
+ recent documentation is older than 90 days is flagged.
521
+ """
522
+ now = _now()
523
+ stale_threshold = now - timedelta(days=90)
524
+ active_since = now - timedelta(days=30)
525
+
526
+ # Fetch a wide window to capture old docs.
527
+ all_episodes = await self._fetch_episodes(since=now - timedelta(days=365))
528
+
529
+ latest_doc: dict[str, datetime] = {}
530
+ active_services: set[str] = set()
531
+
532
+ for ep in all_episodes:
533
+ ep_type = self._episode_type(ep)
534
+ ts = self._episode_time(ep)
535
+ if ts is None:
536
+ continue
537
+ for entity in self._extract_entities(ep):
538
+ if ts >= active_since:
539
+ active_services.add(entity)
540
+ if ep_type in self._DOCUMENT_TYPES:
541
+ if entity not in latest_doc or ts > latest_doc[entity]:
542
+ latest_doc[entity] = ts
543
+
544
+ insights: list[Insight] = []
545
+ for svc in sorted(active_services):
546
+ last_doc = latest_doc.get(svc)
547
+ if last_doc is None:
548
+ continue # knowledge_gap_detection handles missing docs
549
+ if last_doc >= stale_threshold:
550
+ continue
551
+ days_stale = (now - last_doc).days
552
+ insights.append(
553
+ Insight(
554
+ id=_make_id("stale_doc", svc),
555
+ insight_type=InsightType.stale_documentation,
556
+ title=f"Stale documentation: {svc}",
557
+ description=(
558
+ f"Documentation for {svc} was last updated {days_stale} days ago. "
559
+ f"The service is still active -- consider refreshing its docs."
560
+ ),
561
+ severity=Severity.warning,
562
+ entities=[svc],
563
+ evidence=[],
564
+ generated_at=now,
565
+ confidence=0.65,
566
+ )
567
+ )
568
+
569
+ return insights
570
+
571
+ # 6. Recurring issue detection --------------------------------------------
572
+
573
+ async def recurring_issue_detection(self) -> list[Insight]:
574
+ """Find similar incident patterns repeating for the same service.
575
+
576
+ If a service has three or more incidents in the last 30 days with
577
+ overlapping content tokens, the pattern is flagged as recurring.
578
+ """
579
+ now = _now()
580
+ since = now - timedelta(days=30)
581
+ all_episodes = await self._fetch_episodes(since=since)
582
+
583
+ service_incidents: dict[str, list[dict[str, Any]]] = defaultdict(list)
584
+
585
+ for ep in all_episodes:
586
+ if self._episode_type(ep) not in self._INCIDENT_TYPES:
587
+ continue
588
+ for entity in self._extract_entities(ep):
589
+ service_incidents[entity].append(ep)
590
+
591
+ insights: list[Insight] = []
592
+ for svc, incidents in sorted(service_incidents.items()):
593
+ if len(incidents) < 3:
594
+ continue
595
+
596
+ # Simple token-overlap heuristic: extract content words and find
597
+ # commonalities across incidents.
598
+ token_sets: list[set[str]] = []
599
+ for ep in incidents:
600
+ content = (ep.get("content") or ep.get("description") or "").lower()
601
+ tokens = set(content.split())
602
+ # Remove very short and very common words.
603
+ tokens = {t for t in tokens if len(t) > 3}
604
+ token_sets.append(tokens)
605
+
606
+ if not token_sets:
607
+ continue
608
+
609
+ # Find tokens that appear in at least half the incidents.
610
+ all_tokens: Counter[str] = Counter()
611
+ for ts in token_sets:
612
+ for t in ts:
613
+ all_tokens[t] += 1
614
+
615
+ threshold = max(2, len(incidents) // 2)
616
+ common = [t for t, c in all_tokens.most_common(10) if c >= threshold]
617
+
618
+ if not common:
619
+ continue
620
+
621
+ evidence = [self._episode_id(ep) for ep in incidents]
622
+ pattern_hint = ", ".join(common[:5])
623
+ insights.append(
624
+ Insight(
625
+ id=_make_id("recurring", svc, pattern_hint),
626
+ insight_type=InsightType.recurring_issue,
627
+ title=f"Recurring issue: {svc}",
628
+ description=(
629
+ f"{svc} has {len(incidents)} similar incidents in the last 30 days. "
630
+ f"Common keywords: {pattern_hint}."
631
+ ),
632
+ severity=Severity.warning,
633
+ entities=[svc],
634
+ evidence=evidence,
635
+ generated_at=now,
636
+ confidence=0.6,
637
+ )
638
+ )
639
+
640
+ return insights