cortexdb-mcp 0.3.1__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cortexdb-mcp
3
- Version: 0.3.1
3
+ Version: 0.3.2
4
4
  Summary: MCP Server for CortexDB — expose memory operations to AI agents
5
5
  License-Expression: MIT
6
6
  Requires-Python: >=3.10
@@ -1,3 +1,3 @@
1
1
  """CortexDB MCP Server -- expose CortexDB memory operations to AI agents via MCP."""
2
2
 
3
- __version__ = "0.3.1"
3
+ __version__ = "0.3.2"
@@ -0,0 +1,480 @@
1
+ """Proactive insights engine over real CortexDB v1 layers.
2
+
3
+ Generates actionable intelligence from what a memory layer actually observes —
4
+ the **events**, **facts**, and **beliefs** the v1 API exposes. No LLM calls on
5
+ the hot path: every insight comes from simple counting and temporal analysis,
6
+ so ``get_insights`` returns in sub-second time.
7
+
8
+ The signals are memory-native (not the SRE/ops episode model an earlier draft
9
+ assumed — v1 episodes are session clusters with no ``entities``/``type`` and
10
+ were never populated, so those heuristics produced nothing):
11
+
12
+ - **Contradictions** — beliefs CortexDB itself flagged ``stance="contradicted"``
13
+ (conflicting evidence stored about the same topic). The standout signal.
14
+ - **Low-confidence knowledge** — uncertain / weakly-supported beliefs.
15
+ - **Key entities** — the subjects facts cluster around.
16
+ - **Activity spikes** — sources sending markedly more this week than last.
17
+ - **New sources** — a ``source:`` that first appeared in the last 7 days.
18
+
19
+ Each insight carries its evidence (event/fact/belief ids) so a caller can drill
20
+ in via the other tools.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import hashlib
26
+ import logging
27
+ from collections import Counter
28
+ from dataclasses import dataclass
29
+ from datetime import datetime, timedelta, timezone
30
+ from enum import Enum
31
+ from typing import Any
32
+
33
+ import httpx
34
+
35
+ from cortexdb_mcp.render import (
36
+ belief_statement,
37
+ is_synthetic_subject,
38
+ typed_value_text,
39
+ )
40
+
41
+ logger = logging.getLogger("cortexdb_mcp.insights")
42
+
43
+ # How many records to pull per layer when analyzing a scope.
44
+ _FETCH_LIMIT = 500
45
+
46
+
47
+ # ---------------------------------------------------------------------------
48
+ # Data model
49
+ # ---------------------------------------------------------------------------
50
+
51
+
52
+ class InsightType(str, Enum):
53
+ """Categories of proactive insights the engine can produce."""
54
+
55
+ contradiction = "contradiction"
56
+ low_confidence = "low_confidence"
57
+ key_entity = "key_entity"
58
+ activity_spike = "activity_spike"
59
+ new_source = "new_source"
60
+ recent_activity = "recent_activity"
61
+
62
+
63
+ class Severity(str, Enum):
64
+ """Severity level for an insight."""
65
+
66
+ info = "info"
67
+ warning = "warning"
68
+ critical = "critical"
69
+
70
+
71
+ @dataclass
72
+ class Insight:
73
+ """A single actionable insight generated by the engine."""
74
+
75
+ id: str
76
+ insight_type: InsightType
77
+ title: str
78
+ description: str
79
+ severity: Severity
80
+ entities: list[str]
81
+ evidence: list[str]
82
+ generated_at: datetime
83
+ confidence: float
84
+
85
+ def to_dict(self) -> dict[str, Any]:
86
+ """Serialize the insight to a JSON-safe dictionary."""
87
+ return {
88
+ "id": self.id,
89
+ "insight_type": self.insight_type.value,
90
+ "title": self.title,
91
+ "description": self.description,
92
+ "severity": self.severity.value,
93
+ "entities": self.entities,
94
+ "evidence": self.evidence,
95
+ "generated_at": self.generated_at.isoformat(),
96
+ "confidence": self.confidence,
97
+ }
98
+
99
+
100
+ # ---------------------------------------------------------------------------
101
+ # Helpers
102
+ # ---------------------------------------------------------------------------
103
+
104
+
105
+ def _make_id(*parts: str) -> str:
106
+ """Produce a deterministic short insight ID from constituent parts."""
107
+ raw = ":".join(parts)
108
+ return "ins_" + hashlib.sha256(raw.encode()).hexdigest()[:12]
109
+
110
+
111
+ def _now() -> datetime:
112
+ """Return the current UTC time."""
113
+ return datetime.now(timezone.utc)
114
+
115
+
116
+ def _parse_ts(raw: Any) -> datetime | None:
117
+ """Parse an ISO-8601 timestamp (with trailing Z) into an aware datetime."""
118
+ if raw is None:
119
+ return None
120
+ if isinstance(raw, datetime):
121
+ return raw if raw.tzinfo else raw.replace(tzinfo=timezone.utc)
122
+ try:
123
+ dt = datetime.fromisoformat(str(raw).replace("Z", "+00:00"))
124
+ except (ValueError, TypeError):
125
+ return None
126
+ return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
127
+
128
+
129
+ def _event_time(ev: dict[str, Any]) -> datetime | None:
130
+ """Observation time of a v1 event (``context.observed_at``)."""
131
+ ctx = ev.get("context") or {}
132
+ return _parse_ts(ctx.get("observed_at") or ctx.get("recorded_at"))
133
+
134
+
135
+ def _event_labels(ev: dict[str, Any]) -> list[str]:
136
+ return (ev.get("context") or {}).get("labels") or []
137
+
138
+
139
+ def _label_value(labels: list[str], prefix: str) -> str | None:
140
+ """Return the value of the first ``<prefix>:<value>`` label, if any."""
141
+ for label in labels:
142
+ if label.startswith(prefix + ":"):
143
+ return label[len(prefix) + 1:]
144
+ return None
145
+
146
+
147
+ def _event_id(ev: dict[str, Any]) -> str:
148
+ return str(ev.get("id") or ev.get("event_id") or "unknown")
149
+
150
+
151
+ # ---------------------------------------------------------------------------
152
+ # Engine
153
+ # ---------------------------------------------------------------------------
154
+
155
+
156
+ class InsightsEngine:
157
+ """Analyze a CortexDB scope's events / facts / beliefs and surface insights.
158
+
159
+ Parameters
160
+ ----------
161
+ cortex_url:
162
+ Base URL of the CortexDB v1 HTTP API.
163
+ api_key:
164
+ Optional bearer token for authenticated access.
165
+ actor:
166
+ Actor id sent as ``X-Cortex-Actor`` (required by the v1 actor check).
167
+ scope:
168
+ Scope path to analyze. ``tenant_id`` is accepted as a legacy alias.
169
+ """
170
+
171
+ # Confidence at or below which a (non-contradicted) belief is "low".
172
+ _LOW_CONFIDENCE = 0.6
173
+ # Minimum facts before the "key entities" summary is worth emitting.
174
+ _MIN_FACTS_FOR_ENTITIES = 3
175
+ # Minimum events in a window before an activity spike is worth flagging.
176
+ _MIN_SPIKE_VOLUME = 3
177
+
178
+ def __init__(
179
+ self,
180
+ cortex_url: str = "https://api-v1.cortexdb.ai",
181
+ api_key: str | None = None,
182
+ actor: str | None = None,
183
+ scope: str | None = None,
184
+ tenant_id: str | None = None,
185
+ ) -> None:
186
+ self.cortex_url = cortex_url.rstrip("/")
187
+ self.api_key = api_key
188
+ self.actor = actor
189
+ self.scope = scope or tenant_id
190
+
191
+ # -- HTTP helpers -------------------------------------------------------
192
+
193
+ def _headers(self) -> dict[str, str]:
194
+ """Build HTTP headers for CortexDB requests."""
195
+ headers: dict[str, str] = {"Content-Type": "application/json"}
196
+ if self.api_key:
197
+ headers["Authorization"] = f"Bearer {self.api_key}"
198
+ if self.actor:
199
+ headers["X-Cortex-Actor"] = self.actor
200
+ return headers
201
+
202
+ async def _get_items(self, path: str) -> list[dict[str, Any]]:
203
+ """GET ``path`` scoped to ``self.scope`` and return its ``items`` list.
204
+ Network/HTTP errors degrade to an empty list (logged, never raised)."""
205
+ params: dict[str, Any] = {"limit": str(_FETCH_LIMIT)}
206
+ if self.scope:
207
+ params["scope"] = self.scope
208
+ try:
209
+ async with httpx.AsyncClient(
210
+ base_url=self.cortex_url, headers=self._headers(), timeout=30.0
211
+ ) as client:
212
+ resp = await client.get(path, params=params)
213
+ resp.raise_for_status()
214
+ data = resp.json()
215
+ except (httpx.HTTPStatusError, httpx.RequestError, ValueError) as exc:
216
+ logger.warning("Failed to fetch %s: %s", path, exc)
217
+ return []
218
+ if isinstance(data, list):
219
+ return data
220
+ return data.get("items", [])
221
+
222
+ # -- Orchestration ------------------------------------------------------
223
+
224
+ async def generate_all(self) -> list[Insight]:
225
+ """Fetch the scope's layers once and run every generator over them."""
226
+ events = await self._get_items("/v1/events")
227
+ facts = await self._get_items("/v1/facts")
228
+ beliefs = await self._get_items("/v1/beliefs")
229
+
230
+ results: list[Insight] = []
231
+ generators = (
232
+ self._gen_contradictions,
233
+ self._gen_low_confidence,
234
+ self._gen_key_entities,
235
+ self._gen_activity,
236
+ self._gen_new_sources,
237
+ )
238
+ for gen in generators:
239
+ try:
240
+ results.extend(gen(events, facts, beliefs))
241
+ except Exception: # noqa: BLE001 - one bad generator must not sink all
242
+ logger.exception("Insight generator %s failed", gen.__name__)
243
+ return results
244
+
245
+ # -- Generators (pure functions of the fetched layers) ------------------
246
+
247
+ def _gen_contradictions(
248
+ self,
249
+ events: list[dict[str, Any]],
250
+ facts: list[dict[str, Any]],
251
+ beliefs: list[dict[str, Any]],
252
+ ) -> list[Insight]:
253
+ """One insight per belief CortexDB flagged ``contradicted`` — the system
254
+ holds conflicting evidence about that statement."""
255
+ now = _now()
256
+ out: list[Insight] = []
257
+ for b in beliefs:
258
+ if b.get("stance") != "contradicted":
259
+ continue
260
+ statement = belief_statement(b) or "(unnamed claim)"
261
+ conf = b.get("confidence")
262
+ conf = conf if isinstance(conf, (int, float)) else 0.5
263
+ severity = Severity.critical if conf >= 0.8 else Severity.warning
264
+ evidence = [
265
+ str(s.get("id"))
266
+ for s in (b.get("supports") or [])
267
+ if isinstance(s, dict) and s.get("id")
268
+ ]
269
+ out.append(
270
+ Insight(
271
+ id=_make_id("contradiction", statement),
272
+ insight_type=InsightType.contradiction,
273
+ title=f"Contradiction: {statement[:80]}",
274
+ description=(
275
+ f'CortexDB holds conflicting evidence about: '
276
+ f'"{statement}". Review the supporting memories '
277
+ f'to resolve which is current.'
278
+ ),
279
+ severity=severity,
280
+ entities=[],
281
+ evidence=evidence,
282
+ generated_at=now,
283
+ confidence=0.9,
284
+ )
285
+ )
286
+ return out
287
+
288
+ def _gen_low_confidence(
289
+ self,
290
+ events: list[dict[str, Any]],
291
+ facts: list[dict[str, Any]],
292
+ beliefs: list[dict[str, Any]],
293
+ ) -> list[Insight]:
294
+ """One aggregated insight covering uncertain / weakly-supported beliefs."""
295
+ now = _now()
296
+ weak: list[tuple[float, str]] = []
297
+ for b in beliefs:
298
+ if b.get("stance") == "contradicted":
299
+ continue # already covered with higher priority
300
+ conf = b.get("confidence")
301
+ conf = conf if isinstance(conf, (int, float)) else 1.0
302
+ if b.get("stance") == "uncertain" or conf <= self._LOW_CONFIDENCE:
303
+ weak.append((conf, belief_statement(b) or "(unnamed claim)"))
304
+ if not weak:
305
+ return []
306
+ weak.sort(key=lambda x: x[0])
307
+ examples = "; ".join(s for _, s in weak[:3])
308
+ severity = Severity.warning if len(weak) >= 5 else Severity.info
309
+ return [
310
+ Insight(
311
+ id=_make_id("low_confidence", str(len(weak))),
312
+ insight_type=InsightType.low_confidence,
313
+ title=f"{len(weak)} low-confidence belief(s)",
314
+ description=(
315
+ f"{len(weak)} belief(s) are uncertain or weakly supported and "
316
+ f"may need confirmation. Examples: {examples}."
317
+ ),
318
+ severity=severity,
319
+ entities=[],
320
+ evidence=[],
321
+ generated_at=now,
322
+ confidence=0.7,
323
+ )
324
+ ]
325
+
326
+ def _gen_key_entities(
327
+ self,
328
+ events: list[dict[str, Any]],
329
+ facts: list[dict[str, Any]],
330
+ beliefs: list[dict[str, Any]],
331
+ ) -> list[Insight]:
332
+ """Summarize the entities facts cluster around (real subjects only)."""
333
+ now = _now()
334
+ counts: Counter[str] = Counter()
335
+ for f in facts:
336
+ subject = f.get("subject", {})
337
+ if is_synthetic_subject(subject):
338
+ continue
339
+ name = typed_value_text(subject)
340
+ if name:
341
+ counts[name] += 1
342
+ if not counts or sum(counts.values()) < self._MIN_FACTS_FOR_ENTITIES:
343
+ return []
344
+ top = counts.most_common(5)
345
+ listing = ", ".join(f"{name} ({n})" for name, n in top)
346
+ return [
347
+ Insight(
348
+ id=_make_id("key_entity", *[name for name, _ in top]),
349
+ insight_type=InsightType.key_entity,
350
+ title=f"Key entities: {', '.join(name for name, _ in top[:3])}",
351
+ description=(
352
+ f"Knowledge is concentrated around {len(counts)} entity/entities. "
353
+ f"Most referenced: {listing}."
354
+ ),
355
+ severity=Severity.info,
356
+ entities=[name for name, _ in top],
357
+ evidence=[],
358
+ generated_at=now,
359
+ confidence=0.75,
360
+ )
361
+ ]
362
+
363
+ def _gen_activity(
364
+ self,
365
+ events: list[dict[str, Any]],
366
+ facts: list[dict[str, Any]],
367
+ beliefs: list[dict[str, Any]],
368
+ ) -> list[Insight]:
369
+ """Recent-volume trend (this week vs last). Decided on total volume so it
370
+ works even when events carry no ``source:`` label; a per-source
371
+ breakdown is added to the description when labels are present."""
372
+ now = _now()
373
+ this_start = now - timedelta(days=7)
374
+ last_start = now - timedelta(days=14)
375
+
376
+ this_total = last_total = 0
377
+ by_source: Counter[str] = Counter()
378
+ evidence: list[str] = []
379
+ for ev in events:
380
+ ts = _event_time(ev)
381
+ if ts is None:
382
+ continue
383
+ if ts >= this_start:
384
+ this_total += 1
385
+ if len(evidence) < 20:
386
+ evidence.append(_event_id(ev))
387
+ source = _label_value(_event_labels(ev), "source")
388
+ if source:
389
+ by_source[source] += 1
390
+ elif ts >= last_start:
391
+ last_total += 1
392
+
393
+ if this_total < self._MIN_SPIKE_VOLUME:
394
+ return []
395
+
396
+ detail = ""
397
+ if by_source:
398
+ top = ", ".join(f"{s} ({n})" for s, n in by_source.most_common(3))
399
+ detail = f" Top sources: {top}."
400
+
401
+ is_spike = last_total == 0 or this_total >= 2 * last_total
402
+ if is_spike and last_total > 0:
403
+ insight_type = InsightType.activity_spike
404
+ severity = Severity.warning
405
+ title = f"Activity spike: {this_total} memories this week"
406
+ desc = (
407
+ f"{this_total} memory/memories captured in the last 7 days, up "
408
+ f"from {last_total} ({this_total / last_total:.1f}x) the week before."
409
+ f"{detail}"
410
+ )
411
+ elif is_spike: # no prior-week baseline
412
+ insight_type = InsightType.activity_spike
413
+ severity = Severity.info
414
+ title = f"Activity spike: {this_total} memories this week"
415
+ desc = (
416
+ f"{this_total} memory/memories captured in the last 7 days "
417
+ f"(none the week before).{detail}"
418
+ )
419
+ else:
420
+ insight_type = InsightType.recent_activity
421
+ severity = Severity.info
422
+ title = f"{this_total} memories captured this week"
423
+ desc = (
424
+ f"{this_total} memory/memories captured in the last 7 days "
425
+ f"(vs {last_total} the week before).{detail}"
426
+ )
427
+
428
+ return [
429
+ Insight(
430
+ id=_make_id(insight_type.value, str(this_total), str(last_total)),
431
+ insight_type=insight_type,
432
+ title=title,
433
+ description=desc,
434
+ severity=severity,
435
+ entities=list(by_source),
436
+ evidence=evidence,
437
+ generated_at=now,
438
+ confidence=0.65,
439
+ )
440
+ ]
441
+
442
+ def _gen_new_sources(
443
+ self,
444
+ events: list[dict[str, Any]],
445
+ facts: list[dict[str, Any]],
446
+ beliefs: list[dict[str, Any]],
447
+ ) -> list[Insight]:
448
+ """Flag a ``source:`` whose earliest event is within the last 7 days."""
449
+ now = _now()
450
+ cutoff = now - timedelta(days=7)
451
+ first_seen: dict[str, datetime] = {}
452
+ for ev in events:
453
+ ts = _event_time(ev)
454
+ source = _label_value(_event_labels(ev), "source")
455
+ if ts is None or source is None:
456
+ continue
457
+ if source not in first_seen or ts < first_seen[source]:
458
+ first_seen[source] = ts
459
+
460
+ out: list[Insight] = []
461
+ for source, first in sorted(first_seen.items()):
462
+ if first < cutoff:
463
+ continue
464
+ out.append(
465
+ Insight(
466
+ id=_make_id("new_source", source),
467
+ insight_type=InsightType.new_source,
468
+ title=f"New data source: {source}",
469
+ description=(
470
+ f'A new source "{source}" started feeding memories on '
471
+ f"{first.strftime('%B %d')}. It had no events before the last 7 days."
472
+ ),
473
+ severity=Severity.info,
474
+ entities=[source],
475
+ evidence=[],
476
+ generated_at=now,
477
+ confidence=0.65,
478
+ )
479
+ )
480
+ return out
@@ -0,0 +1,103 @@
1
+ """Render helpers for v1 CortexDB response shapes.
2
+
3
+ Single source of truth for projecting the v1 wire shapes into readable text,
4
+ shared by the MCP tools (``server.py``) and the insights engine
5
+ (``insights.py``).
6
+
7
+ Ground truth (verified against a live ``/v1/recall``): the StratifiedPack
8
+ returns recalled *events* inside ``context_block`` (NOT ``layers.events`` —
9
+ that layer is empty on the synthesized-recall path), while ``layers`` carries
10
+ the derived ``facts`` / ``beliefs`` / ``episodes``. A v1 Fact serializes its
11
+ triple FLAT — ``subject`` / ``predicate`` / ``object`` are top-level keys, with
12
+ no ``triple`` wrapper — and ``subject`` / ``object`` are tagged ``TypedValue``
13
+ objects: ``{"type":"entity","id":...,"name":...}`` or
14
+ ``{"type":"literal","datatype":...,"value":...}``.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import re
20
+ from typing import Any
21
+
22
+ # A recalled memory line in context_block looks like:
23
+ # "[2026-06-27 21:48 UTC] [user] Project Nimbus integrates with Slack."
24
+ # (the leading timestamp may be followed by one or more [..] tag groups).
25
+ _MEM_LINE = re.compile(r"\[([^\]]*UTC[^\]]*)\]\s*(.*)")
26
+ _LEADING_TAGS = re.compile(r"^(?:\[[^\]]*\]\s*)+")
27
+
28
+
29
+ def memories_from_context(context_block: str) -> list[tuple[str, str]]:
30
+ """Parse a recall ``context_block`` into ``(timestamp, text)`` pairs.
31
+
32
+ The block separates memories with ``---`` rules and prefixes each with a
33
+ ``[<ts> UTC] [<role>] `` header. Empty/separator lines (left behind by
34
+ facts/beliefs that have no event text) are skipped."""
35
+ out: list[tuple[str, str]] = []
36
+ for raw in (context_block or "").splitlines():
37
+ line = raw.strip()
38
+ if not line or set(line) <= {"-"}:
39
+ continue
40
+ m = _MEM_LINE.match(line)
41
+ if m:
42
+ ts, rest = m.group(1), m.group(2)
43
+ rest = _LEADING_TAGS.sub("", rest).strip() # drop [role]/[local] tags
44
+ if rest:
45
+ out.append((ts, rest))
46
+ else:
47
+ # A bare line with no timestamp header is still content worth showing.
48
+ out.append(("", line))
49
+ return out
50
+
51
+
52
+ def typed_value_text(tv: Any) -> str:
53
+ """Render a v1 ``TypedValue`` ({type:entity|concept|literal, ...}) to text.
54
+ Entities prefer their ``name`` then ``id``; literals use ``value``."""
55
+ if not isinstance(tv, dict):
56
+ return str(tv) if tv is not None else ""
57
+ t = tv.get("type")
58
+ if t in ("entity", "concept"):
59
+ return str(tv.get("name") or tv.get("id") or "")
60
+ if t == "literal":
61
+ return str(tv.get("value", ""))
62
+ # Unknown/legacy shape: best-effort.
63
+ return str(tv.get("value") or tv.get("name") or tv.get("id") or "")
64
+
65
+
66
+ def is_synthetic_subject(subject: Any) -> bool:
67
+ """True for the placeholder subject the projector assigns to a
68
+ statement-level fact (``ent_fact_subject_<id>``) — these are not real
69
+ named entities and should not be surfaced as such."""
70
+ if isinstance(subject, dict):
71
+ sid = str(subject.get("id", ""))
72
+ else:
73
+ sid = str(subject or "")
74
+ return sid.startswith("ent_fact_subject_")
75
+
76
+
77
+ def fact_text(f: dict[str, Any]) -> str:
78
+ """Render a v1 fact (flat triple) to one readable line. Statement-level
79
+ facts (synthetic subject + ``states`` predicate) show just the object
80
+ text; real triples show ``subject predicate object``."""
81
+ subject = f.get("subject", {})
82
+ obj_text = typed_value_text(f.get("object", {}))
83
+ predicate = f.get("predicate", "") or ""
84
+ if predicate in ("", "states") or is_synthetic_subject(subject):
85
+ return obj_text
86
+ subj_text = typed_value_text(subject)
87
+ return " ".join(p for p in (subj_text, predicate, obj_text) if p)
88
+
89
+
90
+ def belief_statement(b: dict[str, Any]) -> str:
91
+ """The human-readable statement a belief asserts (claim.object.value). The
92
+ claim subject is an internal topic key and is deliberately not used."""
93
+ claim = b.get("claim", {}) or {}
94
+ return typed_value_text(claim.get("object", {})) or fact_text(claim)
95
+
96
+
97
+ def belief_text(b: dict[str, Any]) -> str:
98
+ """Render a v1 belief to ``[stance] claim (confidence NN%)``."""
99
+ statement = belief_statement(b)
100
+ stance = b.get("stance", "?")
101
+ conf = b.get("confidence")
102
+ conf_s = f" (confidence {conf:.0%})" if isinstance(conf, (int, float)) else ""
103
+ return f"[{stance}] {statement}{conf_s}"