PyPI - cortexdb-mcp - Versions diffs - 0.3.1__tar.gz → 0.3.2__tar.gz - Mend

cortexdb-mcp 0.3.1tar.gz → 0.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{cortexdb_mcp-0.3.1 → cortexdb_mcp-0.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cortexdb-mcp
-Version: 0.3.1
+Version: 0.3.2
 Summary: MCP Server for CortexDB — expose memory operations to AI agents
 License-Expression: MIT
 Requires-Python: >=3.10

{cortexdb_mcp-0.3.1 → cortexdb_mcp-0.3.2}/cortexdb_mcp/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """CortexDB MCP Server -- expose CortexDB memory operations to AI agents via MCP."""
-__version__ = "0.3.1"
+__version__ = "0.3.2"

cortexdb_mcp-0.3.2/cortexdb_mcp/insights.py ADDED Viewed

@@ -0,0 +1,480 @@
+"""Proactive insights engine over real CortexDB v1 layers.
+Generates actionable intelligence from what a memory layer actually observes —
+the **events**, **facts**, and **beliefs** the v1 API exposes. No LLM calls on
+the hot path: every insight comes from simple counting and temporal analysis,
+so ``get_insights`` returns in sub-second time.
+The signals are memory-native (not the SRE/ops episode model an earlier draft
+assumed — v1 episodes are session clusters with no ``entities``/``type`` and
+were never populated, so those heuristics produced nothing):
+- **Contradictions** — beliefs CortexDB itself flagged ``stance="contradicted"``
+  (conflicting evidence stored about the same topic). The standout signal.
+- **Low-confidence knowledge** — uncertain / weakly-supported beliefs.
+- **Key entities** — the subjects facts cluster around.
+- **Activity spikes** — sources sending markedly more this week than last.
+- **New sources** — a ``source:`` that first appeared in the last 7 days.
+Each insight carries its evidence (event/fact/belief ids) so a caller can drill
+in via the other tools.
+"""
+from __future__ import annotations
+import hashlib
+import logging
+from collections import Counter
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from enum import Enum
+from typing import Any
+import httpx
+from cortexdb_mcp.render import (
+    belief_statement,
+    is_synthetic_subject,
+    typed_value_text,
+)
+logger = logging.getLogger("cortexdb_mcp.insights")
+# How many records to pull per layer when analyzing a scope.
+_FETCH_LIMIT = 500
+# ---------------------------------------------------------------------------
+# Data model
+# ---------------------------------------------------------------------------
+class InsightType(str, Enum):
+    """Categories of proactive insights the engine can produce."""
+    contradiction = "contradiction"
+    low_confidence = "low_confidence"
+    key_entity = "key_entity"
+    activity_spike = "activity_spike"
+    new_source = "new_source"
+    recent_activity = "recent_activity"
+class Severity(str, Enum):
+    """Severity level for an insight."""
+    info = "info"
+    warning = "warning"
+    critical = "critical"
+@dataclass
+class Insight:
+    """A single actionable insight generated by the engine."""
+    id: str
+    insight_type: InsightType
+    title: str
+    description: str
+    severity: Severity
+    entities: list[str]
+    evidence: list[str]
+    generated_at: datetime
+    confidence: float
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize the insight to a JSON-safe dictionary."""
+        return {
+            "id": self.id,
+            "insight_type": self.insight_type.value,
+            "title": self.title,
+            "description": self.description,
+            "severity": self.severity.value,
+            "entities": self.entities,
+            "evidence": self.evidence,
+            "generated_at": self.generated_at.isoformat(),
+            "confidence": self.confidence,
+        }
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _make_id(*parts: str) -> str:
+    """Produce a deterministic short insight ID from constituent parts."""
+    raw = ":".join(parts)
+    return "ins_" + hashlib.sha256(raw.encode()).hexdigest()[:12]
+def _now() -> datetime:
+    """Return the current UTC time."""
+    return datetime.now(timezone.utc)
+def _parse_ts(raw: Any) -> datetime | None:
+    """Parse an ISO-8601 timestamp (with trailing Z) into an aware datetime."""
+    if raw is None:
+        return None
+    if isinstance(raw, datetime):
+        return raw if raw.tzinfo else raw.replace(tzinfo=timezone.utc)
+    try:
+        dt = datetime.fromisoformat(str(raw).replace("Z", "+00:00"))
+    except (ValueError, TypeError):
+        return None
+    return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
+def _event_time(ev: dict[str, Any]) -> datetime | None:
+    """Observation time of a v1 event (``context.observed_at``)."""
+    ctx = ev.get("context") or {}
+    return _parse_ts(ctx.get("observed_at") or ctx.get("recorded_at"))
+def _event_labels(ev: dict[str, Any]) -> list[str]:
+    return (ev.get("context") or {}).get("labels") or []
+def _label_value(labels: list[str], prefix: str) -> str | None:
+    """Return the value of the first ``<prefix>:<value>`` label, if any."""
+    for label in labels:
+        if label.startswith(prefix + ":"):
+            return label[len(prefix) + 1:]
+    return None
+def _event_id(ev: dict[str, Any]) -> str:
+    return str(ev.get("id") or ev.get("event_id") or "unknown")
+# ---------------------------------------------------------------------------
+# Engine
+# ---------------------------------------------------------------------------
+class InsightsEngine:
+    """Analyze a CortexDB scope's events / facts / beliefs and surface insights.
+    Parameters
+    ----------
+    cortex_url:
+        Base URL of the CortexDB v1 HTTP API.
+    api_key:
+        Optional bearer token for authenticated access.
+    actor:
+        Actor id sent as ``X-Cortex-Actor`` (required by the v1 actor check).
+    scope:
+        Scope path to analyze. ``tenant_id`` is accepted as a legacy alias.
+    """
+    # Confidence at or below which a (non-contradicted) belief is "low".
+    _LOW_CONFIDENCE = 0.6
+    # Minimum facts before the "key entities" summary is worth emitting.
+    _MIN_FACTS_FOR_ENTITIES = 3
+    # Minimum events in a window before an activity spike is worth flagging.
+    _MIN_SPIKE_VOLUME = 3
+    def __init__(
+        self,
+        cortex_url: str = "https://api-v1.cortexdb.ai",
+        api_key: str | None = None,
+        actor: str | None = None,
+        scope: str | None = None,
+        tenant_id: str | None = None,
+    ) -> None:
+        self.cortex_url = cortex_url.rstrip("/")
+        self.api_key = api_key
+        self.actor = actor
+        self.scope = scope or tenant_id
+    # -- HTTP helpers -------------------------------------------------------
+    def _headers(self) -> dict[str, str]:
+        """Build HTTP headers for CortexDB requests."""
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        if self.actor:
+            headers["X-Cortex-Actor"] = self.actor
+        return headers
+    async def _get_items(self, path: str) -> list[dict[str, Any]]:
+        """GET ``path`` scoped to ``self.scope`` and return its ``items`` list.
+        Network/HTTP errors degrade to an empty list (logged, never raised)."""
+        params: dict[str, Any] = {"limit": str(_FETCH_LIMIT)}
+        if self.scope:
+            params["scope"] = self.scope
+        try:
+            async with httpx.AsyncClient(
+                base_url=self.cortex_url, headers=self._headers(), timeout=30.0
+            ) as client:
+                resp = await client.get(path, params=params)
+                resp.raise_for_status()
+                data = resp.json()
+        except (httpx.HTTPStatusError, httpx.RequestError, ValueError) as exc:
+            logger.warning("Failed to fetch %s: %s", path, exc)
+            return []
+        if isinstance(data, list):
+            return data
+        return data.get("items", [])
+    # -- Orchestration ------------------------------------------------------
+    async def generate_all(self) -> list[Insight]:
+        """Fetch the scope's layers once and run every generator over them."""
+        events = await self._get_items("/v1/events")
+        facts = await self._get_items("/v1/facts")
+        beliefs = await self._get_items("/v1/beliefs")
+        results: list[Insight] = []
+        generators = (
+            self._gen_contradictions,
+            self._gen_low_confidence,
+            self._gen_key_entities,
+            self._gen_activity,
+            self._gen_new_sources,
+        )
+        for gen in generators:
+            try:
+                results.extend(gen(events, facts, beliefs))
+            except Exception:  # noqa: BLE001 - one bad generator must not sink all
+                logger.exception("Insight generator %s failed", gen.__name__)
+        return results
+    # -- Generators (pure functions of the fetched layers) ------------------
+    def _gen_contradictions(
+        self,
+        events: list[dict[str, Any]],
+        facts: list[dict[str, Any]],
+        beliefs: list[dict[str, Any]],
+    ) -> list[Insight]:
+        """One insight per belief CortexDB flagged ``contradicted`` — the system
+        holds conflicting evidence about that statement."""
+        now = _now()
+        out: list[Insight] = []
+        for b in beliefs:
+            if b.get("stance") != "contradicted":
+                continue
+            statement = belief_statement(b) or "(unnamed claim)"
+            conf = b.get("confidence")
+            conf = conf if isinstance(conf, (int, float)) else 0.5
+            severity = Severity.critical if conf >= 0.8 else Severity.warning
+            evidence = [
+                str(s.get("id"))
+                for s in (b.get("supports") or [])
+                if isinstance(s, dict) and s.get("id")
+            ]
+            out.append(
+                Insight(
+                    id=_make_id("contradiction", statement),
+                    insight_type=InsightType.contradiction,
+                    title=f"Contradiction: {statement[:80]}",
+                    description=(
+                        f'CortexDB holds conflicting evidence about: '
+                        f'"{statement}". Review the supporting memories '
+                        f'to resolve which is current.'
+                    ),
+                    severity=severity,
+                    entities=[],
+                    evidence=evidence,
+                    generated_at=now,
+                    confidence=0.9,
+                )
+            )
+        return out
+    def _gen_low_confidence(
+        self,
+        events: list[dict[str, Any]],
+        facts: list[dict[str, Any]],
+        beliefs: list[dict[str, Any]],
+    ) -> list[Insight]:
+        """One aggregated insight covering uncertain / weakly-supported beliefs."""
+        now = _now()
+        weak: list[tuple[float, str]] = []
+        for b in beliefs:
+            if b.get("stance") == "contradicted":
+                continue  # already covered with higher priority
+            conf = b.get("confidence")
+            conf = conf if isinstance(conf, (int, float)) else 1.0
+            if b.get("stance") == "uncertain" or conf <= self._LOW_CONFIDENCE:
+                weak.append((conf, belief_statement(b) or "(unnamed claim)"))
+        if not weak:
+            return []
+        weak.sort(key=lambda x: x[0])
+        examples = "; ".join(s for _, s in weak[:3])
+        severity = Severity.warning if len(weak) >= 5 else Severity.info
+        return [
+            Insight(
+                id=_make_id("low_confidence", str(len(weak))),
+                insight_type=InsightType.low_confidence,
+                title=f"{len(weak)} low-confidence belief(s)",
+                description=(
+                    f"{len(weak)} belief(s) are uncertain or weakly supported and "
+                    f"may need confirmation. Examples: {examples}."
+                ),
+                severity=severity,
+                entities=[],
+                evidence=[],
+                generated_at=now,
+                confidence=0.7,
+            )
+        ]
+    def _gen_key_entities(
+        self,
+        events: list[dict[str, Any]],
+        facts: list[dict[str, Any]],
+        beliefs: list[dict[str, Any]],
+    ) -> list[Insight]:
+        """Summarize the entities facts cluster around (real subjects only)."""
+        now = _now()
+        counts: Counter[str] = Counter()
+        for f in facts:
+            subject = f.get("subject", {})
+            if is_synthetic_subject(subject):
+                continue
+            name = typed_value_text(subject)
+            if name:
+                counts[name] += 1
+        if not counts or sum(counts.values()) < self._MIN_FACTS_FOR_ENTITIES:
+            return []
+        top = counts.most_common(5)
+        listing = ", ".join(f"{name} ({n})" for name, n in top)
+        return [
+            Insight(
+                id=_make_id("key_entity", *[name for name, _ in top]),
+                insight_type=InsightType.key_entity,
+                title=f"Key entities: {', '.join(name for name, _ in top[:3])}",
+                description=(
+                    f"Knowledge is concentrated around {len(counts)} entity/entities. "
+                    f"Most referenced: {listing}."
+                ),
+                severity=Severity.info,
+                entities=[name for name, _ in top],
+                evidence=[],
+                generated_at=now,
+                confidence=0.75,
+            )
+        ]
+    def _gen_activity(
+        self,
+        events: list[dict[str, Any]],
+        facts: list[dict[str, Any]],
+        beliefs: list[dict[str, Any]],
+    ) -> list[Insight]:
+        """Recent-volume trend (this week vs last). Decided on total volume so it
+        works even when events carry no ``source:`` label; a per-source
+        breakdown is added to the description when labels are present."""
+        now = _now()
+        this_start = now - timedelta(days=7)
+        last_start = now - timedelta(days=14)
+        this_total = last_total = 0
+        by_source: Counter[str] = Counter()
+        evidence: list[str] = []
+        for ev in events:
+            ts = _event_time(ev)
+            if ts is None:
+                continue
+            if ts >= this_start:
+                this_total += 1
+                if len(evidence) < 20:
+                    evidence.append(_event_id(ev))
+                source = _label_value(_event_labels(ev), "source")
+                if source:
+                    by_source[source] += 1
+            elif ts >= last_start:
+                last_total += 1
+        if this_total < self._MIN_SPIKE_VOLUME:
+            return []
+        detail = ""
+        if by_source:
+            top = ", ".join(f"{s} ({n})" for s, n in by_source.most_common(3))
+            detail = f" Top sources: {top}."
+        is_spike = last_total == 0 or this_total >= 2 * last_total
+        if is_spike and last_total > 0:
+            insight_type = InsightType.activity_spike
+            severity = Severity.warning
+            title = f"Activity spike: {this_total} memories this week"
+            desc = (
+                f"{this_total} memory/memories captured in the last 7 days, up "
+                f"from {last_total} ({this_total / last_total:.1f}x) the week before."
+                f"{detail}"
+            )
+        elif is_spike:  # no prior-week baseline
+            insight_type = InsightType.activity_spike
+            severity = Severity.info
+            title = f"Activity spike: {this_total} memories this week"
+            desc = (
+                f"{this_total} memory/memories captured in the last 7 days "
+                f"(none the week before).{detail}"
+            )
+        else:
+            insight_type = InsightType.recent_activity
+            severity = Severity.info
+            title = f"{this_total} memories captured this week"
+            desc = (
+                f"{this_total} memory/memories captured in the last 7 days "
+                f"(vs {last_total} the week before).{detail}"
+            )
+        return [
+            Insight(
+                id=_make_id(insight_type.value, str(this_total), str(last_total)),
+                insight_type=insight_type,
+                title=title,
+                description=desc,
+                severity=severity,
+                entities=list(by_source),
+                evidence=evidence,
+                generated_at=now,
+                confidence=0.65,
+            )
+        ]
+    def _gen_new_sources(
+        self,
+        events: list[dict[str, Any]],
+        facts: list[dict[str, Any]],
+        beliefs: list[dict[str, Any]],
+    ) -> list[Insight]:
+        """Flag a ``source:`` whose earliest event is within the last 7 days."""
+        now = _now()
+        cutoff = now - timedelta(days=7)
+        first_seen: dict[str, datetime] = {}
+        for ev in events:
+            ts = _event_time(ev)
+            source = _label_value(_event_labels(ev), "source")
+            if ts is None or source is None:
+                continue
+            if source not in first_seen or ts < first_seen[source]:
+                first_seen[source] = ts
+        out: list[Insight] = []
+        for source, first in sorted(first_seen.items()):
+            if first < cutoff:
+                continue
+            out.append(
+                Insight(
+                    id=_make_id("new_source", source),
+                    insight_type=InsightType.new_source,
+                    title=f"New data source: {source}",
+                    description=(
+                        f'A new source "{source}" started feeding memories on '
+                        f"{first.strftime('%B %d')}. It had no events before the last 7 days."
+                    ),
+                    severity=Severity.info,
+                    entities=[source],
+                    evidence=[],
+                    generated_at=now,
+                    confidence=0.65,
+                )
+            )
+        return out

cortexdb_mcp-0.3.2/cortexdb_mcp/render.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""Render helpers for v1 CortexDB response shapes.
+Single source of truth for projecting the v1 wire shapes into readable text,
+shared by the MCP tools (``server.py``) and the insights engine
+(``insights.py``).
+Ground truth (verified against a live ``/v1/recall``): the StratifiedPack
+returns recalled *events* inside ``context_block`` (NOT ``layers.events`` —
+that layer is empty on the synthesized-recall path), while ``layers`` carries
+the derived ``facts`` / ``beliefs`` / ``episodes``. A v1 Fact serializes its
+triple FLAT — ``subject`` / ``predicate`` / ``object`` are top-level keys, with
+no ``triple`` wrapper — and ``subject`` / ``object`` are tagged ``TypedValue``
+objects: ``{"type":"entity","id":...,"name":...}`` or
+``{"type":"literal","datatype":...,"value":...}``.
+"""
+from __future__ import annotations
+import re
+from typing import Any
+# A recalled memory line in context_block looks like:
+#   "[2026-06-27 21:48 UTC] [user] Project Nimbus integrates with Slack."
+# (the leading timestamp may be followed by one or more [..] tag groups).
+_MEM_LINE = re.compile(r"\[([^\]]*UTC[^\]]*)\]\s*(.*)")
+_LEADING_TAGS = re.compile(r"^(?:\[[^\]]*\]\s*)+")
+def memories_from_context(context_block: str) -> list[tuple[str, str]]:
+    """Parse a recall ``context_block`` into ``(timestamp, text)`` pairs.
+    The block separates memories with ``---`` rules and prefixes each with a
+    ``[<ts> UTC] [<role>] `` header. Empty/separator lines (left behind by
+    facts/beliefs that have no event text) are skipped."""
+    out: list[tuple[str, str]] = []
+    for raw in (context_block or "").splitlines():
+        line = raw.strip()
+        if not line or set(line) <= {"-"}:
+            continue
+        m = _MEM_LINE.match(line)
+        if m:
+            ts, rest = m.group(1), m.group(2)
+            rest = _LEADING_TAGS.sub("", rest).strip()  # drop [role]/[local] tags
+            if rest:
+                out.append((ts, rest))
+        else:
+            # A bare line with no timestamp header is still content worth showing.
+            out.append(("", line))
+    return out
+def typed_value_text(tv: Any) -> str:
+    """Render a v1 ``TypedValue`` ({type:entity|concept|literal, ...}) to text.
+    Entities prefer their ``name`` then ``id``; literals use ``value``."""
+    if not isinstance(tv, dict):
+        return str(tv) if tv is not None else ""
+    t = tv.get("type")
+    if t in ("entity", "concept"):
+        return str(tv.get("name") or tv.get("id") or "")
+    if t == "literal":
+        return str(tv.get("value", ""))
+    # Unknown/legacy shape: best-effort.
+    return str(tv.get("value") or tv.get("name") or tv.get("id") or "")
+def is_synthetic_subject(subject: Any) -> bool:
+    """True for the placeholder subject the projector assigns to a
+    statement-level fact (``ent_fact_subject_<id>``) — these are not real
+    named entities and should not be surfaced as such."""
+    if isinstance(subject, dict):
+        sid = str(subject.get("id", ""))
+    else:
+        sid = str(subject or "")
+    return sid.startswith("ent_fact_subject_")
+def fact_text(f: dict[str, Any]) -> str:
+    """Render a v1 fact (flat triple) to one readable line. Statement-level
+    facts (synthetic subject + ``states`` predicate) show just the object
+    text; real triples show ``subject predicate object``."""
+    subject = f.get("subject", {})
+    obj_text = typed_value_text(f.get("object", {}))
+    predicate = f.get("predicate", "") or ""
+    if predicate in ("", "states") or is_synthetic_subject(subject):
+        return obj_text
+    subj_text = typed_value_text(subject)
+    return " ".join(p for p in (subj_text, predicate, obj_text) if p)
+def belief_statement(b: dict[str, Any]) -> str:
+    """The human-readable statement a belief asserts (claim.object.value). The
+    claim subject is an internal topic key and is deliberately not used."""
+    claim = b.get("claim", {}) or {}
+    return typed_value_text(claim.get("object", {})) or fact_text(claim)
+def belief_text(b: dict[str, Any]) -> str:
+    """Render a v1 belief to ``[stance] claim (confidence NN%)``."""
+    statement = belief_statement(b)
+    stance = b.get("stance", "?")
+    conf = b.get("confidence")
+    conf_s = f" (confidence {conf:.0%})" if isinstance(conf, (int, float)) else ""
+    return f"[{stance}] {statement}{conf_s}"

cortexdb-mcp 0.3.1__tar.gz → 0.3.2__tar.gz

cortexdb-mcp 0.3.1tar.gz → 0.3.2tar.gz