PyPI - mcal-ai - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mcal-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

mcal/__init__.py +165 -0
mcal/backends/__init__.py +42 -0
mcal/backends/base.py +383 -0
mcal/baselines/__init__.py +1 -0
mcal/core/__init__.py +101 -0
mcal/core/embeddings.py +266 -0
mcal/core/extraction_cache.py +398 -0
mcal/core/goal_retriever.py +539 -0
mcal/core/intent_tracker.py +734 -0
mcal/core/models.py +445 -0
mcal/core/rate_limiter.py +372 -0
mcal/core/reasoning_store.py +1061 -0
mcal/core/retry.py +188 -0
mcal/core/storage.py +456 -0
mcal/core/streaming.py +254 -0
mcal/core/unified_extractor.py +1466 -0
mcal/core/vector_index.py +206 -0
mcal/evaluation/__init__.py +1 -0
mcal/integrations/__init__.py +88 -0
mcal/integrations/autogen.py +95 -0
mcal/integrations/crewai.py +92 -0
mcal/integrations/langchain.py +112 -0
mcal/integrations/langgraph.py +50 -0
mcal/mcal.py +1697 -0
mcal/providers/bedrock.py +217 -0
mcal/storage/__init__.py +1 -0
mcal_ai-0.1.0.dist-info/METADATA +319 -0
mcal_ai-0.1.0.dist-info/RECORD +32 -0
mcal_ai-0.1.0.dist-info/WHEEL +5 -0
mcal_ai-0.1.0.dist-info/entry_points.txt +2 -0
mcal_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
mcal_ai-0.1.0.dist-info/top_level.txt +1 -0

mcal/core/goal_retriever.py ADDED Viewed

@@ -0,0 +1,539 @@
+"""
+Goal-Aware Retriever
+Retrieves memories based on goal relevance, not just semantic similarity.
+This is Pillar 3 of MCAL: Predictive/Goal-Aware Context Retrieval.
+Key capabilities:
+- Multi-factor relevance scoring (semantic + goal + decision + recency)
+- Goal alignment scoring for memories
+- Decision impact tracking
+- Adaptive retrieval based on current objectives
+"""
+from __future__ import annotations
+import logging
+from datetime import datetime, timedelta, timezone
+from typing import Optional, Protocol
+import numpy as np
+from .models import (
+    DecisionTrail,
+    IntentNode,
+    Memory,
+    MemoryType,
+    RetrievalConfig,
+    RetrievalResult,
+)
+def _utc_now() -> datetime:
+    """Return current UTC time (timezone-aware)."""
+    return datetime.now(timezone.utc)
+logger = logging.getLogger(__name__)
+# =============================================================================
+# Embedding Client Protocol
+# =============================================================================
+class EmbeddingClient(Protocol):
+    """Protocol for embedding client implementations."""
+    async def embed(self, text: str) -> list[float]:
+        """Generate embedding for text."""
+        ...
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Generate embeddings for multiple texts."""
+        ...
+class LLMClient(Protocol):
+    """Protocol for LLM client implementations."""
+    async def complete(self, prompt: str, system: Optional[str] = None) -> str:
+        """Generate a completion for the given prompt."""
+        ...
+# =============================================================================
+# Prompts
+# =============================================================================
+GOAL_ALIGNMENT_SYSTEM = """You are an expert at assessing relevance between memories and goals.
+Given a memory and a goal, rate how relevant the memory is to achieving that goal.
+Consider:
+- Direct relevance: Does the memory directly help with the goal?
+- Indirect relevance: Does the memory provide context needed for the goal?
+- Dependency: Is this memory required before the goal can be achieved?
+Output a score from 0.0 to 1.0 and brief explanation."""
+GOAL_ALIGNMENT_PROMPT = """Rate the relevance of this memory to this goal.
+MEMORY: {memory}
+GOAL: {goal}
+Output as JSON:
+{{
+    "score": 0.0-1.0,
+    "reason": "brief explanation"
+}}
+Output ONLY valid JSON."""
+# =============================================================================
+# Goal-Aware Retriever
+# =============================================================================
+class GoalRetriever:
+    """
+    Retrieves memories based on goal relevance, not just semantic similarity.
+    The key insight: traditional RAG asks "what's similar to this query?"
+    Goal-aware retrieval asks "what's needed to achieve this objective?"
+    Scoring formula:
+        Score = α × semantic_similarity
+              + β × goal_alignment
+              + γ × recency_decay
+              + δ × reference_frequency
+              + ε × decision_impact
+              + ζ × user_importance
+    Usage:
+        retriever = GoalRetriever(embedding_client, llm_client)
+        # Add memories
+        retriever.add_memory(memory)
+        # Retrieve with goal awareness
+        results = await retriever.retrieve(
+            query="What should we focus on next?",
+            active_goals=current_goals,
+            decision_trail=recent_decisions
+        )
+    """
+    def __init__(
+        self,
+        embedding_client: EmbeddingClient,
+        llm_client: Optional[LLMClient] = None,
+        config: Optional[RetrievalConfig] = None
+    ):
+        """
+        Initialize the retriever.
+        Args:
+            embedding_client: Client for generating embeddings
+            llm_client: Optional LLM client for goal alignment scoring
+            config: Retrieval configuration
+        """
+        self.embedding_client = embedding_client
+        self.llm_client = llm_client
+        self.config = config or RetrievalConfig()
+        # Memory storage
+        self.memories: dict[str, Memory] = {}
+        self._embeddings: dict[str, np.ndarray] = {}
+        # Indices for fast lookup
+        self._decision_memories: dict[str, list[str]] = {}  # decision_id -> memory_ids
+        self._goal_memories: dict[str, list[str]] = {}  # goal_id -> memory_ids
+    async def add_memory(self, memory: Memory) -> str:
+        """
+        Add a memory to the store.
+        Args:
+            memory: Memory to add
+        Returns:
+            Memory ID
+        """
+        # Generate embedding if not present
+        if memory.embedding is None:
+            embedding = await self.embedding_client.embed(memory.content)
+            memory.embedding = embedding
+        # Store memory and embedding
+        self.memories[memory.id] = memory
+        self._embeddings[memory.id] = np.array(memory.embedding)
+        # Update indices
+        if memory.decision_trail_id:
+            if memory.decision_trail_id not in self._decision_memories:
+                self._decision_memories[memory.decision_trail_id] = []
+            self._decision_memories[memory.decision_trail_id].append(memory.id)
+        if memory.intent_node_id:
+            if memory.intent_node_id not in self._goal_memories:
+                self._goal_memories[memory.intent_node_id] = []
+            self._goal_memories[memory.intent_node_id].append(memory.id)
+        return memory.id
+    async def retrieve(
+        self,
+        query: str,
+        active_goals: Optional[list[IntentNode]] = None,
+        decision_trail: Optional[list[DecisionTrail]] = None,
+        config: Optional[RetrievalConfig] = None
+    ) -> list[RetrievalResult]:
+        """
+        Retrieve memories using multi-factor relevance scoring.
+        Args:
+            query: Query string
+            active_goals: Currently active goals for goal alignment
+            decision_trail: Recent decisions for decision impact scoring
+            config: Override default retrieval config
+        Returns:
+            List of RetrievalResult sorted by relevance
+        """
+        config = config or self.config
+        if not self.memories:
+            return []
+        # Get query embedding
+        query_embedding = np.array(await self.embedding_client.embed(query))
+        # Score all memories
+        results = []
+        for memory_id, memory in self.memories.items():
+            # Apply type filter
+            if config.memory_types and memory.type not in config.memory_types:
+                continue
+            # Calculate component scores
+            scores = await self._calculate_scores(
+                memory=memory,
+                query=query,
+                query_embedding=query_embedding,
+                active_goals=active_goals,
+                decision_trail=decision_trail,
+                config=config
+            )
+            # Calculate weighted total
+            total_score = (
+                config.semantic_weight * scores["semantic"] +
+                config.goal_alignment_weight * scores["goal_alignment"] +
+                config.recency_weight * scores["recency"] +
+                config.reference_weight * scores["reference"] +
+                config.decision_impact_weight * scores["decision_impact"]
+            )
+            # Add user importance bonus
+            if memory.user_marked:
+                total_score += 0.1  # Bonus for user-marked items
+            if total_score >= config.min_score:
+                results.append(RetrievalResult(
+                    memory=memory,
+                    score=total_score,
+                    score_breakdown=scores
+                ))
+                # Mark memory as accessed
+                memory.access()
+        # Sort by score and limit
+        results.sort(key=lambda r: r.score, reverse=True)
+        results = results[:config.max_results]
+        logger.info(f"Retrieved {len(results)} memories for query")
+        return results
+    async def retrieve_for_goal(
+        self,
+        goal: IntentNode,
+        config: Optional[RetrievalConfig] = None
+    ) -> list[RetrievalResult]:
+        """
+        Retrieve all memories relevant to a specific goal.
+        Args:
+            goal: Goal to retrieve memories for
+            config: Retrieval configuration
+        Returns:
+            Memories relevant to the goal
+        """
+        return await self.retrieve(
+            query=goal.content,
+            active_goals=[goal],
+            config=config
+        )
+    async def _calculate_scores(
+        self,
+        memory: Memory,
+        query: str,
+        query_embedding: np.ndarray,
+        active_goals: Optional[list[IntentNode]],
+        decision_trail: Optional[list[DecisionTrail]],
+        config: RetrievalConfig
+    ) -> dict[str, float]:
+        """Calculate all component scores for a memory."""
+        scores = {}
+        # 1. Semantic similarity (cosine similarity)
+        memory_embedding = self._embeddings.get(memory.id)
+        if memory_embedding is not None:
+            scores["semantic"] = self._cosine_similarity(query_embedding, memory_embedding)
+        else:
+            scores["semantic"] = 0.0
+        # 2. Goal alignment
+        if active_goals and self.llm_client:
+            scores["goal_alignment"] = await self._compute_goal_alignment(
+                memory, active_goals
+            )
+        else:
+            # Fast heuristic: check if memory is linked to any active goal
+            goal_ids = [g.id for g in (active_goals or [])]
+            if memory.intent_node_id and memory.intent_node_id in goal_ids:
+                scores["goal_alignment"] = 0.8
+            else:
+                scores["goal_alignment"] = scores["semantic"] * 0.5  # Fallback
+        # 3. Recency decay
+        scores["recency"] = self._compute_recency_score(memory)
+        # 4. Reference frequency
+        scores["reference"] = self._compute_reference_score(memory)
+        # 5. Decision impact
+        if decision_trail:
+            scores["decision_impact"] = self._compute_decision_impact(
+                memory, decision_trail
+            )
+        else:
+            scores["decision_impact"] = 0.0
+        return scores
+    async def _compute_goal_alignment(
+        self,
+        memory: Memory,
+        goals: list[IntentNode]
+    ) -> float:
+        """Compute goal alignment score using LLM."""
+        if not self.llm_client or not goals:
+            return 0.0
+        # For efficiency, only compute for top goal (could be extended)
+        top_goal = goals[0]
+        prompt = GOAL_ALIGNMENT_PROMPT.format(
+            memory=memory.content,
+            goal=top_goal.content
+        )
+        try:
+            response = await self.llm_client.complete(
+                prompt, system=GOAL_ALIGNMENT_SYSTEM
+            )
+            # Parse response
+            import json
+            response = response.strip()
+            if response.startswith("```"):
+                response = response.split("```")[1]
+                if response.startswith("json"):
+                    response = response[4:]
+            data = json.loads(response)
+            return float(data.get("score", 0.0))
+        except Exception as e:
+            logger.warning(f"Failed to compute goal alignment: {e}")
+            return 0.0
+    def _compute_recency_score(self, memory: Memory) -> float:
+        """
+        Compute recency score with decay.
+        Uses exponential decay: score = e^(-λt)
+        where t is days since last access and λ is decay rate
+        """
+        days_old = (_utc_now() - memory.last_accessed).total_seconds() / 86400
+        decay_rate = 0.1  # Configurable
+        return np.exp(-decay_rate * days_old)
+    def _compute_reference_score(self, memory: Memory) -> float:
+        """
+        Compute score based on reference frequency.
+        Uses log scaling to prevent high-frequency items from dominating.
+        """
+        if memory.reference_count == 0:
+            return 0.0
+        return min(1.0, np.log1p(memory.reference_count) / 5.0)
+    def _compute_decision_impact(
+        self,
+        memory: Memory,
+        decisions: list[DecisionTrail]
+    ) -> float:
+        """
+        Compute how much this memory impacted decisions.
+        A memory that was cited as evidence in decisions is more valuable.
+        """
+        impact_count = 0
+        for decision in decisions:
+            # Check if memory is in evidence
+            for evidence in decision.evidence:
+                if evidence.turn_id and memory.turn_id == evidence.turn_id:
+                    impact_count += 1
+                    break
+        if impact_count == 0:
+            return 0.0
+        return min(1.0, impact_count / len(decisions))
+    def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> float:
+        """Compute cosine similarity between two vectors."""
+        norm_a = np.linalg.norm(a)
+        norm_b = np.linalg.norm(b)
+        if norm_a == 0 or norm_b == 0:
+            return 0.0
+        return float(np.dot(a, b) / (norm_a * norm_b))
+    def get_memories_for_decision(self, decision_id: str) -> list[Memory]:
+        """Get all memories linked to a decision."""
+        memory_ids = self._decision_memories.get(decision_id, [])
+        return [self.memories[mid] for mid in memory_ids if mid in self.memories]
+    def get_memories_for_goal(self, goal_id: str) -> list[Memory]:
+        """Get all memories linked to a goal."""
+        memory_ids = self._goal_memories.get(goal_id, [])
+        return [self.memories[mid] for mid in memory_ids if mid in self.memories]
+# =============================================================================
+# Context Assembler
+# =============================================================================
+class ContextAssembler:
+    """
+    Assembles context from retrieved memories for LLM input.
+    Handles:
+    - Token budget management
+    - Priority ordering
+    - Format optimization
+    - Deduplication
+    """
+    def __init__(self, max_tokens: int = 4000):
+        """
+        Initialize the context assembler.
+        Args:
+            max_tokens: Maximum tokens for assembled context
+        """
+        self.max_tokens = max_tokens
+    def assemble(
+        self,
+        retrieved: list[RetrievalResult],
+        active_goals: Optional[list[IntentNode]] = None,
+        decisions: Optional[list[DecisionTrail]] = None,
+        include_goals: bool = True,
+        include_decisions: bool = True
+    ) -> str:
+        """
+        Assemble context from retrieved memories and structured data.
+        Args:
+            retrieved: Retrieved memory results
+            active_goals: Active goals to include
+            decisions: Relevant decisions to include
+            include_goals: Whether to include goal summary
+            include_decisions: Whether to include decision summary
+        Returns:
+            Assembled context string
+        """
+        sections = []
+        # 1. Active goals section
+        if include_goals and active_goals:
+            goal_section = self._format_goals(active_goals)
+            sections.append(("ACTIVE GOALS", goal_section))
+        # 2. Relevant decisions section
+        if include_decisions and decisions:
+            decision_section = self._format_decisions(decisions)
+            sections.append(("KEY DECISIONS", decision_section))
+        # 3. Retrieved memories section
+        if retrieved:
+            memory_section = self._format_memories(retrieved)
+            sections.append(("RELEVANT CONTEXT", memory_section))
+        # Assemble with headers
+        output = []
+        for header, content in sections:
+            output.append(f"### {header} ###")
+            output.append(content)
+            output.append("")
+        return "\n".join(output)
+    def _format_goals(self, goals: list[IntentNode]) -> str:
+        """Format goals for context."""
+        lines = []
+        for goal in goals:
+            status_marker = {
+                "active": "🔵",
+                "completed": "✅",
+                "pending": "⏳"
+            }.get(goal.status.value, "")
+            lines.append(f"- {status_marker} [{goal.type.value}] {goal.content}")
+        return "\n".join(lines)
+    def _format_decisions(self, decisions: list[DecisionTrail]) -> str:
+        """Format decisions for context."""
+        lines = []
+        for decision in decisions[:5]:  # Limit to top 5
+            lines.append(f"DECISION: {decision.decision}")
+            lines.append(f"  Rationale: {decision.rationale}")
+            if decision.alternatives:
+                alt_names = [a.option for a in decision.alternatives[:3]]
+                lines.append(f"  Alternatives considered: {', '.join(alt_names)}")
+            lines.append("")
+        return "\n".join(lines)
+    def _format_memories(self, results: list[RetrievalResult]) -> str:
+        """Format retrieved memories for context."""
+        lines = []
+        for result in results:
+            memory = result.memory
+            score = result.score
+            lines.append(f"[{memory.type.value}] (relevance: {score:.2f})")
+            lines.append(f"  {memory.content}")
+            lines.append("")
+        return "\n".join(lines)