PyPI - alma-memory - Versions diffs - 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

alma-memory 0.5.1py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

alma/__init__.py +296 -226
alma/compression/__init__.py +33 -0
alma/compression/pipeline.py +980 -0
alma/confidence/__init__.py +47 -47
alma/confidence/engine.py +540 -540
alma/confidence/types.py +351 -351
alma/config/loader.py +157 -157
alma/consolidation/__init__.py +23 -23
alma/consolidation/engine.py +678 -678
alma/consolidation/prompts.py +84 -84
alma/core.py +1189 -430
alma/domains/__init__.py +30 -30
alma/domains/factory.py +359 -359
alma/domains/schemas.py +448 -448
alma/domains/types.py +272 -272
alma/events/__init__.py +75 -75
alma/events/emitter.py +285 -284
alma/events/storage_mixin.py +246 -246
alma/events/types.py +126 -126
alma/events/webhook.py +425 -425
alma/exceptions.py +49 -49
alma/extraction/__init__.py +31 -31
alma/extraction/auto_learner.py +265 -265
alma/extraction/extractor.py +420 -420
alma/graph/__init__.py +106 -106
alma/graph/backends/__init__.py +32 -32
alma/graph/backends/kuzu.py +624 -624
alma/graph/backends/memgraph.py +432 -432
alma/graph/backends/memory.py +236 -236
alma/graph/backends/neo4j.py +417 -417
alma/graph/base.py +159 -159
alma/graph/extraction.py +198 -198
alma/graph/store.py +860 -860
alma/harness/__init__.py +35 -35
alma/harness/base.py +386 -386
alma/harness/domains.py +705 -705
alma/initializer/__init__.py +37 -37
alma/initializer/initializer.py +418 -418
alma/initializer/types.py +250 -250
alma/integration/__init__.py +62 -62
alma/integration/claude_agents.py +444 -444
alma/integration/helena.py +423 -423
alma/integration/victor.py +471 -471
alma/learning/__init__.py +101 -86
alma/learning/decay.py +878 -0
alma/learning/forgetting.py +1446 -1446
alma/learning/heuristic_extractor.py +390 -390
alma/learning/protocols.py +374 -374
alma/learning/validation.py +346 -346
alma/mcp/__init__.py +123 -45
alma/mcp/__main__.py +156 -156
alma/mcp/resources.py +122 -122
alma/mcp/server.py +955 -591
alma/mcp/tools.py +3254 -509
alma/observability/__init__.py +91 -84
alma/observability/config.py +302 -302
alma/observability/guidelines.py +170 -0
alma/observability/logging.py +424 -424
alma/observability/metrics.py +583 -583
alma/observability/tracing.py +440 -440
alma/progress/__init__.py +21 -21
alma/progress/tracker.py +607 -607
alma/progress/types.py +250 -250
alma/retrieval/__init__.py +134 -53
alma/retrieval/budget.py +525 -0
alma/retrieval/cache.py +1304 -1061
alma/retrieval/embeddings.py +202 -202
alma/retrieval/engine.py +850 -427
alma/retrieval/modes.py +365 -0
alma/retrieval/progressive.py +560 -0
alma/retrieval/scoring.py +344 -344
alma/retrieval/trust_scoring.py +637 -0
alma/retrieval/verification.py +797 -0
alma/session/__init__.py +19 -19
alma/session/manager.py +442 -399
alma/session/types.py +288 -288
alma/storage/__init__.py +101 -90
alma/storage/archive.py +233 -0
alma/storage/azure_cosmos.py +1259 -1259
alma/storage/base.py +1083 -583
alma/storage/chroma.py +1443 -1443
alma/storage/constants.py +103 -103
alma/storage/file_based.py +614 -614
alma/storage/migrations/__init__.py +21 -21
alma/storage/migrations/base.py +321 -321
alma/storage/migrations/runner.py +323 -323
alma/storage/migrations/version_stores.py +337 -337
alma/storage/migrations/versions/__init__.py +11 -11
alma/storage/migrations/versions/v1_0_0.py +373 -373
alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
alma/storage/pinecone.py +1080 -1080
alma/storage/postgresql.py +1948 -1559
alma/storage/qdrant.py +1306 -1306
alma/storage/sqlite_local.py +3041 -1457
alma/testing/__init__.py +46 -46
alma/testing/factories.py +301 -301
alma/testing/mocks.py +389 -389
alma/types.py +292 -264
alma/utils/__init__.py +19 -0
alma/utils/tokenizer.py +521 -0
alma/workflow/__init__.py +83 -0
alma/workflow/artifacts.py +170 -0
alma/workflow/checkpoint.py +311 -0
alma/workflow/context.py +228 -0
alma/workflow/outcomes.py +189 -0
alma/workflow/reducers.py +393 -0
{alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
alma_memory-0.7.0.dist-info/RECORD +112 -0
alma_memory-0.5.1.dist-info/RECORD +0 -93
{alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
{alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0

alma/extraction/extractor.py CHANGED Viewed

@@ -1,420 +1,420 @@
-"""
-ALMA Fact Extraction Module.
-LLM-powered extraction of facts, preferences, and learnings from conversations.
-This bridges the gap between Mem0's automatic extraction and ALMA's explicit learning.
-"""
-import logging
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
-from enum import Enum
-from typing import Any, Dict, List, Optional
-logger = logging.getLogger(__name__)
-class FactType(Enum):
-    """Types of facts that can be extracted from conversations."""
-    HEURISTIC = "heuristic"  # Strategy that worked
-    ANTI_PATTERN = "anti_pattern"  # What NOT to do
-    PREFERENCE = "preference"  # User preference
-    DOMAIN_KNOWLEDGE = "domain_knowledge"  # Factual information
-    OUTCOME = "outcome"  # Task result
-@dataclass
-class ExtractedFact:
-    """A fact extracted from conversation."""
-    fact_type: FactType
-    content: str
-    confidence: float  # 0.0 to 1.0
-    source_text: str  # Original text this was extracted from
-    metadata: Dict[str, Any] = None
-    # For heuristics/anti-patterns
-    condition: Optional[str] = None  # When does this apply?
-    strategy: Optional[str] = None  # What to do?
-    # For preferences
-    category: Optional[str] = None
-    # For domain knowledge
-    domain: Optional[str] = None
-@dataclass
-class ExtractionResult:
-    """Result of fact extraction from a conversation."""
-    facts: List[ExtractedFact]
-    raw_response: str  # LLM's raw response for debugging
-    tokens_used: int
-    extraction_time_ms: int
-class FactExtractor(ABC):
-    """Abstract base class for fact extraction."""
-    @abstractmethod
-    def extract(
-        self,
-        messages: List[Dict[str, str]],
-        agent_context: Optional[str] = None,
-        existing_facts: Optional[List[str]] = None,
-    ) -> ExtractionResult:
-        """
-        Extract facts from a conversation.
-        Args:
-            messages: List of {"role": "user"|"assistant", "content": "..."}
-            agent_context: Optional context about the agent's domain
-            existing_facts: Optional list of already-known facts to avoid duplicates
-        Returns:
-            ExtractionResult with extracted facts
-        """
-        pass
-class LLMFactExtractor(FactExtractor):
-    """
-    LLM-powered fact extraction.
-    Uses structured prompting to extract facts, preferences, and learnings
-    from conversations. Supports OpenAI, Anthropic, and local models.
-    """
-    EXTRACTION_PROMPT = """You are a fact extraction system for an AI agent memory architecture.
-Analyze the following conversation and extract facts worth remembering.
-IMPORTANT: Only extract facts that are:
-1. Specific and actionable (not vague observations)
-2. Likely to be useful in future similar situations
-3. Not already in the existing facts list
-Categorize each fact as one of:
-- HEURISTIC: A strategy or approach that worked well
-- ANTI_PATTERN: Something that failed or should be avoided
-- PREFERENCE: A user preference or constraint
-- DOMAIN_KNOWLEDGE: A factual piece of information about the domain
-- OUTCOME: The result of a specific task
-For HEURISTIC and ANTI_PATTERN, also extract:
-- condition: When does this apply?
-- strategy: What to do (or not do)?
-For PREFERENCE, extract:
-- category: What type of preference (communication, code_style, workflow, etc.)
-For DOMAIN_KNOWLEDGE, extract:
-- domain: What knowledge domain this belongs to
-{agent_context}
-{existing_facts_section}
-CONVERSATION:
-{conversation}
-Respond in JSON format:
-```json
-{{
-  "facts": [
-    {{
-      "fact_type": "HEURISTIC|ANTI_PATTERN|PREFERENCE|DOMAIN_KNOWLEDGE|OUTCOME",
-      "content": "The main fact statement",
-      "confidence": 0.0-1.0,
-      "condition": "optional - when this applies",
-      "strategy": "optional - what to do",
-      "category": "optional - preference category",
-      "domain": "optional - knowledge domain"
-    }}
-  ]
-}}
-```
-If no facts worth extracting, return: {{"facts": []}}
-"""
-    def __init__(
-        self,
-        provider: str = "openai",
-        model: str = "gpt-4o-mini",
-        api_key: Optional[str] = None,
-        temperature: float = 0.1,
-    ):
-        """
-        Initialize LLM fact extractor.
-        Args:
-            provider: "openai", "anthropic", or "local"
-            model: Model name/identifier
-            api_key: API key (or use environment variable)
-            temperature: LLM temperature for extraction
-        """
-        self.provider = provider
-        self.model = model
-        self.api_key = api_key
-        self.temperature = temperature
-        self._client = None
-    def _get_client(self):
-        """Lazy initialization of LLM client."""
-        if self._client is None:
-            if self.provider == "openai":
-                from openai import OpenAI
-                self._client = OpenAI(api_key=self.api_key)
-            elif self.provider == "anthropic":
-                from anthropic import Anthropic
-                self._client = Anthropic(api_key=self.api_key)
-            else:
-                raise ValueError(f"Unsupported provider: {self.provider}")
-        return self._client
-    def extract(
-        self,
-        messages: List[Dict[str, str]],
-        agent_context: Optional[str] = None,
-        existing_facts: Optional[List[str]] = None,
-    ) -> ExtractionResult:
-        """Extract facts from conversation using LLM."""
-        import time
-        start_time = time.time()
-        # Format conversation
-        conversation = "\n".join(
-            f"{msg['role'].upper()}: {msg['content']}" for msg in messages
-        )
-        # Build prompt
-        agent_context_section = ""
-        if agent_context:
-            agent_context_section = f"\nAGENT CONTEXT:\n{agent_context}\n"
-        existing_facts_section = ""
-        if existing_facts:
-            facts_list = "\n".join(f"- {f}" for f in existing_facts)
-            existing_facts_section = (
-                f"\nEXISTING FACTS (do not duplicate):\n{facts_list}\n"
-            )
-        prompt = self.EXTRACTION_PROMPT.format(
-            agent_context=agent_context_section,
-            existing_facts_section=existing_facts_section,
-            conversation=conversation,
-        )
-        # Call LLM
-        client = self._get_client()
-        tokens_used = 0
-        if self.provider == "openai":
-            response = client.chat.completions.create(
-                model=self.model,
-                messages=[{"role": "user", "content": prompt}],
-                temperature=self.temperature,
-            )
-            raw_response = response.choices[0].message.content
-            tokens_used = response.usage.total_tokens if response.usage else 0
-        elif self.provider == "anthropic":
-            response = client.messages.create(
-                model=self.model,
-                max_tokens=2000,
-                messages=[{"role": "user", "content": prompt}],
-            )
-            raw_response = response.content[0].text
-            tokens_used = response.usage.input_tokens + response.usage.output_tokens
-        # Parse response
-        facts = self._parse_response(raw_response, conversation)
-        extraction_time_ms = int((time.time() - start_time) * 1000)
-        return ExtractionResult(
-            facts=facts,
-            raw_response=raw_response,
-            tokens_used=tokens_used,
-            extraction_time_ms=extraction_time_ms,
-        )
-    def _parse_response(
-        self,
-        raw_response: str,
-        source_text: str,
-    ) -> List[ExtractedFact]:
-        """Parse LLM response into ExtractedFact objects."""
-        import json
-        import re
-        # Extract JSON from response (handle markdown code blocks)
-        json_match = re.search(r"```json\s*(.*?)\s*```", raw_response, re.DOTALL)
-        if json_match:
-            json_str = json_match.group(1)
-        else:
-            # Try to find raw JSON
-            json_match = re.search(r"\{.*\}", raw_response, re.DOTALL)
-            if json_match:
-                json_str = json_match.group(0)
-            else:
-                logger.warning(
-                    f"Could not parse JSON from response: {raw_response[:200]}"
-                )
-                return []
-        try:
-            data = json.loads(json_str)
-        except json.JSONDecodeError as e:
-            logger.warning(f"JSON parse error: {e}")
-            return []
-        facts = []
-        for item in data.get("facts", []):
-            try:
-                fact_type = FactType[item["fact_type"].upper()]
-                facts.append(
-                    ExtractedFact(
-                        fact_type=fact_type,
-                        content=item["content"],
-                        confidence=float(item.get("confidence", 0.7)),
-                        source_text=source_text[:500],  # Truncate for storage
-                        condition=item.get("condition"),
-                        strategy=item.get("strategy"),
-                        category=item.get("category"),
-                        domain=item.get("domain"),
-                    )
-                )
-            except (KeyError, ValueError) as e:
-                logger.warning(f"Could not parse fact: {item}, error: {e}")
-                continue
-        return facts
-class RuleBasedExtractor(FactExtractor):
-    """
-    Rule-based fact extraction for offline/free usage.
-    Uses pattern matching and heuristics instead of LLM calls.
-    Less accurate but free and fast.
-    """
-    # Patterns that indicate different fact types
-    HEURISTIC_PATTERNS = [
-        r"(?:worked|succeeded|fixed|solved|helped).*(?:by|using|with)",
-        r"(?:better|best|good)\s+(?:to|approach|way|strategy)",
-        r"(?:should|always|recommend).*(?:use|try|do)",
-    ]
-    ANTI_PATTERN_PATTERNS = [
-        r"(?:don't|do not|never|avoid).*(?:use|do|try)",
-        r"(?:failed|broke|caused|error).*(?:because|when|due)",
-        r"(?:bad|wrong|incorrect)\s+(?:to|approach|way)",
-    ]
-    PREFERENCE_PATTERNS = [
-        r"(?:i|user)\s+(?:prefer|like|want|need)",
-        r"(?:always|never).*(?:for me|i want)",
-    ]
-    def extract(
-        self,
-        messages: List[Dict[str, str]],
-        agent_context: Optional[str] = None,
-        existing_facts: Optional[List[str]] = None,
-    ) -> ExtractionResult:
-        """Extract facts using pattern matching."""
-        import re
-        import time
-        start_time = time.time()
-        facts = []
-        for msg in messages:
-            content = msg["content"].lower()
-            # Check for heuristics
-            for pattern in self.HEURISTIC_PATTERNS:
-                if re.search(pattern, content, re.IGNORECASE):
-                    facts.append(
-                        ExtractedFact(
-                            fact_type=FactType.HEURISTIC,
-                            content=msg["content"][:200],
-                            confidence=0.5,  # Lower confidence for rule-based
-                            source_text=msg["content"],
-                        )
-                    )
-                    break
-            # Check for anti-patterns
-            for pattern in self.ANTI_PATTERN_PATTERNS:
-                if re.search(pattern, content, re.IGNORECASE):
-                    facts.append(
-                        ExtractedFact(
-                            fact_type=FactType.ANTI_PATTERN,
-                            content=msg["content"][:200],
-                            confidence=0.5,
-                            source_text=msg["content"],
-                        )
-                    )
-                    break
-            # Check for preferences
-            for pattern in self.PREFERENCE_PATTERNS:
-                if re.search(pattern, content, re.IGNORECASE):
-                    facts.append(
-                        ExtractedFact(
-                            fact_type=FactType.PREFERENCE,
-                            content=msg["content"][:200],
-                            confidence=0.5,
-                            source_text=msg["content"],
-                        )
-                    )
-                    break
-        extraction_time_ms = int((time.time() - start_time) * 1000)
-        return ExtractionResult(
-            facts=facts,
-            raw_response="rule-based extraction",
-            tokens_used=0,
-            extraction_time_ms=extraction_time_ms,
-        )
-def create_extractor(
-    provider: str = "auto",
-    **kwargs,
-) -> FactExtractor:
-    """
-    Factory function to create appropriate extractor.
-    Args:
-        provider: "openai", "anthropic", "local", "rule-based", or "auto"
-        **kwargs: Additional arguments for the extractor
-    Returns:
-        Configured FactExtractor instance
-    """
-    if provider == "auto":
-        # Try to use LLM if API key is available
-        import os
-        if os.environ.get("OPENAI_API_KEY"):
-            provider = "openai"
-        elif os.environ.get("ANTHROPIC_API_KEY"):
-            provider = "anthropic"
-        else:
-            provider = "rule-based"
-    if provider == "rule-based":
-        return RuleBasedExtractor()
-    else:
-        return LLMFactExtractor(provider=provider, **kwargs)
+"""
+ALMA Fact Extraction Module.
+LLM-powered extraction of facts, preferences, and learnings from conversations.
+This bridges the gap between Mem0's automatic extraction and ALMA's explicit learning.
+"""
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger(__name__)
+class FactType(Enum):
+    """Types of facts that can be extracted from conversations."""
+    HEURISTIC = "heuristic"  # Strategy that worked
+    ANTI_PATTERN = "anti_pattern"  # What NOT to do
+    PREFERENCE = "preference"  # User preference
+    DOMAIN_KNOWLEDGE = "domain_knowledge"  # Factual information
+    OUTCOME = "outcome"  # Task result
+@dataclass
+class ExtractedFact:
+    """A fact extracted from conversation."""
+    fact_type: FactType
+    content: str
+    confidence: float  # 0.0 to 1.0
+    source_text: str  # Original text this was extracted from
+    metadata: Dict[str, Any] = None
+    # For heuristics/anti-patterns
+    condition: Optional[str] = None  # When does this apply?
+    strategy: Optional[str] = None  # What to do?
+    # For preferences
+    category: Optional[str] = None
+    # For domain knowledge
+    domain: Optional[str] = None
+@dataclass
+class ExtractionResult:
+    """Result of fact extraction from a conversation."""
+    facts: List[ExtractedFact]
+    raw_response: str  # LLM's raw response for debugging
+    tokens_used: int
+    extraction_time_ms: int
+class FactExtractor(ABC):
+    """Abstract base class for fact extraction."""
+    @abstractmethod
+    def extract(
+        self,
+        messages: List[Dict[str, str]],
+        agent_context: Optional[str] = None,
+        existing_facts: Optional[List[str]] = None,
+    ) -> ExtractionResult:
+        """
+        Extract facts from a conversation.
+        Args:
+            messages: List of {"role": "user"|"assistant", "content": "..."}
+            agent_context: Optional context about the agent's domain
+            existing_facts: Optional list of already-known facts to avoid duplicates
+        Returns:
+            ExtractionResult with extracted facts
+        """
+        pass
+class LLMFactExtractor(FactExtractor):
+    """
+    LLM-powered fact extraction.
+    Uses structured prompting to extract facts, preferences, and learnings
+    from conversations. Supports OpenAI, Anthropic, and local models.
+    """
+    EXTRACTION_PROMPT = """You are a fact extraction system for an AI agent memory architecture.
+Analyze the following conversation and extract facts worth remembering.
+IMPORTANT: Only extract facts that are:
+1. Specific and actionable (not vague observations)
+2. Likely to be useful in future similar situations
+3. Not already in the existing facts list
+Categorize each fact as one of:
+- HEURISTIC: A strategy or approach that worked well
+- ANTI_PATTERN: Something that failed or should be avoided
+- PREFERENCE: A user preference or constraint
+- DOMAIN_KNOWLEDGE: A factual piece of information about the domain
+- OUTCOME: The result of a specific task
+For HEURISTIC and ANTI_PATTERN, also extract:
+- condition: When does this apply?
+- strategy: What to do (or not do)?
+For PREFERENCE, extract:
+- category: What type of preference (communication, code_style, workflow, etc.)
+For DOMAIN_KNOWLEDGE, extract:
+- domain: What knowledge domain this belongs to
+{agent_context}
+{existing_facts_section}
+CONVERSATION:
+{conversation}
+Respond in JSON format:
+```json
+{{
+  "facts": [
+    {{
+      "fact_type": "HEURISTIC|ANTI_PATTERN|PREFERENCE|DOMAIN_KNOWLEDGE|OUTCOME",
+      "content": "The main fact statement",
+      "confidence": 0.0-1.0,
+      "condition": "optional - when this applies",
+      "strategy": "optional - what to do",
+      "category": "optional - preference category",
+      "domain": "optional - knowledge domain"
+    }}
+  ]
+}}
+```
+If no facts worth extracting, return: {{"facts": []}}
+"""
+    def __init__(
+        self,
+        provider: str = "openai",
+        model: str = "gpt-4o-mini",
+        api_key: Optional[str] = None,
+        temperature: float = 0.1,
+    ):
+        """
+        Initialize LLM fact extractor.
+        Args:
+            provider: "openai", "anthropic", or "local"
+            model: Model name/identifier
+            api_key: API key (or use environment variable)
+            temperature: LLM temperature for extraction
+        """
+        self.provider = provider
+        self.model = model
+        self.api_key = api_key
+        self.temperature = temperature
+        self._client = None
+    def _get_client(self):
+        """Lazy initialization of LLM client."""
+        if self._client is None:
+            if self.provider == "openai":
+                from openai import OpenAI
+                self._client = OpenAI(api_key=self.api_key)
+            elif self.provider == "anthropic":
+                from anthropic import Anthropic
+                self._client = Anthropic(api_key=self.api_key)
+            else:
+                raise ValueError(f"Unsupported provider: {self.provider}")
+        return self._client
+    def extract(
+        self,
+        messages: List[Dict[str, str]],
+        agent_context: Optional[str] = None,
+        existing_facts: Optional[List[str]] = None,
+    ) -> ExtractionResult:
+        """Extract facts from conversation using LLM."""
+        import time
+        start_time = time.time()
+        # Format conversation
+        conversation = "\n".join(
+            f"{msg['role'].upper()}: {msg['content']}" for msg in messages
+        )
+        # Build prompt
+        agent_context_section = ""
+        if agent_context:
+            agent_context_section = f"\nAGENT CONTEXT:\n{agent_context}\n"
+        existing_facts_section = ""
+        if existing_facts:
+            facts_list = "\n".join(f"- {f}" for f in existing_facts)
+            existing_facts_section = (
+                f"\nEXISTING FACTS (do not duplicate):\n{facts_list}\n"
+            )
+        prompt = self.EXTRACTION_PROMPT.format(
+            agent_context=agent_context_section,
+            existing_facts_section=existing_facts_section,
+            conversation=conversation,
+        )
+        # Call LLM
+        client = self._get_client()
+        tokens_used = 0
+        if self.provider == "openai":
+            response = client.chat.completions.create(
+                model=self.model,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=self.temperature,
+            )
+            raw_response = response.choices[0].message.content
+            tokens_used = response.usage.total_tokens if response.usage else 0
+        elif self.provider == "anthropic":
+            response = client.messages.create(
+                model=self.model,
+                max_tokens=2000,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            raw_response = response.content[0].text
+            tokens_used = response.usage.input_tokens + response.usage.output_tokens
+        # Parse response
+        facts = self._parse_response(raw_response, conversation)
+        extraction_time_ms = int((time.time() - start_time) * 1000)
+        return ExtractionResult(
+            facts=facts,
+            raw_response=raw_response,
+            tokens_used=tokens_used,
+            extraction_time_ms=extraction_time_ms,
+        )
+    def _parse_response(
+        self,
+        raw_response: str,
+        source_text: str,
+    ) -> List[ExtractedFact]:
+        """Parse LLM response into ExtractedFact objects."""
+        import json
+        import re
+        # Extract JSON from response (handle markdown code blocks)
+        json_match = re.search(r"```json\s*(.*?)\s*```", raw_response, re.DOTALL)
+        if json_match:
+            json_str = json_match.group(1)
+        else:
+            # Try to find raw JSON
+            json_match = re.search(r"\{.*\}", raw_response, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(0)
+            else:
+                logger.warning(
+                    f"Could not parse JSON from response: {raw_response[:200]}"
+                )
+                return []
+        try:
+            data = json.loads(json_str)
+        except json.JSONDecodeError as e:
+            logger.warning(f"JSON parse error: {e}")
+            return []
+        facts = []
+        for item in data.get("facts", []):
+            try:
+                fact_type = FactType[item["fact_type"].upper()]
+                facts.append(
+                    ExtractedFact(
+                        fact_type=fact_type,
+                        content=item["content"],
+                        confidence=float(item.get("confidence", 0.7)),
+                        source_text=source_text[:500],  # Truncate for storage
+                        condition=item.get("condition"),
+                        strategy=item.get("strategy"),
+                        category=item.get("category"),
+                        domain=item.get("domain"),
+                    )
+                )
+            except (KeyError, ValueError) as e:
+                logger.warning(f"Could not parse fact: {item}, error: {e}")
+                continue
+        return facts
+class RuleBasedExtractor(FactExtractor):
+    """
+    Rule-based fact extraction for offline/free usage.
+    Uses pattern matching and heuristics instead of LLM calls.
+    Less accurate but free and fast.
+    """
+    # Patterns that indicate different fact types
+    HEURISTIC_PATTERNS = [
+        r"(?:worked|succeeded|fixed|solved|helped).*(?:by|using|with)",
+        r"(?:better|best|good)\s+(?:to|approach|way|strategy)",
+        r"(?:should|always|recommend).*(?:use|try|do)",
+    ]
+    ANTI_PATTERN_PATTERNS = [
+        r"(?:don't|do not|never|avoid).*(?:use|do|try)",
+        r"(?:failed|broke|caused|error).*(?:because|when|due)",
+        r"(?:bad|wrong|incorrect)\s+(?:to|approach|way)",
+    ]
+    PREFERENCE_PATTERNS = [
+        r"(?:i|user)\s+(?:prefer|like|want|need)",
+        r"(?:always|never).*(?:for me|i want)",
+    ]
+    def extract(
+        self,
+        messages: List[Dict[str, str]],
+        agent_context: Optional[str] = None,
+        existing_facts: Optional[List[str]] = None,
+    ) -> ExtractionResult:
+        """Extract facts using pattern matching."""
+        import re
+        import time
+        start_time = time.time()
+        facts = []
+        for msg in messages:
+            content = msg["content"].lower()
+            # Check for heuristics
+            for pattern in self.HEURISTIC_PATTERNS:
+                if re.search(pattern, content, re.IGNORECASE):
+                    facts.append(
+                        ExtractedFact(
+                            fact_type=FactType.HEURISTIC,
+                            content=msg["content"][:200],
+                            confidence=0.5,  # Lower confidence for rule-based
+                            source_text=msg["content"],
+                        )
+                    )
+                    break
+            # Check for anti-patterns
+            for pattern in self.ANTI_PATTERN_PATTERNS:
+                if re.search(pattern, content, re.IGNORECASE):
+                    facts.append(
+                        ExtractedFact(
+                            fact_type=FactType.ANTI_PATTERN,
+                            content=msg["content"][:200],
+                            confidence=0.5,
+                            source_text=msg["content"],
+                        )
+                    )
+                    break
+            # Check for preferences
+            for pattern in self.PREFERENCE_PATTERNS:
+                if re.search(pattern, content, re.IGNORECASE):
+                    facts.append(
+                        ExtractedFact(
+                            fact_type=FactType.PREFERENCE,
+                            content=msg["content"][:200],
+                            confidence=0.5,
+                            source_text=msg["content"],
+                        )
+                    )
+                    break
+        extraction_time_ms = int((time.time() - start_time) * 1000)
+        return ExtractionResult(
+            facts=facts,
+            raw_response="rule-based extraction",
+            tokens_used=0,
+            extraction_time_ms=extraction_time_ms,
+        )
+def create_extractor(
+    provider: str = "auto",
+    **kwargs,
+) -> FactExtractor:
+    """
+    Factory function to create appropriate extractor.
+    Args:
+        provider: "openai", "anthropic", "local", "rule-based", or "auto"
+        **kwargs: Additional arguments for the extractor
+    Returns:
+        Configured FactExtractor instance
+    """
+    if provider == "auto":
+        # Try to use LLM if API key is available
+        import os
+        if os.environ.get("OPENAI_API_KEY"):
+            provider = "openai"
+        elif os.environ.get("ANTHROPIC_API_KEY"):
+            provider = "anthropic"
+        else:
+            provider = "rule-based"
+    if provider == "rule-based":
+        return RuleBasedExtractor()
+    else:
+        return LLMFactExtractor(provider=provider, **kwargs)

alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

alma-memory 0.5.1py3-none-any.whl → 0.7.0py3-none-any.whl