PyPI - alma-memory - Versions diffs - 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

alma-memory 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (111) hide show

alma/__init__.py +296 -194
alma/compression/__init__.py +33 -0
alma/compression/pipeline.py +980 -0
alma/confidence/__init__.py +47 -47
alma/confidence/engine.py +540 -540
alma/confidence/types.py +351 -351
alma/config/loader.py +157 -157
alma/consolidation/__init__.py +23 -23
alma/consolidation/engine.py +678 -678
alma/consolidation/prompts.py +84 -84
alma/core.py +1189 -322
alma/domains/__init__.py +30 -30
alma/domains/factory.py +359 -359
alma/domains/schemas.py +448 -448
alma/domains/types.py +272 -272
alma/events/__init__.py +75 -75
alma/events/emitter.py +285 -284
alma/events/storage_mixin.py +246 -246
alma/events/types.py +126 -126
alma/events/webhook.py +425 -425
alma/exceptions.py +49 -49
alma/extraction/__init__.py +31 -31
alma/extraction/auto_learner.py +265 -264
alma/extraction/extractor.py +420 -420
alma/graph/__init__.py +106 -81
alma/graph/backends/__init__.py +32 -18
alma/graph/backends/kuzu.py +624 -0
alma/graph/backends/memgraph.py +432 -0
alma/graph/backends/memory.py +236 -236
alma/graph/backends/neo4j.py +417 -417
alma/graph/base.py +159 -159
alma/graph/extraction.py +198 -198
alma/graph/store.py +860 -860
alma/harness/__init__.py +35 -35
alma/harness/base.py +386 -386
alma/harness/domains.py +705 -705
alma/initializer/__init__.py +37 -37
alma/initializer/initializer.py +418 -418
alma/initializer/types.py +250 -250
alma/integration/__init__.py +62 -62
alma/integration/claude_agents.py +444 -432
alma/integration/helena.py +423 -423
alma/integration/victor.py +471 -471
alma/learning/__init__.py +101 -86
alma/learning/decay.py +878 -0
alma/learning/forgetting.py +1446 -1446
alma/learning/heuristic_extractor.py +390 -390
alma/learning/protocols.py +374 -374
alma/learning/validation.py +346 -346
alma/mcp/__init__.py +123 -45
alma/mcp/__main__.py +156 -156
alma/mcp/resources.py +122 -122
alma/mcp/server.py +955 -591
alma/mcp/tools.py +3254 -511
alma/observability/__init__.py +91 -0
alma/observability/config.py +302 -0
alma/observability/guidelines.py +170 -0
alma/observability/logging.py +424 -0
alma/observability/metrics.py +583 -0
alma/observability/tracing.py +440 -0
alma/progress/__init__.py +21 -21
alma/progress/tracker.py +607 -607
alma/progress/types.py +250 -250
alma/retrieval/__init__.py +134 -53
alma/retrieval/budget.py +525 -0
alma/retrieval/cache.py +1304 -1061
alma/retrieval/embeddings.py +202 -202
alma/retrieval/engine.py +850 -366
alma/retrieval/modes.py +365 -0
alma/retrieval/progressive.py +560 -0
alma/retrieval/scoring.py +344 -344
alma/retrieval/trust_scoring.py +637 -0
alma/retrieval/verification.py +797 -0
alma/session/__init__.py +19 -19
alma/session/manager.py +442 -399
alma/session/types.py +288 -288
alma/storage/__init__.py +101 -61
alma/storage/archive.py +233 -0
alma/storage/azure_cosmos.py +1259 -1048
alma/storage/base.py +1083 -525
alma/storage/chroma.py +1443 -1443
alma/storage/constants.py +103 -0
alma/storage/file_based.py +614 -619
alma/storage/migrations/__init__.py +21 -0
alma/storage/migrations/base.py +321 -0
alma/storage/migrations/runner.py +323 -0
alma/storage/migrations/version_stores.py +337 -0
alma/storage/migrations/versions/__init__.py +11 -0
alma/storage/migrations/versions/v1_0_0.py +373 -0
alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
alma/storage/pinecone.py +1080 -1080
alma/storage/postgresql.py +1948 -1452
alma/storage/qdrant.py +1306 -1306
alma/storage/sqlite_local.py +3041 -1358
alma/testing/__init__.py +46 -0
alma/testing/factories.py +301 -0
alma/testing/mocks.py +389 -0
alma/types.py +292 -264
alma/utils/__init__.py +19 -0
alma/utils/tokenizer.py +521 -0
alma/workflow/__init__.py +83 -0
alma/workflow/artifacts.py +170 -0
alma/workflow/checkpoint.py +311 -0
alma/workflow/context.py +228 -0
alma/workflow/outcomes.py +189 -0
alma/workflow/reducers.py +393 -0
{alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
alma_memory-0.7.0.dist-info/RECORD +112 -0
alma_memory-0.5.0.dist-info/RECORD +0 -76
{alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
{alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0

alma/utils/tokenizer.py ADDED Viewed

@@ -0,0 +1,521 @@
+"""
+ALMA Token Estimation Module.
+Provides accurate token counting using tiktoken for OpenAI models
+and configurable token budgets per model type.
+This module addresses Issue #11 (LOW-001): Token Estimation is Rough.
+"""
+import logging
+from dataclasses import dataclass
+from enum import Enum
+from typing import TYPE_CHECKING, Dict, Optional
+if TYPE_CHECKING:
+    import tiktoken
+logger = logging.getLogger(__name__)
+class ModelFamily(Enum):
+    """Model families with different tokenization schemes."""
+    GPT4 = "gpt4"  # GPT-4, GPT-4 Turbo, GPT-4o
+    GPT35 = "gpt35"  # GPT-3.5 Turbo
+    CLAUDE = "claude"  # Claude 3.x models
+    GEMINI = "gemini"  # Google Gemini models
+    LLAMA = "llama"  # Meta Llama models
+    MISTRAL = "mistral"  # Mistral models
+    LOCAL = "local"  # Local/open-source models
+    UNKNOWN = "unknown"  # Fallback
+@dataclass
+class ModelTokenBudget:
+    """
+    Token budget configuration for a model.
+    Attributes:
+        context_window: Maximum context window size for the model
+        memory_budget: Recommended tokens to allocate for ALMA memories
+        response_reserve: Tokens to reserve for model response
+        safety_margin: Additional safety margin (percentage, 0.0-1.0)
+    """
+    context_window: int
+    memory_budget: int
+    response_reserve: int = 4096
+    safety_margin: float = 0.1
+    @property
+    def effective_memory_budget(self) -> int:
+        """Calculate effective memory budget after safety margin."""
+        return int(self.memory_budget * (1 - self.safety_margin))
+# Default token budgets per model
+DEFAULT_TOKEN_BUDGETS: Dict[str, ModelTokenBudget] = {
+    # OpenAI GPT-4 family
+    "gpt-4": ModelTokenBudget(
+        context_window=8192,
+        memory_budget=2000,
+        response_reserve=2048,
+    ),
+    "gpt-4-32k": ModelTokenBudget(
+        context_window=32768,
+        memory_budget=4000,
+        response_reserve=4096,
+    ),
+    "gpt-4-turbo": ModelTokenBudget(
+        context_window=128000,
+        memory_budget=8000,
+        response_reserve=4096,
+    ),
+    "gpt-4o": ModelTokenBudget(
+        context_window=128000,
+        memory_budget=8000,
+        response_reserve=4096,
+    ),
+    "gpt-4o-mini": ModelTokenBudget(
+        context_window=128000,
+        memory_budget=8000,
+        response_reserve=4096,
+    ),
+    # OpenAI GPT-3.5 family
+    "gpt-3.5-turbo": ModelTokenBudget(
+        context_window=16385,
+        memory_budget=2000,
+        response_reserve=2048,
+    ),
+    "gpt-3.5-turbo-16k": ModelTokenBudget(
+        context_window=16385,
+        memory_budget=4000,
+        response_reserve=4096,
+    ),
+    # Anthropic Claude family
+    "claude-3-opus": ModelTokenBudget(
+        context_window=200000,
+        memory_budget=10000,
+        response_reserve=4096,
+    ),
+    "claude-3-sonnet": ModelTokenBudget(
+        context_window=200000,
+        memory_budget=8000,
+        response_reserve=4096,
+    ),
+    "claude-3-haiku": ModelTokenBudget(
+        context_window=200000,
+        memory_budget=6000,
+        response_reserve=4096,
+    ),
+    "claude-3.5-sonnet": ModelTokenBudget(
+        context_window=200000,
+        memory_budget=8000,
+        response_reserve=4096,
+    ),
+    "claude-3.5-haiku": ModelTokenBudget(
+        context_window=200000,
+        memory_budget=6000,
+        response_reserve=4096,
+    ),
+    # Google Gemini family
+    "gemini-pro": ModelTokenBudget(
+        context_window=32768,
+        memory_budget=4000,
+        response_reserve=4096,
+    ),
+    "gemini-1.5-pro": ModelTokenBudget(
+        context_window=1000000,
+        memory_budget=10000,
+        response_reserve=8192,
+    ),
+    "gemini-1.5-flash": ModelTokenBudget(
+        context_window=1000000,
+        memory_budget=8000,
+        response_reserve=8192,
+    ),
+    # Local/open-source models (conservative defaults)
+    "llama-2-7b": ModelTokenBudget(
+        context_window=4096,
+        memory_budget=1000,
+        response_reserve=1024,
+    ),
+    "llama-2-70b": ModelTokenBudget(
+        context_window=4096,
+        memory_budget=1000,
+        response_reserve=1024,
+    ),
+    "llama-3-8b": ModelTokenBudget(
+        context_window=8192,
+        memory_budget=2000,
+        response_reserve=2048,
+    ),
+    "llama-3-70b": ModelTokenBudget(
+        context_window=8192,
+        memory_budget=2000,
+        response_reserve=2048,
+    ),
+    "mistral-7b": ModelTokenBudget(
+        context_window=8192,
+        memory_budget=2000,
+        response_reserve=2048,
+    ),
+    "mixtral-8x7b": ModelTokenBudget(
+        context_window=32768,
+        memory_budget=4000,
+        response_reserve=4096,
+    ),
+    # Default fallback
+    "default": ModelTokenBudget(
+        context_window=8192,
+        memory_budget=2000,
+        response_reserve=2048,
+    ),
+}
+class TokenEstimator:
+    """
+    Accurate token estimation using tiktoken for OpenAI-compatible tokenization.
+    For non-OpenAI models, uses model-specific approximations based on
+    documented token-to-character ratios.
+    Usage:
+        estimator = TokenEstimator(model="gpt-4")
+        token_count = estimator.count_tokens("Hello, world!")
+        budget = estimator.get_token_budget()
+    """
+    # Tiktoken encoding cache
+    _encoding_cache: Dict[str, "tiktoken.Encoding"] = {}  # type: ignore
+    # Approximate tokens-per-character ratios for fallback estimation
+    # These are based on documented model characteristics
+    TOKENS_PER_CHAR_RATIOS: Dict[ModelFamily, float] = {
+        ModelFamily.GPT4: 0.25,  # ~4 chars per token on average
+        ModelFamily.GPT35: 0.25,
+        ModelFamily.CLAUDE: 0.28,  # Claude tends to be slightly more token-dense
+        ModelFamily.GEMINI: 0.25,
+        ModelFamily.LLAMA: 0.27,  # Llama tokenizer is similar to GPT
+        ModelFamily.MISTRAL: 0.27,
+        ModelFamily.LOCAL: 0.25,
+        ModelFamily.UNKNOWN: 0.25,
+    }
+    def __init__(
+        self,
+        model: str = "gpt-4",
+        custom_budget: Optional[ModelTokenBudget] = None,
+    ):
+        """
+        Initialize token estimator.
+        Args:
+            model: Model name (e.g., "gpt-4", "claude-3-sonnet", "llama-3-8b")
+            custom_budget: Optional custom token budget to override defaults
+        """
+        self.model = model.lower()
+        self.model_family = self._detect_model_family(self.model)
+        self._tiktoken_available = self._check_tiktoken()
+        self._encoding = self._get_encoding() if self._tiktoken_available else None
+        self._custom_budget = custom_budget
+    def _check_tiktoken(self) -> bool:
+        """Check if tiktoken is available."""
+        try:
+            import tiktoken  # noqa: F401
+            return True
+        except ImportError:
+            logger.debug("tiktoken not available, using approximate token estimation")
+            return False
+    def _detect_model_family(self, model: str) -> ModelFamily:
+        """Detect the model family from model name."""
+        model_lower = model.lower()
+        if any(x in model_lower for x in ["gpt-4", "gpt4"]):
+            return ModelFamily.GPT4
+        elif any(x in model_lower for x in ["gpt-3.5", "gpt35"]):
+            return ModelFamily.GPT35
+        elif "claude" in model_lower:
+            return ModelFamily.CLAUDE
+        elif "gemini" in model_lower:
+            return ModelFamily.GEMINI
+        elif "llama" in model_lower:
+            return ModelFamily.LLAMA
+        elif "mistral" in model_lower or "mixtral" in model_lower:
+            return ModelFamily.MISTRAL
+        else:
+            return ModelFamily.UNKNOWN
+    def _get_encoding(self) -> Optional["tiktoken.Encoding"]:  # type: ignore
+        """Get tiktoken encoding for the model."""
+        if not self._tiktoken_available:
+            return None
+        import tiktoken
+        # Map model families to tiktoken encodings
+        encoding_map = {
+            ModelFamily.GPT4: "cl100k_base",
+            ModelFamily.GPT35: "cl100k_base",
+            ModelFamily.CLAUDE: "cl100k_base",  # Claude uses similar tokenization
+            ModelFamily.GEMINI: "cl100k_base",  # Approximate
+            ModelFamily.LLAMA: "cl100k_base",  # Approximate
+            ModelFamily.MISTRAL: "cl100k_base",  # Approximate
+            ModelFamily.LOCAL: "cl100k_base",
+            ModelFamily.UNKNOWN: "cl100k_base",
+        }
+        encoding_name = encoding_map.get(self.model_family, "cl100k_base")
+        # Use cached encoding if available
+        if encoding_name not in self._encoding_cache:
+            try:
+                self._encoding_cache[encoding_name] = tiktoken.get_encoding(
+                    encoding_name
+                )
+            except Exception as e:
+                logger.warning(f"Failed to get tiktoken encoding: {e}")
+                return None
+        return self._encoding_cache[encoding_name]
+    def count_tokens(self, text: str) -> int:
+        """
+        Count tokens in text using tiktoken or fallback estimation.
+        Args:
+            text: Text to count tokens for
+        Returns:
+            Estimated token count
+        """
+        if not text:
+            return 0
+        # Use tiktoken if available
+        if self._encoding is not None:
+            try:
+                return len(self._encoding.encode(text))
+            except Exception as e:
+                logger.debug(f"tiktoken encoding failed, using fallback: {e}")
+        # Fallback: character-based estimation
+        ratio = self.TOKENS_PER_CHAR_RATIOS.get(self.model_family, 0.25)
+        return int(len(text) * ratio)
+    def count_tokens_for_messages(
+        self,
+        messages: list[dict[str, str]],
+    ) -> int:
+        """
+        Count tokens for a list of messages (chat format).
+        Accounts for message formatting overhead.
+        Args:
+            messages: List of message dicts with "role" and "content" keys
+        Returns:
+            Estimated token count including formatting overhead
+        """
+        total = 0
+        # Per-message overhead varies by model
+        # GPT-4/3.5: ~4 tokens per message for formatting
+        # Claude: ~3 tokens per message
+        overhead_per_message = (
+            4 if self.model_family in (ModelFamily.GPT4, ModelFamily.GPT35) else 3
+        )
+        for message in messages:
+            content = message.get("content", "")
+            total += self.count_tokens(content)
+            total += overhead_per_message
+        # Add reply priming overhead
+        total += 3
+        return total
+    def get_token_budget(self) -> ModelTokenBudget:
+        """
+        Get the token budget for the current model.
+        Returns custom budget if set, otherwise returns default for model.
+        """
+        if self._custom_budget:
+            return self._custom_budget
+        # Try exact model match first
+        if self.model in DEFAULT_TOKEN_BUDGETS:
+            return DEFAULT_TOKEN_BUDGETS[self.model]
+        # Try partial matches - prefer longer key matches
+        best_match = None
+        best_match_len = 0
+        for key, budget in DEFAULT_TOKEN_BUDGETS.items():
+            if key == "default":
+                continue
+            if key in self.model:
+                if len(key) > best_match_len:
+                    best_match = budget
+                    best_match_len = len(key)
+            elif self.model in key:
+                if len(self.model) > best_match_len:
+                    best_match = budget
+                    best_match_len = len(self.model)
+        if best_match:
+            return best_match
+        # Return default
+        return DEFAULT_TOKEN_BUDGETS["default"]
+    def truncate_to_token_limit(
+        self,
+        text: str,
+        max_tokens: int,
+        suffix: str = "\n[truncated]",
+    ) -> str:
+        """
+        Truncate text to fit within a token limit.
+        Args:
+            text: Text to truncate
+            max_tokens: Maximum tokens allowed
+            suffix: Suffix to append if truncated
+        Returns:
+            Truncated text with suffix if it exceeded the limit
+        """
+        current_tokens = self.count_tokens(text)
+        if current_tokens <= max_tokens:
+            return text
+        # Reserve tokens for suffix
+        suffix_tokens = self.count_tokens(suffix)
+        target_tokens = max_tokens - suffix_tokens
+        if target_tokens <= 0:
+            return suffix
+        # Binary search for the right truncation point
+        if self._encoding is not None:
+            try:
+                tokens = self._encoding.encode(text)
+                truncated_tokens = tokens[:target_tokens]
+                return self._encoding.decode(truncated_tokens) + suffix
+            except Exception:
+                pass
+        # Fallback: character-based truncation
+        ratio = self.TOKENS_PER_CHAR_RATIOS.get(self.model_family, 0.25)
+        target_chars = int(target_tokens / ratio)
+        return text[:target_chars] + suffix
+    def estimate_remaining_budget(
+        self,
+        used_tokens: int,
+        include_response_reserve: bool = True,
+    ) -> int:
+        """
+        Estimate remaining token budget for memories.
+        Args:
+            used_tokens: Tokens already used in context
+            include_response_reserve: Whether to subtract response reserve
+        Returns:
+            Remaining tokens available for memories
+        """
+        budget = self.get_token_budget()
+        available = budget.context_window - used_tokens
+        if include_response_reserve:
+            available -= budget.response_reserve
+        # Apply safety margin
+        available = int(available * (1 - budget.safety_margin))
+        return max(0, min(available, budget.effective_memory_budget))
+def get_token_estimator(
+    model: str = "gpt-4",
+    custom_budget: Optional[ModelTokenBudget] = None,
+) -> TokenEstimator:
+    """
+    Factory function to create a TokenEstimator.
+    Args:
+        model: Model name
+        custom_budget: Optional custom token budget
+    Returns:
+        Configured TokenEstimator instance
+    """
+    return TokenEstimator(model=model, custom_budget=custom_budget)
+def get_default_token_budget(model: str = "gpt-4") -> ModelTokenBudget:
+    """
+    Get the default token budget for a model.
+    Args:
+        model: Model name
+    Returns:
+        Token budget configuration
+    """
+    model_lower = model.lower()
+    # Try exact match
+    if model_lower in DEFAULT_TOKEN_BUDGETS:
+        return DEFAULT_TOKEN_BUDGETS[model_lower]
+    # Try partial match - prefer longer key matches to avoid e.g. "gpt-4" matching "gpt-4o"
+    best_match = None
+    best_match_len = 0
+    for key, budget in DEFAULT_TOKEN_BUDGETS.items():
+        if key == "default":
+            continue
+        if key in model_lower:
+            if len(key) > best_match_len:
+                best_match = budget
+                best_match_len = len(key)
+        elif model_lower in key:
+            if len(model_lower) > best_match_len:
+                best_match = budget
+                best_match_len = len(model_lower)
+    if best_match:
+        return best_match
+    return DEFAULT_TOKEN_BUDGETS["default"]
+def estimate_tokens_simple(text: str) -> int:
+    """
+    Simple token estimation without model context.
+    Uses the standard ~4 characters per token approximation.
+    For more accurate estimation, use TokenEstimator.
+    Args:
+        text: Text to estimate tokens for
+    Returns:
+        Approximate token count
+    """
+    if not text:
+        return 0
+    # Standard approximation: 1 token ~ 4 characters
+    return max(1, len(text) // 4)

alma/workflow/__init__.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""
+ALMA Workflow Module.
+Provides workflow context, checkpointing, state management, and artifact
+linking for integration with workflow orchestration systems like AGtestari.
+Sprint 1 Task 1.7
+"""
+# Context and scoping
+# Artifact linking
+from alma.workflow.artifacts import (
+    ArtifactRef,
+    ArtifactType,
+    link_artifact,
+)
+# Checkpoints for crash recovery
+from alma.workflow.checkpoint import (
+    DEFAULT_MAX_STATE_SIZE,
+    Checkpoint,
+    CheckpointManager,
+)
+from alma.workflow.context import (
+    RetrievalScope,
+    WorkflowContext,
+)
+# Workflow outcomes for learning
+from alma.workflow.outcomes import (
+    WorkflowOutcome,
+    WorkflowResult,
+)
+# State reducers for parallel merge
+from alma.workflow.reducers import (
+    BUILTIN_REDUCERS,
+    AppendReducer,
+    FirstValueReducer,
+    LastValueReducer,
+    MaxReducer,
+    MergeDictReducer,
+    MinReducer,
+    ReducerConfig,
+    StateMerger,
+    StateReducer,
+    SumReducer,
+    UnionReducer,
+    get_reducer,
+    merge_states,
+)
+__all__ = [
+    # Context
+    "RetrievalScope",
+    "WorkflowContext",
+    # Checkpoints
+    "Checkpoint",
+    "CheckpointManager",
+    "DEFAULT_MAX_STATE_SIZE",
+    # Outcomes
+    "WorkflowOutcome",
+    "WorkflowResult",
+    # Artifacts
+    "ArtifactRef",
+    "ArtifactType",
+    "link_artifact",
+    # Reducers
+    "StateReducer",
+    "AppendReducer",
+    "MergeDictReducer",
+    "LastValueReducer",
+    "FirstValueReducer",
+    "SumReducer",
+    "MaxReducer",
+    "MinReducer",
+    "UnionReducer",
+    "ReducerConfig",
+    "StateMerger",
+    "get_reducer",
+    "merge_states",
+    "BUILTIN_REDUCERS",
+]

alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

alma-memory 0.5.0py3-none-any.whl → 0.7.0py3-none-any.whl