PyPI - keep-skill - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

keep-skill 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

keep/__init__.py +3 -6
keep/api.py +1052 -145
keep/cli.py +705 -132
keep/config.py +172 -41
keep/context.py +1 -125
keep/document_store.py +908 -0
keep/errors.py +33 -0
keep/indexing.py +1 -1
keep/logging_config.py +34 -3
keep/paths.py +81 -17
keep/pending_summaries.py +52 -40
keep/providers/embedding_cache.py +59 -46
keep/providers/embeddings.py +43 -13
keep/providers/mlx.py +23 -21
keep/store.py +169 -25
keep_skill-0.3.0.dist-info/METADATA +218 -0
keep_skill-0.3.0.dist-info/RECORD +28 -0
keep_skill-0.1.0.dist-info/METADATA +0 -290
keep_skill-0.1.0.dist-info/RECORD +0 -26
{keep_skill-0.1.0.dist-info → keep_skill-0.3.0.dist-info}/WHEEL +0 -0
{keep_skill-0.1.0.dist-info → keep_skill-0.3.0.dist-info}/entry_points.txt +0 -0
{keep_skill-0.1.0.dist-info → keep_skill-0.3.0.dist-info}/licenses/LICENSE +0 -0

keep/config.py CHANGED Viewed

@@ -11,7 +11,7 @@ import tomllib
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any
+from typing import Any, Optional
 # tomli_w for writing TOML (tomllib is read-only)
 try:
@@ -21,7 +21,7 @@ except ImportError:
 CONFIG_FILENAME = "keep.toml"
-CONFIG_VERSION = 1
+CONFIG_VERSION = 3  # Bumped for document versioning support
 @dataclass
@@ -31,23 +31,72 @@ class ProviderConfig:
     params: dict[str, Any] = field(default_factory=dict)
+@dataclass
+class EmbeddingIdentity:
+    """
+    Identity of an embedding model for compatibility checking.
+    Two embeddings are compatible only if they have the same identity.
+    Different models, even with the same dimension, produce incompatible vectors.
+    """
+    provider: str  # e.g., "sentence-transformers", "openai"
+    model: str     # e.g., "all-MiniLM-L6-v2", "text-embedding-3-small"
+    dimension: int # e.g., 384, 1536
+    @property
+    def key(self) -> str:
+        """
+        Short key for collection naming.
+        Format: {provider}_{model_slug}
+        e.g., "st_MiniLM_L6_v2", "openai_3_small"
+        """
+        # Simplify model name for use in collection names
+        model_slug = self.model.replace("-", "_").replace(".", "_")
+        # Remove common prefixes
+        for prefix in ["all_", "text_embedding_"]:
+            if model_slug.lower().startswith(prefix):
+                model_slug = model_slug[len(prefix):]
+        # Shorten provider names
+        provider_short = {
+            "sentence-transformers": "st",
+            "openai": "openai",
+            "gemini": "gemini",
+            "ollama": "ollama",
+        }.get(self.provider, self.provider[:6])
+        return f"{provider_short}_{model_slug}"
 @dataclass
 class StoreConfig:
     """Complete store configuration."""
-    path: Path
+    path: Path  # Store path (where data lives)
+    config_dir: Optional[Path] = None  # Where config was loaded from (may differ from path)
+    store_path: Optional[str] = None  # Explicit store.path from config file (raw string)
     version: int = CONFIG_VERSION
     created: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
     # Provider configurations
     embedding: ProviderConfig = field(default_factory=lambda: ProviderConfig("sentence-transformers"))
     summarization: ProviderConfig = field(default_factory=lambda: ProviderConfig("truncate"))
     document: ProviderConfig = field(default_factory=lambda: ProviderConfig("composite"))
+    # Embedding identity (set after first use, used for validation)
+    embedding_identity: Optional[EmbeddingIdentity] = None
+    # Default tags applied to all update/remember operations
+    default_tags: dict[str, str] = field(default_factory=dict)
+    # Maximum length for summaries (used for smart remember and validation)
+    max_summary_length: int = 500
     @property
     def config_path(self) -> Path:
         """Path to the TOML config file."""
-        return self.path / CONFIG_FILENAME
+        config_location = self.config_dir if self.config_dir else self.path
+        return config_location / CONFIG_FILENAME
     def exists(self) -> bool:
         """Check if config file exists."""
         return self.config_path.exists()
@@ -182,9 +231,13 @@ def detect_default_providers() -> dict[str, ProviderConfig]:
                     params["model"] = ms_model
                 embedding_provider = ProviderConfig("gemini", params)
-    # Fall back to sentence-transformers (local, always works)
+    # Fall back to local embedding (prefer MPS-accelerated on Apple Silicon)
     if embedding_provider is None:
-        embedding_provider = ProviderConfig("sentence-transformers")
+        if is_apple_silicon:
+            # Use sentence-transformers with MPS acceleration (no auth required)
+            embedding_provider = ProviderConfig("mlx", {"model": "all-MiniLM-L6-v2"})
+        else:
+            embedding_provider = ProviderConfig("sentence-transformers")
     providers["embedding"] = embedding_provider
@@ -225,99 +278,177 @@ def detect_default_providers() -> dict[str, ProviderConfig]:
     return providers
-def create_default_config(store_path: Path) -> StoreConfig:
-    """Create a new config with auto-detected defaults."""
+def create_default_config(config_dir: Path, store_path: Optional[Path] = None) -> StoreConfig:
+    """
+    Create a new config with auto-detected defaults.
+    Args:
+        config_dir: Directory where keep.toml will be saved
+        store_path: Optional explicit store location (if different from config_dir)
+    """
     providers = detect_default_providers()
+    # If store_path is provided and different from config_dir, record it
+    store_path_str = None
+    actual_store = config_dir
+    if store_path and store_path.resolve() != config_dir.resolve():
+        store_path_str = str(store_path)
+        actual_store = store_path
     return StoreConfig(
-        path=store_path,
+        path=actual_store,
+        config_dir=config_dir,
+        store_path=store_path_str,
         embedding=providers["embedding"],
         summarization=providers["summarization"],
         document=providers["document"],
     )
-def load_config(store_path: Path) -> StoreConfig:
+def load_config(config_dir: Path) -> StoreConfig:
     """
-    Load configuration from a store directory.
+    Load configuration from a config directory.
+    The config_dir is where keep.toml lives. The actual store location
+    may be different if store.path is set in the config.
+    Args:
+        config_dir: Directory containing keep.toml
     Raises:
         FileNotFoundError: If config doesn't exist
         ValueError: If config is invalid
     """
-    config_path = store_path / CONFIG_FILENAME
+    config_path = config_dir / CONFIG_FILENAME
     if not config_path.exists():
         raise FileNotFoundError(f"Config not found: {config_path}")
     with open(config_path, "rb") as f:
         data = tomllib.load(f)
     # Validate version
     version = data.get("store", {}).get("version", 1)
     if version > CONFIG_VERSION:
         raise ValueError(f"Config version {version} is newer than supported ({CONFIG_VERSION})")
+    # Parse store.path - explicit store location
+    store_path_str = data.get("store", {}).get("path")
+    if store_path_str:
+        actual_store = Path(store_path_str).expanduser().resolve()
+    else:
+        actual_store = config_dir  # Backwards compat: store is at config location
     # Parse provider configs
     def parse_provider(section: dict) -> ProviderConfig:
         return ProviderConfig(
             name=section.get("name", ""),
             params={k: v for k, v in section.items() if k != "name"},
         )
+    # Parse default tags (filter out system tags)
+    raw_tags = data.get("tags", {})
+    default_tags = {k: str(v) for k, v in raw_tags.items()
+                    if not k.startswith("_")}
+    # Parse max_summary_length (default 500)
+    max_summary_length = data.get("store", {}).get("max_summary_length", 500)
     return StoreConfig(
-        path=store_path,
+        path=actual_store,
+        config_dir=config_dir,
+        store_path=store_path_str,
         version=version,
         created=data.get("store", {}).get("created", ""),
         embedding=parse_provider(data.get("embedding", {"name": "sentence-transformers"})),
         summarization=parse_provider(data.get("summarization", {"name": "truncate"})),
         document=parse_provider(data.get("document", {"name": "composite"})),
+        embedding_identity=parse_embedding_identity(data.get("embedding_identity")),
+        default_tags=default_tags,
+        max_summary_length=max_summary_length,
     )
+def parse_embedding_identity(data: dict | None) -> EmbeddingIdentity | None:
+    """Parse embedding identity from config data."""
+    if data is None:
+        return None
+    provider = data.get("provider")
+    model = data.get("model")
+    dimension = data.get("dimension")
+    if provider and model and dimension:
+        return EmbeddingIdentity(provider=provider, model=model, dimension=dimension)
+    return None
 def save_config(config: StoreConfig) -> None:
     """
-    Save configuration to the store directory.
+    Save configuration to the config directory.
     Creates the directory if it doesn't exist.
     """
     if tomli_w is None:
         raise RuntimeError("tomli_w is required to save config. Install with: pip install tomli-w")
-    # Ensure directory exists
-    config.path.mkdir(parents=True, exist_ok=True)
+    # Ensure config directory exists
+    config_location = config.config_dir if config.config_dir else config.path
+    config_location.mkdir(parents=True, exist_ok=True)
     # Build TOML structure
     def provider_to_dict(p: ProviderConfig) -> dict:
         d = {"name": p.name}
         d.update(p.params)
         return d
+    store_section: dict[str, Any] = {
+        "version": config.version,
+        "created": config.created,
+    }
+    # Only write store.path if explicitly set (not default)
+    if config.store_path:
+        store_section["path"] = config.store_path
+    # Only write max_summary_length if not default
+    if config.max_summary_length != 500:
+        store_section["max_summary_length"] = config.max_summary_length
     data = {
-        "store": {
-            "version": config.version,
-            "created": config.created,
-        },
+        "store": store_section,
         "embedding": provider_to_dict(config.embedding),
         "summarization": provider_to_dict(config.summarization),
         "document": provider_to_dict(config.document),
     }
+    # Add embedding identity if set
+    if config.embedding_identity:
+        data["embedding_identity"] = {
+            "provider": config.embedding_identity.provider,
+            "model": config.embedding_identity.model,
+            "dimension": config.embedding_identity.dimension,
+        }
+    # Add default tags if set
+    if config.default_tags:
+        data["tags"] = config.default_tags
     with open(config.config_path, "wb") as f:
         tomli_w.dump(data, f)
-def load_or_create_config(store_path: Path) -> StoreConfig:
+def load_or_create_config(config_dir: Path, store_path: Optional[Path] = None) -> StoreConfig:
     """
     Load existing config or create a new one with defaults.
     This is the main entry point for config management.
+    Args:
+        config_dir: Directory containing (or to contain) keep.toml
+        store_path: Optional explicit store location (for new configs only)
     """
-    config_path = store_path / CONFIG_FILENAME
+    config_path = config_dir / CONFIG_FILENAME
     if config_path.exists():
-        return load_config(store_path)
+        return load_config(config_dir)
     else:
-        config = create_default_config(store_path)
+        config = create_default_config(config_dir, store_path)
         save_config(config)
         return config

keep/context.py CHANGED Viewed

@@ -1,127 +1,3 @@
 """
-Working context and top-of-mind retrieval.
-This module provides hierarchical context management for efficient
-"what are we working on?" queries with O(log(log(N))) retrieval.
+Context module - placeholder for future routing functionality.
 """
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from typing import Any, Optional
-@dataclass
-class WorkingContext:
-    """
-    The current working context — a high-level summary of active work.
-    This is the "Level 3" summary that any agent can read to instantly
-    understand what's being worked on.
-    Attributes:
-        summary: Natural language description of current focus
-        active_items: IDs of items currently being worked with
-        topics: Active topic/domain tags
-        updated: When context was last updated
-        session_id: Current session identifier
-        metadata: Additional context-specific data (arbitrary structure)
-    """
-    summary: str
-    active_items: list[str] = field(default_factory=list)
-    topics: list[str] = field(default_factory=list)
-    updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
-    session_id: Optional[str] = None
-    metadata: dict[str, Any] = field(default_factory=dict)
-@dataclass
-class TopicSummary:
-    """
-    A summary of items within a topic cluster (Level 2).
-    Topics aggregate related items and provide a mid-level
-    overview without retrieving all underlying items.
-    Attributes:
-        topic: Topic identifier (tag value)
-        summary: Generated summary of topic contents
-        item_count: Number of items in this topic
-        key_items: IDs of the most important items in the topic
-        subtopics: Child topics if hierarchical
-        updated: When topic summary was last regenerated
-    """
-    topic: str
-    summary: str
-    item_count: int
-    key_items: list[str] = field(default_factory=list)
-    subtopics: list[str] = field(default_factory=list)
-    updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
-@dataclass
-class RoutingContext:
-    """
-    Describes how items are routed between private and shared stores.
-    This document lives at a well-known location in the shared store.
-    The facade reads it to make routing decisions. The private store
-    is physically separate and invisible from the shared store.
-    Attributes:
-        summary: Natural language description of the privacy model
-        private_patterns: Tag patterns that route to private store (each pattern is dict[str, str])
-        private_store_path: Location of the private store (if local)
-        updated: When routing was last modified
-        metadata: Additional routing configuration
-    """
-    summary: str = "Items tagged for private/draft visibility route to a separate store."
-    private_patterns: list[dict[str, str]] = field(default_factory=lambda: [
-        {"_visibility": "draft"},
-        {"_visibility": "private"},
-        {"_for": "self"},
-    ])
-    private_store_path: Optional[str] = None  # Resolved at init; None = default location
-    updated: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
-    metadata: dict[str, Any] = field(default_factory=dict)
-# Well-known item ID for the routing context document
-ROUTING_CONTEXT_ID = "_system:routing"
-# Reserved system tags for context management (stored with items)
-CONTEXT_TAGS = {
-    "_session": "Session that last touched this item",
-    "_topic": "Primary topic classification",
-    "_level": "Hierarchy level (0=source, 1=cluster, 2=topic, 3=context)",
-    "_summarizes": "IDs of items this item summarizes (for hierarchy)",
-}
-# Relevance scoring is computed at query time, NOT stored.
-# This preserves agility between broad exploration and focused work.
-# Score factors:
-#   - semantic similarity to query/hint
-#   - recency (time decay)
-#   - topic overlap with current WorkingContext.topics
-#   - session affinity (same session = boost)
-# The weighting of these factors can vary by retrieval mode.
-def generate_session_id() -> str:
-    """Generate a unique session identifier."""
-    import uuid
-    date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
-    short_uuid = uuid.uuid4().hex[:8]
-    return f"{date}:{short_uuid}"
-def matches_private_pattern(tags: dict[str, str], patterns: list[dict[str, str]]) -> bool:
-    """
-    Check if an item's tags match any private routing pattern.
-    A pattern matches if ALL its key-value pairs are present in tags.
-    """
-    for pattern in patterns:
-        if all(tags.get(k) == v for k, v in pattern.items()):
-            return True
-    return False

keep-skill 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

keep-skill 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl