PyPI - keep-skill - Versions diffs - 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

keep-skill 0.2.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

keep/__init__.py +1 -1
keep/api.py +325 -11
keep/cli.py +453 -83
keep/config.py +2 -2
keep/document_store.py +351 -12
keep/pending_summaries.py +6 -0
keep/providers/embedding_cache.py +6 -0
keep/store.py +128 -11
keep_skill-0.4.1.dist-info/METADATA +219 -0
{keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/RECORD +13 -13
keep_skill-0.2.0.dist-info/METADATA +0 -304
{keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/WHEEL +0 -0
{keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/entry_points.txt +0 -0
{keep_skill-0.2.0.dist-info → keep_skill-0.4.1.dist-info}/licenses/LICENSE +0 -0

keep/__init__.py CHANGED Viewed

@@ -40,7 +40,7 @@ if not os.environ.get("KEEP_VERBOSE"):
 from .api import Keeper, NOWDOC_ID
 from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
-__version__ = "0.2.0"
+__version__ = "0.4.1"
 __all__ = [
     "Keeper",
     "Item",

keep/api.py CHANGED Viewed

@@ -114,6 +114,7 @@ from .providers.base import (
     SummarizationProvider,
 )
 from .providers.embedding_cache import CachingEmbeddingProvider
+from .document_store import VersionInfo
 from .store import ChromaStore
 from .types import Item, filter_non_system_tags, SYSTEM_TAG_PREFIX
@@ -192,6 +193,25 @@ def _content_hash(content: str) -> str:
     return hashlib.sha256(content.encode("utf-8")).hexdigest()
+def _text_content_id(content: str) -> str:
+    """
+    Generate a content-addressed ID for text updates.
+    This makes text updates versioned by content:
+    - `keep update "my note"` → ID = _text:{hash[:12]}
+    - `keep update "my note" -t status=done` → same ID, new version
+    - `keep update "different note"` → different ID
+    Args:
+        content: The text content
+    Returns:
+        Content-addressed ID in format _text:{hash[:12]}
+    """
+    content_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()[:12]
+    return f"_text:{content_hash}"
 class Keeper:
     """
     Semantic memory keeper - persistent storage with similarity search.
@@ -516,14 +536,20 @@ class Keeper:
         if doc.content_type:
             merged_tags["_content_type"] = doc.content_type
+        # Get existing doc info for versioning before upsert
+        old_doc = self._document_store.get(coll, id)
         # Dual-write: document store (canonical) + ChromaDB (embedding index)
-        self._document_store.upsert(
+        # DocumentStore.upsert now returns (record, content_changed) and archives old version
+        doc_record, content_changed = self._document_store.upsert(
             collection=coll,
             id=id,
             summary=final_summary,
             tags=merged_tags,
             content_hash=new_hash,
         )
+        # Store embedding for current version
         self._store.upsert(
             collection=coll,
             id=id,
@@ -532,6 +558,23 @@ class Keeper:
             tags=merged_tags,
         )
+        # If content changed and we archived a version, also store versioned embedding
+        # Skip if content hash is same (only tags/summary changed)
+        if old_doc is not None and content_changed:
+            # Get the version number that was just archived
+            version_count = self._document_store.version_count(coll, id)
+            if version_count > 0:
+                # Re-embed the old content for the archived version
+                old_embedding = self._get_embedding_provider().embed(old_doc.summary)
+                self._store.upsert_version(
+                    collection=coll,
+                    id=id,
+                    version=version_count,
+                    embedding=old_embedding,
+                    summary=old_doc.summary,
+                    tags=old_doc.tags,
+                )
         # Spawn background processor if lazy (only if summary wasn't user-provided and content changed)
         if lazy and summary is None and not content_unchanged:
             self._spawn_processor()
@@ -671,14 +714,20 @@ class Keeper:
         # Add system tags
         merged_tags["_source"] = "inline"
+        # Get existing doc info for versioning before upsert
+        old_doc = self._document_store.get(coll, id)
         # Dual-write: document store (canonical) + ChromaDB (embedding index)
-        self._document_store.upsert(
+        # DocumentStore.upsert now returns (record, content_changed) and archives old version
+        doc_record, content_changed = self._document_store.upsert(
             collection=coll,
             id=id,
             summary=final_summary,
             tags=merged_tags,
             content_hash=new_hash,
         )
+        # Store embedding for current version
         self._store.upsert(
             collection=coll,
             id=id,
@@ -687,6 +736,23 @@ class Keeper:
             tags=merged_tags,
         )
+        # If content changed and we archived a version, also store versioned embedding
+        # Skip if content hash is same (only tags/summary changed)
+        if old_doc is not None and content_changed:
+            # Get the version number that was just archived
+            version_count = self._document_store.version_count(coll, id)
+            if version_count > 0:
+                # Re-embed the old content for the archived version
+                old_embedding = self._get_embedding_provider().embed(old_doc.summary)
+                self._store.upsert_version(
+                    collection=coll,
+                    id=id,
+                    version=version_count,
+                    embedding=old_embedding,
+                    summary=old_doc.summary,
+                    tags=old_doc.tags,
+                )
         # Spawn background processor if lazy and content was queued (only if content changed)
         if lazy and summary is None and len(content) > max_len and not content_unchanged:
             self._spawn_processor()
@@ -836,7 +902,66 @@ class Keeper:
             items = _filter_by_date(items, since)
         return items[:limit]
+    def get_similar_for_display(
+        self,
+        id: str,
+        *,
+        limit: int = 3,
+        collection: Optional[str] = None
+    ) -> list[Item]:
+        """
+        Find similar items for frontmatter display using stored embedding.
+        Optimized for display: uses stored embedding (no re-embedding),
+        filters to distinct base documents, excludes source document versions.
+        Args:
+            id: ID of item to find similar items for
+            limit: Maximum results to return
+            collection: Target collection
+        Returns:
+            List of similar items, one per unique base document
+        """
+        coll = self._resolve_collection(collection)
+        # Get the stored embedding (no re-embedding)
+        embedding = self._store.get_embedding(coll, id)
+        if embedding is None:
+            return []
+        # Fetch more than needed to account for version filtering
+        fetch_limit = limit * 3
+        results = self._store.query_embedding(coll, embedding, limit=fetch_limit)
+        # Convert to Items
+        items = [r.to_item() for r in results]
+        # Extract base ID of source document
+        source_base_id = id.split("@v")[0] if "@v" in id else id
+        # Filter to distinct base IDs, excluding source document
+        seen_base_ids: set[str] = set()
+        filtered: list[Item] = []
+        for item in items:
+            # Get base ID from tags or parse from ID
+            base_id = item.tags.get("_base_id", item.id.split("@v")[0] if "@v" in item.id else item.id)
+            # Skip versions of source document
+            if base_id == source_base_id:
+                continue
+            # Keep only first version of each document
+            if base_id not in seen_base_ids:
+                seen_base_ids.add(base_id)
+                filtered.append(item)
+                if len(filtered) >= limit:
+                    break
+        return filtered
     def query_fulltext(
         self,
         query: str,
@@ -993,7 +1118,95 @@ class Keeper:
         if result is None:
             return None
         return result.to_item()
+    def get_version(
+        self,
+        id: str,
+        offset: int = 0,
+        *,
+        collection: Optional[str] = None,
+    ) -> Optional[Item]:
+        """
+        Get a specific version of a document by offset.
+        Offset semantics:
+        - 0 = current version
+        - 1 = previous version
+        - 2 = two versions ago
+        - etc.
+        Args:
+            id: Document identifier
+            offset: Version offset (0=current, 1=previous, etc.)
+            collection: Target collection
+        Returns:
+            Item if found, None if version doesn't exist
+        """
+        coll = self._resolve_collection(collection)
+        if offset == 0:
+            # Current version
+            return self.get(id, collection=collection)
+        # Get archived version
+        version_info = self._document_store.get_version(coll, id, offset)
+        if version_info is None:
+            return None
+        return Item(
+            id=id,
+            summary=version_info.summary,
+            tags=version_info.tags,
+        )
+    def list_versions(
+        self,
+        id: str,
+        limit: int = 10,
+        *,
+        collection: Optional[str] = None,
+    ) -> list[VersionInfo]:
+        """
+        List version history for a document.
+        Returns versions in reverse chronological order (newest archived first).
+        Does not include the current version.
+        Args:
+            id: Document identifier
+            limit: Maximum versions to return
+            collection: Target collection
+        Returns:
+            List of VersionInfo, newest archived first
+        """
+        coll = self._resolve_collection(collection)
+        return self._document_store.list_versions(coll, id, limit)
+    def get_version_nav(
+        self,
+        id: str,
+        current_version: Optional[int] = None,
+        limit: int = 3,
+        *,
+        collection: Optional[str] = None,
+    ) -> dict[str, list[VersionInfo]]:
+        """
+        Get version navigation info (prev/next) for display.
+        Args:
+            id: Document identifier
+            current_version: The version being viewed (None = current/live version)
+            limit: Max previous versions to return when viewing current
+            collection: Target collection
+        Returns:
+            Dict with 'prev' and optionally 'next' lists of VersionInfo.
+        """
+        coll = self._resolve_collection(collection)
+        return self._document_store.get_version_nav(coll, id, current_version, limit)
     def exists(self, id: str, *, collection: Optional[str] = None) -> bool:
         """
         Check if an item exists in the store.
@@ -1002,16 +1215,28 @@ class Keeper:
         # Check document store first, then ChromaDB
         return self._document_store.exists(coll, id) or self._store.exists(coll, id)
-    def delete(self, id: str, *, collection: Optional[str] = None) -> bool:
+    def delete(
+        self,
+        id: str,
+        *,
+        collection: Optional[str] = None,
+        delete_versions: bool = True,
+    ) -> bool:
         """
         Delete an item from both stores.
-        Returns True if item existed and was deleted.
+        Args:
+            id: Document identifier
+            collection: Target collection
+            delete_versions: If True, also delete version history
+        Returns:
+            True if item existed and was deleted.
         """
         coll = self._resolve_collection(collection)
-        # Delete from both stores
-        doc_deleted = self._document_store.delete(coll, id)
-        chroma_deleted = self._store.delete(coll, id)
+        # Delete from both stores (including versions)
+        doc_deleted = self._document_store.delete(coll, id, delete_versions=delete_versions)
+        chroma_deleted = self._store.delete(coll, id, delete_versions=delete_versions)
         return doc_deleted or chroma_deleted
     # -------------------------------------------------------------------------
@@ -1157,7 +1382,7 @@ class Keeper:
     def count(self, *, collection: Optional[str] = None) -> int:
         """
         Count items in a collection.
         Returns count from document store if available, else ChromaDB.
         """
         coll = self._resolve_collection(collection)
@@ -1165,7 +1390,36 @@ class Keeper:
         if doc_count > 0:
             return doc_count
         return self._store.count(coll)
+    def list_recent(
+        self,
+        limit: int = 10,
+        *,
+        collection: Optional[str] = None,
+    ) -> list[Item]:
+        """
+        List recent items ordered by update time.
+        Args:
+            limit: Maximum number to return (default 10)
+            collection: Collection to query (uses default if not specified)
+        Returns:
+            List of Items, most recently updated first
+        """
+        coll = self._resolve_collection(collection)
+        records = self._document_store.list_recent(coll, limit)
+        return [
+            Item(
+                id=rec.id,
+                summary=rec.summary,
+                tags=rec.tags,
+                score=None,
+            )
+            for rec in records
+        ]
     def embedding_cache_stats(self) -> dict:
         """
         Get embedding cache statistics.
@@ -1307,6 +1561,66 @@ class Keeper:
             logger.warning("Failed to spawn background processor: %s", e)
             return False
+    def reconcile(
+        self,
+        collection: Optional[str] = None,
+        fix: bool = False,
+    ) -> dict:
+        """
+        Check and optionally fix consistency between DocumentStore and ChromaDB.
+        Detects:
+        - Documents in DocumentStore missing from ChromaDB (not searchable)
+        - Documents in ChromaDB missing from DocumentStore (orphaned embeddings)
+        Args:
+            collection: Collection to check (None = default collection)
+            fix: If True, re-index documents missing from ChromaDB
+        Returns:
+            Dict with 'missing_from_chroma', 'orphaned_in_chroma', 'fixed' counts
+        """
+        coll = self._resolve_collection(collection)
+        # Get IDs from both stores
+        doc_ids = set(self._document_store.list_ids(coll))
+        chroma_ids = set(self._store.list_ids(coll))
+        missing_from_chroma = doc_ids - chroma_ids
+        orphaned_in_chroma = chroma_ids - doc_ids
+        fixed = 0
+        if fix and missing_from_chroma:
+            for doc_id in missing_from_chroma:
+                try:
+                    # Re-fetch and re-index
+                    doc_record = self._document_store.get(coll, doc_id)
+                    if doc_record:
+                        # Fetch original content
+                        doc = self._document_provider.fetch(doc_id)
+                        embedding = self._get_embedding_provider().embed(doc.content)
+                        # Write to ChromaDB
+                        self._store.upsert(
+                            collection=coll,
+                            id=doc_id,
+                            embedding=embedding,
+                            summary=doc_record.summary,
+                            tags=doc_record.tags,
+                        )
+                        fixed += 1
+                        logger.info("Reconciled: %s", doc_id)
+                except Exception as e:
+                    logger.warning("Failed to reconcile %s: %s", doc_id, e)
+        return {
+            "missing_from_chroma": len(missing_from_chroma),
+            "orphaned_in_chroma": len(orphaned_in_chroma),
+            "fixed": fixed,
+            "missing_ids": list(missing_from_chroma) if missing_from_chroma else [],
+            "orphaned_ids": list(orphaned_in_chroma) if orphaned_in_chroma else [],
+        }
     def close(self) -> None:
         """
         Close resources (embedding cache connection, pending queue, etc.).

keep-skill 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

keep-skill 0.2.0py3-none-any.whl → 0.4.1py3-none-any.whl